From: Kibum Kim Date: Fri, 6 Jan 2012 15:48:52 +0000 (+0900) Subject: Git init X-Git-Tag: 1.0_branch~16 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=363768c2895d199042a36e179eb8a2458eb4c6cd;p=profile%2Fivi%2Flibsoup2.4.git Git init --- diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..89ad10c --- /dev/null +++ b/AUTHORS @@ -0,0 +1,13 @@ +SOUP Authors +============ + +Original SOUP 0.0 - 0.7: + Alex Graveley + Dick Porter + Miguel De Icaza + +libsoup 1.99.x - 2.2: + Joe Shaw + Dan Winship + +See MAINTAINERS for the current maintainers. diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..eb685a5 --- /dev/null +++ b/COPYING @@ -0,0 +1,481 @@ + GNU LIBRARY GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the library GPL. It is + numbered 2 because it goes with version 2 of the ordinary GPL.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Library General Public License, applies to some +specially designated Free Software Foundation software, and to any +other libraries whose authors decide to use it. You can use it for +your libraries, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if +you distribute copies of the library, or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link a program with the library, you must provide +complete object files to the recipients so that they can relink them +with the library, after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + Our method of protecting your rights has two steps: (1) copyright +the library, and (2) offer you this license which gives you legal +permission to copy, distribute and/or modify the library. + + Also, for each distributor's protection, we want to make certain +that everyone understands that there is no warranty for this free +library. If the library is modified by someone else and passed on, we +want its recipients to know that what they have is not the original +version, so that any problems introduced by others will not reflect on +the original authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that companies distributing free +software will individually obtain patent licenses, thus in effect +transforming the program into proprietary software. To prevent this, +we have made it clear that any patent must be licensed for everyone's +free use or not licensed at all. + + Most GNU software, including some libraries, is covered by the ordinary +GNU General Public License, which was designed for utility programs. This +license, the GNU Library General Public License, applies to certain +designated libraries. This license is quite different from the ordinary +one; be sure to read it in full, and don't assume that anything in it is +the same as in the ordinary license. + + The reason we have a separate public license for some libraries is that +they blur the distinction we usually make between modifying or adding to a +program and simply using it. Linking a program with a library, without +changing the library, is in some sense simply using the library, and is +analogous to running a utility program or application program. However, in +a textual and legal sense, the linked executable is a combined work, a +derivative of the original library, and the ordinary General Public License +treats it as such. + + Because of this blurred distinction, using the ordinary General +Public License for libraries did not effectively promote software +sharing, because most developers did not use the libraries. We +concluded that weaker conditions might promote sharing better. + + However, unrestricted linking of non-free programs would deprive the +users of those programs of all benefit from the free status of the +libraries themselves. This Library General Public License is intended to +permit developers of non-free programs to use free libraries, while +preserving your freedom as a user of such programs to change the free +libraries that are incorporated in them. (We have not seen how to achieve +this as regards changes in header files, but we have achieved it as regards +changes in the actual functions of the Library.) The hope is that this +will lead to faster development of free libraries. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, while the latter only +works together with the library. + + Note that it is possible for a library to be covered by the ordinary +General Public License rather than by this special one. + + GNU LIBRARY GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library which +contains a notice placed by the copyright holder or other authorized +party saying it may be distributed under the terms of this Library +General Public License (also called "this License"). Each licensee is +addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also compile or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + c) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + d) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the source code distributed need not include anything that is normally +distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Library General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..51269e5 --- /dev/null +++ b/HACKING @@ -0,0 +1,66 @@ +CODE STYLE +---------- + +Please use the style used by the rest of the code. Among other things, +this means: + + * Tabs, not spaces, for indentation + + * Put spaces: + * around binary operators + * between if/while/for/switch and "(" + * between function name and "(" + * between ")" and "{" + * after "," + + * if/for/while bodies: + + * Single-line bodies should (a) be on their own line, and (b) + not have braces around them + + * Multi-line bodies should have braces around them, even if + the body is only a single statement and the braces are not + syntactically necessary. + + * Eg: + + for (i = 0; i < len; i++) { + if (find (i, something)) + break; + else { + function_with_big_name (i, something, + something_else); + } + } + + * C89, not C99. (In particular, don't declare variables in the + middle of blocks.) + + * Do not use gint, gchar, glong, and gshort. (Other g-types, such + as gpointer and the unsigned types are fine.) + +CORRECTNESS +----------- + + * libsoup builds with lots of -W options by default, and should + not print any warnings while compiling (unless they're caused by + #included files from other projects, eg, proxy.h). You can use + "make > /dev/null" to do a full compile showing only the + warnings/errors, to make sure your patch does not introduce any + more. + + * There are a number of regression tests in the tests/ directory. + Running "make check" will run all of them (or at least, all of + the ones that it can run based on what software you have + installed. Eg, some tests require apache to be installed.) You + should run "make check" before submitting a patch that could + potentially change libsoup's behavior. ("make check" will warn + you if it was not able to run all of the tests. If you are + making extensive changes, or changing very low-level functions, + you may want to install all of the optional pieces so you can + run all of the regression tests.) + + * libsoup ought to build correctly from outside its source tree, + so if you make large changes to the Makefiles, try a "make + distcheck" to verify that an out-of-source-tree build still + works. diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..861daff --- /dev/null +++ b/Makefile.am @@ -0,0 +1,21 @@ +## Process this file with automake to produce Makefile.in +ACLOCAL_AMFLAGS = -I m4 + +SUBDIRS = libsoup tests docs + +EXTRA_DIST = \ + libsoup-2.4.pc.in \ + libsoup-gnome-2.4.pc.in \ + gtk-doc.make \ + libsoup-zip.in \ + m4/introspection.m4 + +DISTCHECK_CONFIGURE_FLAGS = --enable-gtk-doc --enable-introspection + +pkgconfigdir = $(libdir)/pkgconfig + +pkgconfig_DATA = libsoup-2.4.pc + +if BUILD_LIBSOUP_GNOME +pkgconfig_DATA += libsoup-gnome-2.4.pc +endif diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..b5da4ec --- /dev/null +++ b/NEWS @@ -0,0 +1,1811 @@ +Changes in libsoup from 2.35.5 to 2.35.90: + + * Added SOUP_MESSAGE_CAN_REBUILD flag, to use with + soup_message_body_set_accumulate(FALSE) on a request body, + to indicate that the caller will recreate the request body + after it has been discarded if the message needs to be + re-sent. [#656650] + + * Fixed the build on mingw-w64 by not using "interface" as + as variable name. [#656402, Erik van Pienbroek] + + * (The multihosted https server regression mentioned in the + 2.35.5 NEWS turned out to be a glib-networking bug, which is + fixed as of 2.29.18.) + +Changes in libsoup from 2.35.4 to 2.35.5: + + * Support NTLM single sign on via samba's /usr/bin/ntlm_auth. + [#650940, Mandy Wu]. This is enabled by default (if NTLM is + enabled for the session and the ntlm_auth binary is + available), but will fall back to the standard + password-based authentication if SSO fails. + + * Default to TLS+extensions for https connections, falling + back to SSLv3-without-extensions (the old default) only if + the server fails to negotiate TLS. [#581342] + + * As a result of this change, some users are currently + seeing problems with sites that serve multiple https + hosts from a single IP address (eg, + *.launchpad.net). There is no known workaround at + this time. + + * Fixed a longstanding problem with https pages sometimes not + loading when using a proxy. [#631368, based on patches from + DongJae Kim and Thierry Reding] + + * SoupContentSniffer: don't use gio's sniffing rules, since + the spec now recommends that browsers not do any additional + sniffing beyond what's in the spec. [#648846, "arno"] + + * Fixed SoupRequestHTTP to work properly with alternate + GMainContexts. [#653707] + + * Added some annotations from Vala's vapi files. [#655397, + Evan Nemerson]. Also, removed SoupProxyResolver from the + gir/typelib, since it had been deprecated since before + introspection was available, and it was being scanned wrong + anyway. + +Changes in libsoup from 2.35.3 to 2.35.4: + + * CVE-2011-2524: Fixed a security hole that caused some + SoupServer users to unintentionally allow accessing the + entire local filesystem when they thought they were only + providing access to a single directory. [#653258] + + * Plugged another SoupCache memory leak [Xan] + + * Simplified SoupCache keys, and handle collisions. [#649963, + Sergio] + + * Annotate SoupSession:add-feature, etc, as (skip), so they + don't conflict with the methods of the same name. [#655150, + Jasper St. Pierre] + +Changes in libsoup from 2.34.1 to 2.35.3: + + * SoupCache fixes [Sergio]: + + * Don't store hop-by-hop headers [#650094] + + * Fix status code and headers of responses returned + from the cache after being revalidated [#649965] + + * Added versioning support to the cache file format + [#652554] and extended it to keep track of the + status code [#649965] + + * Fixed storage of time values in the cache [#653349] + + * Fixed a use-after-free that could result in bogus + data being written to the cache [#650620] + + * Various leaks [#649309] + + * Work around an Apache bug where it tells browsers to + automatically decode .gz files but still keep the .gz in the + name. [#613361, Dan] + + * Fixed an overflow when sending a response larger than 2^31 + bytes (eg, streaming movies in Rygel). [#638468, Jens Georg] + + * Always send the Keep-Alive header, not just to hosts we + already know to be HTTP/1.0 [#648680, Sergio] + + * Fixed various leaks [#652699, Sergio], [#651643, Milan], + [etc, Xan] + + * Minor build fix for Debian/Ubuntu [#648948] + + * Fixed a docs typo + +Changes in libsoup from 2.34.0 to 2.34.1: + + * Two multipart forms/Content-Disposition fixes [Dan]: + + * UTF-8-encoded header encoding/decoding rules updated + to match RFC 5987. In particular, a UTF-8-encoded + filename parameter in Content-Disposition will now + override an ASCII one. + + * When not using UTF-8-encoded filenames in + Content-Disposition, always quote the filename, + since some servers erroneously fail to handle + non-quoted ones. [#641280] + + * Fixed several memory leaks [Dan, Xan] + + * Fixed decoding base64 data: URLs [#646896, Sergio] + + * Simplified the libsoup-gnome build on Windows by dropping + the gnome-keyring dependency. [Fridrich Štrba] + + * Fixed a bug in soup_uri_to_string() in which (invalid) port + numbers could be output as negative numbers (tripping up a + WebKit "sanity checking" test). [#647767, Sergio] + + * Fixed a cache corruption bug in SoupCache. [#648285, Sergio] + + * Fixed a crash in SoupSessionSync when using + soup_session_abort(). + +Changes in libsoup from 2.33.92 to 2.34.0: + + * Fixed the GMainContext handling of the new + SoupProxyResolverDefault (which among other things fixes + gstreamer usage inside epiphany). [#646201, Sebastian Dröge] + + * Tweaked the introspection Makefile rules to fix a problem + building on Debian/Ubuntu. [#645505] + + * Belated bumped the shared library versioning to reflect the + API additions since 2.32.0 + +Changes in libsoup from 2.33.90 to 2.33.92: + + * LIBSOUP NO LONGER DEPENDS ON GCONF OR LIBPROXY. + (see below). + + * Added SoupProxyResolverDefault, which uses uses gio's + GProxyResolver to resolve proxies [#642982, Gustavo Noronha + Silva]. Despite the "default" in the name, it is not used by + default, for compatibility reasons, but it is available in + plain libsoup, not libsoup-gnome. (Of course, it depends on + having glib-networking installed.) + + * Updated SoupProxyResolverGNOME to be based on + SoupProxyResolverDefault, but explicitly requesting the + "gnome" GProxyResolver if it is available [#625898], and + removed the old code that used GConf and libproxy directly. + + * Added soup_server_disconnect(), to explicitly disconnect a + SoupServer, since it is not possible to g_object_unref() it + from memory-managed language bindings. [#638576, Andreas + Rottmann] + + * SoupDate now parses month names case-insensitively [#644048, + Christian Dywan] + + * Avoid a g_return_if_fail() when using + SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY with non-http URIs + (file:, data:, etc). [#643226] + + * SoupCookieJar now catches overflows when parsing very + distant dates [#643462, based on a patch from Mark + Starovoytov] + + * Fixed a buggy interaction between request body streaming and + restarted requests [David Woodhouse]. Added some new tests + to tests/chunk-test.c to make sure that a specific + (unsupported!) way of using those methods would not get + broken in the future. + + * Fixed soup_socket_get_remote_address(), which had been + broken since 2.33.4 (and which in turn caused + soup_client_context_get_address/_get_host to be broken). + [#645227] + +Changes in libsoup from 2.33.6 to 2.33.90: + + * Attempted to make libsoup's dependency on glib-networking + (for TLS support) more explicit, by requiring that + glib-networking be present at compile time unless the + builder explicitly passes "--disable-tls-check", and noting + that packagers should give libsoup a runtime dependency on + glib-networking. + + * Fixed a bug in talking to servers with virtual hosts with + Unicode names (IDNs) [#642075] + + * Added a "Connection: Keep-Alive" header when talking to + HTTP/1.0 hosts, to improve performance. [#640414, Sergio + Villar Senin] + + * Changed SoupCache to not cache multipart/x-mixed-replace + content. [#642028, Sergio Villar Senin] + +Changes in libsoup from 2.33.5 to 2.33.6: + + * Made SoupSessionAsync do idle-connection cleanup more + sanely, resulting in faster load times for pages with lots + of subresources. [#639768, Sergio Villar Senin] + + * Fixed soup_form_decode()'s behavior (and by extension, + SoupServer query string handling) with datasets that contain + multiple values for the same key, and added a test case. + [#639783, Sven Neumann] + + * Fixed warnings pointed out by gcc 4.6, including a bug in + SoupCache that would cause unnecessary revalidations + [#640556]. + + * Belated copied a fix from the WebKit copy of soup-cache.c + into ours, and fixed a bug in the local copy of + soup-request-data.c [#641022, Sergio Villar Senin], in + preparation for making WebKit use the libsoup versions. + +Changes in libsoup from 2.33.4 to 2.33.5: + + * Fixed certain cases of soup_session_cancel_message() with + SoupSessionSync that could previously cause warnings or + crashes. [#637741] + +Changes in libsoup from 2.32.2 to 2.33.4: + + * SoupSocket now uses GSocketConnection and GTlsConnection + internally rather than making socket calls directly and + using GIOStream, and TLS is handled via glib's APIs rather + than using gnutls directly. + + * The gzip Content-Encoding handler is now implemented using + GZlibDecompressor + + * As a result of the above two changes, libsoup no longer + directly depends on gnutls, libgcrypt, or zlib, though it + still indirectly depends on zlib via glib and libxml2. Also, + although libsoup does not depend on glib-networking as a + build-time dependency, some "make check" tests will be + skipped if it is not installed. + + * The SoupRequest/SoupCache code from WebKit has been + imported, but it is not yet recommended for general use, and + is not necessarily API stable. [#523100, Sergio Villar, with + the SoupRequest parts based on the Summer of Code work by + Gabriel Corvalan and the cache parts based on an earlier + patch by Xan Lopez] + + * Added SoupMessage:tls-certificate and + SoupMessage:tls-errors, which give more information about + the certificate used to authenticate a TLS connection. + + * It is now possible to disable Basic or Digest auth in a + session by using soup_session_remove_feature_by_type() with + SOUP_TYPE_AUTH_BASIC or SOUP_TYPE_AUTH_DIGEST. Likewise, the + right way to enable NTLM support now is to call + soup_session_add_feature_by_type() with SOUP_TYPE_AUTH_NTLM; + SOUP_SESSION_USE_NTLM is now deprecated. + + * Allow setting cookies on file:// URIs, since other browsers + do, and WebKit has a test for it. [#603825] + + * .gir/.typelib files now include C header/library information + (needed by vala and some other bindings) [#635395, Evan + Nemerson] + + * Added annotations on soup_message_headers_get_content_type() + [Lucas Rocha] and SoupHTTPVersion [John Palmieri] + + * Fixed a Set-Cookie processing leak [#636741, Jonathan + Jongsma] + +Changes in libsoup from 2.32.1 to 2.32.2: + + * Fixed a regression in 2.32.0 that caused evolution-exchange + to get stuck and stop updating. [#634422] + + * Fixed a regression in 2.32.0 with apps using asynchronous + sessions from multiple threads (in particular, gupnp) + [#635101] + + * Fixed the regression test for #631525 to not cause spurious + "make check" failures on some machines. + +Changes in libsoup from 2.32.0 to 2.32.1: + + * Fixed a regression in 2.32.0 with the use of persistent + connections that caused spurious "Connection terminated + unexpectedly" errors. [#631525, debugged by Sergio Villar] + + * Fixed a regression in 2.32.0 that caused proxy-related + DNS errors to return SOUP_STATUS_CANT_RESOLVE rather than + SOUP_STATUS_CANT_RESOLVE_PROXY. + + * Usernames/passwords specified explicitly in request URIs now + override existing cached auth info. [#631679, Sergio Villar] + + * Changed soup_uri_decode() and soup_uri_normalize() to just + ignore malformed %-encoding rather than returning NULL, + for consistency with soup_uri_new(). [#630540] + + * Fixed soup_form_decode() to ignore invalid parameters, + and soup_form_encode_hash() to just g_return_if_fail() + rather than crashing if there are NULL values in the + hash. [#620220] + + * Added another workaround for stupid servers that close the + connection before returning the full response (in this case, + when using chunked encoding and failing to include the + final 0-length chunk). [#629160] + + * Fixed a bug in SoupCookieJarText that deleted excess cookies + whenever any cookie expired. [#631641, Michał Kazior] + + * Fixed a small leak in SoupContentDecoder if you were using + it incorrectly. [pointed out in email by Christophe + Gillette] + + * Added regression tests for passwords-in-URIs [#631679, + Sergio Villar] and SOUP_SESSION_IDLE_TIMEOUT. + +Changes in libsoup from 2.31.92 to 2.32.0: + + * (No changes, just a version bump) + +Changes in libsoup from 2.31.90 to 2.31.92: + + * Updated for gobject-introspection 0.9.5. Also added some new + annotations and removed a bunch of private headers from the + scanning process. + + * Percent-encoded characters in URIs are no longer + automatically normalized to uppercase, since apparently some + servers are stupid. [#628728, Sergio Villar Senin] + + * Fixed a crash when resolving a URI containing both spaces + and non-UTF8 8bit characters. [#629449] + +Changes in libsoup from 2.31.2 to 2.31.90: + + * libsoup now tries to connect to each IP address associated + with a hostname, if the first one fails. In particular, if a + host has both IPv4 and IPv6 addresses, and only one of them + is reachable from the current host, libsoup will now try the + other one rather than failing. (libc is supposed to sort the + IP addresses in the right order, such that, eg, if you don't + have IPv6 connectivity, it will put the IPv4 address first. + For some reason though, this seems to be broken on some + distros.) [#526321]. + + * Fixed Accept-Language header generation in locales where "," + is used as the decimal point. + +Changes in libsoup from 2.31.2 to 2.31.6: + + * Disabled TLS 1.2 in addition to the already-disabled 1.1 and + 1.0 [see below, in libsoup 2.27.2], thus making libsoup + usable with gnutls 2.10. [#622857. This commit, 01a43ad9, + can be applied to libsoup 2.30.x as well if you need that + release to work with gnutls 2.10.] + + * When using libproxy 0.3 or newer, libsoup no longer leaks + proxy-related environment variables into child processes + [#603285] + + * Changed the way message/connection binding works in + SoupSession so that (among other things), when there are + multiple requests queued to a host, and one of them gets a + network error, the other requests are still allowed to try + to succeed, rather than all failing immediately. [#619633] + + * SoupSession now limits the number of times a message can be + redirected, to avoid infinite loops [#604383, José Millán + Soto] + + * Fixed handling of certain messages where the response + headers included "Connection: close" but the server did not + actually close the connection at the end. [#611481] + + * Fixed some incorrect g-i annotations [#621021] + + * Fixed an out-of-bounds memory access when processing certain + Set-Cookie headers [#620288] + + * Improved msg->reason_phrase on network errors [#623274] + + * Fixed gir file disting [#621727, Yaakov Selkowitz] + +Changes in libsoup from 2.30.1 to 2.31.2: + + * gobject-introspection has now been merged in. Use + --enable-introspection to build. [#576595, Andreas Rottmann] + Note that the introspected API is not yet stable, and in + particular, there are numerous values annotated as "utf8" + which are actually not guaranteed to be utf8. (Eg, most + header data.) + + * Added some helper functions for bindings: + soup_buffer_new_take(), + soup_message_body_append_take(), and + soup_buffer_get_data(). [#576595, Andreas Rottmann] + + * Also added properties for several SoupMessage public + fields, and getter methods for various boxed types + (SoupCookie, SoupDate, SoupURI). + + * Added some additional hash-table annotations. + [#619086, Gustavo Noronha Silva] + + * Marked SoupSession abstract. [#617216, Lorenzo Gil, the + first bug filed by someone trying to use libsoup via + introspection!] Likewise for SoupAuth and SoupAuthDomain. + + * Fixed a problem with SoupSessionAsync that would cause + messages to get lost if you aborted a previous message while + it was still looking up the hostname. Fixed several other + problems that were discovered while adding a regression test + for that. [#618641, thanks to Claudio Saavedra for a good + test case] + + * Fixed another connecting-to-lame-http-server problem, and a + getting-stuck-in-a-loop-reconnecting bug that it revealed. + [#615535] + +Changes in libsoup from 2.30.0 to 2.30.1: + + * Fix for https through proxies that close the connection when + returning a "407 Proxy Authentication Required" response, + and add a regression test for that case. [#611663] + + * Fixed multiple forms/multipart-related interoperability + problems reported by Egon Andersen: + + * Don't quote the multipart boundary string if it's + not needed, since RFC 2616 recommends that you + don't, and some servers don't handle quotes there + correctly. (Sigh.) [#614176] + + * Don't put an extra blank line before the first + multipart part, since it's unnecessary and some + servers don't handle a multipart preamble correctly. + (Sigh.) [#614183] + + * Don't put Content-Transfer-Encoding headers in the + multipart/form-data parts, even though the HTML 4 + spec says you must, since no other browsers do, and + some servers don't handle them correctly. (Sigh.) + [#614198] + + * Changed SoupCookieJarSqlite to actually erase deleted + cookies from the database. [#615711, Lukasz Slachciak] + + * Fixed SoupLogger to be more robust against getting passed + bad data by the session. [#611663] + + * Fixed SoupAuthDomain to ignore paths when doing proxy auth + + * Fixed a g_warning when hovering over a javascript link in + WebKit. [#613442, Xan Lopez] + +Changes in libsoup from 2.29.91 to 2.30.0: + + * Fixed a crash in the whitespace-stripping code in + soup_uri_new() [#612644, "arnaud.lb"] + + * Update content-sniffing algorithm to match Chrome and the + soon-to-be-updated sniffing spec. [#611502, Gustavo Noronha + Silva] + + * We now handle "Content-Encoding: x-gzip" as well as "gzip" + (even though "x-gzip" has been deprecated for more than 10 + years). [#611476] + + * Fixed leaks found by valgrind + + * Make the "make check" programs only bind to 127.0.0.1, not + any public network interfaces. [#609489, Saleem Absulrasool] + + * Add a test to sniffing-test to make sure that Content-Type + parameters are preserved correctly. [Gustavo Noronha Silva] + +Changes in libsoup from 2.29.90 to 2.29.91: + + * Added SOUP_SESSION_SSL_STRICT and + SOUP_MESSAGE_CERTIFICATE_TRUSTED, to allow callers to + determine if an https response comes from a server with a + recognized/valid or unrecognized/invalid certificate. + [#610374, Gustavo Noronha Silva] + + * Fixed handling of certain badly-formatted URIs [#590524] + +Changes in libsoup from 2.29.6 to 2.29.90: + + * Added soup_cookie_jar_set_accept_policy() and related API + for implementing cookie acceptance policies. [#608353, Xan + Lopez] + + * Fixed the "request-read" signal in SoupServer to actually be + emitted. + +Changes in libsoup from 2.29.5 to 2.29.6: + + * Fixed SoupContentDecoder to ignore trailing junk after the + encoded message body (as other browsers do), rather than + getting stuck in an infinite loop. [#606352] + + * Fixed an invalid read in soup_cookie_applies_to_uri() + [#607024, pointed out by Xan] + + * Fixed linking on OS X [#606959] + + * Removed a harmless warning in SoupServer. [#606645] + +Changes in libsoup from 2.29.3 to 2.29.5: + + * Added SoupContentDecoder, providing support for + Content-Encoding: gzip for WebKitGTK. [#522772] + + * Added "accept-language" and "accept-language-auto" + properties to SoupSession, to support the Accept-Language + header. [#597004, Mario Sanchez Prada] + + * Fixed a bug in SoupPasswordManagerGNOME that could cause + crashes if you typed the wrong password once and then tried + again. [#595554, debugged by Gustavo Noronha Silva] + + * Fixed a crash in SoupAuthDigest if the server claims support + for both qop=auth and qop=auth-int. (This was not noticed + sooner because no one actually supports qop=auth-int, and + the server in question here was probably confused. :) + + * Updated cookie parsing/output to more closely match + draft-ietf-httpstate-cookie-00. [Also fixes #603496 (WebKit + unit test), and #604794 (hang parsing malformed Set-Cookie + header)] + + * Fixed https-via-proxy to not hang if there is an error + communicating with the proxy immediately after the TLS + negotiation. [#587528] + + * Fixed a bug that broke gobject-introspection's introspection + of libsoup. [#603696, Vincent Untz] + + * Handle spurious CR/LFs between responses. [#602863, + Alexander V. Butenko] + + * Fixed soup-message-client-io to not erroneously include URI + fragments on the Request-Line when sending via a proxy. + [Related to WebKit bug #28687] + + * Fixed Digest authentication against certain (buggy?) + clients/servers that require you to use quotes in exactly the + same places where the spec uses them. [#582219] + + * Fix ugly gtype-related hack to work with the latest unstable + glib. [Benjamin Otte] + +Changes in libsoup from 2.28.1 to 2.29.3: + + * Fixed a crash in SoupCookieJarSqlite when using cookie + databases not created by libsoup (eg, upgraded epiphany + installations). [Patch from Emilio Pozuelo Monfort] + + * Fixed SoupCookieJar to handle non-http URIs properly (so + that, eg, JavaScript bookmarklets that try to set/read + cookies won't cause crashes). [#602498] + + * HEAD requests that receive a "303 See Other" response will + now do a HEAD, not a GET, on the redirected-to resource. + Fixes gvfs access to some sites, including certain + youtube.com URIs. [#600830] + + * Fixed a g_warning that would always trigger in the + server-side SoupCookie code. [#602389] + + * Fixed the server-side SoupMultipart code to be able to parse + multiparts containing binary attachments, rather than + rejecting them as malformed. [#601640] + + * Fixed the Request-Line format in the https-over-proxy case. + Among other things, this fixes access to bugzilla.gnome.org + from WebKitGTK-based browsers. [#598277, #600826] + + * Fixed a leak in SoupSession if a message was cancelled while + the initial socket connection was in progress. [#596074, + debugged by Arnout Vandecappelle] + + * Fixed server-side parsing of Digest auth. [#602898, Chris + Head] + + * Fixed WinSock initialization on Windows. [#600689, Tor + Lillqvist] + + * Fixed a sporadic crash in the SSL code on Windows. [#600748, + Tor Lillqvist] + + * Fixed handling of https connections with timeouts on + Windows. [#600749, Tor Lillqvist] + + * Added soup_session_prepare_for_uri(), to allow DNS + prefetching for faster browsing. [#598948, José Millán Soto] + + * SoupSession now avoids redundant DNS lookups again when + first connecting to a new site, resulting in (probably + imperceptibly) faster loads. + + * Added some debugging APIs to SoupConnection and SoupSession + for use by, eg, epiphany's soup-fly extension. [#589163, + José Millán Soto] + +Changes in libsoup from 2.28.0 to 2.28.1: + + * libsoup will now attempt to make multiple connections to a + server at once when there are multiple messages queued to + that server. The previous behavior (only allowing a single + pending connection to each server) resulted in slow load + times on pages with lots of subresources (images, css, js, + etc) on servers that disallow persistent connections. + [#594768] + + * There should now be fewer (no?) "Connection terminated + unexpectedly" errors in WebKitGTK. + + * Fixed a crash in SoupCookieJarSqlite [#596859, patch from + Alexander Sack]. + + * Fixed soup_address_get_physical() and address-to-name + resolution of SoupAddress [patch from Enrico Tröger]. + + * Fixed a bug in SoupContentSniffer that could cause false + negatives [#597545, patch from Alejandro Castro]. + + * Fixed the configure error if you have gnutls-devel but not + gcrypt-devel installed [#587709]. + +Changes in libsoup from 2.27.92 to 2.28.0: + + * Fixed a handful of leaks found with valgrind, including a + large one in SoupContentSniffer [WebKit bug 28148]. + + * Changed the behavior of SoupCookieJarSqlite to improve + performance. [#584522, patch from Gustavo Noronha Silva] + + * Fixed a crash in SoupSocket that affected gupnp. [#594951, + patch from Olivier Crête] + + * Fixed the type of the SOUP_METHOD_* and SOUP_URI_SCHEME_* + macros to be const char * rather than gpointer. [#594508] + +Changes in libsoup from 2.27.91 to 2.27.92: + + * Removed SoupPasswordManager from the public API until its + problems can be addressed. Although it is still present, you + need to #define a special symbol for it to be visible in the + header files; see #594377 for details. + + * Fixed a bug where empty query components were dropped from + URIs. [#594405] + + * Fixed "make check" to work (but warn) when building with + --disable-ssl. + + * Fixed some small documentation bugs pointed out by Dominik + Bylica and Lucian Langa. + +Changes in libsoup from 2.27.90 to 2.27.91: + + * Added SoupPasswordManager, an interface for managing + persistent password storage, and SoupPasswordManagerGNOME + (in libsoup-gnome), which implements it using gnome-keyring. + tests/get.c provides a minimal example of how to use it. + + * libsoup should now notice when the server closes a + persistent connection, and close its side of the connection + sooner. This should hopefully fix the spurious "Connection + terminated unexpectedly" errors in WebKitGTK. [#578990] + + * Fixed some problems with connection management in + SoupSession that could cause a session to eventually "stall" + and be unable to process new requests. [#592084] + + * Fixed an infinite loop that caused 100% CPU usage if the + network went down at exactly the right time while there were + unsent messages in the queue. [#592492] + + * Fixed a crash in SoupLogger. [#591857] + + * Fixed the definition of soup_message_is_keepalive() for + HTTP/1.0 messages, to fix a problem introduced in 2.27.90 + where some messages would load completely but never emit + "finished". + + * Fixed a crash in SoupServer introduced in 2.27.90 when + processing a request with no "Host" header. + +Changes in libsoup from 2.27.5 to 2.27.90: + + * libsoup now uses glib's GResolver rather than its own DNS + code. For 2.27.90, the only visible change should be that + internationalized domain names are now supported. [#548287] + + * Added soup_message_disable_feature(), which allows you to + disable particular features (eg, cookies, proxy, + content-sniffing, etc) on a per-message basis. [#574773] + + * It is now possible to implement "OPTIONS *" in a SoupServer; + you must explicitly register a handler for "*" in order to + do this. [#590751] + + * Ignore Content-Length on EOF-terminated responses, to match + other browsers and therefore cope with broken servers that + send the wrong length. [Patch from Benjamin Otte.] + + * Fixed the status code when trying to fetch an https URI with + a non-gnutls build of libsoup. [#590464] + + * Fixed strict-aliasing warnings introduced in 2.27.4 + [#588771] + + * Fixed some warnings noted by fortify [#591226] and -Wextra. + + * libsoup now uses automake 1.11's silent-rules support by + default (if you are building with automake 1.11). Use + "./configure --disable-silent-rules" or "make V=1" to + disable. + +Changes in libsoup from 2.27.4 to 2.27.5: + + * Fixed a crash when a web server redirected a request to a + non-http URI (eg, "about:blank"). [#528882] + + * Fixed a hang when trying to create an attachment on certain + bugzilla installations from epiphany. [#584645] + + * Fixed verification of V1 TLS certificates [#589323, Patrick + Ohly] + + * Fixed compile problems on Windows (in the ssl code), and on + Linux (when the most recent version of gtk-doc was + installed). + +Changes in libsoup from 2.27.2 to 2.27.4: + + * Added SoupContentSniffer and the "content-sniffed" signal on + SoupMessage, to do Content-Type sniffing per the HTML5 / + draft-abarth-mime-sniff algorithm. [#572589, Gustavo Noronha + Silva] + + * Updated the earlier SoupSession timeout fixes ([#574414], + [#578928]) so that async connect() also times out [#588177, + Mark Nauwelaerts] and SSL works on Windows again [#587910, + Fridrich Strba]. + + * Fixed the behavior on a 301 response to a POST to match + real-world usage rather than what the spec says. (We were + doing the right thing on 302 and 303, but had missed 301.) + [#586692] + + * Changed configure so that if GNUTLS isn't found then it + errors out, rather than silently building an SSL-less + libsoup. Configure with --disable-ssl if you actually don't + want SSL. [#584955] + +Changes in libsoup from 2.27.1 to 2.27.2: + + * Replaced SoupProxyResolver with SoupProxyURIResolver, which + is a bit simpler, works with non-HTTP URIs (and so could be + used by gvfsd-ftp) and supports proxy auth correctly. + [#580051] + + * Fixed SoupSession to not try to resolve http server + hostnames when it's just going to pass the hostname off to a + proxy server anyway. This fixes things on hosts that use a + proxy for everything and have no working DNS config + [#577532] and also makes WebKitGTK behave more like other + browsers in terms of per-host connection limits (we now + limit connections based on hostname rather than on IP + address). + + We also no longer set the AI_CANONNAME flag when calling + getaddrinfo(), which saves us a little bit of unnecessary + network traffic. [Pointed out by Christophe Gillette on the + mailing list.] + + * libsoup now always uses SSL 3.0 (not TLS 1.0 or 1.1) for + https URIs, to work around problems with older servers that + don't implement the (apparently quite confusing) TLS/SSL + compatibility rules correctly. Makes a bunch of + previously-inaccessible sites now accessible in WebKitGTK + (notably PayPal) [#581342]. Will eventually be revisited, to + first try TLS 1.1 and fall back if that fails. + + * Fixed Digest auth to (recent) Apple CalDAV servers. + [#583091] + + * Changed the way the SoupSession "authenticate" signal works + a bit. We now never emit "authenticate" before sending a + request, even if we know for sure that it's going to fail, + because this makes the semantics of the authenticate handler + too complicated (and because we'll only get into this + situation if a previous call to the authenticate handler + failed anyway). Fixes problems in WebKitGTK when you cancel + a password dialog, and then later try to load the page + again. [#583462, mostly figured out by Gustavo Noronha + Silva]. + + * Fixed a bug in the CRLF-vs-LF patch (#571283) that caused + libsoup to fail to parse the response headers (returning + SOUP_STATUS_MALFORMED) if a CR LF got split across two + read()s. [#582002] + + * Allow using PUT in soup_form_request_for_data(), to work + with certain broken web APIs. [#581860, Ross Burton]. Also, + fixed a problem with empty POST bodies that made some parts + of gmail not work in WebKitGTK. + + * Applied some minor bugfixes to configure.in and autogen.sh + [#583911, #583942]. Fixed configure.in to not use gcc + warning options that the installed version of gcc doesn't + recognize [#578851]. + + * Added G_GNUC_NULL_TERMINATED and G_GNUC_PRINTF to a few + methods that should have had them. [#581754, Ross Burton] + +Changes in libsoup from 2.26.1 to 2.27.1: + + * SOUP_SESSION_TIMEOUT now works properly with + SoupSessionAsync [#574414] and SSL [#578928]. Added + tests/timeout-test to test this. + + * SoupDate fixes: + + * soup_date_to_string() now handles SOUP_DATE_RFC2822 + [#579055, Enrico Tröger] + + * soup_date_new_from_string() now accepts 24:00 as a + time in ISO8601 timestamps + + * soup_date_to_string() now coerces the date to UTC + for HTTP and cookie dates, and outputs the UTC + correct offset for the other date types. + + * Added regression tests to tests/date + + * soup_headers_parse() now completely ignores + syntactically-incorrect headers, rather than passing them to + soup_message_headers_append() and causing a g_warning. + soup_message_headers_append() now also rejects 0-length + header names. Updated tests/header-parsing to check this. + [#579318] + + * Fix a crash when cancelling a message from a "restarted" + handler, and updated a regression test to notice the + underlying cause. [#580193] + + * Completing the API updates for #576760 from 2.26.1, + soup_message_headers_get() is now marked deprecated in favor + of soup_message_headers_get_one() and _get_list(). + +Changes in libsoup from 2.26.0 to 2.26.1: + + * libsoup uses libproxy for PAC and WPAD proxy resolution + again. However, it arranges to do all communication with + GConf itself, to ensure that libproxy doesn't call it in + non-thread-safe ways. [#571527] Also, fixed a bug in + SoupSessionSync when proxy resolution failed. [#574957, + patch from Milan Crha]. + + (Also fixed three SoupProxyResolverGNOME bugs since the + 2.26.0.9 preview release. [#578746, #578809]) + + * SoupURI now handles unencoded spaces in URIs. In particular, + redirects via Location headers with spaces in them now work. + [#566530] + + * libsoup can now deal with servers (and clients) that + erroneously use LF LF instead of CR LF CR LF to separate + the headers and body. [#571283] + + * Added soup_message_headers_get_one() and + soup_message_headers_get_list(), which will eventually + deprecate soup_message_headers_get(). This lets applications + deal correctly with implementations that erroneously send + multiple copies of single-valued headers. [#576760] + + * In particular, soup_message_headers_get_content_type() now + ignores duplicate Content-Type headers [#576760] and also + ignores syntactically-incorrect Content-Type headers. + [#577630] + + * SoupCookieJar can now store multiple cookies with the same + domain and name, but different paths. [#577360] + + * Abnormal SSL connection closes are now treated as ordinary + EOFs, for compatibility with certain sites. [#577386] + + * soup_header_g_string_append_param() now allows NULL values. + [#577728] + + * soup_message_headers_append() now rejects header names and + values with newlines or certain other illegal data in them, + rather than generating syntactically invalid headers. + + * Fixed a small bug in soup_date_new_from_string's ISO 8601 + handling [qv #578369 for g_time_val_from_iso8601]. + + * The regression tests now work correctly on machines where + "localhost" resolves to "::1" instead of "127.0.0.1". + [#576583, patch from Andreas Rottmann] + + * Fixed warnings when a message has a network problem when + many other messages are queued. [#578809] + + * Miscellaneous documentation fixes/clarifications. + +Changes in libsoup from 2.25.91 to 2.26.0: + + * Temporarily disable libproxy support to work around a bug in + its gnome plugin that causes gvfsd-http (and probably + eventually other apps) to crash. [#571527]. For now, + SoupProxyResolverGNOME uses only GConf. To be fixed in + 2.26.1 + + * Fixed a bug that showed up in WebKit, where if many messages + were queued all at once to a server that doesn't support + persistent connections, some of the requests will get lost. + #574365, reported by Xan Lopez. + + * Fixed SoupServer to support using SOUP_ENCODING_EOF, so you + can stream responses of unknown length to HTTP/1.0 clients. + [#572153]. Added a regression test for this, and for chunked + and Content-Length-based streaming. + + * Fixed several bugs that prevented SoupCookieJarSqlite from + working. [#572409, patch from Xan Lopez] + + * Added G_{BEGIN,END}_DECLS guards to public headers that were + missing it. (Xan Lopez) + + * Misc gtk-doc improvements + +Changes in libsoup from 2.25.5 to 2.25.91: + + * Fixed a crash in SoupProxyResolverGNOME when the proxy + requires authentication. (This does not make proxy + authentication *work* yet, it just makes it not crash.) + + * Updated documentation + +Changes in libsoup from 2.25.4 to 2.25.5: + + * SoupProxyResolverGConf (which was incomplete) is gone, and + libsoup-gnome now requires libproxy, which is now officially + an external dependency of GNOME. + + * Fixed a bug in SoupCookieJar that was making it send + "Cookie: (null)" when it had no cookies for a site, which + confused some web servers (WebKit bug 23240). + + * Fixed a bug with using SOUP_MEMORY_TEMPORARY buffers and + soup_message_body_set_accumulate(FALSE). (Part of WebKit bug + 18343, noticed by Gustavo Noronha Silva.) + + * Fixed the build with non-gcc compilers + +Changes in libsoup from 2.25.3 to 2.25.4: + + * Added soup_session_get_feature() and + soup_session_get_features(), to query the features currently + available in a session (which is needed by the patch in + https://bugs.webkit.org/show_bug.cgi?id=22624) + +Changes in libsoup from 2.25.2 to 2.25.3: + + * Fixed a crash when using both cookies and a proxy. [#562191, + Mark Lee] + + * Fixed soup_form_decode() to correctly handle forms with + URI-encoded parameter names [#563302, Evan Nemerson] and + added a regression test. + + * Fixed a crash in SoupProxyResolverGConf. [#563145] + +Changes in libsoup from 2.25.1 to 2.25.2: + + * Fixed client behavior when presented with multiple auth + types to choose the *strongest* auth type (eg, Digest) + rather than the *weakest* one [#562339, Pontus Oldberg]. + Added a regression test for this. + + * Moved libsoup-gnome headers to a different directory to make + it easier to split libsoup and libsoup-gnome into separate + packages, and to ensure that things that only want to be + looking at plain libsoup headers (like gir-repository) don't + accidentally see the libsoup-gnome ones. + + * Some minor doc fixes + + * Fixed libsoup-gnome linking with --as-needed. [#559342] + +Changes in libsoup from 2.24.1 to 2.25.1: + + libsoup 2.25.1 introduces a new library, libsoup-gnome, which + will be used for features which are important to GNOME apps, + but which require GNOME-specific libraries that non-GNOME apps + may not want to add dependencies on. + + In 2.25.1, libsoup-gnome contains: + + * SOUP_TYPE_PROXY_RESOLVER_GNOME, a SoupSessionFeature + type that can be added to a SoupSession to provide + automatic proxy handling via the GConf proxy keys. (See + below) The default implementation uses libproxy, which + also handles WPAD, PAC, etc, but if libproxy is not + available it will use GConf directly, supporting only + the basic HTTP proxy functionality. + + * SoupCookieJarSqlite, a SoupSessionFeature that handles + cookies and stores them in a Firefox 3-compatible sqlite + file. (This is not actually a "GNOME-specific" feature, + but I didn't want to make libsoup itself depend on + sqlite, and I didn't want to make the dependency + optional. This might change before 2.26.) + + * SOUP_TYPE_GNOME_FEATURES_2_26: a SoupSessionFeature type + that can be added to a SoupSession to add all + GNOME-integration features that are available for 2.26; + as of 2.25.1, this is just the GNOME proxy resolver, but + by 2.26.0 it may also include gnome-keyring support and + possibly other features. + + Applications/libraries that are currently doing GConf proxy + lookup by hand can be updated as follows: + + * Remove all of the existing code that listens to the + GConf keys and sets SOUP_SESSION_PROXY_URI + + * Change the configure check to require + "libsoup-gnome-2.4 >= 2.25.1" instead of "libsoup-2.4" + + * #include + + * After creating your SoupSession, do: + + soup_session_add_feature_by_type (session, SOUP_TYPE_PROXY_RESOLVER_GNOME); + + (Or alternatively, use SOUP_SESSION_ADD_FEATURE_BY_TYPE + with soup_session_async_new_with_options() or + soup_session_sync_new_with_options().) + + + Other new features and bug fixes in 2.25.1 include: + + * SoupCookieJarText, like SoupCookieJarSqlite, but using the + old-style cookies.txt format, and in the base libsoup rather + than libsoup-gnome. + + * Various bugfixes to SoupCookie and SoupCookieJar to fix the + problems with cookies not working on certain sites. + + * The new SoupMultipart type provides support for multipart + MIME bodies, and soup-form now includes several methods for + generating and parsing multipart form data and file uploads. + + * SoupMessageHeaders now has methods for easy handling of the + Content-Type, Content-Disposition, Range, and Content-Range + headers. The Content-Disposition handling recognizes + RFC2231-encoded UTF-8 filenames. + + * SoupServer now automatically handles partial GET requests; + if your server returns SOUP_STATUS_OK in response to a + partial GET, libsoup will automatically convert it to a + SOUP_STATUS_PARTIAL_CONTENT response with only the requested + portions. + + Thanks to Xan Lopez and Diego Escalante Urrelo for their work + on SoupCookie, SoupCookieJar, SoupCookieJarText, and + SoupCookieJarSqlite. + +Changes in libsoup from 2.24.0.1 to 2.24.1: + + * Fixed a crash when unreffing the session from a + soup_session_queue_message() callback [#533473], and added + regression test. In particular, this fixes a crash in + seahorse when trying to connect to a non-responsive + keyserver. + + * Fixed an infinite loop when giving a bad password to a site + that uses non-standard capitalization in the + WWW-Authenticate header (eg, gmail.com) [#536285]. + + * Fixed a leak in SoupSessionAsync when using a non-default + GMainContext. [addendum to #498509, Arnout Vandecappelle] + Added additional code to the regression tests to make sure + sessions and servers do not get leaked. + + * Fixed a leak in the XML-RPC code + + * Compile fixes for "gcc -pedantic" [#553976, Sander Dijkhuis] + and -DG_DISABLE_DEPRECATED / -DG_DISABLE_SINGLE_INCLUDES + [#557072, Cosimo Cecchi] + + * Patched xmlrpc-test to accept the incorrect response to + test_echo() that php-xmlrpc gives when it's built against + libxml2 >= 2.7.1 (qv http://bugs.php.net/45996), so that + I can "make distcheck"... + + * Updated generated documentation + +Changes in libsoup from 2.23.92 to 2.24.0.1: + + * Reverted part of the fix for #528882, which caused the DAAP + plugin in rhythmbox to crash. [#553466] + +Changes in libsoup from 2.23.91 to 2.23.92: + + * Fixed the handling of a 302 response to a HEAD request, + which should NOT be treated like a 303 response. [#551190, + Jonathan Matthew] + +Changes in libsoup from 2.23.6 to 2.23.91: + + * Fixed a crash in gvfs [#528882], though there is still an + unknown bug there. As part of this fix, libsoup will now + return an error if you try to do an operation on a non-HTTP + URI. (Previously it was just treating any URI scheme except + "https" as HTTP.) + + * Added soup_date_to_timeval() for gvfs. [#549006, patch from + Bastien Nocera] + +Changes in libsoup from 2.23.1 to 2.23.6: + + * Fixed use of g_idle_add() so that heavy I/O won't end up + blocking libsoup callbacks. [#536676, Benjamin Otte] + + * Allow the caller to override the Host header. [#539803, Marc + Maurer] + + * Properly handle responses larger than 4G. [#539861, Peter + Christensen] + + * Fixed the build when using certain LDFLAGS [#541506, Götz + Waschk] + + * Fixed a small bug in Digest auth handling. [#544681, Mads + Chr. Olesen] + + * Fixed multiple Windows bugs [Tor Lillqvist] + +Changes in libsoup from 2.4.1 to 2.23.1: + + * This is the first unstable release leading up to GNOME 2.24. + Bumped the libsoup version number up to 2.23.x to match the + GNOME version; note that the API version is still "2.4", + meaning in particular that you still call it "libsoup-2.4" + when using pkg-config. + + * Added SoupSessionFeature, an interface type that will be + used for several new features. Ported SoupLogger and + SoupAuthManager to use it. + + * Added SoupCookie and SoupCookieJar. This API is already + being used in Epiphany, via WebKit, but it is not yet + complete. + + * Fixed GnuTLS support on Win32. [#528752, Marc Maurer] + +Changes in libsoup from 2.4.0 to 2.4.1: + + * Fixed SoupMessage to not downgrade to HTTP/1.0 for the + second attempt when it receives an HTTP/1.0 redirect or 401. + [#521848, Tommu Komulainen] + + * Fixed Host: header syntax when the host is an IPv6 address + literal. + + * Fixed SoupSession to not emit "authenticate" multiple times + for messages that have been requeued. [#522601, Tommi + Komulainen]. Also added two new signals to SoupSession, + request-queued and request-unqueued, to help simplify + certain session-helpers and avoid bugs like this in the + future. + + * Fixed soup_server_pause_message() to actually work (rather + than *un*pausing the message). + + * Added a property SOUP_SESSION_IDLE_TIMEOUT that can be used + to set a timeout after which idle connections will + automatically be closed. [#518214, Jorn Baayen] + + * Implemented RFC 2069-style Digest auth, and fixed SoupAuth + to compare auth scheme names case-insensitively, to fix + authentication against Apple's calendar server. [#498484] + + * Fixed a crash in SoupAuthDomainDigest if the client provided + an unrecognized username. [pointed out by Curtis Magyar on + IRC] + + * Fixed a few SoupDate bugs. (In particular, it was outputting + the wrong day of the week when stringifying dates.) + + * Improved the cleanup of idle connections, to fix slow load + times with the libsoup backend of WebKit. + + * Added a new SoupMessage signal "wrote-body-data" that can be + used for progress information when sending a large request + body. Also allow providing the request body in multiple + chunks even when using Content-Length encoding. [#525101, + Christian Kellner] + + * libsoup now ignores SIGPIPE globally, instead of + un-thread-safe-ly ignoring it only around network writes. In + particular, this means it is ignored when the SSL code needs + to unexpectedly do a write when we asked it to do a read. + [#524397, Curtis Magyar] + + * The discard-body-chunks-once-they're-no-longer-needed + behavior, confusingly called SOUP_MESSAGE_OVERWRITE_CHUNKS, + is now controlled by a SoupMessageBody method + (soup_message_body_set_accumulate()), and can be applied to + either the request body or the response body. + (OVERWRITE_CHUNKS is still available for backward + compatibility.) [#522146, Christian Kellner] + + * The DNS cache no longer caches "no such host" results, since + some name servers lie to clients outside their firewall, + which could then cause problems for laptops moved between + networks. [#523269, Jörgen Scheibengruber] + + * Added some new regression tests, fixed some small bugs in + the existing ones. + +Changes in libsoup from 2.3.4 to 2.4.0: + + * Fixed a small memory leak in SoupSession. [#518798, Wouter + Cloetens] + + * Minor fixes to redirect behavior; PROPFINDs can now be + automatically redirected (pointed out by Christian Kellner), + and 302 is treated like 307, not 303. Also fixed to make + sure that redirect-test actually gets run by "make check". + + * The SoupSocket I/O methods now set nread/nwrote even on + error. [Benjamin Otte] + +Changes in libsoup from 2.3.2 to 2.3.4: + + * The documentation should be accessible from devhelp again + [#518384, Mart Raudsepp]. (Also fixed another + documentation-generation bug that affected builds from svn, + but not the 2.3.2 tarball for some reason. [#518317, + Benjamin Otte].) + + * Fixed dependencies in libsoup-2.4.pc file [#517631, + Sebastian Dröge] + +Changes in libsoup from 2.3.0.1 to 2.3.2: + + API changes / Behavior changes: + + * soup_server_add_auth_domain() now refs the auth domain when + adding it. (soup_server_remove_auth_domain() already + unreffed it.) This means existing applications using + SoupAuthDomain will now have a small memory leak. Those + applications should update their libsoup-2.4 requirement to + ">= 2.3.2" at some point before the final GNOME 2.22.0 + release, and then fix the code to unref the auth domain + after adding it to the server. + + * SoupSession's automatic redirect-handling behavior now obeys + RFC 2616 more closely. In particular, status codes 300 and + 304 are no longer mistakenly considered redirects; POSTs + that receive 303 are now redirected into GETs; and POSTs + that receive 301, 302, or 307 are now not redirected. + + Applications that were using the SOUP_MESSAGE_NO_REDIRECT + flag to prevent libsoup from redirecting POSTs incorrectly + before should now be able to remove that if they depend on + libsoup-2.4 >= 2.3.2. + + API additions: + + * Added a SOUP_SESSION_USER_AGENT property to SoupSession, and + SOUP_SERVER_SERVER_HEADER to SoupServer, to support + automatically adding "User-Agent" and "Server" headers to + messages. (The default behavior is to do nothing, as + before.) + + * Added several new methods to soup-forms.h. Applications that + are encoding a fixed set of form fields can now just pass + them to soup_form_encode(), rather than needing to construct + a GHashTable or GData list. (Likewise, the new + soup_uri_set_query_from_fields() behaves similarly for + directly updating a URI with form data.) There are also now + soup_form_request_new() and other related methods, to + directly create a GET or POST SoupMessage for submitting a + form query. + + The original soup_form_* methods have all been renamed, + although #defines exist for backward compatibility. + + * Added soup_message_set_chunk_allocator() and + soup_buffer_new_with_owner(), to give applications more + control over memory usage/copying when doing streaming HTTP. + [Wouter Cloetens, #513810]. + + * Added several new methods to soup-value-utils.h for working + with multiple array or hash table values at once: + soup_value_hash_new_with_vals(), + soup_value_hash_insert_vals(), + soup_value_hash_lookup_vals(), + soup_value_array_new_with_vals(), and + soup_value_array_append_vals(). + + This helps to simplify XML-RPC calls that send or receive + structs or arrays. + + * Added soup_date_to_time_t(). + + * Added SoupMessageHeadersIterator, an iterator type for + SoupMessageHeaders that can be used instead of + soup_message_headers_foreach(). + + Bug fixes: + + * Fixed a crash-when-idle in evolution-exchange [#437835] and + rhythmbox [#506552]. + + * Added the API version to the gtk-doc installation dir, to + prevent parallel-installation problems with libsoup 2.2 and + 2.4. [#512810, Daniel Gryniewicz]. + + * Fixed tests/query-test to compile correctly on Solaris. + [#513602, patch from Jeff Cai] + + * Fixed some other minor HTTP conformance issues. + + Python bindings: + + * Although not present in the release tarball, there are now + experimental python bindings for libsoup in GNOME subversion + (in the python/ subdirectory of libsoup trunk). These are + not yet stable (and are not built by default or installed + even when building from svn), but comments on them are + welcome at libsoup-list@gnome.org + +Changes in libsoup from the 2.2 series to 2.3.0.1: + + libsoup 2.3.0 is the first beta release of the libsoup 2.4 + series. It is an API break from the earlier 2.2 series that + fixes various bugs and API warts and lays the groundwork for + language bindings and various new features in upcoming + releases. + + (2.3.0.1 is identical to the 2.3.0 release in terms of code, + but includes this updated NEWS file which was accidentally + left out of the 2.3.0 tarball.) + + http://library.gnome.org/devel/libsoup/unstable/libsoup-porting-2.2-2.4.html + goes over the API changes in detail. If you have questions not + answered by the porting document, please send mail to + libsoup-list@gnome.org. + + Specific user-reported bugs fixed in this release: + + * SoupURI now correctly handles URIs with complex encoded + queries [#266516, Jean-Yves Lefort] + + * It is now possible for a SoupServer to use Digest auth + without needing to have the cleartext password available. + [#347108, Anas Nashif] + + * Digest authentication now properly handles "stale=true" and + "nextnonce=..." [#471380, Jari Urpalainen] + + * SoupServer is now subclassible [#491653, Mathias Hasselmann] + + * soup_server_run_async and soup_server_quit no longer ref and + unref the server, as that doesn't match ordinary GObject + conventions [#494128, Mathias Hasselmann] + + * The test programs no longer use a symbol name that conflicts + with Cygwin [#501631, Cygwin Ports Maintainer] + + * libsoup can now handle the not-quite-HTTP responses returned + by Shoutcast servers [#502325, Wouter Cloetens] + + * If you use libsoup while disconnected from the network, it + no longer caches the failed DNS results [#508593, Bradley + Worley] + + Items from http://live.gnome.org/LibSoup/ToDo fixed: + + * "Expect: 100-continue" processing now works correctly on + both client and server. + + * SoupSessions are no longer leaked + + * The XML-RPC API is improved. The SOAP API is gone... + + * Added utility functions for HTML form handling + + * Improved message header handling + + * SoupServer now automatically adds a "Date" header + +========== + +Changes in libsoup from 2.2.103 to 2.2.104: + + * soup_message_io_pause() and soup_message_io_pause() are now + allowed for client messages (and in particular, they don't + mess up when called from the "got_chunk" callback). + [#452280, Marco Barisione] + + * Fixed some bugs in SOUP_SESSION_ASYNC_CONTEXT support that + would cause parts of an operation to run in the default + context rather than the session's context. Also fixed some + leaks and added a regression test. [#498509, Wouter + Cloetens] + + * There is a new test/sample program, tests/pull-api.c, + showing how to implement a pull API using SoupSessionAsync. + (This depends on the fixes for #452280 and #498509, so it + won't work with older versions of libsoup.) + + * Discovered "valgrind --leak-resolution=med" and fixed some + more memory leaks. + +Changes in libsoup from 2.2.102 to 2.2.103: + + * Fix memory corruption in SoupSessionAsync that caused + rhythmbox to crash. [#484988, patch from Rob Bradford] + + * Fix socket refcounting in SoupServer to fix warnings / + possible crash. [#459896, Emanuele Aina] + +Changes in libsoup from 2.2.101 to 2.2.102: + + * Unbreak the build when building without SSL. Not that you + should be building without SSL anyway, but... (reported by + guenther). + +Changes in libsoup from 2.2.100 to 2.2.101: + + * Fix build on cygwin [384498] + + * Fix SSL rehandshaking on synchronous sockets [415402, Jacob + Berkman] and add a regression test for it. + + * Fix two bugs in https tunnels over proxies that require + authentication (noticed by Varadhan), and add a regression + test for them. + + * Ensure that if you queue multiple messages at once to an + http server that requires authentication but that you + haven't authenticated to yet, that all of the messages get + properly authenticated [271540, James Willcox]. And add a + regression test for it. + + * Fix NTLM authentication, which got broken by the previous + fix. [471389, Varadhan]. Add a basic NTLM regression test + that doesn't really test the crypto/encoding parts, but at + least makes sure that the message flow is correct. + + * Allow trailing whitespace after HTTP version in + Response-Line, for compatibility with broken servers + [475169, Stephane Loeuillet]. Add that case to the + header-parsing regression test. + + * Fix crash when the session's "authenticate" handler returns + a username and no password when using NTLM. [480987, Wendell + MacKenzie] + + * Use "new" glib base64 and iso8601 methods rather than + duplicating them. [337010, patch from Emmanuele Bassi]. + + * Implement soup_session_queue_message() for SoupSessionSync. + + * Add G_BEGIN_DECLS / G_END_DECLS to all headers that were + missing them. [438776, patch from Jonathon Jongsma]. + + * Fix broken definition of SOUP_IS_MESSAGE_FILTER_CLASS. Noted + by "cascardo" on libsoup-list. + + * Remove documentation of non-public MD5 methods [440092, + Jonathon Jongsma]. Removed a mysterious half-sentence in the + SoupMessage docs [458116, Marco Barisione]. + +Changes in libsoup from 2.2.99 to 2.2.100: + + * Fixed soup_headers_parse_status_line() so WebDAV response + parsing will work again. [406997] + + * Fixed a bug in the header-parsing regression test that + caused the test to fail sometimes, even though the actual + header-parsing code was fine. + +Changes in libsoup from 2.2.98 to 2.2.99: + + * Fixed header parsing, including a crash in SoupServer with + certain malformed requests [391970]. + + * Fixed redirection to other hosts with SoupSessionAsync. + [382251] + + * Fixed a small memory leak pointed out by Chris Austin. + +Changes in libsoup from 2.2.97 to 2.2.98: + + * The XML-RPC code now correctly interprets foo + as meaning the same thing as + foo. [364490] Pointed out by + Todd Kulesza. + + * Memory leak fixes from Andrew W. Nosenko. + + * A few symbols that should have been static before now are. + [376387] Patch from Matthias Clasen. + +Changes in libsoup from 2.2.96 to 2.2.97: + + * Fixed SOAP and XML-RPC code to handle whitespace and + comments in the XML better. (Based on a patch from Andrew W. + Nosenko.) + + * Fixed lots of typecasting/constness warnings in the code + (mostly via a patch from Andrew W. Nosenko) + + * Fixed build on Cygwin [321827] + + * Fixed libsoup-2.2.pc fields [343340] and make it get + uninstalled correctly [356809]. (Mikhail Zabaluev and + Matthew Barnes) + + * Fixed some small leaks in SoupServer pointed out by Paolo + Borelli. [351500] + +Changes in libsoup from 2.2.95.1 to 2.2.96: + + * SoupServer now works even if you don't explicitly set an + encoding for the response. (In particular, the automatic 404 + if you request a path with no handlers now works. Problem + pointed out by Dennis Jacobfeuerborn.) + + * WWW-Authenticate and Proxy-Authenticate responses with no + realm parameter are now ignored, as per RFC 2617, fixing a + crash pointed out by Nate Nielsen. + + * Added soup_xmlrpc_message_from_string(), from Fernando + Herrera [348532]. + + * simple-httpd and "get" now support HEAD + +Changes in libsoup from 2.2.94 to 2.2.95.1: + + * Even more fixes to XML-RPC, found by the new XML-RPC + regression test. This includes some API changes that I don't + feel guilty about, because the code totally didn't work at + all before. + + * Fixed a bug in soup_mktime_utc() + + * (2.2.95 was identical to 2.2.95.1. The only difference is + that the shared library version was belatedly bumped from + 8.2.0 to 8.3.0 to reflect the API "additions") + +Changes in libsoup from 2.2.93 to 2.2.94: + + * Various fixes to the XML-RPC code (which apparently had not + actually ever worked before) from Brent Smith. [343973, + 344222, 344458] + + * Added client and server API tutorials to the docs + + * auth-test now uses a local Apache 2.2 install, if possible, + rather than depending on files that used to be on an old + Ximian web server but haven't been anywhere for a long time. + [311825] + +Changes in libsoup from 2.2.92 to 2.2.93: + + * Fixed outgoing data corruption caused when SoupServer + started writing out a response a second time after already + having started once. [334469]. Also fixed 342640 and another + bug caused by the workaround for 334469 in 2.2.92. Based on + patches and analysis from William Jon McCann and Armin + Bauer. + + * Fixed a deadlock when changing a session's proxy URI. + [309867 / bnc 174255, based on a patch by Veerapuram + Varadhan]. + + * Fixed https-via-proxies in the synchronous case. [bnc 174255] + + * Fixed a crash in evolution-exchange [342545, fix based on an + analysis by Wang Xin]. + + * Fixed simple-proxy to not crash at startup. Oops. (Alex + Larsson) + +Changes in libsoup from 2.2.91 to 2.2.92: + + * Fixed server-side digest auth to return a valid "algorithm" + value and client-side to not crash if it sees an invalid one + [328615]. + + * Fixed the Request-Line parsing code to not hardcode a + maximum URI length (to allow very long DAAP requests from + iTunes in Rhythmbox). [335040] + + * Fixed some warnings (signed/unsigned mismatch). + +Changes in libsoup from 2.2.7 to 2.2.91: + + * (The large version number bump is because there was an + internal 2.2.90 release for SUSE 10.1 alphas, which was + supposed to be intermediate between 2.2.7 and 2.4.0. But + 2.4.0 didn't end up happening, and I don't want to regress + the version number at this point.) + + * SoupSession, SoupServer, SoupConnection, SoupSocket, and + SoupAddress now have an "async-context" property that allows + you to use the async API in a non-default GMainContext. + [Based on patches from Armin Bauer and Jürg Billeter.] + + * SoupSession, SoupConnection, and SoupSocket now have a + "timeout" property to stop synchronous sockets from hanging + forever if the remote end is unresponsive (from Varadhan). + + * Fixed some bugs in soup_date_iso8601_parse(). [324671, from + Emmanuele Bassi] + + * More Windows build fixes from Tor. + +Changes in libsoup from 2.2.6.1 to 2.2.7: + + * Fixed a crash when using NTLM connections [316313, probably + also 318252]. (Also 321208, which was a bug introduced in + the original fix for 316313.) + + * Fixed a bug that could cause soup to suck up all available + CPU when a connection to a SoupServer was dropped by the + other side [319305, patch from Jonathan Matthew] + + * Fixed the creation of struct elements in XMLRPC messages + [321362, patch from Sebastian Bauer] + + * Plugged a small memory leak in SoupSocket (from Wang Xin). + + * Fixed two compile problems, a gccism [320349, patch from + Roland Illig], and a strict-aliasing warning from gcc 4.1. + +Changes in libsoup from 2.2.6 to 2.2.6.1: + + * Fixed a crash when using SoupSoapMessage + +Changes from 2.2.5 to 2.2.6: + + * Fixed a crash when canceling a message (from Tambet Ingo) + + * Fixed a bug where a connection could be leaked forever in + some circumstances if a request got a 30x, 401, or 407 + response, eventually causing a hang when the session hit its + maximum connection limit. (Dan/Tambet) + + * Fixed a memory leak. (Tambet) + + * Fixed a bug that would sometimes show up when connecting to + a server on localhost [#312540] + + * Added some API to SoupServer and SoupSocket to help fix a + long-standing rcd bug. + +Changes from 2.2.4 to 2.2.5: + + * Win32 support (from Tor Lillqvist) + + * Up-to-date API documentation pretty much everywhere + + * Basic XMLRPC support (from Mariano Suarez-Alvarez, Fernando + Herrera, and Jeff Bailey) + + * New HTTP timestamp-manipulation methods soup_date_parse, + soup_date_generate, and soup_date_iso8601_parse. + + * SoupSession now handles relative URLs in the Location header + (in violation of RFC 2616, but in line with how some servers + behave.) [270688] + +Changes from 2.2.3 to 2.2.4: + + * Fixed a problem with NTLM authentication against + multi-domain servers. [306877] + + * Fixed DNS lookups on Solaris. [254551, 268389] + +Changes from 2.2.2 to 2.2.3: + + * Now compiles against gnutls 1.2.0 [257811] + + * Fixed a bug that could result in 100% CPU usage if an SSL + server closed the connection uncleanly. [273352] + +Changes from 2.2.1 to 2.2.2: + + * The SSL validation fix from 2.2.1 [264414] is now completely + fixed. (Part of the fix didn't actually make it into 2.2.1) + + * HTTPS certificate validation now works when using an HTTP + proxy. [268583] + + * HTTP proxy code deals better with proxies that try to make + the user do HTML-form-based authentication. [268531] + + * 64-bit fixes for NTLM auth code. [270323, from Michael + Zucchi] + +Changes from 2.2.0 to 2.2.1: + + * Updated for a libgcrypt API change between 1.1.9x and 1.2.x + that caused a crash at runtime if you compiled against + 1.2.x. [266342] + + * SSL certificate validation failure should now always result + in a status of SOUP_STATUS_SSL_FAILED, rather than getting + turned into SOUP_STATUS_IO_ERROR. [264414] + + +Changes in libsoup from the 2.0 series (1.99.x versions) to 2.2: + + * Most of the libsoup datatypes are now GObjects. (SoupUri + is currently an exception to this.) + + * SoupMessage now emits signals at various stages of + processing. (Eg, "wrote_body", "got_headers".) (You + can also still use soup_message_add_*handler().) + + * SoupContexts are gone; soup_message_new() now takes a URI + string. + + * All formerly global state is now maintained by the + SoupSession object. (This includes the connection pool, + proxy server, cached authentication information, SSL + certificates, etc.) + + * You can create a SoupSessionAsync (for 2.0-like + behavior) or SoupSessionSync (for blocking, + synchronous usage). + + * You can add SoupMessageFilter objects to a session + to have certain processing automatically performed + on every message sent via that session. (Eg, setting + up handlers.) + + * NTLM authentication is no longer supported by + default. You must enable it by setting the + SOUP_SESSION_USE_NTLM flag on the session. + + * The preferred method of handling authentication is + now via the "authenticate" and "reauthenticate" + signals on SoupSession. (The old style, of encoding + the user and password information into the url is + also still supported.) + + * The SOUP_ERROR_* values are now SOUP_STATUS_* (so that we + don't have "SOUP_ERROR_OK" and the like). + + * SOUP_MESSAGE_IS_ERROR() is gone, since some cases + want to include 3xx responses and some don't. + + * SOUP_ERROR_CANT_AUTHENTICATE and + SOUP_ERROR_CANT_AUTHENTICATE_PROXY are now gone, + since they didn't carry any information that + SOUP_STATUS_UNAUTHORIZED and + SOUP_STATUS_PROXY_UNAUTHORIZED don't. + + * DNS errors now show up as the new status code + SOUP_STATUS_CANT_RESOLVE rather than being mixed in + with SOUP_ERROR_CANT_CONNECT. + + * Minimal SOAP support has been added back, via + SoupSoapMessage/SoupSoapResponse + + * The HTTP I/O state machine was completely rewritten, fixing + numerous crashes, leaks, and protocol errors. + + * SoupUri now conforms to RFC 2396. Mostly. + + * Various test programs have been added under tests/ + + * Removed: + + * Support for OpenSSL (which was horribly buggy) and + Mozilla NSS (which was never finished). We only + support GNUTLS for SSL now. + + * SOCKS support + + * CGI support in SoupServer diff --git a/README b/README new file mode 100644 index 0000000..8baf635 --- /dev/null +++ b/README @@ -0,0 +1,29 @@ +libsoup is an HTTP client/server library for GNOME. It uses GObjects +and the glib main loop, to integrate well with GNOME applications. + +Features: + * Both asynchronous (GMainLoop and callback-based) and synchronous APIs + * Automatically caches connections + * SSL support + * Proxy support, including authentication and SSL tunneling + * Client support for Digest, NTLM, and Basic authentication + * Server support for Digest and Basic authentication + * XML-RPC support + +See the documentation in docs/reference/ and the test programs in +tests/ for simple examples of how to use the code. The +evolution-data-server and evolution-exchange modules in GNOME CVS have +some more complicated examples. + +There is a mailing list for libsoup-related questions/discussions at +gnome.org. Visit http://mail.gnome.org/mailman/listinfo/libsoup-list +to subscribe or read the archives. + +Bugs against libsoup can be filed at the GNOME bugzilla: +http://bugzilla.gnome.org/enter_bug.cgi?product=libsoup + +More information (including the libsoup To Do list) is available at +http://live.gnome.org/LibSoup + +Licensing: +libsoup is licensed under the LGPL, see COPYING for more details. diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..90a6488 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# Run this to generate all the initial makefiles, etc. +REQUIRED_AUTOMAKE_VERSION=1.9 + +srcdir=`dirname $0` +test -z "$srcdir" && srcdir=. + +PKG_NAME="libsoup" + +(test -f $srcdir/configure.ac \ + && test -f $srcdir/libsoup.doap \ + && test -d $srcdir/libsoup) || { + echo -n "**Error**: Directory "\`$srcdir\'" does not look like the" + echo " top-level $PKG_NAME directory" + exit 1 +} + +which gnome-autogen.sh || { + echo "You need to install gnome-common from the GNOME CVS" + exit 1 +} +USE_GNOME2_MACROS=1 . gnome-autogen.sh diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..a016ba2 --- /dev/null +++ b/configure.ac @@ -0,0 +1,393 @@ +dnl ******************************************* +dnl *** Initialize automake and set version *** +dnl ******************************************* + +AC_PREREQ(2.63) +AC_INIT([libsoup],[2.35.90],[http://bugzilla.gnome.org/enter_bug.cgi?product=libsoup]) +AC_CONFIG_SRCDIR([libsoup-2.4.pc.in]) +AC_CONFIG_MACRO_DIR([m4]) + +AM_INIT_AUTOMAKE([foreign 1.9]) +m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])],) + +AC_PROG_MAKE_SET + +SOUP_API_VERSION=2.4 +AC_SUBST(SOUP_API_VERSION) + +# Increment on interface addition. Reset on removal. +SOUP_AGE=4 + +# Increment on interface add, remove, or change. +SOUP_CURRENT=5 + +# Increment on source change. Reset when CURRENT changes. +SOUP_REVISION=0 + +AC_SUBST(SOUP_CURRENT) +AC_SUBST(SOUP_REVISION) +AC_SUBST(SOUP_AGE) + +dnl *************************** +dnl *** Set debugging flags *** +dnl *************************** + +debug_default=minimum + +# Declare --enable-* args and collect ac_help strings +AC_ARG_ENABLE(debug, + AS_HELP_STRING([--enable-debug=[no/minimum/yes]], + [turn on debugging (default=$debug_default)]),, + enable_debug=$debug_default) + +# Set the debug flags +if test "x$enable_debug" = "xyes"; then + test "$cflags_set" = set || CFLAGS="$CFLAGS -g" + SOUP_DEBUG_FLAGS="-DG_ENABLE_DEBUG" +else + if test "x$enable_debug" = "xno"; then + SOUP_DEBUG_FLAGS="-DG_DISABLE_ASSERT -DG_DISABLE_CHECKS" + fi +fi + +AC_SUBST(SOUP_DEBUG_FLAGS) + +# Set the maintainer flags +if test -d .git; then + SOUP_MAINTAINER_FLAGS="-DG_DISABLE_DEPRECATED" +fi +AC_SUBST(SOUP_MAINTAINER_FLAGS) + +dnl *************************** +dnl *** Checks for programs *** +dnl *************************** + +AC_PROG_CC +AC_PROG_INSTALL + +# Initialize libtool +LT_INIT([win32-dll]) + +dnl *********************** +dnl *** Checks for glib *** +dnl *********************** + +AM_PATH_GLIB_2_0(2.27.5,,,gobject gthread gio) +if test "$GLIB_LIBS" = ""; then + AC_MSG_ERROR(GLIB 2.27.5 or later is required to build libsoup) +fi +GLIB_CFLAGS="$GLIB_CFLAGS -DG_DISABLE_SINGLE_INCLUDES" + +PKG_CHECK_MODULES(XML, libxml-2.0) +AC_SUBST(XML_CFLAGS) +AC_SUBST(XML_LIBS) + +dnl *********************** +dnl *** Check for Win32 *** +dnl *********************** + +AC_MSG_CHECKING([for Win32]) +case "$host" in + *-*-mingw*) + os_win32=yes + CFLAGS="$CFLAGS -D_REENTRANT" + ;; + *) + os_win32=no + ;; +esac +AC_MSG_RESULT([$os_win32]) +AM_CONDITIONAL(OS_WIN32, [test $os_win32 = yes]) + +dnl ******************* +dnl *** Misc checks *** +dnl ******************* +AC_CHECK_FUNCS(gmtime_r) +AC_CHECK_FUNCS(mmap) +AC_CHECK_FUNC(socket, , AC_CHECK_LIB(socket, socket)) + +dnl ************************* +dnl *** SQL Lite support *** +dnl ************************* + +AC_ARG_ENABLE(sqllite, + AS_HELP_STRING([--enable-sqllite], [Enable SQL lite support ]), , + enable_sqllite=no) + +if test "$enable_sqllite" != "no"; then + PKG_CHECK_MODULES(SQLITE, sqlite3, :, [AC_MSG_ERROR(dnl +[Could not find sqlite3 devel files: + +$SQLITE_PKG_ERRORS + +Pass "--without-sqlite" to configure if you want to build libsoup +without sql lite support.])]) + +fi + +AC_SUBST(SQLITE_CFLAGS) +AC_SUBST(SQLITE_LIBS) + +AM_CONDITIONAL(SQLLITE_SUPPORT, [test $enable_sqllite = yes]) + +dnl ********************* +dnl *** GNOME support *** +dnl ********************* +AC_MSG_CHECKING(whether to build libsoup-gnome) +AC_ARG_WITH(gnome, + AS_HELP_STRING([--without-gnome], [Do not build libsoup-gnome]), + :, [if test $os_win32 = yes; then with_gnome=no; else with_gnome=yes; fi]) +AC_MSG_RESULT($with_gnome) + +if test $with_gnome != no -a $os_win32 != yes; then + PKG_CHECK_MODULES(GNOME_KEYRING, gnome-keyring-1, :, + AC_MSG_ERROR( +[Could not find gnome-keyring devel files. +Configure with --without-gnome if you wish to build only libsoup +without GNOME-specific features.])) +fi +AC_SUBST(GNOME_KEYRING_CFLAGS) +AC_SUBST(GNOME_KEYRING_LIBS) + +AM_CONDITIONAL(BUILD_LIBSOUP_GNOME, test $with_gnome != no) + +if test $with_gnome != no; then + AC_DEFINE(HAVE_GNOME, 1, [Defined if GNOME support is enabled]) + + PKG_CHECK_MODULES(SQLITE, sqlite3, :, [AC_MSG_ERROR(dnl +[Could not find sqlite3 devel files: + +$SQLITE_PKG_ERRORS + +Pass "--without-gnome" to configure if you want to build libsoup +without GNOME support.])]) +fi +AC_SUBST(HAVE_GNOME) +AC_SUBST(SQLITE_CFLAGS) +AC_SUBST(SQLITE_LIBS) + + +dnl *************** +dnl *** gtk-doc *** +dnl *************** +GTK_DOC_CHECK([1.10]) + +dnl ***************************** +dnl *** gobject-introspection *** +dnl ***************************** +GOBJECT_INTROSPECTION_CHECK([0.9.5]) + +dnl ************************************* +dnl *** Warnings to show if using GCC *** +dnl ************************************* + +AC_ARG_ENABLE(more-warnings, + AS_HELP_STRING([--disable-more-warnings], [Inhibit compiler warnings]), + set_more_warnings=no) + +if test "$GCC" = "yes" -a "$set_more_warnings" != "no"; then + CFLAGS="$CFLAGS \ + -Wall -Wstrict-prototypes -Wmissing-declarations \ + -Wmissing-prototypes -Wnested-externs -Wpointer-arith \ + -Wdeclaration-after-statement -Wformat=2 -Winit-self \ + -Waggregate-return -Wmissing-format-attribute" + + for option in -Wmissing-include-dirs -Wundef; do + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $option" + AC_MSG_CHECKING([whether gcc understands $option]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], + [has_option=yes], + [has_option=no]) + AC_MSG_RESULT($has_option) + if test $has_option = no; then + CFLAGS="$SAVE_CFLAGS" + fi + done +fi + +AC_MSG_CHECKING([for glib-networking (glib TLS implementation)]) +save_CFLAGS="$CFLAGS" +save_LIBS="$LIBS" +CFLAGS="$CFLAGS $GLIB_CFLAGS" +LIBS="$LIBS $GLIB_LIBS" +AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], + [g_type_init (); return !g_tls_backend_supports_tls (g_tls_backend_get_default ());])], + [have_glib_networking=yes], + [have_glib_networking=no], + [have_glib_networking="unknown (cross-compiling)"]) +CFLAGS="$save_CFLAGS" +LIBS="$save_LIBS" +AC_MSG_RESULT($have_glib_networking) + +AC_ARG_ENABLE(tls-check, + AS_HELP_STRING([--disable-tls-check], [Don't error out if glib-networking is unavailable])) +if test "$enable_tls_check" != "no" -a "$have_glib_networking" = "no"; then + AC_MSG_ERROR([libsoup requires glib-networking for TLS support. + +If you are building a package, you can pass --disable-tls-check to +allow building libsoup anyway (since glib-networking is not actually +required at compile time), but you should be sure to add a runtime +dependency on it.]) +fi + +dnl ****************************** +dnl *** Stuff for regression tests +dnl ****************************** +AC_MSG_NOTICE([checking for programs needed for regression tests]) +MISSING_REGRESSION_TEST_PACKAGES="" + +if test "$have_glib_networking" = "no"; then + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES glib-networking" +fi + +AC_ARG_WITH(apache-httpd, + AS_HELP_STRING([--with-apache-httpd], [Path to apache httpd (for tests)]), + APACHE_HTTPD="$withval", + [AC_PATH_PROGS(APACHE_HTTPD, httpd2 httpd apache2 apache, no, ${PATH}:/usr/sbin)]) +if test "$APACHE_HTTPD" != "no"; then + AC_MSG_CHECKING([Apache version]) + apache_version=`$APACHE_HTTPD -v 2>/dev/null | sed -ne 's/Server version: Apache\///p'` + case $apache_version in + 2.2.*) + AC_MSG_RESULT([$apache_version (ok)]) + ;; + *) + AC_MSG_RESULT([$apache_version (ignoring)]) + APACHE_HTTPD="no" + ;; + esac +fi +AC_SUBST(APACHE_HTTPD) +AC_DEFINE_UNQUOTED(APACHE_HTTPD, "$APACHE_HTTPD", [Apache httpd]) + +if test "$APACHE_HTTPD" != "no"; then + AC_MSG_CHECKING([for Apache module directory]) + AC_ARG_WITH(apache-module-dir, + AS_HELP_STRING([--with-apache-module-dir], [Apache modules dirs (for tests)]), + apache_module_dirs="$withval", + [apache_prefix=`dirname \`dirname $APACHE_HTTPD\`` + mpm=`$APACHE_HTTPD -V | sed -ne 's/^Server MPM: */-/p' | tr 'A-Z' 'a-z'` + # This only works with bash, but should fail harmlessly in sh + apache_module_dirs=`echo $apache_prefix/lib{64,}/{apache,apache2,http,http2,httpd}{$mpm,}{/modules,}`]) + for dir in $apache_module_dirs; do + if test -f $dir/mod_auth_digest.so; then + APACHE_MODULE_DIR="$dir" + fi + if test -f $dir/mod_ssl.so; then + APACHE_SSL_MODULE_DIR="$dir" + IF_HAVE_APACHE_SSL="" + fi + if test -f $dir/mod_php5.so; then + APACHE_PHP_MODULE_DIR="$dir" + APACHE_PHP_MODULE=mod_php5.so + fi + if test -f $dir/libphp5.so; then + APACHE_PHP_MODULE_DIR="$dir" + APACHE_PHP_MODULE=libphp5.so + fi + done + AC_MSG_RESULT($APACHE_MODULE_DIR) + AC_SUBST(APACHE_MODULE_DIR) + AC_SUBST(APACHE_SSL_MODULE_DIR) + AC_SUBST(APACHE_PHP_MODULE_DIR) + AC_SUBST(APACHE_PHP_MODULE) +fi + +if test "$APACHE_HTTPD" != "no" -a -n "$APACHE_MODULE_DIR" -a -n "$APACHE_SSL_MODULE_DIR"; then + AC_DEFINE(HAVE_APACHE, 1, [Whether or not apache can be used for tests]) + have_apache=1 + if test -z "$APACHE_PHP_MODULE_DIR"; then + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES mod_php5" + fi +else + have_apache=0 + if test "$APACHE_HTTPD" = "no" -o -z "$APACHE_MODULE_DIR"; then + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES apache" + else + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES mod_ssl" + fi +fi +AM_CONDITIONAL(HAVE_APACHE, test $have_apache = 1) + +if test "$have_apache" = 1; then + AC_CHECK_PROGS(PHP, php php5) + + AC_MSG_CHECKING([for Apache PHP module]) + if test -f $APACHE_PHP_MODULE_DIR/$APACHE_PHP_MODULE; then + have_php=yes + IF_HAVE_PHP="" + else + have_php=no + IF_HAVE_PHP="#" + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES php5" + fi + AC_MSG_RESULT($have_php) + + if test "$have_php" = yes; then + AC_MSG_CHECKING([for xmlrpc-epi-php]) + if $PHP --rf xmlrpc_server_create | grep -q "does not exist"; then + have_xmlrpc_epi_php=no + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES php-xmlrpc" + else + have_xmlrpc_epi_php=yes + fi + AC_MSG_RESULT($have_xmlrpc_epi_php) + fi +fi + +AC_SUBST(IF_HAVE_PHP) +AM_CONDITIONAL(HAVE_XMLRPC_EPI_PHP, test "$have_xmlrpc_epi_php" = yes) + +AC_PATH_PROG(CURL, curl, no) +if test "$CURL" != no; then + AC_DEFINE(HAVE_CURL, 1, [Whether or not curl can be used for tests]) +else + MISSING_REGRESSION_TEST_PACKAGES="$MISSING_REGRESSION_TEST_PACKAGES curl" +fi +AM_CONDITIONAL(HAVE_CURL, test "$CURL" != no) + +AC_SUBST(MISSING_REGRESSION_TEST_PACKAGES) +AM_CONDITIONAL(MISSING_REGRESSION_TEST_PACKAGES, test -n "$MISSING_REGRESSION_TEST_PACKAGES") + +dnl ********************************************************** +dnl *** path of NTLM single-sign-on helper ntlm_auth +dnl ********************************************************** +AC_ARG_WITH(ntlm-auth, + AC_HELP_STRING([--with-ntlm-auth=PATH],[Where to look for ntlm_auth, path points to ntlm_auth installation (default: /usr/bin/ntlm_auth)]), + ntlm_auth="$withval", + [if test $os_win32 = yes; then ntlm_auth="no"; else ntlm_auth="/usr/bin/ntlm_auth"; fi]) +if test "$ntlm_auth" != "no"; then + AC_DEFINE(USE_NTLM_AUTH, 1, [Whether or not use Samba's 'winbind' daemon helper 'ntlm_auth' for NTLM single-sign-on]) + if test "$ntlm_auth" = "yes"; then + dnl --with-ntlm-auth (without path) used, use default path + ntlm_auth="/usr/bin/ntlm_auth" + fi +fi +AC_SUBST(ntlm_auth) +AC_DEFINE_UNQUOTED(NTLM_AUTH, "$ntlm_auth", [Samba's 'winbind' daemon helper 'ntlm_auth' which can be used for NTLM single-sign-on]) + +dnl ************************* +dnl *** Output Everything *** +dnl ************************* + +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([ + libsoup-2.4.pc + libsoup-gnome-2.4.pc + Makefile + libsoup-zip + libsoup/Makefile + tests/Makefile + tests/httpd.conf + docs/Makefile + docs/reference/Makefile + ]) +AC_OUTPUT + +if test -n "$MISSING_REGRESSION_TEST_PACKAGES"; then + echo "" + echo Some regression tests will not be run due to missing packages: + echo $MISSING_REGRESSION_TEST_PACKAGES + echo "" +fi diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..e450d47 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,483 @@ +libsoup2.4 (2.35.90-0) unstable; urgency=low + + * Git : pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.35.90-0 + + * [Kwangtae Ko] Change OS(SLP) with OS(TIZEN) + + -- seung hak lee Tue, 27 Dec 2011 16:24:57 +0900 + +libsoup2.4 (2.35.90-1slp2+5) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.35.90-1slp2+5 + + * [DongJae KIM] Change 'use-ssl3' param value to TRUE :: Rollback open source patch to fix paypal.com issue. + * [Kwangtae Ko] Add null checking code in soup_socket_is_ssl() to fix timesofindia.indiatimes.com issue. + + -- seung hak lee Wed, 12 Oct 2011 21:31:02 +0900 + +libsoup2.4 (2.35.90-1slp2+4) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.35.90-1slp2+4 + + * Remove SLP.h frome Makefile.am + * Add samsung specific private function declarations to SLP.h + + -- seung hak lee Fri, 30 Sep 2011 10:30:09 +0900 + +libsoup2.4 (2.35.90-1slp2+3) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.35.90-1slp2+3 + + * ADD SLP.h in makefile.am + + -- seung hak lee Thu, 29 Sep 2011 10:00:09 +0900 + +libsoup2.4 (2.35.90-1slp2+2) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.35.90-1slp2+2 + + * libsoup version up 2.23.92 >> 2.25.90 + * Samsung patchs are merged + + -- seung hak lee Wed, 28 Sep 2011 21:32:27 +0900 + +libsoup2.4 (2.35.90-1slp2+1) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.35.90-1slp2+1 + + * libsoup version up 2.23.92 >> 2.25.90 + + -- seung hak lee Thu, 08 Sep 2011 14:20:57 +0900 + +libsoup2.4 (2.33.92-1slp2+12) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+12 + + * DonJae KIM FIX: do not append data to existing files + * DonJae KIM FIX: Change soup-date-is-past checking value, using Current System Year + + -- seung hak lee Wed, 07 Sep 2011 15:58:10 +0900 + +libsoup2.4 (2.33.92-1slp2+11) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+11 + + * Remove package dependency on libgnutlsxx27 and libgnutls-iopenssl27 + + -- Dongwook Lee Tue, 6 Sep 2011 16:38:30 +0900 + +libsoup2.4 (2.33.92-1slp2+10) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+10 + + * Kwangtae Ko Fix to disable limiting the number of pending connections + * Kwangtae Ko Fix to play a PluginStream Video + + -- seung hak lee Wed, 10 Aug 2011 17:59:30 +0900 + +libsoup2.4 (2.33.92-1slp2+9) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+9 + + * DongJae KIM Rollback not to accept all certificate which has unknown CA + * Kwangtae Ko Fix to a crash problem when the browser is closed + + -- seung hak lee Wed, 13 Jul 2011 15:17:32 +0900 + +libsoup2.4 (2.33.92-1slp2+8) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+8 + + * Keunsoon Lee Fix to accept all certificate which has unknown CA + * Jaehun Lim Fix to the encoding probolm of data protocol + + -- seung hak lee Mon, 13 Jun 2011 17:31:57 +0900 + +libsoup2.4 (2.33.92-1slp2+7) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+7 + + * DongJae KIM Fix to the soup redirection when using proxy and https. + * Kwangtae Ko Fix to the browser touch freezing issue on http://news.mt.co.kr. + + -- seung hak lee Mon, 30 May 2011 15:39:25 +0900 + +libsoup2.4 (2.33.92-1slp2+6) unstable; urgency=low + + * lucid --> unstable + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+6 + + -- Taesoo Jun Wed, 25 May 2011 15:39:01 +0900 + +libsoup2.4 (2.33.92-1slp2+5) lucid; urgency=low + + * patch for UTF-8 encoding patch for cachekey + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+5 + + -- Taesoo Jun Wed, 25 May 2011 15:20:56 +0900 + +libsoup2.4 (2.33.92-1slp2+4) unstable; urgency=low + + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+4 + + * Kwangtae Ko Fix to a long-delay and overcharge CPU when closing... + * Taesoo Jun patch for contents sniff by SLP + + -- Jaehun Lim Mon, 23 May 2011 17:30:54 +0900 + +libsoup2.4 (2.33.92-1slp2+3) unstable; urgency=low + + * patch for cache entry UTF-8 encoding in pack_entry() + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+3 + + -- Taesoo Jun Mon, 16 May 2011 20:53:22 +0900 + +libsoup2.4 (2.33.92-1slp2+2) unstable; urgency=low + + * add gnutls and glib-networking dependency + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+2 + + -- JaeHyun Kim Mon, 25 Apr 2011 17:14:29 +0900 + +libsoup2.4 (2.33.92-1slp2+1) unstable; urgency=low + + * Upgrade to 2.33.92 + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.33.92-1slp2+1 + + -- JaeHyun Kim Tue, 04 Jan 2011 14:27:51 +0900 + +libsoup2.4 (2.31.6-1slp2+2) unstable; urgency=low + + * Apply SEC_CERTIFICATE patch from browser part + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.31.6-1slp2+2 + + -- Taesoo Jun Tue, 04 Jan 2011 14:27:51 +0900 + +libsoup2.4 (2.31.6-1slp2+1) unstable; urgency=low + + * Roll back to version, 2.31.6 + * Apply the ActiveSync crash patch + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.31.6-1slp2+1 + + -- Taesoo Jun Thu, 30 Dec 2010 11:21:36 +0900 + +libsoup2.4 (2.32.2-1slp2+6) unstable; urgency=low + + * Push patched files by sungwon chung + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.32.2-1slp2+6 + + -- Taesoo Jun Wed, 22 Dec 2010 17:14:22 +0900 + +libsoup2.4 (2.32.2-1slp2+5) unstable; urgency=low + + * Fix build break by certificate patch + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.32.2-1slp2+5 + + -- Taesoo Jun Thu, 16 Dec 2010 16:31:56 +0900 + +libsoup2.4 (2.32.2-1slp2+4) unstable; urgency=low + + * Push patched files + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.32.2-1slp2+4 + + -- Taesoo Jun Thu, 16 Dec 2010 11:37:18 +0900 + +libsoup2.4 (2.32.2-1slp2+3) unstable; urgency=low + + * Apply a patch about a device binding to libsoup + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.32.2-1slp2+3 + + -- Taesoo Jun Wed, 15 Dec 2010 16:53:17 +0900 + +libsoup2.4 (2.32.2-1slp2+2) unstable; urgency=low + + * Add soup-debug.h into dev package + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.32.2-1slp2+2 + + -- Sungwon Chung Fri, 10 Dec 2010 21:09:39 +0900 + +libsoup2.4 (2.32.2-1slp2+1) unstable; urgency=low + + * Upgrade libsoup to 2.32.2 + * Git : 165.213.180.234:slp/pkgs/l/libsoup2.4 + * Tag : libsoup2.4_2.32.2-1slp2+1 + + -- Sungwon Chung Fri, 10 Dec 2010 15:01:52 +0900 + +libsoup2.4 (2.30.0-9slp2+1) unstable; urgency=low + + * Commit fixing dailymotion.com problem with flash applied + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-9slp2+1 + + -- Lukasz Slachciak Mon, 29 Nov 2010 17:24:19 +0100 + +libsoup2.4 (2.30.0-8slp2+2) unstable; urgency=low + + * Fix for crash after sign in on mobile version of twitter.com + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-8slp2+2 + + -- Lukasz Slachciak Tue, 09 Nov 2010 17:36:49 +0100 + +libsoup2.4 (2.30.0-8slp2+1) unstable; urgency=low + + * Fix for problems with gnutls_certificate_get_peers + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-8slp2+1 + + -- Lukasz Slachciak Fri, 05 Nov 2010 17:45:11 +0100 + +libsoup2.4 (2.30.0-7slp2+3) unstable; urgency=low + + * Additional gnutls session checks added for certificate handling + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-7slp2+3 + + -- Lukasz Slachciak Fri, 22 Oct 2010 16:55:21 +0200 + +libsoup2.4 (2.30.0-7slp2+2) unstable; urgency=low + + * Added new SoupMessage property with server certificates list + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-7slp2+2 + + -- Lukasz Slachciak Mon, 18 Oct 2010 11:59:34 +0200 + +libsoup2.4 (2.30.0-7slp2+1) unstable; urgency=low + + * Added new SoupMessage property with server certificates list + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-7slp2+1 + + -- Jongmin Lee Mon, 18 Oct 2010 11:37:19 +0200 + +libsoup2.4 (2.30.0-6slp2+3) unstable; urgency=low + + * Add "Replaces" field in debian control file + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-6slp2+3 + + -- Jongmin Lee Thu, 26 Aug 2010 10:41:47 +0900 + +libsoup2.4 (2.30.0-6slp2+2) unstable; urgency=low + + * Remove "Replaces" field in debian control file + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-6slp2+2 + + -- Jongmin Lee Wed, 25 Aug 2010 14:37:06 +0900 + +libsoup2.4 (2.30.0-6slp2+1) unstable; urgency=low + + * fix for livejournal.com incorrect response headers + * Git : 165.213.180.234:/git/slp/pkgs/libsoup2.4 + * Tag : libsoup2.4_2.30.0-6slp2+1 + + -- Lukasz Slachciak Thu, 22 Jul 2010 10:36:13 +0200 + +libsoup2.4 (2.30.0-5slp2+3) unstable; urgency=low + + * Fix flashplayer's build break + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-5slp2+3 + + -- Jaehyun Kim Fri, 09 Jul 2010 11:06:53 +0900 + +libsoup2.4 (2.30.0-5slp2+2) unstable; urgency=low + + * Maintainer/Uploader updated + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-5slp2+2 + + -- Jaehyun Kim Thu, 08 Jul 2010 15:48:05 +0900 + +libsoup2.4 (2.30.0-5slp2+1) unstable; urgency=low + + * unneeded logging removed + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-5slp2+1 + + -- Lukasz Slachciak Mon, 07 Jun 2010 10:40:49 +0200 + +libsoup2.4 (2.30.0-4slp2+1) unstable; urgency=low + + * fix for crash when connection address is invalid + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-4slp2+1 + + -- Lukasz Slachciak Fri, 28 May 2010 17:07:07 +0200 + +libsoup2.4 (2.30.0-3slp2+2) unstable; urgency=low + + * tag description added + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-3slp2+2 + + -- Lukasz Slachciak Mon, 24 May 2010 14:21:57 +0200 + +libsoup2.4 (2.30.0-3slp2+1) unstable; urgency=low + + * changelog updated + + -- Lukasz Slachciak Mon, 24 May 2010 14:04:22 +0200 + +libsoup2.4 (2.30.0-2slp2+4) unstable; urgency=low + + * unneded files removed + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-3slp2+1 + + -- Lukasz Slachciak Mon, 24 May 2010 13:58:54 +0200 + +libsoup2.4 (2.30.0-2slp2+3) unstable; urgency=low + + * fixing tag again. upsss + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : libsoup2.4_2.30.0-2slp2+3 + + -- Lukasz Slachciak Fri, 21 May 2010 15:56:27 +0900 + +libsoup2.4 (2.30.0-2slp2+2) unstable; urgency=low + + * adding tag description + * Git : 165.213.180.234:/git/slp2.0/slp2.0-pkgs/libsoup-2.4 + * Tag : 2.30.0-2slp2+2 + + -- Lukasz Slachciak Fri, 21 May 2010 15:40:45 +0900 + +libsoup2.4 (2.30.0-2slp2+1) unstable; urgency=low + + * just upversioning due to build problems + + -- Lukasz Slachciak Fri, 21 May 2010 15:19:39 +0900 + +libsoup2.4 (2.30.0-2slp2) unstable; urgency=low + + * Fix for Samsung Proxy problem in following situations: + - when running wikipedia.org from tests/get + - when using Samsung3G and opening abc.go.com site + - when using CJP01 router + proxy value set and abc.go.com + + -- Lukasz Slachciak Fri, 21 May 2010 14:47:58 +0900 + +libsoup2.4 (2.30.0-1slp2) unstable; urgency=low + + * libsoup upversioned to 2.30.0 + + -- Lukasz Slachciak Wed, 14 Apr 2010 16:29:42 +0200 + +libsoup2.4 (2.29.5-12slp2) unstable; urgency=low + + * Added PRAGMA secure_delete, when db is initialized, to clear cookie file content when cookies are deleted + + -- Lukasz Slachciak Thu, 08 Apr 2010 11:53:08 +0200 + +libsoup2.4 (2.29.5-11slp2+14) unstable; urgency=low + + * Added missing dependency + + -- Lukasz Slachciak Thu, 25 Mar 2010 12:36:01 +0100 + +libsoup2.4 (2.29.5-11slp2+13) unstable; urgency=low + + * Recompilation because of toolchain upgrade + + -- Lukasz Slachciak Thu, 25 Mar 2010 09:42:28 +0100 + +libsoup2.4 (2.29.5-11slp2+12) unstable; urgency=low + + * added option --enable-sqllite + + -- Lukasz Slachciak Wed, 24 Mar 2010 16:52:36 +0100 + +libsoup2.4 (2.29.5-11slp2+11) unstable; urgency=low + + * reversion for new package policy + + -- Jaehun Lim Fri, 19 Mar 2010 17:26:50 +0900 + +libsoup2.4 (2.29.5-10) unstable; urgency=low + + * memory leaks and logging fixes + + -- Lukasz Slachciak Thu, 04 Mar 2010 14:40:54 +0100 + +libsoup2.4 (2.29.5-9) unstable; urgency=low + + * logging fixes + + -- Lukasz Slachciak Thu, 25 Feb 2010 16:45:08 +0100 + +libsoup2.4 (2.29.5-8) unstable; urgency=low + + * Soup initialization fix for binding property + + -- Lukasz Slachciak Tue, 09 Feb 2010 11:10:05 +0100 + +libsoup2.4 (2.29.5-7) unstable; urgency=low + + * Setting binding property for session and connection + + -- Lukasz Slachciak Wed, 03 Feb 2010 11:37:40 +0100 + +libsoup2.4 (2.29.5-6) unstable; urgency=low + + * Add a gnutle dependency + + -- Gyuyoung Kim Thu, 28 Jan 2010 12:10:23 +0900 + +libsoup2.4 (2.29.5-5) unstable; urgency=low + + * Conflicts removed from debian/control + + -- Lukasz Slachciak Wed, 27 Jan 2010 11:02:11 +0100 + +libsoup2.4 (2.29.5-4) unstable; urgency=low + + * Fixed package files (conflict with 2.4-1): + + -- Lukasz Slachciak Wed, 27 Jan 2010 09:31:56 +0100 + +libsoup2.4 (2.29.5-3) unstable; urgency=low + + * Fixed package files + + -- Lukasz Slachciak Wed, 27 Jan 2010 08:20:40 +0100 + +libsoup2.4 (2.29.5-2) unstable; urgency=low + + * Fixed package files. + + -- Jaroslaw Staniek Tue, 26 Jan 2010 16:49:59 +0100 + +libsoup2.4 (2.29.5-1) UNRELEASED; urgency=low + + * Initial release. + + -- Lukasz Slachciak Mon, 25 Jan 2010 15:24:00 +0100 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..7ed6ff8 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +5 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..47b40e2 --- /dev/null +++ b/debian/control @@ -0,0 +1,26 @@ +Source: libsoup2.4 +Section: net +Priority: extra +Maintainer: Jaehyun Kim , Taesoo Jun +Uploaders: Lukasz Slachciak , Jaroslaw Staniek , Lukasz Slachciak , Gyuyoung Kim , Jaehun Lim , Jihye Lim Jongmin Lee , Sungwon Chung , Taesoo Jun , Taeksu Shin , Seonghyeon Kim , Dongwook Lee , seung hak lee +Build-Depends: debhelper (>= 5), autotools-dev, libglib2.0-dev (>= 2.21.3), pkg-config, libxml2-dev, zlib1g-dev, libsqlite3-dev, glib-networking +Standards-Version: 0.1.0 + +Package: libsoup2.4 +Section: libs +Architecture: any +Depends: zlib1g, libsqlite3-0, libglib2.0-0, libxml2, glib-networking +Replaces: libsoup2.4-1 +Description: an HTTP library implementation in C (shared libs) + +Package: libsoup2.4-dev +Section: libdevel +Architecture: any +Depends: libsoup2.4 (= ${binary:Version}), libglib2.0-dev (>= 2.18.1), libxml2-dev, pkg-config, zlib1g-dev, libsqlite3-dev +Description: an HTTP library implementation in C (development files) + +Package: libsoup2.4-dbg +Section: debug +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends}, libsoup2.4 (= ${Source-Version}) +Description: an HTTP library implementation in C (development files) diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..e69de29 diff --git a/debian/dirs b/debian/dirs new file mode 100644 index 0000000..e69de29 diff --git a/debian/docs b/debian/docs new file mode 100644 index 0000000..6f12db5 --- /dev/null +++ b/debian/docs @@ -0,0 +1,2 @@ +AUTHORS +README diff --git a/debian/libsoup2.4-dev.install.in b/debian/libsoup2.4-dev.install.in new file mode 100644 index 0000000..ca2c259 --- /dev/null +++ b/debian/libsoup2.4-dev.install.in @@ -0,0 +1,4 @@ +@PREFIX@/include/libsoup-2.4/libsoup/* +@PREFIX@/lib/libsoup*.la +@PREFIX@/lib/libsoup*.a +@PREFIX@/lib/pkgconfig/libsoup* diff --git a/debian/libsoup2.4.install.in b/debian/libsoup2.4.install.in new file mode 100644 index 0000000..4bc5b2b --- /dev/null +++ b/debian/libsoup2.4.install.in @@ -0,0 +1 @@ +@PREFIX@/lib/libsoup-2.4.so* diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..7d274d7 --- /dev/null +++ b/debian/rules @@ -0,0 +1,120 @@ +#!/usr/bin/make -f +# -*- makefile -*- +# Sample debian/rules that uses debhelper. +# This file was originally written by Joey Hess and Craig Small. +# As a special exception, when this file is copied by dh-make into a +# dh-make output file, you may use that output file without restriction. +# This special exception was added by Craig Small in version 0.37 of dh-make. + +# Uncomment this to turn on verbose mode. +export DH_VERBOSE=1 + + +# These are used for cross-compiling and for saving the configure script +# from having to guess our platform (since we know it already) +DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) +DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) + +CFLAGS += -Wall -g +LDFLAGS ?= +PREFIX ?= /usr +DATADIR ?= /opt + +ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) + CFLAGS += -O0 +else + CFLAGS += -O2 +endif + +LDFLAGS += -Wl,--rpath=$(PREFIX)/lib -Wl,--as-needed + +config.status: + dh_testdir + # Add here commands to configure the package. +# CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" ./autogen.sh --prefix=$(PREFIX) --without-gnome --enable-sqllite=yes + CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" ./autogen.sh --prefix=$(PREFIX) --without-gnome --enable-sqllite=yes --disable-tls-check + +build: build-stamp + +build-stamp: config.status + dh_testdir + + # Add here commands to compile the package. + $(MAKE) + #docbook-to-man debian/ncurses.sgml > ncurses.1 + + for f in `find $(CURDIR)/debian/ -name "*.in"`; do \ + cat $$f > $${f%.in}; \ + sed -i -e "s#@PREFIX@#$(PREFIX)#g" $${f%.in}; \ + sed -i -e "s#@DATADIR@#$(DATADIR)#g" $${f%.in}; \ + done + + touch $@ + +clean: + dh_testdir + dh_testroot + rm -f build-stamp + + # Add here commands to clean up after the build process. + -$(MAKE) distclean +ifneq "$(wildcard /usr/share/misc/config.sub)" "" + cp -f /usr/share/misc/config.sub config.sub +endif +ifneq "$(wildcard /usr/share/misc/config.guess)" "" + cp -f /usr/share/misc/config.guess config.guess +endif + + for f in `find $(CURDIR)/debian/ -name "*.in"`; do \ + rm -f $${f%.in}; \ + done + + dh_clean + +install: build + dh_testdir + dh_testroot + dh_clean -k + dh_installdirs + + # Add here commands to install the package into debian/ncurses. + $(MAKE) DESTDIR=$(CURDIR)/debian/tmp install + + +# Build architecture-independent files here. +binary-indep: build install +# We have nothing to do by default. + +# Build architecture-dependent files here. +binary-arch: build install + dh_testdir + dh_testroot + dh_installchangelogs + dh_installdocs + dh_installexamples + dh_install --sourcedir=debian/tmp +# dh_installmenu +# dh_installdebconf +# dh_installlogrotate +# dh_installemacsen +# dh_installpam +# dh_installmime +# dh_python +# dh_installinit +# dh_installcron +# dh_installinfo + dh_installman + dh_link + dh_strip --dbg-package=libsoup2.4-dbg + dh_compress + dh_fixperms +# dh_perl + dh_makeshlibs + dh_installdeb + dh_shlibdeps + dh_gencontrol + dh_md5sums + dh_builddeb + +binary: binary-indep binary-arch +.PHONY: build clean binary-indep binary-arch binary install diff --git a/docs/Makefile.am b/docs/Makefile.am new file mode 100644 index 0000000..f3ddc22 --- /dev/null +++ b/docs/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = reference diff --git a/docs/reference/Makefile.am b/docs/reference/Makefile.am new file mode 100644 index 0000000..43b568d --- /dev/null +++ b/docs/reference/Makefile.am @@ -0,0 +1,73 @@ +## Process this file with automake to produce Makefile.in +AUTOMAKE_OPTIONS = 1.6 + +# The name of the module +DOC_MODULE=libsoup-2.4 + +# The top-level SGML file. +DOC_MAIN_SGML_FILE=$(DOC_MODULE)-docs.sgml + +# The directory containing the source code. Relative to $(srcdir). +# gtk-doc will search all .c & .h files beneath here for inline comments +# documenting functions and macros. +DOC_SOURCE_DIR=../../libsoup + +# Extra options to supply to gtkdoc-scan. +SCAN_OPTIONS=--deprecated-guards=LIBSOUP_DISABLE_DEPRECATED --rebuild-types + +# Extra options to supply to gtkdoc-scangobj. +SCANGOBJ_OPTIONS=--type-init-func 'g_type_init(); g_thread_init(NULL)' + +# Extra options to supply to gtkdoc-mkdb. +MKDB_OPTIONS=--sgml-mode --output-format=xml + +# Extra options to supply to gtkdoc-fixref. +FIXXREF_OPTIONS= + +# Used for dependencies. +HFILE_GLOB= +CFILE_GLOB= + +# Header files to ignore when scanning. +IGNORE_HFILES= soup.h soup-marshal.h soup-enum-types.h \ + soup-message-private.h soup-session-private.h \ + soup-auth-basic.h soup-auth-digest.h soup-auth-ntlm.h \ + soup-connection.h soup-connection-ntlm.h \ + soup-dns.h soup-auth-manager.h soup-auth-manager-ntlm.h \ + soup-message-queue.h soup-path-map.h soup-ssl.h \ + soup-proxy-resolver.h soup-proxy-resolver-gnome.h \ + soup-proxy-resolver-static.h + +# Images to copy into HTML directory. +HTML_IMAGES = + +# Extra XML files that are included by $(DOC_MAIN_SGML_FILE). +content_files = \ + build-howto.xml \ + client-howto.xml \ + server-howto.xml \ + porting-2.2-2.4.xml + +# Other files to distribute. +extra_files = + + +# CFLAGS and LDFLAGS for compiling scan program. Only needed +# if $(DOC_MODULE).types is non-empty. +GTKDOC_CFLAGS = \ + -I$(top_srcdir) \ + -I$(top_builddir) \ + $(GLIB_CFLAGS) \ + $(XML_CFLAGS) \ + $(GNUTLS_CFLAGS) + +GTKDOC_LIBS = $(top_builddir)/libsoup/libsoup-gnome-2.4.la + +# include common portion ... +include $(top_srcdir)/gtk-doc.make + +# kludges +$(srcdir)/tmpl/*.sgml: + +clean: clean-am + rm -rf tmpl diff --git a/docs/reference/build-howto.xml b/docs/reference/build-howto.xml new file mode 100644 index 0000000..975dfd5 --- /dev/null +++ b/docs/reference/build-howto.xml @@ -0,0 +1,105 @@ + + + + +Compiling with libsoup +3 +LIBSOUP Library + + + +Compiling with libsoupNotes on compiling + + + +Using pkg-config + + +Like other GNOME libraries, libsoup uses +pkg-config to provide compiler options. The +package name is "libsoup-2.4". So in your +configure script, you might specify something like: + + + +PKG_CHECK_MODULES(LIBSOUP, [libsoup-2.4 >= 2.26]) +AC_SUBST(LIBSOUP_CFLAGS) +AC_SUBST(LIBSOUP_LIBS) + + + +The "2.4" in the package name is the "API version" +(indicating "the version of the libsoup API +that first appeared in version 2.4") and is essentially just part of +the package name. + + + +If you are using any of the GNOME-specific features of +libsoup (such as automatic proxy +configuration), you must require +"libsoup-gnome-2.4" instead: + + + +PKG_CHECK_MODULES(LIBSOUP, [libsoup-gnome-2.4 >= 2.26]) +AC_SUBST(LIBSOUP_CFLAGS) +AC_SUBST(LIBSOUP_LIBS) + + + +You can also make libsoup-gnome an optional +dependency: + + + +PKG_CHECK_MODULES(LIBSOUP_GNOME, + [libsoup-gnome-2.4 >= 2.26], + [LIBSOUP_CFLAGS="$LIBSOUP_GNOME_CFLAGS" + LIBSOUP_LIBS="$LIBSOUP_GNOME_LIBS" + AC_DEFINE(HAVE_LIBSOUP_GNOME, 1, [Have libsoup-gnome])], + [PKG_CHECK_MODULES(LIBSOUP, [libsoup-2.4 >= 2.26])]) +AC_SUBST(LIBSOUP_CFLAGS) +AC_SUBST(LIBSOUP_LIBS) + + + +This will allow the application to be built with either plain +libsoup or with +libsoup-gnome, and it will define the C +preprocessor symbol HAVE_LIBSOUP_GNOME if +libsoup-gnome features are available. + + + + + +Headers + + +Code using libsoup should do: + + + +#include <libsoup/soup.h> + + + +or, for libsoup-gnome: + + + +#include <libsoup/soup-gnome.h> + + + +Including individual headers besides the two main header files is not +recommended. You may include both soup.h and +soup-gnome.h (though this is not required; the +latter automatically includes the former). + + + + + diff --git a/docs/reference/client-howto.xml b/docs/reference/client-howto.xml new file mode 100644 index 0000000..e87f901 --- /dev/null +++ b/docs/reference/client-howto.xml @@ -0,0 +1,563 @@ + + + + +Soup Client Basics +3 +LIBSOUP Library + + + +Soup Client BasicsClient-side tutorial + + + +Creating a <type>SoupSession</type> + + +The first step in using the client API is to create a SoupSession. The session object +encapsulates all of the state that libsoup +is keeping on behalf of your program; cached HTTP connections, +authentication information, etc. + + + +There are two subclasses of SoupSession that you can use, with +slightly different behavior: + + + + + SoupSessionAsync, + which uses callbacks and the glib main loop to provide + asynchronous I/O. + + + + SoupSessionSync, + which uses blocking I/O rather than callbacks, making it more + suitable for threaded applications. + + + + +If you want to do a mix of mainloop-based and blocking I/O, you will +need to create two different session objects. + + + +When you create the session (with soup_session_async_new_with_options +or soup_session_sync_new_with_options), +you can specify various additional options: + + + + + SOUP_SESSION_MAX_CONNS + + Allows you to set the maximum total number of connections + the session will have open at one time. (Once it reaches + this limit, it will either close idle connections, or + wait for existing connections to free up before starting + new requests.) + + + + SOUP_SESSION_MAX_CONNS_PER_HOST + + Allows you to set the maximum total number of connections + the session will have open to a single + host at one time. + + + + SOUP_SESSION_USE_NTLM + + If TRUE, then Microsoft NTLM + authentication will be used if available (and will be + preferred to HTTP Basic or Digest authentication). + If FALSE, NTLM authentication won't be + used, even if it's the only authentication type available. + (NTLM works differently from the standard HTTP + authentication types, so it needs to be handled + specially.) + + + + SOUP_SESSION_SSL_CA_FILE + + Points to a file containing certificates for recognized + SSL Certificate Authorities. If this is set, then HTTPS + connections will be checked against these authorities, and + rejected if they can't be verified. (Otherwise all SSL + certificates will be accepted automatically.) + + + + SOUP_SESSION_ASYNC_CONTEXT + + A GMainContext + which the session will use for asynchronous operations. + This can be set if you want to use a + SoupSessionAsync in a thread other than the + main thread. + + + + SOUP_SESSION_ADD_FEATURE and SOUP_SESSION_ADD_FEATURE_BY_TYPE + + These allow you to specify SoupSessionFeatures + (discussed below) + to add at construct-time. + + + + + +If you don't need to specify any options, you can just use soup_session_async_new or +soup_session_sync_new, +which take no arguments. + + + + + +Session features + + +Additional session functionality is provided as SoupSessionFeatures, +which can be added to a session, via the SOUP_SESSION_ADD_FEATURE +and SOUP_SESSION_ADD_FEATURE_BY_TYPE +options at session-construction-time, or afterward via the soup_session_add_feature +and soup_session_add_feature_by_type +functions. Some of the features available in +libsoup are: + + + + + SoupLogger + + A debugging aid, which logs all of libsoup's HTTP traffic + to stdout (or another place you specify). + + + + SoupCookieJar and SoupCookieJarText + + Support for HTTP cookies. SoupCookieJar + provides non-persistent cookie storage, while + SoupCookieJarText uses a text file to keep + track of cookies between sessions. + + + + + +And in libsoup-gnome: + + + + + SoupProxyResolverGNOME + + A feature that automatically determines the correct HTTP + proxy to use for requests. + + + + SoupCookieJarSqlite + + Support for HTTP cookies stored in an + SQLite database. + + + + + +Use the "add_feature_by_type" property/function to add features that +don't require any configuration (such as SoupProxyResolverGNOME), +and the "add_feature" property/function to add features that must be +constructed first (such as SoupLogger). For example, an +application might do something like the following: + + + + session = soup_session_async_new_with_options ( +#ifdef HAVE_LIBSOUP_GNOME + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_PROXY_RESOLVER_GNOME, +#endif + NULL); + if (debug_level) { + SoupLogger *logger; + + logger = soup_logger_new (debug_level, -1); + soup_session_add_feature (session, SOUP_SESSION_FEATURE (logger)); + g_object_unref (logger); + } + + + + + +Creating and Sending SoupMessages + + +Once you have a session, you do HTTP traffic using SoupMessage. In the simplest +case, you only need to create the message and it's ready to send: + + + + SoupMessage *msg; + + msg = soup_message_new ("GET", "http://example.com/"); + + + +In more complicated cases, you can use various SoupMessage, SoupMessageHeaders, and SoupMessageBody methods to set the +request headers and body of the message: + + + + SoupMessage *msg; + + msg = soup_message_new ("POST", "http://example.com/form.cgi"); + soup_message_set_request (msg, "application/x-www-form-urlencoded", + SOUP_MEMORY_COPY, formdata, strlen (formdata)); + soup_message_headers_append (msg->request_headers, "Referer", referring_url); + + + +(Although this is a bad example, because +libsoup actually has convenience methods +for dealing with HTML +forms, as well as XML-RPC.) + + + +You can also use soup_message_set_flags +to change some default behaviors. For example, by default, +SoupSession automatically handles responses from the +server that redirect to another URL. If you would like to handle these +yourself, you can set the SOUP_MESSAGE_NO_REDIRECT +flag. + + + +Sending a Message Synchronously + + +To send a message and wait for the response, use soup_session_send_message: + + + + guint status; + + status = soup_session_send_message (session, msg); + + + +(If you use soup_session_send_message with a +SoupSessionAsync, +it will run the main loop itself until the message is complete.) + + + +The return value from soup_session_send_message +is a libsoup status code, +indicating either a transport error that prevented the message from +being sent, or the HTTP status that was returned by the server in +response to the message. (The status is also available as +msg->status_code.) + + + + + +Sending a Message Asynchronously + + +To send a message asynchronously, use soup_session_queue_message: + + + + ... + soup_session_queue_message (session, msg, my_callback, my_callback_data); + ... +} + +static void +my_callback (SoupSession *session, SoupMessage *msg, gpointer user_data) +{ + /* Handle the response here */ +} + + + +The message will be added to the session's queue, and eventually (when +control is returned back to the main loop), it will be sent and the +response be will be read. When the message is complete, +callback will be invoked, along with the data you +passed to soup_session_queue_message. + + + +soup_session_queue_message +steals a reference to the message object, and unrefs it after the last +callback is invoked on it. So in the usual case, messages sent +asynchronously will be automatically freed for you without you needing +to do anything. (Of course, this wouldn't work when using the synchronous +API, since you will usually need continue working with the message +after calling soup_session_send_message, +so in that case, you must unref it explicitly when you are done with +it.) + + + +(If you use soup_session_queue_message +with a SoupSessionSync, the +message will be sent in another thread, with the callback eventually +being invoked in the session's SOUP_SESSION_ASYNC_CONTEXT.) + + + + + + + +Processing the Response + + +Once you have received the response from the server, synchronously or +asynchronously, you can look at the response fields in the +SoupMessage to decide what to do next. The +status_code and +reason_phrase fields contain the numeric +status and textual status response from the server. +response_headers contains the response +headers, which you can investigate using soup_message_headers_get and +soup_message_headers_foreach. +The response body (if any) is in the +response_body field. + + + +SoupMessageHeaders +automatically parses several important headers in +response_headers for you and provides +specialized accessors for them. Eg, soup_message_headers_get_content_type. +There are several generic methods such as soup_header_parse_param_list +(for parsing an attribute-list-type header) and soup_header_contains +(for quickly testing if a list-type header contains a particular +token). These handle the various syntactical oddities of parsing HTTP +headers much better than functions like +g_strsplit or strstr. + + + + + +Intermediate/Automatic Processing + + +You can also connect to various SoupMessage signals to do +processing at intermediate stages of HTTP I/O. Eg, the got-chunk +signal is emitted as each piece of the response body is read (allowing +you to provide progress information when receiving a large response, +for example). SoupMessage also provides two convenience +methods, soup_message_add_header_handler, +and soup_message_add_status_code_handler, +which allow you to set up a signal handler that will only be invoked +for messages with certain response headers or status codes. +SoupSession uses this internally to handle authentication +and redirection. + + + +When using the synchronous API, the callbacks and signal handlers will +be invoked during the call to soup_session_send_message. + + + +To automatically set up handlers on all messages sent via a session, +you can connect to the session's request_started +signal, and add handlers to each message from there. + + + + + +Handling Authentication + + +SoupSession handles most of the details of HTTP +authentication for you. If it receives a 401 ("Unauthorized") or 407 +("Proxy Authentication Required") response, the session will emit the +authenticate signal, +providing you with a SoupAuth object indicating the +authentication type ("Basic", "Digest", or "NTLM") and the realm name +provided by the server. If you have a username and password available +(or can generate one), call soup_auth_authenticate +to give the information to libsoup. The session will automatically +requeue the message and try it again with that authentication +information. (If you don't call +soup_auth_authenticate, the session will just +return the message to the application with its 401 or 407 status.) + + + +If the server doesn't accept the username and password provided, the +session will emit authenticate again, with the +retrying parameter set to TRUE. This lets the +application know that the information it provided earlier was +incorrect, and gives it a chance to try again. If this +username/password pair also doesn't work, the session will contine to +emit authenticate again and again until the +provided username/password successfully authenticates, or until the +signal handler fails to call soup_auth_authenticate, +at which point libsoup will allow the +message to fail (with status 401 or 407). + + + +If you need to handle authentication asynchronously (eg, to pop up a +password dialog without recursively entering the main loop), you can +do that as well. Just call soup_session_pause_message +on the message before returning from the signal handler, and +g_object_ref the SoupAuth. Then, +later on, after calling soup_auth_authenticate +(or deciding not to), call soup_session_unpause_message +to resume the paused message. + + + + + +Multi-threaded usage + + +The only explicitly thread-safe operations in +libsoup are SoupSessionSync's +implementations of the SoupSession methods. So +after creating a SoupSessionSync, you can call soup_session_send_message +and soup_session_cancel_message +on it from any thread. But, eg, while the session is processing a +message, you should not call any SoupMessage methods on it +from any thread other than the one in which it is being sent. (That +is, you should not call any SoupMessage methods on it +except from a message or session callback or signal handler.) + + + +All other objects (including SoupSessionAsync) +should only be used from a single thread, with objects that are also +only be used from that thread. (And in particular, if you set a +non-default GMainContext on a session, +socket, etc, then you can only use that object from the thread in +which that GMainContext is running.) + + + + + +Sample Programs + + +A few sample programs are available in the +libsoup sources: + + + + + get is a simple command-line + HTTP GET utility using the asynchronous API. + + + + getbug is a trivial + demonstration of the XMLRPC interface. + (xmlrpc-test provides + a slightly more complicated example.) + + + + auth-test shows how to use + authentication handlers and status-code handlers, although in + a fairly unusual way. + + + + simple-proxy uses both the + client and server APIs to create a simple (and not very + RFC-compliant) proxy server. It shows how to use the SOUP_MESSAGE_OVERWRITE_CHUNKS + flag when reading a message to save memory by processing each + chunk of the message as it is read, rather than accumulating + them all into a single buffer to process all at the end. + + + + +More complicated examples are available in GNOME CVS. The libsoup +pages on the GNOME wiki include a list of applications using +libsoup. + + + + + diff --git a/docs/reference/libsoup-2.4-docs.sgml b/docs/reference/libsoup-2.4-docs.sgml new file mode 100644 index 0000000..c52208f --- /dev/null +++ b/docs/reference/libsoup-2.4-docs.sgml @@ -0,0 +1,75 @@ + + + + + libsoup Reference Manual + + + + Tutorial + + + + + + + + Core API + + + + + + + + + + + + + + + + + + + + + Additional Features + + + + + + + + + + Web Services APIs + + + + + + + GNOME integration + + + + + + Low-level Networking API + + + + + + Index + + + + + + + diff --git a/docs/reference/libsoup-2.4-overrides.txt b/docs/reference/libsoup-2.4-overrides.txt new file mode 100644 index 0000000..06826ca --- /dev/null +++ b/docs/reference/libsoup-2.4-overrides.txt @@ -0,0 +1,6 @@ + +soup_address_get_sockaddr +struct sockaddr * +SoupAddress *addr, +int *len + diff --git a/docs/reference/libsoup-2.4-sections.txt b/docs/reference/libsoup-2.4-sections.txt new file mode 100644 index 0000000..3980677 --- /dev/null +++ b/docs/reference/libsoup-2.4-sections.txt @@ -0,0 +1,896 @@ +libsoup/soup.h +
+soup-message +SoupMessage +SoupMessage + +soup_message_new +soup_message_new_from_uri +soup_message_set_request +soup_message_set_response + +SoupHTTPVersion +soup_message_set_http_version +soup_message_get_http_version +soup_message_get_uri +soup_message_set_uri +soup_message_get_address + +soup_message_set_status +soup_message_set_status_full +soup_message_is_keepalive + +soup_message_add_header_handler +soup_message_add_status_code_handler + +SoupMessageFlags +soup_message_set_flags +soup_message_get_flags +SoupChunkAllocator +soup_message_set_chunk_allocator + +soup_message_disable_feature + +SOUP_MESSAGE_METHOD +SOUP_MESSAGE_URI +SOUP_MESSAGE_HTTP_VERSION +SOUP_MESSAGE_FLAGS +SOUP_MESSAGE_STATUS_CODE +SOUP_MESSAGE_REASON_PHRASE +SOUP_MESSAGE_SERVER_SIDE + +SOUP_MESSAGE +SOUP_IS_MESSAGE +SOUP_TYPE_MESSAGE +soup_message_get_type +SOUP_MESSAGE_CLASS +SOUP_IS_MESSAGE_CLASS +SOUP_MESSAGE_GET_CLASS +SoupMessageClass + +soup_message_wrote_informational +soup_message_wrote_headers +soup_message_wrote_chunk +soup_message_wrote_body_data +soup_message_wrote_body +soup_message_got_informational +soup_message_got_headers +soup_message_content_sniffed +soup_message_got_chunk +soup_message_got_body +soup_message_finished +soup_message_restarted +
+ +
+soup-method +SOUP_METHOD_OPTIONS +SOUP_METHOD_GET +SOUP_METHOD_HEAD +SOUP_METHOD_PUT +SOUP_METHOD_POST +SOUP_METHOD_DELETE +SOUP_METHOD_TRACE +SOUP_METHOD_CONNECT + +SOUP_METHOD_PROPFIND +SOUP_METHOD_PROPPATCH +SOUP_METHOD_MKCOL +SOUP_METHOD_COPY +SOUP_METHOD_MOVE +SOUP_METHOD_LOCK +SOUP_METHOD_UNLOCK +
+ +
+soup-message-headers +SoupMessageHeaders +SoupMessageHeaders +SoupMessageHeadersType +soup_message_headers_new +soup_message_headers_free + +soup_message_headers_append +soup_message_headers_replace +soup_message_headers_remove +soup_message_headers_clear +soup_message_headers_get_one +soup_message_headers_get_list +soup_message_headers_get + +SoupMessageHeadersForeachFunc +soup_message_headers_foreach + +SoupMessageHeadersIter +soup_message_headers_iter_init +soup_message_headers_iter_next + +SoupEncoding +soup_message_headers_get_encoding +soup_message_headers_set_encoding +soup_message_headers_get_content_length +soup_message_headers_set_content_length + +SoupExpectation +soup_message_headers_get_expectations +soup_message_headers_set_expectations + +soup_message_headers_get_content_type +soup_message_headers_set_content_type + +soup_message_headers_get_content_disposition +soup_message_headers_set_content_disposition + +SoupRange +soup_message_headers_get_ranges +soup_message_headers_set_ranges +soup_message_headers_set_range +soup_message_headers_free_ranges +soup_message_headers_get_content_range +soup_message_headers_set_content_range + +SOUP_TYPE_MESSAGE_HEADERS +soup_message_headers_get_type +
+ +
+soup-message-body +SoupMessageBody +SoupBuffer +SoupMemoryUse +soup_buffer_new +soup_buffer_new_subbuffer +soup_buffer_new_with_owner +soup_buffer_get_owner +soup_buffer_copy +soup_buffer_free + +SoupMessageBody +soup_message_body_new +soup_message_body_free + +soup_message_body_set_accumulate +soup_message_body_get_accumulate + +soup_message_body_append +soup_message_body_append_buffer +soup_message_body_truncate +soup_message_body_complete +soup_message_body_flatten +soup_message_body_get_chunk + +soup_message_body_got_chunk +soup_message_body_wrote_chunk + +SOUP_TYPE_BUFFER +soup_buffer_get_type +SOUP_TYPE_MESSAGE_BODY +soup_message_body_get_type +
+ +
+soup-status +SOUP_STATUS_IS_TRANSPORT_ERROR +SOUP_STATUS_IS_INFORMATIONAL +SOUP_STATUS_IS_SUCCESSFUL +SOUP_STATUS_IS_REDIRECTION +SOUP_STATUS_IS_CLIENT_ERROR +SOUP_STATUS_IS_SERVER_ERROR +SoupKnownStatusCode +soup_status_get_phrase +soup_status_proxify + +SOUP_HTTP_ERROR + +soup_http_error_quark +
+ +
+soup-server +SoupServer +SoupServer +soup_server_new +soup_server_is_https +soup_server_get_port +soup_server_get_listener +soup_server_run +soup_server_run_async +soup_server_quit +soup_server_get_async_context + +SoupServerCallback +soup_server_add_handler +soup_server_remove_handler + +SoupClientContext +soup_client_context_get_socket +soup_client_context_get_address +soup_client_context_get_host +soup_client_context_get_auth_domain +soup_client_context_get_auth_user + +soup_server_add_auth_domain +soup_server_remove_auth_domain + +soup_server_pause_message +soup_server_unpause_message + +SOUP_SERVER_PORT +SOUP_SERVER_INTERFACE +SOUP_SERVER_SSL_CERT_FILE +SOUP_SERVER_SSL_KEY_FILE +SOUP_SERVER_ASYNC_CONTEXT +SOUP_SERVER_RAW_PATHS +SOUP_SERVER_SERVER_HEADER + +SOUP_SERVER +SOUP_IS_SERVER +SOUP_TYPE_SERVER +soup_server_get_type +SOUP_SERVER_CLASS +SOUP_IS_SERVER_CLASS +SOUP_SERVER_GET_CLASS +SoupServerClass +SOUP_TYPE_CLIENT_CONTEXT +soup_client_context_get_type +
+ +
+soup-auth-domain +SoupAuthDomain +SoupAuthDomain + +soup_auth_domain_add_path +soup_auth_domain_remove_path +SoupAuthDomainFilter +soup_auth_domain_set_filter +soup_auth_domain_get_realm + +SoupAuthDomainGenericAuthCallback +soup_auth_domain_set_generic_auth_callback +soup_auth_domain_check_password + +soup_auth_domain_covers +soup_auth_domain_accepts +soup_auth_domain_challenge + +SOUP_AUTH_DOMAIN_REALM +SOUP_AUTH_DOMAIN_PROXY +SOUP_AUTH_DOMAIN_ADD_PATH +SOUP_AUTH_DOMAIN_REMOVE_PATH +SOUP_AUTH_DOMAIN_FILTER +SOUP_AUTH_DOMAIN_FILTER_DATA +SOUP_AUTH_DOMAIN_GENERIC_AUTH_CALLBACK +SOUP_AUTH_DOMAIN_GENERIC_AUTH_DATA + +soup_auth_domain_try_generic_auth_callback + +SOUP_AUTH_DOMAIN +SOUP_IS_AUTH_DOMAIN +SOUP_TYPE_AUTH_DOMAIN +soup_auth_domain_get_type +SOUP_AUTH_DOMAIN_CLASS +SOUP_IS_AUTH_DOMAIN_CLASS +SOUP_AUTH_DOMAIN_GET_CLASS +SoupAuthDomainClass +
+ +
+soup-auth-domain-basic +SoupAuthDomainBasic +SoupAuthDomainBasic +soup_auth_domain_basic_new + +SoupAuthDomainBasicAuthCallback +soup_auth_domain_basic_set_auth_callback + +SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK +SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA + +SOUP_AUTH_DOMAIN_BASIC +SOUP_IS_AUTH_DOMAIN_BASIC +SOUP_TYPE_AUTH_DOMAIN_BASIC +soup_auth_domain_basic_get_type +SOUP_AUTH_DOMAIN_BASIC_CLASS +SOUP_IS_AUTH_DOMAIN_BASIC_CLASS +SOUP_AUTH_DOMAIN_BASIC_GET_CLASS +SoupAuthDomainBasicClass +
+ +
+soup-auth-domain-digest +SoupAuthDomainDigest +SoupAuthDomainDigest +soup_auth_domain_digest_new + +SoupAuthDomainDigestAuthCallback +soup_auth_domain_digest_set_auth_callback +soup_auth_domain_digest_encode_password + +SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK +SOUP_AUTH_DOMAIN_DIGEST_AUTH_DATA + +SOUP_AUTH_DOMAIN_DIGEST +SOUP_IS_AUTH_DOMAIN_DIGEST +SOUP_TYPE_AUTH_DOMAIN_DIGEST +soup_auth_domain_digest_get_type +SOUP_AUTH_DOMAIN_DIGEST_CLASS +SOUP_IS_AUTH_DOMAIN_DIGEST_CLASS +SOUP_AUTH_DOMAIN_DIGEST_GET_CLASS +SoupAuthDomainDigestClass +
+ +
+soup-address +SoupAddress +SoupAddress +SoupAddressFamily +SOUP_ADDRESS_ANY_PORT +soup_address_new +soup_address_new_from_sockaddr +soup_address_new_any + +SoupAddressCallback +soup_address_resolve_async +soup_address_resolve_sync +soup_address_is_resolved + +soup_address_get_name +soup_address_get_sockaddr +soup_address_get_physical +soup_address_get_port + +soup_address_equal_by_name +soup_address_hash_by_name +soup_address_equal_by_ip +soup_address_hash_by_ip + +SOUP_ADDRESS_FAMILY +SOUP_ADDRESS_NAME +SOUP_ADDRESS_PHYSICAL +SOUP_ADDRESS_PORT +SOUP_ADDRESS_SOCKADDR + +SOUP_ADDRESS +SOUP_IS_ADDRESS +SOUP_TYPE_ADDRESS +soup_address_get_type +SOUP_ADDRESS_CLASS +SOUP_IS_ADDRESS_CLASS +SOUP_ADDRESS_GET_CLASS +SoupAddressClass + +AF_INET6 +
+ +
+soup-session +SoupSession +SoupSession + +SoupSessionCallback +soup_session_queue_message +soup_session_requeue_message +soup_session_send_message +soup_session_cancel_message +soup_session_abort + +soup_session_pause_message +soup_session_unpause_message + +soup_session_get_async_context + +soup_session_add_feature +soup_session_add_feature_by_type +soup_session_remove_feature +soup_session_remove_feature_by_type +soup_session_get_features +soup_session_get_feature +soup_session_get_feature_for_message + +SOUP_SESSION_PROXY_URI +SOUP_SESSION_MAX_CONNS +SOUP_SESSION_MAX_CONNS_PER_HOST +SOUP_SESSION_USE_NTLM +SOUP_SESSION_SSL_CA_FILE +SOUP_SESSION_ASYNC_CONTEXT +SOUP_SESSION_TIMEOUT +SOUP_SESSION_IDLE_TIMEOUT +SOUP_SESSION_USER_AGENT +SOUP_SESSION_ADD_FEATURE +SOUP_SESSION_ADD_FEATURE_BY_TYPE +SOUP_SESSION_REMOVE_FEATURE_BY_TYPE + +SOUP_IS_SESSION +SOUP_IS_SESSION_CLASS +SOUP_SESSION +SOUP_SESSION_CLASS +SOUP_SESSION_GET_CLASS +SOUP_TYPE_SESSION +SoupSessionClass +soup_session_get_type +
+ +
+soup-session-async +SoupSessionAsync +SoupSessionAsync +soup_session_async_new +soup_session_async_new_with_options + +SOUP_IS_SESSION_ASYNC +SOUP_IS_SESSION_ASYNC_CLASS +SOUP_SESSION_ASYNC +SOUP_SESSION_ASYNC_CLASS +SOUP_SESSION_ASYNC_GET_CLASS +SOUP_TYPE_SESSION_ASYNC +SoupSessionAsyncClass +soup_session_async_get_type +
+ +
+soup-session-sync +SoupSessionSync +SoupSessionSync +soup_session_sync_new +soup_session_sync_new_with_options + +SOUP_IS_SESSION_SYNC +SOUP_IS_SESSION_SYNC_CLASS +SOUP_SESSION_SYNC +SOUP_SESSION_SYNC_CLASS +SOUP_SESSION_SYNC_GET_CLASS +SOUP_TYPE_SESSION_SYNC +SoupSessionSyncClass +soup_session_sync_get_type +
+ +
+soup-session-feature +SoupSessionFeature +SoupSessionFeature +SoupSessionFeatureInterface + +soup_session_feature_get_type +SOUP_SESSION_FEATURE +SOUP_SESSION_FEATURE_CLASS +SOUP_SESSION_FEATURE_GET_CLASS +SOUP_IS_SESSION_FEATURE +SOUP_IS_SESSION_FEATURE_CLASS +SOUP_TYPE_SESSION_FEATURE + +soup_session_feature_attach +soup_session_feature_detach +
+ +
+soup-auth +SoupAuth +SoupAuth +soup_auth_new +soup_auth_update + +soup_auth_is_for_proxy +soup_auth_get_scheme_name +soup_auth_get_host +soup_auth_get_realm +soup_auth_get_info + +soup_auth_authenticate +soup_auth_is_authenticated + +soup_auth_get_authorization +soup_auth_get_protection_space +soup_auth_free_protection_space + +SOUP_AUTH_SCHEME_NAME +SOUP_AUTH_REALM +SOUP_AUTH_HOST +SOUP_AUTH_IS_FOR_PROXY +SOUP_AUTH_IS_AUTHENTICATED + +SOUP_AUTH +SOUP_IS_AUTH +SOUP_TYPE_AUTH +soup_auth_get_type +SOUP_AUTH_CLASS +SOUP_IS_AUTH_CLASS +SOUP_AUTH_GET_CLASS +SoupAuthClass +
+ +
+soup-socket +SoupSocket +SoupSocket +soup_socket_new + +SoupSocketCallback +soup_socket_connect_async +soup_socket_connect_sync + +soup_socket_listen + +soup_socket_start_ssl +soup_socket_start_proxy_ssl +soup_socket_is_ssl + +soup_socket_disconnect +soup_socket_is_connected + +soup_socket_get_local_address +soup_socket_get_remote_address + +SoupSocketIOStatus +soup_socket_read +soup_socket_read_until +soup_socket_write + +SOUP_SSL_ERROR +SoupSSLError + +SOUP_SOCKET_LOCAL_ADDRESS +SOUP_SOCKET_REMOTE_ADDRESS +SOUP_SOCKET_FLAG_NONBLOCKING +SOUP_SOCKET_IS_SERVER +SOUP_SOCKET_SSL_CREDENTIALS +SOUP_SOCKET_ASYNC_CONTEXT +SOUP_SOCKET_TIMEOUT + +SOUP_SOCKET +SOUP_IS_SOCKET +SOUP_TYPE_SOCKET +soup_socket_get_type +SOUP_SOCKET_CLASS +SOUP_IS_SOCKET_CLASS +SOUP_SOCKET_GET_CLASS +SoupSocketClass + +soup_ssl_error_quark +
+ +
+soup-uri +SoupURI +SoupURI +soup_uri_new_with_base +soup_uri_new +soup_uri_to_string + +soup_uri_copy +soup_uri_equal +soup_uri_free + +soup_uri_encode +soup_uri_decode +soup_uri_normalize + +SOUP_URI_SCHEME_HTTP +SOUP_URI_SCHEME_HTTPS +soup_uri_uses_default_port +SOUP_URI_VALID_FOR_HTTP + +soup_uri_set_scheme +soup_uri_set_user +soup_uri_set_password +soup_uri_set_host +soup_uri_set_port +soup_uri_set_path +soup_uri_set_query +soup_uri_set_query_from_form +soup_uri_set_query_from_fields +soup_uri_set_fragment + +SOUP_TYPE_URI +soup_uri_get_type +
+ +
+soup-misc +Soup Miscellaneous Utilities +SoupDate +SoupDateFormat +soup_date_new +soup_date_new_from_string +soup_date_new_from_time_t +soup_date_new_from_now +soup_date_to_string +soup_date_to_time_t +soup_date_to_timeval +soup_date_is_past +soup_date_free + +soup_headers_parse_request +soup_headers_parse_response +soup_headers_parse_status_line +soup_headers_parse + +soup_header_parse_list +soup_header_parse_quality_list +soup_header_free_list +soup_header_contains +soup_header_parse_param_list +soup_header_parse_semi_param_list +soup_header_free_param_list +soup_header_g_string_append_param + +soup_str_case_equal +soup_str_case_hash + +soup_add_completion +soup_add_idle +soup_add_io_watch +soup_add_timeout + +soup_ssl_supported + +soup_date_copy +SOUP_TYPE_DATE +soup_date_get_type +soup_form_decode_urlencoded +soup_form_encode_urlencoded +soup_form_encode_urlencoded_list +
+ +
+soup-forms +HTML Form Support + +SOUP_FORM_MIME_TYPE_MULTIPART +SOUP_FORM_MIME_TYPE_URLENCODED +soup_form_decode +soup_form_decode_multipart +soup_form_encode +soup_form_encode_datalist +soup_form_encode_hash +soup_form_encode_valist +soup_form_request_new +soup_form_request_new_from_datalist +soup_form_request_new_from_hash +soup_form_request_new_from_multipart +
+ +
+soup-xmlrpc +XMLRPC Support + +soup_xmlrpc_build_method_call +soup_xmlrpc_request_new +soup_xmlrpc_parse_method_response +soup_xmlrpc_extract_method_response + +soup_xmlrpc_parse_method_call +soup_xmlrpc_extract_method_call +soup_xmlrpc_build_method_response +soup_xmlrpc_build_fault +soup_xmlrpc_set_response +soup_xmlrpc_set_fault + +SOUP_XMLRPC_ERROR +SoupXMLRPCError +SOUP_XMLRPC_FAULT +SoupXMLRPCFault + +soup_xmlrpc_error_quark +soup_xmlrpc_fault_quark +
+ +
+soup-value-utils +GValue Support +soup_value_hash_new +soup_value_hash_new_with_vals +soup_value_hash_insert_value +soup_value_hash_insert +soup_value_hash_insert_vals +soup_value_hash_lookup +soup_value_hash_lookup_vals + +soup_value_array_from_args +soup_value_array_to_args +soup_value_array_new +soup_value_array_new_with_vals +soup_value_array_insert +soup_value_array_append +soup_value_array_append_vals +soup_value_array_get_nth + +SOUP_VALUE_SETV +SOUP_VALUE_GETV + +SOUP_TYPE_BYTE_ARRAY + +soup_byte_array_get_type +
+ +
+soup-logger +SoupLogger +SoupLogger +SoupLoggerLogLevel +soup_logger_new +soup_logger_attach +soup_logger_detach + +SoupLoggerFilter +soup_logger_set_request_filter +soup_logger_set_response_filter + +SoupLoggerPrinter +soup_logger_set_printer + +SoupLoggerClass +soup_logger_get_type +SOUP_IS_LOGGER +SOUP_IS_LOGGER_CLASS +SOUP_LOGGER +SOUP_LOGGER_CLASS +SOUP_LOGGER_GET_CLASS +SOUP_TYPE_LOGGER +
+ +
+soup-cookie +SoupCookie +SoupCookie +soup_cookie_new +soup_cookie_parse +soup_cookie_copy +soup_cookie_equal +soup_cookie_free + +soup_cookie_set_name +soup_cookie_set_value +soup_cookie_set_domain +soup_cookie_set_path +soup_cookie_set_max_age +SOUP_COOKIE_MAX_AGE_ONE_HOUR +SOUP_COOKIE_MAX_AGE_ONE_DAY +SOUP_COOKIE_MAX_AGE_ONE_WEEK +SOUP_COOKIE_MAX_AGE_ONE_YEAR +soup_cookie_set_expires +soup_cookie_set_secure +soup_cookie_set_http_only + +soup_cookie_applies_to_uri + +soup_cookie_to_cookie_header +soup_cookie_to_set_cookie_header + +soup_cookies_from_request +soup_cookies_from_response +soup_cookies_to_request +soup_cookies_to_response +soup_cookies_to_cookie_header +soup_cookies_free + +SOUP_TYPE_COOKIE +soup_cookie_get_type +
+ +
+soup-cookie-jar +SoupCookieJar +SoupCookieJar +soup_cookie_jar_new +soup_cookie_jar_save +soup_cookie_jar_get_cookies +soup_cookie_jar_set_cookie + +soup_cookie_jar_add_cookie +soup_cookie_jar_delete_cookie +soup_cookie_jar_all_cookies + +SOUP_COOKIE_JAR_READ_ONLY + +SoupCookieJarClass +SOUP_COOKIE_JAR +SOUP_COOKIE_JAR_CLASS +SOUP_COOKIE_JAR_GET_CLASS +SOUP_IS_COOKIE_JAR +SOUP_IS_COOKIE_JAR_CLASS +SOUP_TYPE_COOKIE_JAR +soup_cookie_jar_get_type +
+ +
+soup-multipart +SoupMultipart +SoupMultipart +soup_multipart_new +soup_multipart_new_from_message +soup_multipart_free + +soup_multipart_get_length +soup_multipart_get_part +soup_multipart_append_part +soup_multipart_append_form_string +soup_multipart_append_form_file +soup_multipart_to_message + +SOUP_TYPE_MULTIPART +soup_multipart_get_type +
+ +
+soup-cookie-jar-text +SoupCookieJarText +SoupCookieJarText +soup_cookie_jar_text_new + +SOUP_COOKIE_JAR_TEXT_FILENAME + +SoupCookieJarTextClass +SOUP_COOKIE_JAR_TEXT +SOUP_COOKIE_JAR_TEXT_CLASS +SOUP_COOKIE_JAR_TEXT_GET_CLASS +SOUP_TYPE_COOKIE_JAR_TEXT +SOUP_IS_COOKIE_JAR_TEXT +SOUP_IS_COOKIE_JAR_TEXT_CLASS +soup_cookie_jar_text_get_type +
+ +
+libsoup/soup-gnome.h +soup-cookie-jar-sqlite +SoupCookieJarSqlite +SoupCookieJarSqlite +soup_cookie_jar_sqlite_new + +SOUP_COOKIE_JAR_SQLITE_FILENAME + +SoupCookieJarSqliteClass +SOUP_COOKIE_JAR_SQLITE +SOUP_COOKIE_JAR_SQLITE_CLASS +SOUP_COOKIE_JAR_SQLITE_GET_CLASS +SOUP_TYPE_COOKIE_JAR_SQLITE +SOUP_IS_COOKIE_JAR_SQLITE +SOUP_IS_COOKIE_JAR_SQLITE_CLASS +soup_cookie_jar_sqlite_get_type +
+ +
+soup-proxy-uri-resolver +SoupProxyURIResolver +SoupProxyURIResolver +SoupProxyURIResolverCallback +soup_proxy_uri_resolver_get_proxy_uri_async +soup_proxy_uri_resolver_get_proxy_uri_sync + +SoupProxyURIResolverInterface +SOUP_IS_PROXY_URI_RESOLVER +SOUP_IS_PROXY_URI_RESOLVER_CLASS +SOUP_PROXY_URI_RESOLVER +SOUP_PROXY_URI_RESOLVER_CLASS +SOUP_PROXY_URI_RESOLVER_GET_CLASS +SOUP_TYPE_PROXY_URI_RESOLVER +soup_proxy_uri_resolver_get_type +
+ +
+libsoup/soup-gnome.h +soup-gnome-features +SOUP_TYPE_GNOME_FEATURES_2_26 +SOUP_TYPE_PROXY_RESOLVER_GNOME + +soup_proxy_resolver_gnome_get_type +soup_gnome_features_2_26_get_type +
+ +
+soup-content-sniffer +SoupContentSniffer +SoupContentSniffer +soup_content_sniffer_new +soup_content_sniffer_sniff + +SOUP_CONTENT_SNIFFER +SOUP_CONTENT_SNIFFER_CLASS +SOUP_CONTENT_SNIFFER_GET_CLASS +SOUP_IS_CONTENT_SNIFFER +SOUP_IS_CONTENT_SNIFFER_CLASS +SOUP_TYPE_CONTENT_SNIFFER +SoupContentSnifferClass +SoupContentSnifferPrivate +soup_content_sniffer_get_type +
diff --git a/docs/reference/porting-2.2-2.4.xml b/docs/reference/porting-2.2-2.4.xml new file mode 100644 index 0000000..37632bf --- /dev/null +++ b/docs/reference/porting-2.2-2.4.xml @@ -0,0 +1,878 @@ + + + + +libsoup 2.2 to 2.4 porting notes +3 +LIBSOUP Library + + + +Porting notesNotes on porting from libsoup 2.2 to 2.4 + + + +Overview + + +After many API-compatible releases in the 2.2 series, +libsoup has now changed its API and bumped +its version number to 2.4. Changes were made for a variety of reasons: + + + + + To fix bugs and add features that couldn't be done ABI-compatibly. + + + + To make it easier to generate bindings for libsoup for + languages other than C. + + + + To clean up ugly/confusing old APIs + + + + To be more glib/gobject/gtk-like in general. + + + + + + +SoupMessage + + +SoupMessage has had a +number of API changes made, mostly to increase its +language-bindability. + + + +SoupMessageHeaders + + + SoupMessage's + request_headers and + response_headers fields are now an + opaque type (SoupMessageHeaders) + rather than being GHashTables. The method names have + changed slightly to reflect this: + + + + + soup_message_add_header + + → soup_message_headers_append + + + + soup_message_get_header + + → soup_message_headers_get + + + + soup_message_foreach_header + + → soup_message_headers_foreach + + + + soup_message_remove_header + + → soup_message_headers_remove + + + + soup_message_clear_headers + + → soup_message_headers_clear + + + + + + soup_message_get_header_list has no equivalent; + if multiple copies of a header are present, + soup_message_headers_get will return all of + them, concatenated together and separated by commas; RFC 2616 says + that the two forms (multiple headers, and a single header with + comma-separated values) are equivalent; this change to libsoup + ensures that applications will treat them as equivalent. + + + + In addition, certain important header fields now have + dedicated get/set methods: + + + + + soup_message_headers_get_encoding / soup_message_headers_set_encoding + + + + soup_message_headers_get_content_length / soup_message_headers_set_content_length + + + + soup_message_headers_get_expectations / soup_message_headers_set_expectations + + + + + (soup_message_headers_set_expectation(msg, SOUP_EXPECTATION_CONTINUE) + replaces the SOUP_MESSAGE_EXPECT_CONTINUE + message flag). + + + + + +SoupMessageBody + + + Similarly, the request_body and + response fields (renamed from + request and response) are + now a new type, SoupMessageBody, + implemented in terms of SoupBuffer, a refcounted + memory buffer type with clearer semantics than the old + SoupDataBuffer/SoupOwnership. + + + + + SOUP_BUFFER_STATIC + + → SOUP_MEMORY_STATIC + + + + SOUP_BUFFER_SYSTEM_OWNED + + → SOUP_MEMORY_TAKE + (meaning libsoup + should take ownership of the memory from your). + + + + SOUP_BUFFER_USER_OWNED + + → SOUP_MEMORY_COPY + (meaning libsoup + should make a copy of the memory, because you + can't make any guarantees about how long it will + last.) + + + + + + A fourth SoupMemoryUse value is also available: SOUP_MEMORY_TEMPORARY, + which helps to avoid extra copies in some cases. + SOUP_MEMORY_TEMPORARY means that the memory + will last at least as long as the object you are handing it to (a + SoupBuffer, SoupMessageBody, or + SoupMessage), and so doesn't need to be copied right + away, but that if anyone makes a copy of the buffer, + libsoup needs to make a new copy of the + memory for them at that point, since the original pointer may not + remain valid for the lifetime of the new copy. + + + + (In the future, there may be additional SoupBuffer + and SoupMessageBody methods to work directly with + mmapped memory, splicing to file descriptors, etc.) + + + + soup_message_set_request + and soup_message_set_response + still work roughly like they used to. + + + + Unlike the old request and + response fields, the new + request_body and + response_body fields are not guaranteed + to be filled in at all times. (In particular, the + response_body is not filled in until it + has been fully read, although you can use soup_message_body_get_chunk + to iterate through the chunks before that point if you need to.) + + + + When request_body and + response_body are + filled in, they are '\0'-terminated for your + processing convenience. (The terminating 0 byte is not included in + their length.) + + + + + +Chunked encoding + + + The prototype of the SoupMessage::got_chunk + signal has been changed; it now includes the chunk as a + SoupBuffer parameter (rather than storing the chunk + data in msg->response as in 2.2). SOUP_MESSAGE_OVERWRITE_CHUNKS + is now somewhat poorly named, but still has essentially the same + semantics: if you set it, each chunk will be discarded after it is + read, and msg->response_body will not be filled + in with the complete response at the end of message processing. + + + + The API for sending chunked responses from a + SoupServer is also slightly different now: + + + + + soup_server_message_set_encoding + + → soup_message_headers_set_encoding + + + + soup_message_add_chunk + + → soup_message_body_append + or soup_message_body_append_buffer + + + + soup_message_add_final_chunk + + → soup_message_body_complete + + + + + + Since the new chunk-sending APIs require you to explicitly pass + the + request_headers/request_body + fields, rather than just assuming you're talking about the + response body, in theory it is now possible to use chunked + encoding with the request as well. As of the 2.3.0 release this + has not yet been tested. + + + + + +Methods + + + SoupMessage's + method field is now an interned + string, and you can compare the method directly against + the defines such as SOUP_METHOD_GET + (eg, in a SoupServer request handler). + soup_method_get_id and the + SOUP_METHOD_ID_* macros are now gone. + + + + +Handlers + + + soup_message_add_header_handler + and soup_message_add_status_code_handler + are now just clever wrappers around + g_signal_connect. In particular, you now pass + a signal name to them rather than a SoupHandlerPhase, + and you remove them with the normal signal handler remove methods. + However, they still retain the special behavior that if the + message has been cancelled or requeued when the time comes for the + handler to run, then the handler will be skipped. (Use plain + g_signal_connect if you don't want that + behavior.) + + + + +I/O-related <type>SoupMessage</type> methods + + + soup_message_io_pause and + soup_message_io_unpause have been moved to + SoupSession and SoupServer, to better + reflect the fact that the session/server control the I/O, and + SoupMessage is merely acted-upon by them. + + + + + soup_message_io_pause + + → soup_session_pause_message / soup_server_pause_message + + + + soup_message_io_unpause + + → soup_session_unpause_message / soup_server_unpause_message + + + + + + msg->status (the I/O status) is now + gone as well, because (a) it's really an internal state of + SoupSession, and (b) it's too easy to confuse + with msg->status_code (the HTTP status) + anyway. Code that used to check if status was + SOUP_MESSAGE_STATUS_FINISHED needs to + be rewritten to track whether or not the finished + signal has been emitted. + + + + + +HTTP-Version + + + SoupHttpVersion is now SoupHTTPVersion + + + + + + +SoupSession + + +<function>soup_session_queue_message</function> callback + + + soup_session_queue_message's + callback parameter now includes the SoupSession as a + parameter, reflecting the fact that it is a + SoupSession callback, not a SoupMessage + callback. (It has also been renamed, from + SoupMessageCallbackFn to SoupSessionCallback.) + + + + +Authentication + + + SoupSession's authenticate and + reauthenticate signals have been merged into a + single authenticate + signal with a retrying parameter to indicate if + it's the second (or later) try. Also, the signal now includes a + SoupAuth directly, + and you authenticate by calling soup_auth_authenticate + on the auth (rather than passing back a username and password from + the signal handler). + + + + +<type>SoupLogger</type> + + +SoupLogger is a +new object that copies the behavior of +evolution-exchange's +E2K_DEBUG and its clones. That is, it causes a +SoupSession to start logging some or all of its HTTP +traffic to stdout, for debugging purposes. + + + + +<type>SoupMessageFilter</type> + + + SoupMessageFilter is gone; code that used to use it + can now connect to the SoupSession::request-started + signal to get a chance to act on each message as it is sent. + (This is how SoupLogger works.) + + + + +Internal types + + + The SoupConnection and SoupMessageQueue + types (which should always have been internal to + SoupSession) have been removed from the public API. + + + + + + +SoupURI + +SoupUri has been renamed SoupURI, and its behavior has +changed in a few ways: + + + + + It no longer fully-decodes %-encoded URI components. This + is necessary to ensure that complicated URIs (eg, URIs + that include other URIs as query parameters) can be + round-tripped correctly. This corresponds to the old + broken_encoding behavior, but + that flag no longer exists, since it is the default and + there's no way to turn it off. + + + + In theory, this is an ABI-breaking change, especially for + SoupServers. + However, it is unlikely to actually break anything. (And + in the SoupServer case, servers now + fully-decode the path component + themselves unless you set the SOUP_SERVER_RAW_PATHS + flag on the server, so the behavior should still be the + same. + + + + + It uses the RFC3986 parsing rules, including support for IPv6 literal + addresses. + + + + + The field formerly called + protocol is now + scheme, to match the spec, and + it's an interned string rather than a quark. The names of + the predefined values have changed to match: + + + + + SOUP_PROTOCOL_HTTP + + → SOUP_URI_SCHEME_HTTP + + + + SOUP_PROTOCOL_HTTPS + + → SOUP_URI_SCHEME_HTTPS + + + + + + + +soup_uri_decode +now returns a new string rather than modifying its input string in +place. The new method soup_uri_normalize, +which removes some, but not all, %-encoding, behaves similarly. + + + +Finally, SoupURI (as well as most other struct types in +libsoup) now uses the glib "slice" +allocator, so any code that uses g_new to create +SoupURIs is wrong. If you want to create a URI "by hand", +you can call soup_uri_new, +passing NULL, and you will get back an empty +SoupURI. There are also now methods that can be used to +set its fields (eg, soup_uri_set_scheme, +soup_uri_set_path, +etc) rather than mucking with the fields directly. + + + +Forms + + +Related to SoupURI, there are some new helper methods for +dealing with HTML forms. soup_form_decode_urlencoded +decodes a URI query component (or an +application/x-www-form-urlencoded request body) +into a GHashTable. soup_form_encode_urlencoded +reverses the process, allowing you to fill in a +uri->query with a properly-encoded form dataset. +(SoupURI also provides soup_uri_set_query_from_form +to help with this.) + + + + + + + +XML-RPC and SOAP + + +SOAP + +SOAP support has been removed; the existing methods covered only a +teeny tiny subset of SOAP, which was really only useful to a single +application. (The code that was formerly in libsoup has been moved to +that application.). If you were using this code, you can resurrect a +libsoup-2.4-compatible version of it from revision 1016 of libsoup +svn. + + + + +XML-RPC + +The XML-RPC code has been completely rewritten to make it simpler to +implement XML-RPC clients and servers. (Note: the server-side code has +not been heavily tested yet.) The new XML-RPC API makes use of +GValues, with the following type mappings: + + + + + int + + → int (G_TYPE_INT) + + + + boolean + + → gboolean (G_TYPE_BOOLEAN) + + + + string + + → char * (G_TYPE_STRING) + + + + double + + → double (G_TYPE_DOUBLE) + + + + dateTime.iso8601 + + → SoupDate (SOUP_TYPE_DATE) + + + + base64 + + → GByteArray (SOUP_TYPE_BYTE_ARRAY) + + + + struct + + → GHashTable (G_TYPE_HASH_TABLE) + + + + array + + → GValueArray (G_TYPE_VALUE_ARRAY) + + + + + +SoupDate is discussed below. +SOUP_TYPE_BYTE_ARRAY is just a new +GType value defined by libsoup +to represent GByteArrays, which glib does not define a +GType for. + + + +libsoup provides some additional GValue support +methods for working with +GValueArrays, and GHashTables of +GValues, for the XML-RPC struct and +array types. Eg, you can use soup_value_hash_new +to create a GHashTable to use with the XML-RPC methods, +and soup_value_hash_insert +to add values to it without needing to muck with GValues +directly. + + + +The getbug and xmlrpc-test +programs in the libsoup sources provide +examples of how to use the new API. (Beware that +xmlrpc-test's use of the API is a little +complicated because of the way it sends all calls through a single +do_xmlrpc method.) + + + + + + +SoupServer + + +SoupServer handlers + + + The prototypes for soup_server_add_handler, + and for the SoupServer + handlers themselves have changed: + + + +typedef void (*SoupServerCallback) (SoupServer *server, + SoupMessage *msg, + const char *path, + GHashTable *query, + SoupClientContext *client, + gpointer user_data); + +void soup_server_add_handler (SoupServer *server, + const char *path, + SoupServerCallback callback, + gpointer data, + GDestroyNotify destroy); + + + + soup_server_add_handler no longer takes a + SoupServerAuthContext (see the discussion of server + authentication below), and the order of the final two arguments + has been swapped. (Additionally, SoupServerCallbackFn + has been renamed to SoupServerCallback, and the old + unregister parameter of type + SoupServerUnregisterFn is now a standard + GDestroyNotify. The change to + GDestroyNotify and the swapping of the final two + arguments is to make the method conform to standard glib/gtk + practices.) + + + + In SoupServerCallback, several bits of data that used + to be part of the context argument are now + provided directly, and context specifically + only contains more specifically-client-related information (such + as the SoupSocket that the request arrived on, and + information about authentication). + + + + path is the fully %-decoded path component + of msg's URI, and + query is a hash table containing + msg's URI's + query component decoded with soup_form_decode_urlencoded. + These are provided for your convenience; if you need the raw + query, you can get it out of msg's URI + directly. If you need the raw path, you'll need to set the SOUP_SERVER_RAW_PATHS + property on the server, which actually changes the behavior of the + server with respect to how paths are matched; see the + documentation for details. + + + + +Server-side authentication + + + SoupServer authentication has been completely + rewritten, with SoupServerAuthContext being replaced + with SoupAuthDomain. Among + other improvements, you no longer need to have the cleartext + password available to check against. See the + SoupAuthDomain documentation, the server tutorial, and + tests/server-auth-test.c. + + + + +<literal>Expect: 100-continue</literal> and other early <type>SoupMessage</type> processing + + + SoupServer now handles + "Expect: 100-continue" correctly. In + particular, if the client passes that header, and your server + requires authentication, then authentication will be checked + before reading the request body. + + + + If you want to do additional pre-request-body handling, you can + connect to SoupServer's request_started + signal, and connect to the request's got_headers + signal from there. (See the description of + request_started for information about other + related SoupServer signals.) + + + + +Date header + + + SoupServer now automatically sets the + Date header on all responses, as required by + RFC 2616. + + + + +SoupServerMessage + + + SoupServerMessage is now merged into + SoupMessage. + soup_server_message_set_encoding is replaced + with soup_message_headers_set_encoding + as described in the section on SoupMessage above. + + + + +<function>soup_server_run</function> / <function>soup_server_quit</function> + + + soup_server_run + and soup_server_run_async + no longer g_object_ref the server, and + soup_server_quit + no longer unrefs it. + + + + + + +Miscellaneous + + +SoupDate + + + The new SoupDate type + replaces the old soup_date_* methods, and has + an improved (more liberal) date parser. + + + + +Header parsing + + + soup-headers.h now has a few additional methods + for parsing list-type headers. + + + + +SoupAddress, SoupSocket + + + SoupSocket has had various simplifications made to + reflect the fact that this is specifically libsoup's socket + implementation, not some random generic socket API. + + + + Various SoupAddress and SoupSocket + methods now take arguments of the new GCancellable type, from + libgio. When porting old code, you can just pass + NULL for these. (soup_address_resolve_async + also takes another new argument, a GMainContext that + you'll want to pass NULL for.) If you pass a + GCancellable, you can use it to cleanly cancel the + address resolution / socket operation. + + + + + +Base64 methods + + + The deprecated base64 methods are now gone; use glib's base64 + methods instead. + + + + + + diff --git a/docs/reference/server-howto.xml b/docs/reference/server-howto.xml new file mode 100644 index 0000000..0a9a53d --- /dev/null +++ b/docs/reference/server-howto.xml @@ -0,0 +1,370 @@ + + + + +Soup Server Basics +3 +LIBSOUP Library + + + +Soup Server BasicsServer-side tutorial + + + +Creating a SoupSession + + +As with the client API, there is a single object that will encapsulate +most of your interactions with libsoup. In this case, SoupServer. + + + +You create the server with soup_server_new, +and as with the SoupSession constructor, you can specify +various additional options: + + + + + SOUP_SERVER_PORT + + The TCP port to listen on. If 0 (or + left unspecified), some unused port will be selected for + you. (You can find out what port by calling soup_server_get_port. + + + + SOUP_SERVER_INTERFACE + + A SoupAddress, + specifying the IP address of the network interface to run + the server on. If NULL (or left + unspecified), the server will listen on all interfaces. + + + + SOUP_SERVER_SSL_CERT_FILE + + Points to a file containing an SSL certificate to use. If + this is set, then the server will speak HTTPS; otherwise + it will speak HTTP. + + + + SOUP_SERVER_SSL_KEY_FILE + + Points to a file containing the private key for the + SOUP_SERVER_SSL_CERT_FILE. (It may + point to the same file.) + + + + SOUP_SERVER_ASYNC_CONTEXT + + A GMainContext which + the server will use for asynchronous operations. This can + be set if you want to use a SoupServer in a thread + other than the main thread. + + + + SOUP_SERVER_RAW_PATHS + + Set this to TRUE if you don't want + libsoup to decode %-encoding + in the Request-URI. (Eg, because you need to treat + "/foo/bar" and + "/foo%2Fbar" as different paths. + + + + + + + +Adding Handlers + + +By default, SoupServer +returns "404 Not Found" in response to all requests (except ones that +it can't parse, which get "400 Bad Request"). To override this +behavior, call soup_server_add_handler +to set a callback to handle certain URI paths. + + + + soup_server_add_handler (server, "/foo", server_callback, + data, destroy_notify); + + + +The "/foo" indicates the base path for this +handler. When a request comes in, if there is a handler registered for +exactly the path in the request's Request-URI, then +that handler will be called. Otherwise +libsoup will strip path components one by +one until it finds a matching handler. So for example, a request of +the form +"GET /foo/bar/baz.html?a=1&b=2 HTTP/1.1" +would look for handlers for "/foo/bar/baz.html", +"/foo/bar", and "/foo". If a +handler has been registered with a NULL base path, +then it is used as the default handler for any request that doesn't +match any other handler. + + + + + +Responding to Requests + + +A handler callback looks something like this: + + + +static void +server_callback (SoupServer *server, + SoupMessage *msg, + const char *path, + GHashTable *query, + SoupClientContext *client, + gpointer user_data) +{ + ... +} + + + +msg is the request that has been received and +user_data is the data that was passed to soup_server_add_handler. +path is the path (from msg's +URI), and query contains the result of parsing the +URI query field. (It is NULL if there was no +query.) client is a SoupClientContext, +which contains additional information about the client (including its +IP address, and whether or not it used HTTP authentication). + + + +By default, libsoup assumes that you have +completely finished processing the message when you return from the +callback, and that it can therefore begin sending the response. If you +are not ready to send a response immediately (eg, you have to contact +another server, or wait for data from a database), you must call soup_server_pause_message +on the message before returning from the callback. This will delay +sending a response until you call soup_server_unpause_message. +(You must also connect to the finished signal on the message +in this case, so that you can break off processing if the client +unexpectedly disconnects before you start sending the data.) + + + +To set the response status, call soup_message_set_status +or soup_message_set_status_full. +If the response requires a body, you must decide whether to use +Content-Length encoding (the default), or +chunked encoding. + + + +Responding with <literal>Content-Length</literal> +Encoding + + +This is the simpler way to set a response body, if you have all of the +data available at once. + + + +static void +server_callback (SoupServer *server, + SoupMessage *msg, + const char *path, + GHashTable *query, + SoupClientContext *client, + gpointer user_data) +{ + MyServerData *server_data = user_data; + const char *mime_type; + GByteArray *body; + + if (msg->method != SOUP_METHOD_GET) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + /* This is somewhat silly. Presumably your server will do + * something more interesting. + */ + body = g_hash_table_lookup (server_data->bodies, path); + mime_type = g_hash_table_lookup (server_data->mime_types, path); + if (!body || !mime_type) { + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_set_response (msg, mime_type, SOUP_MEMORY_COPY, + body->data, body->len); +} + + + + + +Responding with <literal>chunked</literal> Encoding + + +If you want to supply the response body in chunks as it becomes +available, use chunked encoding instead. In this +case, first call soup_message_headers_set_encoding (msg->response_headers, SOUP_ENCODING_CHUNKED) +to tell libsoup that you'll be using +chunked encoding. Then call soup_message_body_append +(or soup_message_body_append_buffer) +on msg->response_body with each chunk of the +response body as it becomes available, and call soup_message_body_complete +when the response is complete. After each of these calls, you must +also call soup_server_unpause_message +to cause the chunk to be sent. (You do not normally need to call soup_server_pause_message, +because I/O is automatically paused when doing a +chunked transfer if no chunks are available.) + + + +When using chunked encoding, you must also connect to the finished signal on the message, +so that you will be notified if the client disconnects between two +chunks; SoupServer will unref the message if that +happens, so you must stop adding new chunks to the response at that +point. (An alternate possibility is to write each new chunk only when +the wrote_chunk signal +is emitted indicating that the previous one was written successfully.) + + + +The simple-proxy +example in the tests/ directory gives an example of +using chunked encoding. + + + + + + + +Handling Authentication + + +To have SoupServer +handle HTTP authentication for you, create a SoupAuthDomainBasic +or SoupAuthDomainDigest, +and pass it to soup_server_add_auth_domain: + + + + SoupAuthDomain *domain; + + domain = soup_auth_domain_basic_new ( + SOUP_AUTH_DOMAIN_REALM, "My Realm", + SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, auth_callback, + SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA, auth_data, + SOUP_AUTH_DOMAIN_ADD_PATH, "/foo", + SOUP_AUTH_DOMAIN_ADD_PATH, "/bar/private", + NULL); + soup_server_add_auth_domain (server, domain); + g_object_unref (domain); + + + +Then, every request under one of the auth domain's paths will be +passed to the auth_callback first before being +passed to the server_callback: + + + +static gboolean +auth_callback (SoupAuthDomain *domain, SoupMessage *msg, + const char *username, const char *password, + gpointer user_data) +{ + MyServerData *server_data = user_data; + MyUserData *user; + + user = my_server_data_lookup_user (server_data, username); + if (!user) + return FALSE; + + /* FIXME: Don't do this. Keeping a cleartext password database + * is bad. + */ + return strcmp (password, user->password) == 0; +} + + + +The SoupAuthDomainBasicAuthCallback +is given the username and password from the +Authorization header and must determine, in some +server-specific manner, whether or not to accept them. (In this +example we compare the password against a cleartext password database, +but it would be better to store the password somehow encoded, as in +the UNIX password database. Alternatively, you may need to delegate +the password check to PAM or some other service.) + + + +If you are using Digest authentication, note that SoupAuthDomainDigestAuthCallback +works completely differently (since the server doesn't receive the +cleartext password from the client in that case, so there's no way to +compare it directly). See the documentation for SoupAuthDomainDigest +for more details. + + + +You can have multiple SoupAuthDomains attached to a +SoupServer, either in separate parts of the path +hierarchy, or overlapping. (Eg, you might want to accept either Basic +or Digest authentication for a given path.) When more than one auth +domain covers a given path, the request will be accepted if the user +authenticates successfully against any of the +domains. + + + +If you want to require authentication for some requests under a +certain path, but not all of them (eg, you want to authenticate +PUTs, but not GETs), use a +SoupAuthDomainFilter. + + + + + diff --git a/docs/specs/README b/docs/specs/README new file mode 100644 index 0000000..0dee62d --- /dev/null +++ b/docs/specs/README @@ -0,0 +1,13 @@ +rfc1945 - HTTP/1.0 +rfc2068 - HTTP/1.1 (mostly obsoleted original specification) +rfc2109 - HTTP State Management Mechanism +rfc2145 - Use and Interpretation of HTTP Version Numbers +rfc2324 - Hyper Text Coffee Pot Control Protocol (HTCPCP/1.0) +rfc2388 - Returning Values from Forms: multipart/form-data +rfc2518 - HTTP Extensions for Distributed Authoring -- WEBDAV +rfc2616 - HTTP/1.1 (revised) [plus errata] +rfc2617 - HTTP Authentication: Basic and Digest Access Authentication [plus errata] +rfc2817 - Upgrading to TLS Within HTTP/1.1 +rfc2818 - HTTP Over TLS +rfc2965 - HTTP State Management Mechanism (allegedly obsoletes 2109) +rfc3986 - Uniform Resource Identifiers (URI): Generic Syntax diff --git a/docs/specs/rfc1945.txt b/docs/specs/rfc1945.txt new file mode 100644 index 0000000..37f3f23 --- /dev/null +++ b/docs/specs/rfc1945.txt @@ -0,0 +1,3363 @@ + + + + + + +Network Working Group T. Berners-Lee +Request for Comments: 1945 MIT/LCS +Category: Informational R. Fielding + UC Irvine + H. Frystyk + MIT/LCS + May 1996 + + + Hypertext Transfer Protocol -- HTTP/1.0 + +Status of This Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +IESG Note: + + The IESG has concerns about this protocol, and expects this document + to be replaced relatively soon by a standards track document. + +Abstract + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol with the lightness and speed necessary for distributed, + collaborative, hypermedia information systems. It is a generic, + stateless, object-oriented protocol which can be used for many tasks, + such as name servers and distributed object management systems, + through extension of its request methods (commands). A feature of + HTTP is the typing of data representation, allowing systems to be + built independently of the data being transferred. + + HTTP has been in use by the World-Wide Web global information + initiative since 1990. This specification reflects common usage of + the protocol referred to as "HTTP/1.0". + +Table of Contents + + 1. Introduction .............................................. 4 + 1.1 Purpose .............................................. 4 + 1.2 Terminology .......................................... 4 + 1.3 Overall Operation .................................... 6 + 1.4 HTTP and MIME ........................................ 8 + 2. Notational Conventions and Generic Grammar ................ 8 + 2.1 Augmented BNF ........................................ 8 + 2.2 Basic Rules .......................................... 10 + 3. Protocol Parameters ....................................... 12 + + + +Berners-Lee, et al Informational [Page 1] + +RFC 1945 HTTP/1.0 May 1996 + + + 3.1 HTTP Version ......................................... 12 + 3.2 Uniform Resource Identifiers ......................... 14 + 3.2.1 General Syntax ................................ 14 + 3.2.2 http URL ...................................... 15 + 3.3 Date/Time Formats .................................... 15 + 3.4 Character Sets ....................................... 17 + 3.5 Content Codings ...................................... 18 + 3.6 Media Types .......................................... 19 + 3.6.1 Canonicalization and Text Defaults ............ 19 + 3.6.2 Multipart Types ............................... 20 + 3.7 Product Tokens ....................................... 20 + 4. HTTP Message .............................................. 21 + 4.1 Message Types ........................................ 21 + 4.2 Message Headers ...................................... 22 + 4.3 General Header Fields ................................ 23 + 5. Request ................................................... 23 + 5.1 Request-Line ......................................... 23 + 5.1.1 Method ........................................ 24 + 5.1.2 Request-URI ................................... 24 + 5.2 Request Header Fields ................................ 25 + 6. Response .................................................. 25 + 6.1 Status-Line .......................................... 26 + 6.1.1 Status Code and Reason Phrase ................. 26 + 6.2 Response Header Fields ............................... 28 + 7. Entity .................................................... 28 + 7.1 Entity Header Fields ................................. 29 + 7.2 Entity Body .......................................... 29 + 7.2.1 Type .......................................... 29 + 7.2.2 Length ........................................ 30 + 8. Method Definitions ........................................ 30 + 8.1 GET .................................................. 31 + 8.2 HEAD ................................................. 31 + 8.3 POST ................................................. 31 + 9. Status Code Definitions ................................... 32 + 9.1 Informational 1xx .................................... 32 + 9.2 Successful 2xx ....................................... 32 + 9.3 Redirection 3xx ...................................... 34 + 9.4 Client Error 4xx ..................................... 35 + 9.5 Server Error 5xx ..................................... 37 + 10. Header Field Definitions .................................. 37 + 10.1 Allow ............................................... 38 + 10.2 Authorization ....................................... 38 + 10.3 Content-Encoding .................................... 39 + 10.4 Content-Length ...................................... 39 + 10.5 Content-Type ........................................ 40 + 10.6 Date ................................................ 40 + 10.7 Expires ............................................. 41 + 10.8 From ................................................ 42 + + + +Berners-Lee, et al Informational [Page 2] + +RFC 1945 HTTP/1.0 May 1996 + + + 10.9 If-Modified-Since ................................... 42 + 10.10 Last-Modified ....................................... 43 + 10.11 Location ............................................ 44 + 10.12 Pragma .............................................. 44 + 10.13 Referer ............................................. 44 + 10.14 Server .............................................. 45 + 10.15 User-Agent .......................................... 46 + 10.16 WWW-Authenticate .................................... 46 + 11. Access Authentication ..................................... 47 + 11.1 Basic Authentication Scheme ......................... 48 + 12. Security Considerations ................................... 49 + 12.1 Authentication of Clients ........................... 49 + 12.2 Safe Methods ........................................ 49 + 12.3 Abuse of Server Log Information ..................... 50 + 12.4 Transfer of Sensitive Information ................... 50 + 12.5 Attacks Based On File and Path Names ................ 51 + 13. Acknowledgments ........................................... 51 + 14. References ................................................ 52 + 15. Authors' Addresses ........................................ 54 + Appendix A. Internet Media Type message/http ................ 55 + Appendix B. Tolerant Applications ........................... 55 + Appendix C. Relationship to MIME ............................ 56 + C.1 Conversion to Canonical Form ......................... 56 + C.2 Conversion of Date Formats ........................... 57 + C.3 Introduction of Content-Encoding ..................... 57 + C.4 No Content-Transfer-Encoding ......................... 57 + C.5 HTTP Header Fields in Multipart Body-Parts ........... 57 + Appendix D. Additional Features ............................. 57 + D.1 Additional Request Methods ........................... 58 + D.1.1 PUT ........................................... 58 + D.1.2 DELETE ........................................ 58 + D.1.3 LINK .......................................... 58 + D.1.4 UNLINK ........................................ 58 + D.2 Additional Header Field Definitions .................. 58 + D.2.1 Accept ........................................ 58 + D.2.2 Accept-Charset ................................ 59 + D.2.3 Accept-Encoding ............................... 59 + D.2.4 Accept-Language ............................... 59 + D.2.5 Content-Language .............................. 59 + D.2.6 Link .......................................... 59 + D.2.7 MIME-Version .................................. 59 + D.2.8 Retry-After ................................... 60 + D.2.9 Title ......................................... 60 + D.2.10 URI ........................................... 60 + + + + + + + +Berners-Lee, et al Informational [Page 3] + +RFC 1945 HTTP/1.0 May 1996 + + +1. Introduction + +1.1 Purpose + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol with the lightness and speed necessary for distributed, + collaborative, hypermedia information systems. HTTP has been in use + by the World-Wide Web global information initiative since 1990. This + specification reflects common usage of the protocol referred too as + "HTTP/1.0". This specification describes the features that seem to be + consistently implemented in most HTTP/1.0 clients and servers. The + specification is split into two sections. Those features of HTTP for + which implementations are usually consistent are described in the + main body of this document. Those features which have few or + inconsistent implementations are listed in Appendix D. + + Practical information systems require more functionality than simple + retrieval, including search, front-end update, and annotation. HTTP + allows an open-ended set of methods to be used to indicate the + purpose of a request. It builds on the discipline of reference + provided by the Uniform Resource Identifier (URI) [2], as a location + (URL) [4] or name (URN) [16], for indicating the resource on which a + method is to be applied. Messages are passed in a format similar to + that used by Internet Mail [7] and the Multipurpose Internet Mail + Extensions (MIME) [5]. + + HTTP is also used as a generic protocol for communication between + user agents and proxies/gateways to other Internet protocols, such as + SMTP [12], NNTP [11], FTP [14], Gopher [1], and WAIS [8], allowing + basic hypermedia access to resources available from diverse + applications and simplifying the implementation of user agents. + +1.2 Terminology + + This specification uses a number of terms to refer to the roles + played by participants in, and objects of, the HTTP communication. + + connection + + A transport layer virtual circuit established between two + application programs for the purpose of communication. + + message + + The basic unit of HTTP communication, consisting of a structured + sequence of octets matching the syntax defined in Section 4 and + transmitted via the connection. + + + + +Berners-Lee, et al Informational [Page 4] + +RFC 1945 HTTP/1.0 May 1996 + + + request + + An HTTP request message (as defined in Section 5). + + response + + An HTTP response message (as defined in Section 6). + + resource + + A network data object or service which can be identified by a + URI (Section 3.2). + + entity + + A particular representation or rendition of a data resource, or + reply from a service resource, that may be enclosed within a + request or response message. An entity consists of + metainformation in the form of entity headers and content in the + form of an entity body. + + client + + An application program that establishes connections for the + purpose of sending requests. + + user agent + + The client which initiates a request. These are often browsers, + editors, spiders (web-traversing robots), or other end user + tools. + + server + + An application program that accepts connections in order to + service requests by sending back responses. + + origin server + + The server on which a given resource resides or is to be created. + + proxy + + An intermediary program which acts as both a server and a client + for the purpose of making requests on behalf of other clients. + Requests are serviced internally or by passing them, with + possible translation, on to other servers. A proxy must + interpret and, if necessary, rewrite a request message before + + + +Berners-Lee, et al Informational [Page 5] + +RFC 1945 HTTP/1.0 May 1996 + + + forwarding it. Proxies are often used as client-side portals + through network firewalls and as helper applications for + handling requests via protocols not implemented by the user + agent. + + gateway + + A server which acts as an intermediary for some other server. + Unlike a proxy, a gateway receives requests as if it were the + origin server for the requested resource; the requesting client + may not be aware that it is communicating with a gateway. + Gateways are often used as server-side portals through network + firewalls and as protocol translators for access to resources + stored on non-HTTP systems. + + tunnel + + A tunnel is an intermediary program which is acting as a blind + relay between two connections. Once active, a tunnel is not + considered a party to the HTTP communication, though the tunnel + may have been initiated by an HTTP request. The tunnel ceases to + exist when both ends of the relayed connections are closed. + Tunnels are used when a portal is necessary and the intermediary + cannot, or should not, interpret the relayed communication. + + cache + + A program's local store of response messages and the subsystem + that controls its message storage, retrieval, and deletion. A + cache stores cachable responses in order to reduce the response + time and network bandwidth consumption on future, equivalent + requests. Any client or server may include a cache, though a + cache cannot be used by a server while it is acting as a tunnel. + + Any given program may be capable of being both a client and a server; + our use of these terms refers only to the role being performed by the + program for a particular connection, rather than to the program's + capabilities in general. Likewise, any server may act as an origin + server, proxy, gateway, or tunnel, switching behavior based on the + nature of each request. + +1.3 Overall Operation + + The HTTP protocol is based on a request/response paradigm. A client + establishes a connection with a server and sends a request to the + server in the form of a request method, URI, and protocol version, + followed by a MIME-like message containing request modifiers, client + information, and possible body content. The server responds with a + + + +Berners-Lee, et al Informational [Page 6] + +RFC 1945 HTTP/1.0 May 1996 + + + status line, including the message's protocol version and a success + or error code, followed by a MIME-like message containing server + information, entity metainformation, and possible body content. + + Most HTTP communication is initiated by a user agent and consists of + a request to be applied to a resource on some origin server. In the + simplest case, this may be accomplished via a single connection (v) + between the user agent (UA) and the origin server (O). + + request chain ------------------------> + UA -------------------v------------------- O + <----------------------- response chain + + A more complicated situation occurs when one or more intermediaries + are present in the request/response chain. There are three common + forms of intermediary: proxy, gateway, and tunnel. A proxy is a + forwarding agent, receiving requests for a URI in its absolute form, + rewriting all or parts of the message, and forwarding the reformatted + request toward the server identified by the URI. A gateway is a + receiving agent, acting as a layer above some other server(s) and, if + necessary, translating the requests to the underlying server's + protocol. A tunnel acts as a relay point between two connections + without changing the messages; tunnels are used when the + communication needs to pass through an intermediary (such as a + firewall) even when the intermediary cannot understand the contents + of the messages. + + request chain --------------------------------------> + UA -----v----- A -----v----- B -----v----- C -----v----- O + <------------------------------------- response chain + + The figure above shows three intermediaries (A, B, and C) between the + user agent and origin server. A request or response message that + travels the whole chain must pass through four separate connections. + This distinction is important because some HTTP communication options + may apply only to the connection with the nearest, non-tunnel + neighbor, only to the end-points of the chain, or to all connections + along the chain. Although the diagram is linear, each participant may + be engaged in multiple, simultaneous communications. For example, B + may be receiving requests from many clients other than A, and/or + forwarding requests to servers other than C, at the same time that it + is handling A's request. + + Any party to the communication which is not acting as a tunnel may + employ an internal cache for handling requests. The effect of a cache + is that the request/response chain is shortened if one of the + participants along the chain has a cached response applicable to that + request. The following illustrates the resulting chain if B has a + + + +Berners-Lee, et al Informational [Page 7] + +RFC 1945 HTTP/1.0 May 1996 + + + cached copy of an earlier response from O (via C) for a request which + has not been cached by UA or A. + + request chain ----------> + UA -----v----- A -----v----- B - - - - - - C - - - - - - O + <--------- response chain + + Not all responses are cachable, and some requests may contain + modifiers which place special requirements on cache behavior. Some + HTTP/1.0 applications use heuristics to describe what is or is not a + "cachable" response, but these rules are not standardized. + + On the Internet, HTTP communication generally takes place over TCP/IP + connections. The default port is TCP 80 [15], but other ports can be + used. This does not preclude HTTP from being implemented on top of + any other protocol on the Internet, or on other networks. HTTP only + presumes a reliable transport; any protocol that provides such + guarantees can be used, and the mapping of the HTTP/1.0 request and + response structures onto the transport data units of the protocol in + question is outside the scope of this specification. + + Except for experimental applications, current practice requires that + the connection be established by the client prior to each request and + closed by the server after sending the response. Both clients and + servers should be aware that either party may close the connection + prematurely, due to user action, automated time-out, or program + failure, and should handle such closing in a predictable fashion. In + any case, the closing of the connection by either or both parties + always terminates the current request, regardless of its status. + +1.4 HTTP and MIME + + HTTP/1.0 uses many of the constructs defined for MIME, as defined in + RFC 1521 [5]. Appendix C describes the ways in which the context of + HTTP allows for different use of Internet Media Types than is + typically found in Internet mail, and gives the rationale for those + differences. + +2. Notational Conventions and Generic Grammar + +2.1 Augmented BNF + + All of the mechanisms specified in this document are described in + both prose and an augmented Backus-Naur Form (BNF) similar to that + used by RFC 822 [7]. Implementors will need to be familiar with the + notation in order to understand this specification. The augmented BNF + includes the following constructs: + + + + +Berners-Lee, et al Informational [Page 8] + +RFC 1945 HTTP/1.0 May 1996 + + + name = definition + + The name of a rule is simply the name itself (without any + enclosing "<" and ">") and is separated from its definition by + the equal character "=". Whitespace is only significant in that + indentation of continuation lines is used to indicate a rule + definition that spans more than one line. Certain basic rules + are in uppercase, such as SP, LWS, HT, CRLF, DIGIT, ALPHA, etc. + Angle brackets are used within definitions whenever their + presence will facilitate discerning the use of rule names. + + "literal" + + Quotation marks surround literal text. Unless stated otherwise, + the text is case-insensitive. + + rule1 | rule2 + + Elements separated by a bar ("I") are alternatives, + e.g., "yes | no" will accept yes or no. + + (rule1 rule2) + + Elements enclosed in parentheses are treated as a single + element. Thus, "(elem (foo | bar) elem)" allows the token + sequences "elem foo elem" and "elem bar elem". + + *rule + + The character "*" preceding an element indicates repetition. The + full form is "*element" indicating at least and at + most occurrences of element. Default values are 0 and + infinity so that "*(element)" allows any number, including zero; + "1*element" requires at least one; and "1*2element" allows one + or two. + + [rule] + + Square brackets enclose optional elements; "[foo bar]" is + equivalent to "*1(foo bar)". + + N rule + + Specific repetition: "(element)" is equivalent to + "*(element)"; that is, exactly occurrences of + (element). Thus 2DIGIT is a 2-digit number, and 3ALPHA is a + string of three alphabetic characters. + + + + +Berners-Lee, et al Informational [Page 9] + +RFC 1945 HTTP/1.0 May 1996 + + + #rule + + A construct "#" is defined, similar to "*", for defining lists + of elements. The full form is "#element" indicating at + least and at most elements, each separated by one or + more commas (",") and optional linear whitespace (LWS). This + makes the usual form of lists very easy; a rule such as + "( *LWS element *( *LWS "," *LWS element ))" can be shown as + "1#element". Wherever this construct is used, null elements are + allowed, but do not contribute to the count of elements present. + That is, "(element), , (element)" is permitted, but counts as + only two elements. Therefore, where at least one element is + required, at least one non-null element must be present. Default + values are 0 and infinity so that "#(element)" allows any + number, including zero; "1#element" requires at least one; and + "1#2element" allows one or two. + + ; comment + + A semi-colon, set off some distance to the right of rule text, + starts a comment that continues to the end of line. This is a + simple way of including useful notes in parallel with the + specifications. + + implied *LWS + + The grammar described by this specification is word-based. + Except where noted otherwise, linear whitespace (LWS) can be + included between any two adjacent words (token or + quoted-string), and between adjacent tokens and delimiters + (tspecials), without changing the interpretation of a field. At + least one delimiter (tspecials) must exist between any two + tokens, since they would otherwise be interpreted as a single + token. However, applications should attempt to follow "common + form" when generating HTTP constructs, since there exist some + implementations that fail to accept anything beyond the common + forms. + +2.2 Basic Rules + + The following rules are used throughout this specification to + describe basic parsing constructs. The US-ASCII coded character set + is defined by [17]. + + OCTET = + CHAR = + UPALPHA = + LOALPHA = + + + +Berners-Lee, et al Informational [Page 10] + +RFC 1945 HTTP/1.0 May 1996 + + + ALPHA = UPALPHA | LOALPHA + DIGIT = + CTL = + CR = + LF = + SP = + HT = + <"> = + + HTTP/1.0 defines the octet sequence CR LF as the end-of-line marker + for all protocol elements except the Entity-Body (see Appendix B for + tolerant applications). The end-of-line marker within an Entity-Body + is defined by its associated media type, as described in Section 3.6. + + CRLF = CR LF + + HTTP/1.0 headers may be folded onto multiple lines if each + continuation line begins with a space or horizontal tab. All linear + whitespace, including folding, has the same semantics as SP. + + LWS = [CRLF] 1*( SP | HT ) + + However, folding of header lines is not expected by some + applications, and should not be generated by HTTP/1.0 applications. + + The TEXT rule is only used for descriptive field contents and values + that are not intended to be interpreted by the message parser. Words + of *TEXT may contain octets from character sets other than US-ASCII. + + TEXT = + + Recipients of header field TEXT containing octets outside the US- + ASCII character set may assume that they represent ISO-8859-1 + characters. + + Hexadecimal numeric characters are used in several protocol elements. + + HEX = "A" | "B" | "C" | "D" | "E" | "F" + | "a" | "b" | "c" | "d" | "e" | "f" | DIGIT + + Many HTTP/1.0 header field values consist of words separated by LWS + or special characters. These special characters must be in a quoted + string to be used within a parameter value. + + word = token | quoted-string + + + + +Berners-Lee, et al Informational [Page 11] + +RFC 1945 HTTP/1.0 May 1996 + + + token = 1* + + tspecials = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + Comments may be included in some HTTP header fields by surrounding + the comment text with parentheses. Comments are only allowed in + fields containing "comment" as part of their field value definition. + In all other fields, parentheses are considered part of the field + value. + + comment = "(" *( ctext | comment ) ")" + ctext = + + A string of text is parsed as a single word if it is quoted using + double-quote marks. + + quoted-string = ( <"> *(qdtext) <"> ) + + qdtext = and CTLs, + but including LWS> + + Single-character quoting using the backslash ("\") character is not + permitted in HTTP/1.0. + +3. Protocol Parameters + +3.1 HTTP Version + + HTTP uses a "." numbering scheme to indicate versions + of the protocol. The protocol versioning policy is intended to allow + the sender to indicate the format of a message and its capacity for + understanding further HTTP communication, rather than the features + obtained via that communication. No change is made to the version + number for the addition of message components which do not affect + communication behavior or which only add to extensible field values. + The number is incremented when the changes made to the + protocol add features which do not change the general message parsing + algorithm, but which may add to the message semantics and imply + additional capabilities of the sender. The number is + incremented when the format of a message within the protocol is + changed. + + The version of an HTTP message is indicated by an HTTP-Version field + in the first line of the message. If the protocol version is not + specified, the recipient must assume that the message is in the + + + +Berners-Lee, et al Informational [Page 12] + +RFC 1945 HTTP/1.0 May 1996 + + + simple HTTP/0.9 format. + + HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT + + Note that the major and minor numbers should be treated as separate + integers and that each may be incremented higher than a single digit. + Thus, HTTP/2.4 is a lower version than HTTP/2.13, which in turn is + lower than HTTP/12.3. Leading zeros should be ignored by recipients + and never generated by senders. + + This document defines both the 0.9 and 1.0 versions of the HTTP + protocol. Applications sending Full-Request or Full-Response + messages, as defined by this specification, must include an HTTP- + Version of "HTTP/1.0". + + HTTP/1.0 servers must: + + o recognize the format of the Request-Line for HTTP/0.9 and + HTTP/1.0 requests; + + o understand any valid request in the format of HTTP/0.9 or + HTTP/1.0; + + o respond appropriately with a message in the same protocol + version used by the client. + + HTTP/1.0 clients must: + + o recognize the format of the Status-Line for HTTP/1.0 responses; + + o understand any valid response in the format of HTTP/0.9 or + HTTP/1.0. + + Proxy and gateway applications must be careful in forwarding requests + that are received in a format different than that of the + application's native HTTP version. Since the protocol version + indicates the protocol capability of the sender, a proxy/gateway must + never send a message with a version indicator which is greater than + its native version; if a higher version request is received, the + proxy/gateway must either downgrade the request version or respond + with an error. Requests with a version lower than that of the + application's native format may be upgraded before being forwarded; + the proxy/gateway's response to that request must follow the server + requirements listed above. + + + + + + + +Berners-Lee, et al Informational [Page 13] + +RFC 1945 HTTP/1.0 May 1996 + + +3.2 Uniform Resource Identifiers + + URIs have been known by many names: WWW addresses, Universal Document + Identifiers, Universal Resource Identifiers [2], and finally the + combination of Uniform Resource Locators (URL) [4] and Names (URN) + [16]. As far as HTTP is concerned, Uniform Resource Identifiers are + simply formatted strings which identify--via name, location, or any + other characteristic--a network resource. + +3.2.1 General Syntax + + URIs in HTTP can be represented in absolute form or relative to some + known base URI [9], depending upon the context of their use. The two + forms are differentiated by the fact that absolute URIs always begin + with a scheme name followed by a colon. + + URI = ( absoluteURI | relativeURI ) [ "#" fragment ] + + absoluteURI = scheme ":" *( uchar | reserved ) + + relativeURI = net_path | abs_path | rel_path + + net_path = "//" net_loc [ abs_path ] + abs_path = "/" rel_path + rel_path = [ path ] [ ";" params ] [ "?" query ] + + path = fsegment *( "/" segment ) + fsegment = 1*pchar + segment = *pchar + + params = param *( ";" param ) + param = *( pchar | "/" ) + + scheme = 1*( ALPHA | DIGIT | "+" | "-" | "." ) + net_loc = *( pchar | ";" | "?" ) + query = *( uchar | reserved ) + fragment = *( uchar | reserved ) + + pchar = uchar | ":" | "@" | "&" | "=" | "+" + uchar = unreserved | escape + unreserved = ALPHA | DIGIT | safe | extra | national + + escape = "%" HEX HEX + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" + extra = "!" | "*" | "'" | "(" | ")" | "," + safe = "$" | "-" | "_" | "." + unsafe = CTL | SP | <"> | "#" | "%" | "<" | ">" + national = + + For definitive information on URL syntax and semantics, see RFC 1738 + [4] and RFC 1808 [9]. The BNF above includes national characters not + allowed in valid URLs as specified by RFC 1738, since HTTP servers + are not restricted in the set of unreserved characters allowed to + represent the rel_path part of addresses, and HTTP proxies may + receive requests for URIs not defined by RFC 1738. + +3.2.2 http URL + + The "http" scheme is used to locate network resources via the HTTP + protocol. This section defines the scheme-specific syntax and + semantics for http URLs. + + http_URL = "http:" "//" host [ ":" port ] [ abs_path ] + + host = + + port = *DIGIT + + If the port is empty or not given, port 80 is assumed. The semantics + are that the identified resource is located at the server listening + for TCP connections on that port of that host, and the Request-URI + for the resource is abs_path. If the abs_path is not present in the + URL, it must be given as "/" when used as a Request-URI (Section + 5.1.2). + + Note: Although the HTTP protocol is independent of the transport + layer protocol, the http URL only identifies resources by their + TCP location, and thus non-TCP resources must be identified by + some other URI scheme. + + The canonical form for "http" URLs is obtained by converting any + UPALPHA characters in host to their LOALPHA equivalent (hostnames are + case-insensitive), eliding the [ ":" port ] if the port is 80, and + replacing an empty abs_path with "/". + +3.3 Date/Time Formats + + HTTP/1.0 applications have historically allowed three different + formats for the representation of date/time stamps: + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + + + +Berners-Lee, et al Informational [Page 15] + +RFC 1945 HTTP/1.0 May 1996 + + + The first format is preferred as an Internet standard and represents + a fixed-length subset of that defined by RFC 1123 [6] (an update to + RFC 822 [7]). The second format is in common use, but is based on the + obsolete RFC 850 [10] date format and lacks a four-digit year. + HTTP/1.0 clients and servers that parse the date value should accept + all three formats, though they must never generate the third + (asctime) format. + + Note: Recipients of date values are encouraged to be robust in + accepting date values that may have been generated by non-HTTP + applications, as is sometimes the case when retrieving or posting + messages via proxies/gateways to SMTP or NNTP. + + All HTTP/1.0 date/time stamps must be represented in Universal Time + (UT), also known as Greenwich Mean Time (GMT), without exception. + This is indicated in the first two formats by the inclusion of "GMT" + as the three-letter abbreviation for time zone, and should be assumed + when reading the asctime format. + + HTTP-date = rfc1123-date | rfc850-date | asctime-date + + rfc1123-date = wkday "," SP date1 SP time SP "GMT" + rfc850-date = weekday "," SP date2 SP time SP "GMT" + asctime-date = wkday SP date3 SP time SP 4DIGIT + + date1 = 2DIGIT SP month SP 4DIGIT + ; day month year (e.g., 02 Jun 1982) + date2 = 2DIGIT "-" month "-" 2DIGIT + ; day-month-year (e.g., 02-Jun-82) + date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) + ; month day (e.g., Jun 2) + + time = 2DIGIT ":" 2DIGIT ":" 2DIGIT + ; 00:00:00 - 23:59:59 + + wkday = "Mon" | "Tue" | "Wed" + | "Thu" | "Fri" | "Sat" | "Sun" + + weekday = "Monday" | "Tuesday" | "Wednesday" + | "Thursday" | "Friday" | "Saturday" | "Sunday" + + month = "Jan" | "Feb" | "Mar" | "Apr" + | "May" | "Jun" | "Jul" | "Aug" + | "Sep" | "Oct" | "Nov" | "Dec" + + Note: HTTP requirements for the date/time stamp format apply + only to their usage within the protocol stream. Clients and + servers are not required to use these formats for user + + + +Berners-Lee, et al Informational [Page 16] + +RFC 1945 HTTP/1.0 May 1996 + + + presentation, request logging, etc. + +3.4 Character Sets + + HTTP uses the same definition of the term "character set" as that + described for MIME: + + The term "character set" is used in this document to refer to a + method used with one or more tables to convert a sequence of + octets into a sequence of characters. Note that unconditional + conversion in the other direction is not required, in that not all + characters may be available in a given character set and a + character set may provide more than one sequence of octets to + represent a particular character. This definition is intended to + allow various kinds of character encodings, from simple single- + table mappings such as US-ASCII to complex table switching methods + such as those that use ISO 2022's techniques. However, the + definition associated with a MIME character set name must fully + specify the mapping to be performed from octets to characters. In + particular, use of external profiling information to determine the + exact mapping is not permitted. + + Note: This use of the term "character set" is more commonly + referred to as a "character encoding." However, since HTTP and + MIME share the same registry, it is important that the terminology + also be shared. + + HTTP character sets are identified by case-insensitive tokens. The + complete set of tokens are defined by the IANA Character Set registry + [15]. However, because that registry does not define a single, + consistent token for each character set, we define here the preferred + names for those character sets most likely to be used with HTTP + entities. These character sets include those registered by RFC 1521 + [5] -- the US-ASCII [17] and ISO-8859 [18] character sets -- and + other names specifically recommended for use within MIME charset + parameters. + + charset = "US-ASCII" + | "ISO-8859-1" | "ISO-8859-2" | "ISO-8859-3" + | "ISO-8859-4" | "ISO-8859-5" | "ISO-8859-6" + | "ISO-8859-7" | "ISO-8859-8" | "ISO-8859-9" + | "ISO-2022-JP" | "ISO-2022-JP-2" | "ISO-2022-KR" + | "UNICODE-1-1" | "UNICODE-1-1-UTF-7" | "UNICODE-1-1-UTF-8" + | token + + Although HTTP allows an arbitrary token to be used as a charset + value, any token that has a predefined value within the IANA + Character Set registry [15] must represent the character set defined + + + +Berners-Lee, et al Informational [Page 17] + +RFC 1945 HTTP/1.0 May 1996 + + + by that registry. Applications should limit their use of character + sets to those defined by the IANA registry. + + The character set of an entity body should be labelled as the lowest + common denominator of the character codes used within that body, with + the exception that no label is preferred over the labels US-ASCII or + ISO-8859-1. + +3.5 Content Codings + + Content coding values are used to indicate an encoding transformation + that has been applied to a resource. Content codings are primarily + used to allow a document to be compressed or encrypted without losing + the identity of its underlying media type. Typically, the resource is + stored in this encoding and only decoded before rendering or + analogous usage. + + content-coding = "x-gzip" | "x-compress" | token + + Note: For future compatibility, HTTP/1.0 applications should + consider "gzip" and "compress" to be equivalent to "x-gzip" + and "x-compress", respectively. + + All content-coding values are case-insensitive. HTTP/1.0 uses + content-coding values in the Content-Encoding (Section 10.3) header + field. Although the value describes the content-coding, what is more + important is that it indicates what decoding mechanism will be + required to remove the encoding. Note that a single program may be + capable of decoding multiple content-coding formats. Two values are + defined by this specification: + + x-gzip + An encoding format produced by the file compression program + "gzip" (GNU zip) developed by Jean-loup Gailly. This format is + typically a Lempel-Ziv coding (LZ77) with a 32 bit CRC. + + x-compress + The encoding format produced by the file compression program + "compress". This format is an adaptive Lempel-Ziv-Welch coding + (LZW). + + Note: Use of program names for the identification of + encoding formats is not desirable and should be discouraged + for future encodings. Their use here is representative of + historical practice, not good design. + + + + + + +Berners-Lee, et al Informational [Page 18] + +RFC 1945 HTTP/1.0 May 1996 + + +3.6 Media Types + + HTTP uses Internet Media Types [13] in the Content-Type header field + (Section 10.5) in order to provide open and extensible data typing. + + media-type = type "/" subtype *( ";" parameter ) + type = token + subtype = token + + Parameters may follow the type/subtype in the form of attribute/value + pairs. + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + The type, subtype, and parameter attribute names are case- + insensitive. Parameter values may or may not be case-sensitive, + depending on the semantics of the parameter name. LWS must not be + generated between the type and subtype, nor between an attribute and + its value. Upon receipt of a media type with an unrecognized + parameter, a user agent should treat the media type as if the + unrecognized parameter and its value were not present. + + Some older HTTP applications do not recognize media type parameters. + HTTP/1.0 applications should only use media type parameters when they + are necessary to define the content of a message. + + Media-type values are registered with the Internet Assigned Number + Authority (IANA [15]). The media type registration process is + outlined in RFC 1590 [13]. Use of non-registered media types is + discouraged. + +3.6.1 Canonicalization and Text Defaults + + Internet media types are registered with a canonical form. In + general, an Entity-Body transferred via HTTP must be represented in + the appropriate canonical form prior to its transmission. If the body + has been encoded with a Content-Encoding, the underlying data should + be in canonical form prior to being encoded. + + Media subtypes of the "text" type use CRLF as the text line break + when in canonical form. However, HTTP allows the transport of text + media with plain CR or LF alone representing a line break when used + consistently within the Entity-Body. HTTP applications must accept + CRLF, bare CR, and bare LF as being representative of a line break in + text media received via HTTP. + + + + +Berners-Lee, et al Informational [Page 19] + +RFC 1945 HTTP/1.0 May 1996 + + + In addition, if the text media is represented in a character set that + does not use octets 13 and 10 for CR and LF respectively, as is the + case for some multi-byte character sets, HTTP allows the use of + whatever octet sequences are defined by that character set to + represent the equivalent of CR and LF for line breaks. This + flexibility regarding line breaks applies only to text media in the + Entity-Body; a bare CR or LF should not be substituted for CRLF + within any of the HTTP control structures (such as header fields and + multipart boundaries). + + The "charset" parameter is used with some media types to define the + character set (Section 3.4) of the data. When no explicit charset + parameter is provided by the sender, media subtypes of the "text" + type are defined to have a default charset value of "ISO-8859-1" when + received via HTTP. Data in character sets other than "ISO-8859-1" or + its subsets must be labelled with an appropriate charset value in + order to be consistently interpreted by the recipient. + + Note: Many current HTTP servers provide data using charsets other + than "ISO-8859-1" without proper labelling. This situation reduces + interoperability and is not recommended. To compensate for this, + some HTTP user agents provide a configuration option to allow the + user to change the default interpretation of the media type + character set when no charset parameter is given. + +3.6.2 Multipart Types + + MIME provides for a number of "multipart" types -- encapsulations of + several entities within a single message's Entity-Body. The multipart + types registered by IANA [15] do not have any special meaning for + HTTP/1.0, though user agents may need to understand each type in + order to correctly interpret the purpose of each body-part. An HTTP + user agent should follow the same or similar behavior as a MIME user + agent does upon receipt of a multipart type. HTTP servers should not + assume that all HTTP clients are prepared to handle multipart types. + + All multipart types share a common syntax and must include a boundary + parameter as part of the media type value. The message body is itself + a protocol element and must therefore use only CRLF to represent line + breaks between body-parts. Multipart body-parts may contain HTTP + header fields which are significant to the meaning of that part. + +3.7 Product Tokens + + Product tokens are used to allow communicating applications to + identify themselves via a simple product token, with an optional + slash and version designator. Most fields using product tokens also + allow subproducts which form a significant part of the application to + + + +Berners-Lee, et al Informational [Page 20] + +RFC 1945 HTTP/1.0 May 1996 + + + be listed, separated by whitespace. By convention, the products are + listed in order of their significance for identifying the + application. + + product = token ["/" product-version] + product-version = token + + Examples: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + + Server: Apache/0.8.4 + + Product tokens should be short and to the point -- use of them for + advertizing or other non-essential information is explicitly + forbidden. Although any token character may appear in a product- + version, this token should only be used for a version identifier + (i.e., successive versions of the same product should only differ in + the product-version portion of the product value). + +4. HTTP Message + +4.1 Message Types + + HTTP messages consist of requests from client to server and responses + from server to client. + + HTTP-message = Simple-Request ; HTTP/0.9 messages + | Simple-Response + | Full-Request ; HTTP/1.0 messages + | Full-Response + + Full-Request and Full-Response use the generic message format of RFC + 822 [7] for transferring entities. Both messages may include optional + header fields (also known as "headers") and an entity body. The + entity body is separated from the headers by a null line (i.e., a + line with nothing preceding the CRLF). + + Full-Request = Request-Line ; Section 5.1 + *( General-Header ; Section 4.3 + | Request-Header ; Section 5.2 + | Entity-Header ) ; Section 7.1 + CRLF + [ Entity-Body ] ; Section 7.2 + + Full-Response = Status-Line ; Section 6.1 + *( General-Header ; Section 4.3 + | Response-Header ; Section 6.2 + + + +Berners-Lee, et al Informational [Page 21] + +RFC 1945 HTTP/1.0 May 1996 + + + | Entity-Header ) ; Section 7.1 + CRLF + [ Entity-Body ] ; Section 7.2 + + Simple-Request and Simple-Response do not allow the use of any header + information and are limited to a single request method (GET). + + Simple-Request = "GET" SP Request-URI CRLF + + Simple-Response = [ Entity-Body ] + + Use of the Simple-Request format is discouraged because it prevents + the server from identifying the media type of the returned entity. + +4.2 Message Headers + + HTTP header fields, which include General-Header (Section 4.3), + Request-Header (Section 5.2), Response-Header (Section 6.2), and + Entity-Header (Section 7.1) fields, follow the same generic format as + that given in Section 3.1 of RFC 822 [7]. Each header field consists + of a name followed immediately by a colon (":"), a single space (SP) + character, and the field value. Field names are case-insensitive. + Header fields can be extended over multiple lines by preceding each + extra line with at least one SP or HT, though this is not + recommended. + + HTTP-header = field-name ":" [ field-value ] CRLF + + field-name = token + field-value = *( field-content | LWS ) + + field-content = + + The order in which header fields are received is not significant. + However, it is "good practice" to send General-Header fields first, + followed by Request-Header or Response-Header fields prior to the + Entity-Header fields. + + Multiple HTTP-header fields with the same field-name may be present + in a message if and only if the entire field-value for that header + field is defined as a comma-separated list [i.e., #(values)]. It must + be possible to combine the multiple header fields into one "field- + name: field-value" pair, without changing the semantics of the + message, by appending each subsequent field-value to the first, each + separated by a comma. + + + + +Berners-Lee, et al Informational [Page 22] + +RFC 1945 HTTP/1.0 May 1996 + + +4.3 General Header Fields + + There are a few header fields which have general applicability for + both request and response messages, but which do not apply to the + entity being transferred. These headers apply only to the message + being transmitted. + + General-Header = Date ; Section 10.6 + | Pragma ; Section 10.12 + + General header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields may be given the semantics of general + header fields if all parties in the communication recognize them to + be general header fields. Unrecognized header fields are treated as + Entity-Header fields. + +5. Request + + A request message from a client to a server includes, within the + first line of that message, the method to be applied to the resource, + the identifier of the resource, and the protocol version in use. For + backwards compatibility with the more limited HTTP/0.9 protocol, + there are two valid formats for an HTTP request: + + Request = Simple-Request | Full-Request + + Simple-Request = "GET" SP Request-URI CRLF + + Full-Request = Request-Line ; Section 5.1 + *( General-Header ; Section 4.3 + | Request-Header ; Section 5.2 + | Entity-Header ) ; Section 7.1 + CRLF + [ Entity-Body ] ; Section 7.2 + + If an HTTP/1.0 server receives a Simple-Request, it must respond with + an HTTP/0.9 Simple-Response. An HTTP/1.0 client capable of receiving + a Full-Response should never generate a Simple-Request. + +5.1 Request-Line + + The Request-Line begins with a method token, followed by the + Request-URI and the protocol version, and ending with CRLF. The + elements are separated by SP characters. No CR or LF are allowed + except in the final CRLF sequence. + + Request-Line = Method SP Request-URI SP HTTP-Version CRLF + + + +Berners-Lee, et al Informational [Page 23] + +RFC 1945 HTTP/1.0 May 1996 + + + Note that the difference between a Simple-Request and the Request- + Line of a Full-Request is the presence of the HTTP-Version field and + the availability of methods other than GET. + +5.1.1 Method + + The Method token indicates the method to be performed on the resource + identified by the Request-URI. The method is case-sensitive. + + Method = "GET" ; Section 8.1 + | "HEAD" ; Section 8.2 + | "POST" ; Section 8.3 + | extension-method + + extension-method = token + + The list of methods acceptable by a specific resource can change + dynamically; the client is notified through the return code of the + response if a method is not allowed on a resource. Servers should + return the status code 501 (not implemented) if the method is + unrecognized or not implemented. + + The methods commonly used by HTTP/1.0 applications are fully defined + in Section 8. + +5.1.2 Request-URI + + The Request-URI is a Uniform Resource Identifier (Section 3.2) and + identifies the resource upon which to apply the request. + + Request-URI = absoluteURI | abs_path + + The two options for Request-URI are dependent on the nature of the + request. + + The absoluteURI form is only allowed when the request is being made + to a proxy. The proxy is requested to forward the request and return + the response. If the request is GET or HEAD and a prior response is + cached, the proxy may use the cached message if it passes any + restrictions in the Expires header field. Note that the proxy may + forward the request on to another proxy or directly to the server + specified by the absoluteURI. In order to avoid request loops, a + proxy must be able to recognize all of its server names, including + any aliases, local variations, and the numeric IP address. An example + Request-Line would be: + + GET http://www.w3.org/pub/WWW/TheProject.html HTTP/1.0 + + + + +Berners-Lee, et al Informational [Page 24] + +RFC 1945 HTTP/1.0 May 1996 + + + The most common form of Request-URI is that used to identify a + resource on an origin server or gateway. In this case, only the + absolute path of the URI is transmitted (see Section 3.2.1, + abs_path). For example, a client wishing to retrieve the resource + above directly from the origin server would create a TCP connection + to port 80 of the host "www.w3.org" and send the line: + + GET /pub/WWW/TheProject.html HTTP/1.0 + + followed by the remainder of the Full-Request. Note that the absolute + path cannot be empty; if none is present in the original URI, it must + be given as "/" (the server root). + + The Request-URI is transmitted as an encoded string, where some + characters may be escaped using the "% HEX HEX" encoding defined by + RFC 1738 [4]. The origin server must decode the Request-URI in order + to properly interpret the request. + +5.2 Request Header Fields + + The request header fields allow the client to pass additional + information about the request, and about the client itself, to the + server. These fields act as request modifiers, with semantics + equivalent to the parameters on a programming language method + (procedure) invocation. + + Request-Header = Authorization ; Section 10.2 + | From ; Section 10.8 + | If-Modified-Since ; Section 10.9 + | Referer ; Section 10.13 + | User-Agent ; Section 10.15 + + Request-Header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields may be given the semantics of request + header fields if all parties in the communication recognize them to + be request header fields. Unrecognized header fields are treated as + Entity-Header fields. + +6. Response + + After receiving and interpreting a request message, a server responds + in the form of an HTTP response message. + + Response = Simple-Response | Full-Response + + Simple-Response = [ Entity-Body ] + + + + +Berners-Lee, et al Informational [Page 25] + +RFC 1945 HTTP/1.0 May 1996 + + + Full-Response = Status-Line ; Section 6.1 + *( General-Header ; Section 4.3 + | Response-Header ; Section 6.2 + | Entity-Header ) ; Section 7.1 + CRLF + [ Entity-Body ] ; Section 7.2 + + A Simple-Response should only be sent in response to an HTTP/0.9 + Simple-Request or if the server only supports the more limited + HTTP/0.9 protocol. If a client sends an HTTP/1.0 Full-Request and + receives a response that does not begin with a Status-Line, it should + assume that the response is a Simple-Response and parse it + accordingly. Note that the Simple-Response consists only of the + entity body and is terminated by the server closing the connection. + +6.1 Status-Line + + The first line of a Full-Response message is the Status-Line, + consisting of the protocol version followed by a numeric status code + and its associated textual phrase, with each element separated by SP + characters. No CR or LF is allowed except in the final CRLF sequence. + + Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF + + Since a status line always begins with the protocol version and + status code + + "HTTP/" 1*DIGIT "." 1*DIGIT SP 3DIGIT SP + + (e.g., "HTTP/1.0 200 "), the presence of that expression is + sufficient to differentiate a Full-Response from a Simple-Response. + Although the Simple-Response format may allow such an expression to + occur at the beginning of an entity body, and thus cause a + misinterpretation of the message if it was given in response to a + Full-Request, most HTTP/0.9 servers are limited to responses of type + "text/html" and therefore would never generate such a response. + +6.1.1 Status Code and Reason Phrase + + The Status-Code element is a 3-digit integer result code of the + attempt to understand and satisfy the request. The Reason-Phrase is + intended to give a short textual description of the Status-Code. The + Status-Code is intended for use by automata and the Reason-Phrase is + intended for the human user. The client is not required to examine or + display the Reason-Phrase. + + + + + + +Berners-Lee, et al Informational [Page 26] + +RFC 1945 HTTP/1.0 May 1996 + + + The first digit of the Status-Code defines the class of response. The + last two digits do not have any categorization role. There are 5 + values for the first digit: + + o 1xx: Informational - Not used, but reserved for future use + + o 2xx: Success - The action was successfully received, + understood, and accepted. + + o 3xx: Redirection - Further action must be taken in order to + complete the request + + o 4xx: Client Error - The request contains bad syntax or cannot + be fulfilled + + o 5xx: Server Error - The server failed to fulfill an apparently + valid request + + The individual values of the numeric status codes defined for + HTTP/1.0, and an example set of corresponding Reason-Phrase's, are + presented below. The reason phrases listed here are only recommended + -- they may be replaced by local equivalents without affecting the + protocol. These codes are fully defined in Section 9. + + Status-Code = "200" ; OK + | "201" ; Created + | "202" ; Accepted + | "204" ; No Content + | "301" ; Moved Permanently + | "302" ; Moved Temporarily + | "304" ; Not Modified + | "400" ; Bad Request + | "401" ; Unauthorized + | "403" ; Forbidden + | "404" ; Not Found + | "500" ; Internal Server Error + | "501" ; Not Implemented + | "502" ; Bad Gateway + | "503" ; Service Unavailable + | extension-code + + extension-code = 3DIGIT + + Reason-Phrase = * + + HTTP status codes are extensible, but the above codes are the only + ones generally recognized in current practice. HTTP applications are + not required to understand the meaning of all registered status + + + +Berners-Lee, et al Informational [Page 27] + +RFC 1945 HTTP/1.0 May 1996 + + + codes, though such understanding is obviously desirable. However, + applications must understand the class of any status code, as + indicated by the first digit, and treat any unrecognized response as + being equivalent to the x00 status code of that class, with the + exception that an unrecognized response must not be cached. For + example, if an unrecognized status code of 431 is received by the + client, it can safely assume that there was something wrong with its + request and treat the response as if it had received a 400 status + code. In such cases, user agents should present to the user the + entity returned with the response, since that entity is likely to + include human-readable information which will explain the unusual + status. + +6.2 Response Header Fields + + The response header fields allow the server to pass additional + information about the response which cannot be placed in the Status- + Line. These header fields give information about the server and about + further access to the resource identified by the Request-URI. + + Response-Header = Location ; Section 10.11 + | Server ; Section 10.14 + | WWW-Authenticate ; Section 10.16 + + Response-Header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields may be given the semantics of response + header fields if all parties in the communication recognize them to + be response header fields. Unrecognized header fields are treated as + Entity-Header fields. + +7. Entity + + Full-Request and Full-Response messages may transfer an entity within + some requests and responses. An entity consists of Entity-Header + fields and (usually) an Entity-Body. In this section, both sender and + recipient refer to either the client or the server, depending on who + sends and who receives the entity. + + + + + + + + + + + + + +Berners-Lee, et al Informational [Page 28] + +RFC 1945 HTTP/1.0 May 1996 + + +7.1 Entity Header Fields + + Entity-Header fields define optional metainformation about the + Entity-Body or, if no body is present, about the resource identified + by the request. + + Entity-Header = Allow ; Section 10.1 + | Content-Encoding ; Section 10.3 + | Content-Length ; Section 10.4 + | Content-Type ; Section 10.5 + | Expires ; Section 10.7 + | Last-Modified ; Section 10.10 + | extension-header + + extension-header = HTTP-header + + The extension-header mechanism allows additional Entity-Header fields + to be defined without changing the protocol, but these fields cannot + be assumed to be recognizable by the recipient. Unrecognized header + fields should be ignored by the recipient and forwarded by proxies. + +7.2 Entity Body + + The entity body (if any) sent with an HTTP request or response is in + a format and encoding defined by the Entity-Header fields. + + Entity-Body = *OCTET + + An entity body is included with a request message only when the + request method calls for one. The presence of an entity body in a + request is signaled by the inclusion of a Content-Length header field + in the request message headers. HTTP/1.0 requests containing an + entity body must include a valid Content-Length header field. + + For response messages, whether or not an entity body is included with + a message is dependent on both the request method and the response + code. All responses to the HEAD request method must not include a + body, even though the presence of entity header fields may lead one + to believe they do. All 1xx (informational), 204 (no content), and + 304 (not modified) responses must not include a body. All other + responses must include an entity body or a Content-Length header + field defined with a value of zero (0). + +7.2.1 Type + + When an Entity-Body is included with a message, the data type of that + body is determined via the header fields Content-Type and Content- + Encoding. These define a two-layer, ordered encoding model: + + + +Berners-Lee, et al Informational [Page 29] + +RFC 1945 HTTP/1.0 May 1996 + + + entity-body := Content-Encoding( Content-Type( data ) ) + + A Content-Type specifies the media type of the underlying data. A + Content-Encoding may be used to indicate any additional content + coding applied to the type, usually for the purpose of data + compression, that is a property of the resource requested. The + default for the content encoding is none (i.e., the identity + function). + + Any HTTP/1.0 message containing an entity body should include a + Content-Type header field defining the media type of that body. If + and only if the media type is not given by a Content-Type header, as + is the case for Simple-Response messages, the recipient may attempt + to guess the media type via inspection of its content and/or the name + extension(s) of the URL used to identify the resource. If the media + type remains unknown, the recipient should treat it as type + "application/octet-stream". + +7.2.2 Length + + When an Entity-Body is included with a message, the length of that + body may be determined in one of two ways. If a Content-Length header + field is present, its value in bytes represents the length of the + Entity-Body. Otherwise, the body length is determined by the closing + of the connection by the server. + + Closing the connection cannot be used to indicate the end of a + request body, since it leaves no possibility for the server to send + back a response. Therefore, HTTP/1.0 requests containing an entity + body must include a valid Content-Length header field. If a request + contains an entity body and Content-Length is not specified, and the + server does not recognize or cannot calculate the length from other + fields, then the server should send a 400 (bad request) response. + + Note: Some older servers supply an invalid Content-Length when + sending a document that contains server-side includes dynamically + inserted into the data stream. It must be emphasized that this + will not be tolerated by future versions of HTTP. Unless the + client knows that it is receiving a response from a compliant + server, it should not depend on the Content-Length value being + correct. + +8. Method Definitions + + The set of common methods for HTTP/1.0 is defined below. Although + this set can be expanded, additional methods cannot be assumed to + share the same semantics for separately extended clients and servers. + + + + +Berners-Lee, et al Informational [Page 30] + +RFC 1945 HTTP/1.0 May 1996 + + +8.1 GET + + The GET method means retrieve whatever information (in the form of an + entity) is identified by the Request-URI. If the Request-URI refers + to a data-producing process, it is the produced data which shall be + returned as the entity in the response and not the source text of the + process, unless that text happens to be the output of the process. + + The semantics of the GET method changes to a "conditional GET" if the + request message includes an If-Modified-Since header field. A + conditional GET method requests that the identified resource be + transferred only if it has been modified since the date given by the + If-Modified-Since header, as described in Section 10.9. The + conditional GET method is intended to reduce network usage by + allowing cached entities to be refreshed without requiring multiple + requests or transferring unnecessary data. + +8.2 HEAD + + The HEAD method is identical to GET except that the server must not + return any Entity-Body in the response. The metainformation contained + in the HTTP headers in response to a HEAD request should be identical + to the information sent in response to a GET request. This method can + be used for obtaining metainformation about the resource identified + by the Request-URI without transferring the Entity-Body itself. This + method is often used for testing hypertext links for validity, + accessibility, and recent modification. + + There is no "conditional HEAD" request analogous to the conditional + GET. If an If-Modified-Since header field is included with a HEAD + request, it should be ignored. + +8.3 POST + + The POST method is used to request that the destination server accept + the entity enclosed in the request as a new subordinate of the + resource identified by the Request-URI in the Request-Line. POST is + designed to allow a uniform method to cover the following functions: + + o Annotation of existing resources; + + o Posting a message to a bulletin board, newsgroup, mailing list, + or similar group of articles; + + o Providing a block of data, such as the result of submitting a + form [3], to a data-handling process; + + o Extending a database through an append operation. + + + +Berners-Lee, et al Informational [Page 31] + +RFC 1945 HTTP/1.0 May 1996 + + + The actual function performed by the POST method is determined by the + server and is usually dependent on the Request-URI. The posted entity + is subordinate to that URI in the same way that a file is subordinate + to a directory containing it, a news article is subordinate to a + newsgroup to which it is posted, or a record is subordinate to a + database. + + A successful POST does not require that the entity be created as a + resource on the origin server or made accessible for future + reference. That is, the action performed by the POST method might not + result in a resource that can be identified by a URI. In this case, + either 200 (ok) or 204 (no content) is the appropriate response + status, depending on whether or not the response includes an entity + that describes the result. + + If a resource has been created on the origin server, the response + should be 201 (created) and contain an entity (preferably of type + "text/html") which describes the status of the request and refers to + the new resource. + + A valid Content-Length is required on all HTTP/1.0 POST requests. An + HTTP/1.0 server should respond with a 400 (bad request) message if it + cannot determine the length of the request message's content. + + Applications must not cache responses to a POST request because the + application has no way of knowing that the server would return an + equivalent response on some future request. + +9. Status Code Definitions + + Each Status-Code is described below, including a description of which + method(s) it can follow and any metainformation required in the + response. + +9.1 Informational 1xx + + This class of status code indicates a provisional response, + consisting only of the Status-Line and optional headers, and is + terminated by an empty line. HTTP/1.0 does not define any 1xx status + codes and they are not a valid response to a HTTP/1.0 request. + However, they may be useful for experimental applications which are + outside the scope of this specification. + +9.2 Successful 2xx + + This class of status code indicates that the client's request was + successfully received, understood, and accepted. + + + + +Berners-Lee, et al Informational [Page 32] + +RFC 1945 HTTP/1.0 May 1996 + + + 200 OK + + The request has succeeded. The information returned with the + response is dependent on the method used in the request, as follows: + + GET an entity corresponding to the requested resource is sent + in the response; + + HEAD the response must only contain the header information and + no Entity-Body; + + POST an entity describing or containing the result of the action. + + 201 Created + + The request has been fulfilled and resulted in a new resource being + created. The newly created resource can be referenced by the URI(s) + returned in the entity of the response. The origin server should + create the resource before using this Status-Code. If the action + cannot be carried out immediately, the server must include in the + response body a description of when the resource will be available; + otherwise, the server should respond with 202 (accepted). + + Of the methods defined by this specification, only POST can create a + resource. + + 202 Accepted + + The request has been accepted for processing, but the processing + has not been completed. The request may or may not eventually be + acted upon, as it may be disallowed when processing actually takes + place. There is no facility for re-sending a status code from an + asynchronous operation such as this. + + The 202 response is intentionally non-committal. Its purpose is to + allow a server to accept a request for some other process (perhaps + a batch-oriented process that is only run once per day) without + requiring that the user agent's connection to the server persist + until the process is completed. The entity returned with this + response should include an indication of the request's current + status and either a pointer to a status monitor or some estimate of + when the user can expect the request to be fulfilled. + + 204 No Content + + The server has fulfilled the request but there is no new + information to send back. If the client is a user agent, it should + not change its document view from that which caused the request to + + + +Berners-Lee, et al Informational [Page 33] + +RFC 1945 HTTP/1.0 May 1996 + + + be generated. This response is primarily intended to allow input + for scripts or other actions to take place without causing a change + to the user agent's active document view. The response may include + new metainformation in the form of entity headers, which should + apply to the document currently in the user agent's active view. + +9.3 Redirection 3xx + + This class of status code indicates that further action needs to be + taken by the user agent in order to fulfill the request. The action + required may be carried out by the user agent without interaction + with the user if and only if the method used in the subsequent + request is GET or HEAD. A user agent should never automatically + redirect a request more than 5 times, since such redirections usually + indicate an infinite loop. + + 300 Multiple Choices + + This response code is not directly used by HTTP/1.0 applications, + but serves as the default for interpreting the 3xx class of + responses. + + The requested resource is available at one or more locations. + Unless it was a HEAD request, the response should include an entity + containing a list of resource characteristics and locations from + which the user or user agent can choose the one most appropriate. + If the server has a preferred choice, it should include the URL in + a Location field; user agents may use this field value for + automatic redirection. + + 301 Moved Permanently + + The requested resource has been assigned a new permanent URL and + any future references to this resource should be done using that + URL. Clients with link editing capabilities should automatically + relink references to the Request-URI to the new reference returned + by the server, where possible. + + The new URL must be given by the Location field in the response. + Unless it was a HEAD request, the Entity-Body of the response + should contain a short note with a hyperlink to the new URL. + + If the 301 status code is received in response to a request using + the POST method, the user agent must not automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + + + + +Berners-Lee, et al Informational [Page 34] + +RFC 1945 HTTP/1.0 May 1996 + + + Note: When automatically redirecting a POST request after + receiving a 301 status code, some existing user agents will + erroneously change it into a GET request. + + 302 Moved Temporarily + + The requested resource resides temporarily under a different URL. + Since the redirection may be altered on occasion, the client should + continue to use the Request-URI for future requests. + + The URL must be given by the Location field in the response. Unless + it was a HEAD request, the Entity-Body of the response should + contain a short note with a hyperlink to the new URI(s). + + If the 302 status code is received in response to a request using + the POST method, the user agent must not automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + Note: When automatically redirecting a POST request after + receiving a 302 status code, some existing user agents will + erroneously change it into a GET request. + + 304 Not Modified + + If the client has performed a conditional GET request and access is + allowed, but the document has not been modified since the date and + time specified in the If-Modified-Since field, the server must + respond with this status code and not send an Entity-Body to the + client. Header fields contained in the response should only include + information which is relevant to cache managers or which may have + changed independently of the entity's Last-Modified date. Examples + of relevant header fields include: Date, Server, and Expires. A + cache should update its cached entity to reflect any new field + values given in the 304 response. + +9.4 Client Error 4xx + + The 4xx class of status code is intended for cases in which the + client seems to have erred. If the client has not completed the + request when a 4xx code is received, it should immediately cease + sending data to the server. Except when responding to a HEAD request, + the server should include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. These status codes are applicable to any request method. + + + + + + +Berners-Lee, et al Informational [Page 35] + +RFC 1945 HTTP/1.0 May 1996 + + + Note: If the client is sending data, server implementations on TCP + should be careful to ensure that the client acknowledges receipt + of the packet(s) containing the response prior to closing the + input connection. If the client continues sending data to the + server after the close, the server's controller will send a reset + packet to the client, which may erase the client's unacknowledged + input buffers before they can be read and interpreted by the HTTP + application. + + 400 Bad Request + + The request could not be understood by the server due to malformed + syntax. The client should not repeat the request without + modifications. + + 401 Unauthorized + + The request requires user authentication. The response must include + a WWW-Authenticate header field (Section 10.16) containing a + challenge applicable to the requested resource. The client may + repeat the request with a suitable Authorization header field + (Section 10.2). If the request already included Authorization + credentials, then the 401 response indicates that authorization has + been refused for those credentials. If the 401 response contains + the same challenge as the prior response, and the user agent has + already attempted authentication at least once, then the user + should be presented the entity that was given in the response, + since that entity may include relevant diagnostic information. HTTP + access authentication is explained in Section 11. + + 403 Forbidden + + The server understood the request, but is refusing to fulfill it. + Authorization will not help and the request should not be repeated. + If the request method was not HEAD and the server wishes to make + public why the request has not been fulfilled, it should describe + the reason for the refusal in the entity body. This status code is + commonly used when the server does not wish to reveal exactly why + the request has been refused, or when no other response is + applicable. + + 404 Not Found + + The server has not found anything matching the Request-URI. No + indication is given of whether the condition is temporary or + permanent. If the server does not wish to make this information + available to the client, the status code 403 (forbidden) can be + used instead. + + + +Berners-Lee, et al Informational [Page 36] + +RFC 1945 HTTP/1.0 May 1996 + + +9.5 Server Error 5xx + + Response status codes beginning with the digit "5" indicate cases in + which the server is aware that it has erred or is incapable of + performing the request. If the client has not completed the request + when a 5xx code is received, it should immediately cease sending data + to the server. Except when responding to a HEAD request, the server + should include an entity containing an explanation of the error + situation, and whether it is a temporary or permanent condition. + These response codes are applicable to any request method and there + are no required header fields. + + 500 Internal Server Error + + The server encountered an unexpected condition which prevented it + from fulfilling the request. + + 501 Not Implemented + + The server does not support the functionality required to fulfill + the request. This is the appropriate response when the server does + not recognize the request method and is not capable of supporting + it for any resource. + + 502 Bad Gateway + + The server, while acting as a gateway or proxy, received an invalid + response from the upstream server it accessed in attempting to + fulfill the request. + + 503 Service Unavailable + + The server is currently unable to handle the request due to a + temporary overloading or maintenance of the server. The implication + is that this is a temporary condition which will be alleviated + after some delay. + + Note: The existence of the 503 status code does not imply + that a server must use it when becoming overloaded. Some + servers may wish to simply refuse the connection. + +10. Header Field Definitions + + This section defines the syntax and semantics of all commonly used + HTTP/1.0 header fields. For general and entity header fields, both + sender and recipient refer to either the client or the server, + depending on who sends and who receives the message. + + + + +Berners-Lee, et al Informational [Page 37] + +RFC 1945 HTTP/1.0 May 1996 + + +10.1 Allow + + The Allow entity-header field lists the set of methods supported by + the resource identified by the Request-URI. The purpose of this field + is strictly to inform the recipient of valid methods associated with + the resource. The Allow header field is not permitted in a request + using the POST method, and thus should be ignored if it is received + as part of a POST entity. + + Allow = "Allow" ":" 1#method + + Example of use: + + Allow: GET, HEAD + + This field cannot prevent a client from trying other methods. + However, the indications given by the Allow header field value should + be followed. The actual set of allowed methods is defined by the + origin server at the time of each request. + + A proxy must not modify the Allow header field even if it does not + understand all the methods specified, since the user agent may have + other means of communicating with the origin server. + + The Allow header field does not indicate what methods are implemented + by the server. + +10.2 Authorization + + A user agent that wishes to authenticate itself with a server-- + usually, but not necessarily, after receiving a 401 response--may do + so by including an Authorization request-header field with the + request. The Authorization field value consists of credentials + containing the authentication information of the user agent for the + realm of the resource being requested. + + Authorization = "Authorization" ":" credentials + + HTTP access authentication is described in Section 11. If a request + is authenticated and a realm specified, the same credentials should + be valid for all other requests within this realm. + + Responses to requests containing an Authorization field are not + cachable. + + + + + + + +Berners-Lee, et al Informational [Page 38] + +RFC 1945 HTTP/1.0 May 1996 + + +10.3 Content-Encoding + + The Content-Encoding entity-header field is used as a modifier to the + media-type. When present, its value indicates what additional content + coding has been applied to the resource, and thus what decoding + mechanism must be applied in order to obtain the media-type + referenced by the Content-Type header field. The Content-Encoding is + primarily used to allow a document to be compressed without losing + the identity of its underlying media type. + + Content-Encoding = "Content-Encoding" ":" content-coding + + Content codings are defined in Section 3.5. An example of its use is + + Content-Encoding: x-gzip + + The Content-Encoding is a characteristic of the resource identified + by the Request-URI. Typically, the resource is stored with this + encoding and is only decoded before rendering or analogous usage. + +10.4 Content-Length + + The Content-Length entity-header field indicates the size of the + Entity-Body, in decimal number of octets, sent to the recipient or, + in the case of the HEAD method, the size of the Entity-Body that + would have been sent had the request been a GET. + + Content-Length = "Content-Length" ":" 1*DIGIT + + An example is + + Content-Length: 3495 + + Applications should use this field to indicate the size of the + Entity-Body to be transferred, regardless of the media type of the + entity. A valid Content-Length field value is required on all + HTTP/1.0 request messages containing an entity body. + + Any Content-Length greater than or equal to zero is a valid value. + Section 7.2.2 describes how to determine the length of a response + entity body if a Content-Length is not given. + + Note: The meaning of this field is significantly different from + the corresponding definition in MIME, where it is an optional + field used within the "message/external-body" content-type. In + HTTP, it should be used whenever the entity's length can be + determined prior to being transferred. + + + + +Berners-Lee, et al Informational [Page 39] + +RFC 1945 HTTP/1.0 May 1996 + + +10.5 Content-Type + + The Content-Type entity-header field indicates the media type of the + Entity-Body sent to the recipient or, in the case of the HEAD method, + the media type that would have been sent had the request been a GET. + + Content-Type = "Content-Type" ":" media-type + + Media types are defined in Section 3.6. An example of the field is + + Content-Type: text/html + + Further discussion of methods for identifying the media type of an + entity is provided in Section 7.2.1. + +10.6 Date + + The Date general-header field represents the date and time at which + the message was originated, having the same semantics as orig-date in + RFC 822. The field value is an HTTP-date, as described in Section + 3.3. + + Date = "Date" ":" HTTP-date + + An example is + + Date: Tue, 15 Nov 1994 08:12:31 GMT + + If a message is received via direct connection with the user agent + (in the case of requests) or the origin server (in the case of + responses), then the date can be assumed to be the current date at + the receiving end. However, since the date--as it is believed by the + origin--is important for evaluating cached responses, origin servers + should always include a Date header. Clients should only send a Date + header field in messages that include an entity body, as in the case + of the POST request, and even then it is optional. A received message + which does not have a Date header field should be assigned one by the + recipient if the message will be cached by that recipient or + gatewayed via a protocol which requires a Date. + + In theory, the date should represent the moment just before the + entity is generated. In practice, the date can be generated at any + time during the message origination without affecting its semantic + value. + + Note: An earlier version of this document incorrectly specified + that this field should contain the creation date of the enclosed + Entity-Body. This has been changed to reflect actual (and proper) + + + +Berners-Lee, et al Informational [Page 40] + +RFC 1945 HTTP/1.0 May 1996 + + + usage. + +10.7 Expires + + The Expires entity-header field gives the date/time after which the + entity should be considered stale. This allows information providers + to suggest the volatility of the resource, or a date after which the + information may no longer be valid. Applications must not cache this + entity beyond the date given. The presence of an Expires field does + not imply that the original resource will change or cease to exist + at, before, or after that time. However, information providers that + know or even suspect that a resource will change by a certain date + should include an Expires header with that date. The format is an + absolute date and time as defined by HTTP-date in Section 3.3. + + Expires = "Expires" ":" HTTP-date + + An example of its use is + + Expires: Thu, 01 Dec 1994 16:00:00 GMT + + If the date given is equal to or earlier than the value of the Date + header, the recipient must not cache the enclosed entity. If a + resource is dynamic by nature, as is the case with many data- + producing processes, entities from that resource should be given an + appropriate Expires value which reflects that dynamism. + + The Expires field cannot be used to force a user agent to refresh its + display or reload a resource; its semantics apply only to caching + mechanisms, and such mechanisms need only check a resource's + expiration status when a new request for that resource is initiated. + + User agents often have history mechanisms, such as "Back" buttons and + history lists, which can be used to redisplay an entity retrieved + earlier in a session. By default, the Expires field does not apply to + history mechanisms. If the entity is still in storage, a history + mechanism should display it even if the entity has expired, unless + the user has specifically configured the agent to refresh expired + history documents. + + Note: Applications are encouraged to be tolerant of bad or + misinformed implementations of the Expires header. A value of zero + (0) or an invalid date format should be considered equivalent to + an "expires immediately." Although these values are not legitimate + for HTTP/1.0, a robust implementation is always desirable. + + + + + + +Berners-Lee, et al Informational [Page 41] + +RFC 1945 HTTP/1.0 May 1996 + + +10.8 From + + The From request-header field, if given, should contain an Internet + e-mail address for the human user who controls the requesting user + agent. The address should be machine-usable, as defined by mailbox in + RFC 822 [7] (as updated by RFC 1123 [6]): + + From = "From" ":" mailbox + + An example is: + + From: webmaster@w3.org + + This header field may be used for logging purposes and as a means for + identifying the source of invalid or unwanted requests. It should not + be used as an insecure form of access protection. The interpretation + of this field is that the request is being performed on behalf of the + person given, who accepts responsibility for the method performed. In + particular, robot agents should include this header so that the + person responsible for running the robot can be contacted if problems + occur on the receiving end. + + The Internet e-mail address in this field may be separate from the + Internet host which issued the request. For example, when a request + is passed through a proxy, the original issuer's address should be + used. + + Note: The client should not send the From header field without the + user's approval, as it may conflict with the user's privacy + interests or their site's security policy. It is strongly + recommended that the user be able to disable, enable, and modify + the value of this field at any time prior to a request. + +10.9 If-Modified-Since + + The If-Modified-Since request-header field is used with the GET + method to make it conditional: if the requested resource has not been + modified since the time specified in this field, a copy of the + resource will not be returned from the server; instead, a 304 (not + modified) response will be returned without any Entity-Body. + + If-Modified-Since = "If-Modified-Since" ":" HTTP-date + + An example of the field is: + + If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + + + + +Berners-Lee, et al Informational [Page 42] + +RFC 1945 HTTP/1.0 May 1996 + + + A conditional GET method requests that the identified resource be + transferred only if it has been modified since the date given by the + If-Modified-Since header. The algorithm for determining this includes + the following cases: + + a) If the request would normally result in anything other than + a 200 (ok) status, or if the passed If-Modified-Since date + is invalid, the response is exactly the same as for a + normal GET. A date which is later than the server's current + time is invalid. + + b) If the resource has been modified since the + If-Modified-Since date, the response is exactly the same as + for a normal GET. + + c) If the resource has not been modified since a valid + If-Modified-Since date, the server shall return a 304 (not + modified) response. + + The purpose of this feature is to allow efficient updates of cached + information with a minimum amount of transaction overhead. + +10.10 Last-Modified + + The Last-Modified entity-header field indicates the date and time at + which the sender believes the resource was last modified. The exact + semantics of this field are defined in terms of how the recipient + should interpret it: if the recipient has a copy of this resource + which is older than the date given by the Last-Modified field, that + copy should be considered stale. + + Last-Modified = "Last-Modified" ":" HTTP-date + + An example of its use is + + Last-Modified: Tue, 15 Nov 1994 12:45:26 GMT + + The exact meaning of this header field depends on the implementation + of the sender and the nature of the original resource. For files, it + may be just the file system last-modified time. For entities with + dynamically included parts, it may be the most recent of the set of + last-modify times for its component parts. For database gateways, it + may be the last-update timestamp of the record. For virtual objects, + it may be the last time the internal state changed. + + An origin server must not send a Last-Modified date which is later + than the server's time of message origination. In such cases, where + the resource's last modification would indicate some time in the + + + +Berners-Lee, et al Informational [Page 43] + +RFC 1945 HTTP/1.0 May 1996 + + + future, the server must replace that date with the message + origination date. + +10.11 Location + + The Location response-header field defines the exact location of the + resource that was identified by the Request-URI. For 3xx responses, + the location must indicate the server's preferred URL for automatic + redirection to the resource. Only one absolute URL is allowed. + + Location = "Location" ":" absoluteURI + + An example is + + Location: http://www.w3.org/hypertext/WWW/NewLocation.html + +10.12 Pragma + + The Pragma general-header field is used to include implementation- + specific directives that may apply to any recipient along the + request/response chain. All pragma directives specify optional + behavior from the viewpoint of the protocol; however, some systems + may require that behavior be consistent with the directives. + + Pragma = "Pragma" ":" 1#pragma-directive + + pragma-directive = "no-cache" | extension-pragma + extension-pragma = token [ "=" word ] + + When the "no-cache" directive is present in a request message, an + application should forward the request toward the origin server even + if it has a cached copy of what is being requested. This allows a + client to insist upon receiving an authoritative response to its + request. It also allows a client to refresh a cached copy which is + known to be corrupted or stale. + + Pragma directives must be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives may be applicable to all recipients along the + request/response chain. It is not possible to specify a pragma for a + specific recipient; however, any pragma directive not relevant to a + recipient should be ignored by that recipient. + +10.13 Referer + + The Referer request-header field allows the client to specify, for + the server's benefit, the address (URI) of the resource from which + the Request-URI was obtained. This allows a server to generate lists + + + +Berners-Lee, et al Informational [Page 44] + +RFC 1945 HTTP/1.0 May 1996 + + + of back-links to resources for interest, logging, optimized caching, + etc. It also allows obsolete or mistyped links to be traced for + maintenance. The Referer field must not be sent if the Request-URI + was obtained from a source that does not have its own URI, such as + input from the user keyboard. + + Referer = "Referer" ":" ( absoluteURI | relativeURI ) + + Example: + + Referer: http://www.w3.org/hypertext/DataSources/Overview.html + + If a partial URI is given, it should be interpreted relative to the + Request-URI. The URI must not include a fragment. + + Note: Because the source of a link may be private information or + may reveal an otherwise private information source, it is strongly + recommended that the user be able to select whether or not the + Referer field is sent. For example, a browser client could have a + toggle switch for browsing openly/anonymously, which would + respectively enable/disable the sending of Referer and From + information. + +10.14 Server + + The Server response-header field contains information about the + software used by the origin server to handle the request. The field + can contain multiple product tokens (Section 3.7) and comments + identifying the server and any significant subproducts. By + convention, the product tokens are listed in order of their + significance for identifying the application. + + Server = "Server" ":" 1*( product | comment ) + + Example: + + Server: CERN/3.0 libwww/2.17 + + If the response is being forwarded through a proxy, the proxy + application must not add its data to the product list. + + Note: Revealing the specific software version of the server may + allow the server machine to become more vulnerable to attacks + against software that is known to contain security holes. Server + implementors are encouraged to make this field a configurable + option. + + + + + +Berners-Lee, et al Informational [Page 45] + +RFC 1945 HTTP/1.0 May 1996 + + + Note: Some existing servers fail to restrict themselves to the + product token syntax within the Server field. + +10.15 User-Agent + + The User-Agent request-header field contains information about the + user agent originating the request. This is for statistical purposes, + the tracing of protocol violations, and automated recognition of user + agents for the sake of tailoring responses to avoid particular user + agent limitations. Although it is not required, user agents should + include this field with requests. The field can contain multiple + product tokens (Section 3.7) and comments identifying the agent and + any subproducts which form a significant part of the user agent. By + convention, the product tokens are listed in order of their + significance for identifying the application. + + User-Agent = "User-Agent" ":" 1*( product | comment ) + + Example: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + + Note: Some current proxy applications append their product + information to the list in the User-Agent field. This is not + recommended, since it makes machine interpretation of these + fields ambiguous. + + Note: Some existing clients fail to restrict themselves to + the product token syntax within the User-Agent field. + +10.16 WWW-Authenticate + + The WWW-Authenticate response-header field must be included in 401 + (unauthorized) response messages. The field value consists of at + least one challenge that indicates the authentication scheme(s) and + parameters applicable to the Request-URI. + + WWW-Authenticate = "WWW-Authenticate" ":" 1#challenge + + The HTTP access authentication process is described in Section 11. + User agents must take special care in parsing the WWW-Authenticate + field value if it contains more than one challenge, or if more than + one WWW-Authenticate header field is provided, since the contents of + a challenge may itself contain a comma-separated list of + authentication parameters. + + + + + + +Berners-Lee, et al Informational [Page 46] + +RFC 1945 HTTP/1.0 May 1996 + + +11. Access Authentication + + HTTP provides a simple challenge-response authentication mechanism + which may be used by a server to challenge a client request and by a + client to provide authentication information. It uses an extensible, + case-insensitive token to identify the authentication scheme, + followed by a comma-separated list of attribute-value pairs which + carry the parameters necessary for achieving authentication via that + scheme. + + auth-scheme = token + + auth-param = token "=" quoted-string + + The 401 (unauthorized) response message is used by an origin server + to challenge the authorization of a user agent. This response must + include a WWW-Authenticate header field containing at least one + challenge applicable to the requested resource. + + challenge = auth-scheme 1*SP realm *( "," auth-param ) + + realm = "realm" "=" realm-value + realm-value = quoted-string + + The realm attribute (case-insensitive) is required for all + authentication schemes which issue a challenge. The realm value + (case-sensitive), in combination with the canonical root URL of the + server being accessed, defines the protection space. These realms + allow the protected resources on a server to be partitioned into a + set of protection spaces, each with its own authentication scheme + and/or authorization database. The realm value is a string, generally + assigned by the origin server, which may have additional semantics + specific to the authentication scheme. + + A user agent that wishes to authenticate itself with a server-- + usually, but not necessarily, after receiving a 401 response--may do + so by including an Authorization header field with the request. The + Authorization field value consists of credentials containing the + authentication information of the user agent for the realm of the + resource being requested. + + credentials = basic-credentials + | ( auth-scheme #auth-param ) + + The domain over which credentials can be automatically applied by a + user agent is determined by the protection space. If a prior request + has been authorized, the same credentials may be reused for all other + requests within that protection space for a period of time determined + + + +Berners-Lee, et al Informational [Page 47] + +RFC 1945 HTTP/1.0 May 1996 + + + by the authentication scheme, parameters, and/or user preference. + Unless otherwise defined by the authentication scheme, a single + protection space cannot extend outside the scope of its server. + + If the server does not wish to accept the credentials sent with a + request, it should return a 403 (forbidden) response. + + The HTTP protocol does not restrict applications to this simple + challenge-response mechanism for access authentication. Additional + mechanisms may be used, such as encryption at the transport level or + via message encapsulation, and with additional header fields + specifying authentication information. However, these additional + mechanisms are not defined by this specification. + + Proxies must be completely transparent regarding user agent + authentication. That is, they must forward the WWW-Authenticate and + Authorization headers untouched, and must not cache the response to a + request containing Authorization. HTTP/1.0 does not provide a means + for a client to be authenticated with a proxy. + +11.1 Basic Authentication Scheme + + The "basic" authentication scheme is based on the model that the user + agent must authenticate itself with a user-ID and a password for each + realm. The realm value should be considered an opaque string which + can only be compared for equality with other realms on that server. + The server will authorize the request only if it can validate the + user-ID and password for the protection space of the Request-URI. + There are no optional authentication parameters. + + Upon receipt of an unauthorized request for a URI within the + protection space, the server should respond with a challenge like the + following: + + WWW-Authenticate: Basic realm="WallyWorld" + + where "WallyWorld" is the string assigned by the server to identify + the protection space of the Request-URI. + + To receive authorization, the client sends the user-ID and password, + separated by a single colon (":") character, within a base64 [5] + encoded string in the credentials. + + basic-credentials = "Basic" SP basic-cookie + + basic-cookie = + + + + +Berners-Lee, et al Informational [Page 48] + +RFC 1945 HTTP/1.0 May 1996 + + + userid-password = [ token ] ":" *TEXT + + If the user agent wishes to send the user-ID "Aladdin" and password + "open sesame", it would use the following header field: + + Authorization: Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ== + + The basic authentication scheme is a non-secure method of filtering + unauthorized access to resources on an HTTP server. It is based on + the assumption that the connection between the client and the server + can be regarded as a trusted carrier. As this is not generally true + on an open network, the basic authentication scheme should be used + accordingly. In spite of this, clients should implement the scheme in + order to communicate with servers that use it. + +12. Security Considerations + + This section is meant to inform application developers, information + providers, and users of the security limitations in HTTP/1.0 as + described by this document. The discussion does not include + definitive solutions to the problems revealed, though it does make + some suggestions for reducing security risks. + +12.1 Authentication of Clients + + As mentioned in Section 11.1, the Basic authentication scheme is not + a secure method of user authentication, nor does it prevent the + Entity-Body from being transmitted in clear text across the physical + network used as the carrier. HTTP/1.0 does not prevent additional + authentication schemes and encryption mechanisms from being employed + to increase security. + +12.2 Safe Methods + + The writers of client software should be aware that the software + represents the user in their interactions over the Internet, and + should be careful to allow the user to be aware of any actions they + may take which may have an unexpected significance to themselves or + others. + + In particular, the convention has been established that the GET and + HEAD methods should never have the significance of taking an action + other than retrieval. These methods should be considered "safe." This + allows user agents to represent other methods, such as POST, in a + special way, so that the user is made aware of the fact that a + possibly unsafe action is being requested. + + + + + +Berners-Lee, et al Informational [Page 49] + +RFC 1945 HTTP/1.0 May 1996 + + + Naturally, it is not possible to ensure that the server does not + generate side-effects as a result of performing a GET request; in + fact, some dynamic resources consider that a feature. The important + distinction here is that the user did not request the side-effects, + so therefore cannot be held accountable for them. + +12.3 Abuse of Server Log Information + + A server is in the position to save personal data about a user's + requests which may identify their reading patterns or subjects of + interest. This information is clearly confidential in nature and its + handling may be constrained by law in certain countries. People using + the HTTP protocol to provide data are responsible for ensuring that + such material is not distributed without the permission of any + individuals that are identifiable by the published results. + +12.4 Transfer of Sensitive Information + + Like any generic data transfer protocol, HTTP cannot regulate the + content of the data that is transferred, nor is there any a priori + method of determining the sensitivity of any particular piece of + information within the context of any given request. Therefore, + applications should supply as much control over this information as + possible to the provider of that information. Three header fields are + worth special mention in this context: Server, Referer and From. + + Revealing the specific software version of the server may allow the + server machine to become more vulnerable to attacks against software + that is known to contain security holes. Implementors should make the + Server header field a configurable option. + + The Referer field allows reading patterns to be studied and reverse + links drawn. Although it can be very useful, its power can be abused + if user details are not separated from the information contained in + the Referer. Even when the personal information has been removed, the + Referer field may indicate a private document's URI whose publication + would be inappropriate. + + The information sent in the From field might conflict with the user's + privacy interests or their site's security policy, and hence it + should not be transmitted without the user being able to disable, + enable, and modify the contents of the field. The user must be able + to set the contents of this field within a user preference or + application defaults configuration. + + We suggest, though do not require, that a convenient toggle interface + be provided for the user to enable or disable the sending of From and + Referer information. + + + +Berners-Lee, et al Informational [Page 50] + +RFC 1945 HTTP/1.0 May 1996 + + +12.5 Attacks Based On File and Path Names + + Implementations of HTTP origin servers should be careful to restrict + the documents returned by HTTP requests to be only those that were + intended by the server administrators. If an HTTP server translates + HTTP URIs directly into file system calls, the server must take + special care not to serve files that were not intended to be + delivered to HTTP clients. For example, Unix, Microsoft Windows, and + other operating systems use ".." as a path component to indicate a + directory level above the current one. On such a system, an HTTP + server must disallow any such construct in the Request-URI if it + would otherwise allow access to a resource outside those intended to + be accessible via the HTTP server. Similarly, files intended for + reference only internally to the server (such as access control + files, configuration files, and script code) must be protected from + inappropriate retrieval, since they might contain sensitive + information. Experience has shown that minor bugs in such HTTP server + implementations have turned into security risks. + +13. Acknowledgments + + This specification makes heavy use of the augmented BNF and generic + constructs defined by David H. Crocker for RFC 822 [7]. Similarly, it + reuses many of the definitions provided by Nathaniel Borenstein and + Ned Freed for MIME [5]. We hope that their inclusion in this + specification will help reduce past confusion over the relationship + between HTTP/1.0 and Internet mail message formats. + + The HTTP protocol has evolved considerably over the past four years. + It has benefited from a large and active developer community--the + many people who have participated on the www-talk mailing list--and + it is that community which has been most responsible for the success + of HTTP and of the World-Wide Web in general. Marc Andreessen, Robert + Cailliau, Daniel W. Connolly, Bob Denny, Jean-Francois Groff, Phillip + M. Hallam-Baker, Hakon W. Lie, Ari Luotonen, Rob McCool, Lou + Montulli, Dave Raggett, Tony Sanders, and Marc VanHeyningen deserve + special recognition for their efforts in defining aspects of the + protocol for early versions of this specification. + + Paul Hoffman contributed sections regarding the informational status + of this document and Appendices C and D. + + + + + + + + + + +Berners-Lee, et al Informational [Page 51] + +RFC 1945 HTTP/1.0 May 1996 + + + This document has benefited greatly from the comments of all those + participating in the HTTP-WG. In addition to those already mentioned, + the following individuals have contributed to this specification: + + Gary Adams Harald Tveit Alvestrand + Keith Ball Brian Behlendorf + Paul Burchard Maurizio Codogno + Mike Cowlishaw Roman Czyborra + Michael A. Dolan John Franks + Jim Gettys Marc Hedlund + Koen Holtman Alex Hopmann + Bob Jernigan Shel Kaphan + Martijn Koster Dave Kristol + Daniel LaLiberte Paul Leach + Albert Lunde John C. Mallery + Larry Masinter Mitra + Jeffrey Mogul Gavin Nicol + Bill Perry Jeffrey Perry + Owen Rees Luigi Rizzo + David Robinson Marc Salomon + Rich Salz Jim Seidman + Chuck Shotton Eric W. Sink + Simon E. Spero Robert S. Thau + Francois Yergeau Mary Ellen Zurko + Jean-Philippe Martin-Flatin + +14. References + + [1] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D., + Torrey, D., and B. Alberti, "The Internet Gopher Protocol: A + Distributed Document Search and Retrieval Protocol", RFC 1436, + University of Minnesota, March 1993. + + [2] Berners-Lee, T., "Universal Resource Identifiers in WWW: A + Unifying Syntax for the Expression of Names and Addresses of + Objects on the Network as used in the World-Wide Web", + RFC 1630, CERN, June 1994. + + [3] Berners-Lee, T., and D. Connolly, "Hypertext Markup Language - + 2.0", RFC 1866, MIT/W3C, November 1995. + + [4] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform + Resource Locators (URL)", RFC 1738, CERN, Xerox PARC, + University of Minnesota, December 1994. + + + + + + + +Berners-Lee, et al Informational [Page 52] + +RFC 1945 HTTP/1.0 May 1996 + + + [5] Borenstein, N., and N. Freed, "MIME (Multipurpose Internet Mail + Extensions) Part One: Mechanisms for Specifying and Describing + the Format of Internet Message Bodies", RFC 1521, Bellcore, + Innosoft, September 1993. + + [6] Braden, R., "Requirements for Internet hosts - Application and + Support", STD 3, RFC 1123, IETF, October 1989. + + [7] Crocker, D., "Standard for the Format of ARPA Internet Text + Messages", STD 11, RFC 822, UDEL, August 1982. + + [8] F. Davis, B. Kahle, H. Morris, J. Salem, T. Shen, R. Wang, + J. Sui, and M. Grinbaum. "WAIS Interface Protocol Prototype + Functional Specification." (v1.5), Thinking Machines + Corporation, April 1990. + + [9] Fielding, R., "Relative Uniform Resource Locators", RFC 1808, + UC Irvine, June 1995. + + [10] Horton, M., and R. Adams, "Standard for interchange of USENET + Messages", RFC 1036 (Obsoletes RFC 850), AT&T Bell + Laboratories, Center for Seismic Studies, December 1987. + + [11] Kantor, B., and P. Lapsley, "Network News Transfer Protocol: + A Proposed Standard for the Stream-Based Transmission of News", + RFC 977, UC San Diego, UC Berkeley, February 1986. + + [12] Postel, J., "Simple Mail Transfer Protocol." STD 10, RFC 821, + USC/ISI, August 1982. + + [13] Postel, J., "Media Type Registration Procedure." RFC 1590, + USC/ISI, March 1994. + + [14] Postel, J., and J. Reynolds, "File Transfer Protocol (FTP)", + STD 9, RFC 959, USC/ISI, October 1985. + + [15] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC + 1700, USC/ISI, October 1994. + + [16] Sollins, K., and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, MIT/LCS, Xerox Corporation, + December 1994. + + [17] US-ASCII. Coded Character Set - 7-Bit American Standard Code + for Information Interchange. Standard ANSI X3.4-1986, ANSI, + 1986. + + + + + +Berners-Lee, et al Informational [Page 53] + +RFC 1945 HTTP/1.0 May 1996 + + + [18] ISO-8859. International Standard -- Information Processing -- + 8-bit Single-Byte Coded Graphic Character Sets -- + Part 1: Latin alphabet No. 1, ISO 8859-1:1987. + Part 2: Latin alphabet No. 2, ISO 8859-2, 1987. + Part 3: Latin alphabet No. 3, ISO 8859-3, 1988. + Part 4: Latin alphabet No. 4, ISO 8859-4, 1988. + Part 5: Latin/Cyrillic alphabet, ISO 8859-5, 1988. + Part 6: Latin/Arabic alphabet, ISO 8859-6, 1987. + Part 7: Latin/Greek alphabet, ISO 8859-7, 1987. + Part 8: Latin/Hebrew alphabet, ISO 8859-8, 1988. + Part 9: Latin alphabet No. 5, ISO 8859-9, 1990. + +15. Authors' Addresses + + Tim Berners-Lee + Director, W3 Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, U.S.A. + + Fax: +1 (617) 258 8682 + EMail: timbl@w3.org + + + Roy T. Fielding + Department of Information and Computer Science + University of California + Irvine, CA 92717-3425, U.S.A. + + Fax: +1 (714) 824-4056 + EMail: fielding@ics.uci.edu + + + Henrik Frystyk Nielsen + W3 Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, U.S.A. + + Fax: +1 (617) 258 8682 + EMail: frystyk@w3.org + + + + + + + + + + +Berners-Lee, et al Informational [Page 54] + +RFC 1945 HTTP/1.0 May 1996 + + +Appendices + + These appendices are provided for informational reasons only -- they + do not form a part of the HTTP/1.0 specification. + +A. Internet Media Type message/http + + In addition to defining the HTTP/1.0 protocol, this document serves + as the specification for the Internet media type "message/http". The + following is to be registered with IANA [13]. + + Media Type name: message + + Media subtype name: http + + Required parameters: none + + Optional parameters: version, msgtype + + version: The HTTP-Version number of the enclosed message + (e.g., "1.0"). If not present, the version can be + determined from the first line of the body. + + msgtype: The message type -- "request" or "response". If + not present, the type can be determined from the + first line of the body. + + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + + Security considerations: none + +B. Tolerant Applications + + Although this document specifies the requirements for the generation + of HTTP/1.0 messages, not all applications will be correct in their + implementation. We therefore recommend that operational applications + be tolerant of deviations whenever those deviations can be + interpreted unambiguously. + + Clients should be tolerant in parsing the Status-Line and servers + tolerant when parsing the Request-Line. In particular, they should + accept any amount of SP or HT characters between fields, even though + only a single SP is required. + + The line terminator for HTTP-header fields is the sequence CRLF. + However, we recommend that applications, when parsing such headers, + recognize a single LF as a line terminator and ignore the leading CR. + + + +Berners-Lee, et al Informational [Page 55] + +RFC 1945 HTTP/1.0 May 1996 + + +C. Relationship to MIME + + HTTP/1.0 uses many of the constructs defined for Internet Mail (RFC + 822 [7]) and the Multipurpose Internet Mail Extensions (MIME [5]) to + allow entities to be transmitted in an open variety of + representations and with extensible mechanisms. However, RFC 1521 + discusses mail, and HTTP has a few features that are different than + those described in RFC 1521. These differences were carefully chosen + to optimize performance over binary connections, to allow greater + freedom in the use of new media types, to make date comparisons + easier, and to acknowledge the practice of some early HTTP servers + and clients. + + At the time of this writing, it is expected that RFC 1521 will be + revised. The revisions may include some of the practices found in + HTTP/1.0 but not in RFC 1521. + + This appendix describes specific areas where HTTP differs from RFC + 1521. Proxies and gateways to strict MIME environments should be + aware of these differences and provide the appropriate conversions + where necessary. Proxies and gateways from MIME environments to HTTP + also need to be aware of the differences because some conversions may + be required. + +C.1 Conversion to Canonical Form + + RFC 1521 requires that an Internet mail entity be converted to + canonical form prior to being transferred, as described in Appendix G + of RFC 1521 [5]. Section 3.6.1 of this document describes the forms + allowed for subtypes of the "text" media type when transmitted over + HTTP. + + RFC 1521 requires that content with a Content-Type of "text" + represent line breaks as CRLF and forbids the use of CR or LF outside + of line break sequences. HTTP allows CRLF, bare CR, and bare LF to + indicate a line break within text content when a message is + transmitted over HTTP. + + Where it is possible, a proxy or gateway from HTTP to a strict RFC + 1521 environment should translate all line breaks within the text + media types described in Section 3.6.1 of this document to the RFC + 1521 canonical form of CRLF. Note, however, that this may be + complicated by the presence of a Content-Encoding and by the fact + that HTTP allows the use of some character sets which do not use + octets 13 and 10 to represent CR and LF, as is the case for some + multi-byte character sets. + + + + + +Berners-Lee, et al Informational [Page 56] + +RFC 1945 HTTP/1.0 May 1996 + + +C.2 Conversion of Date Formats + + HTTP/1.0 uses a restricted set of date formats (Section 3.3) to + simplify the process of date comparison. Proxies and gateways from + other protocols should ensure that any Date header field present in a + message conforms to one of the HTTP/1.0 formats and rewrite the date + if necessary. + +C.3 Introduction of Content-Encoding + + RFC 1521 does not include any concept equivalent to HTTP/1.0's + Content-Encoding header field. Since this acts as a modifier on the + media type, proxies and gateways from HTTP to MIME-compliant + protocols must either change the value of the Content-Type header + field or decode the Entity-Body before forwarding the message. (Some + experimental applications of Content-Type for Internet mail have used + a media-type parameter of ";conversions=" to perform + an equivalent function as Content-Encoding. However, this parameter + is not part of RFC 1521.) + +C.4 No Content-Transfer-Encoding + + HTTP does not use the Content-Transfer-Encoding (CTE) field of RFC + 1521. Proxies and gateways from MIME-compliant protocols to HTTP must + remove any non-identity CTE ("quoted-printable" or "base64") encoding + prior to delivering the response message to an HTTP client. + + Proxies and gateways from HTTP to MIME-compliant protocols are + responsible for ensuring that the message is in the correct format + and encoding for safe transport on that protocol, where "safe + transport" is defined by the limitations of the protocol being used. + Such a proxy or gateway should label the data with an appropriate + Content-Transfer-Encoding if doing so will improve the likelihood of + safe transport over the destination protocol. + +C.5 HTTP Header Fields in Multipart Body-Parts + + In RFC 1521, most header fields in multipart body-parts are generally + ignored unless the field name begins with "Content-". In HTTP/1.0, + multipart body-parts may contain any HTTP header fields which are + significant to the meaning of that part. + +D. Additional Features + + This appendix documents protocol elements used by some existing HTTP + implementations, but not consistently and correctly across most + HTTP/1.0 applications. Implementors should be aware of these + features, but cannot rely upon their presence in, or interoperability + + + +Berners-Lee, et al Informational [Page 57] + +RFC 1945 HTTP/1.0 May 1996 + + + with, other HTTP/1.0 applications. + +D.1 Additional Request Methods + +D.1.1 PUT + + The PUT method requests that the enclosed entity be stored under the + supplied Request-URI. If the Request-URI refers to an already + existing resource, the enclosed entity should be considered as a + modified version of the one residing on the origin server. If the + Request-URI does not point to an existing resource, and that URI is + capable of being defined as a new resource by the requesting user + agent, the origin server can create the resource with that URI. + + The fundamental difference between the POST and PUT requests is + reflected in the different meaning of the Request-URI. The URI in a + POST request identifies the resource that will handle the enclosed + entity as data to be processed. That resource may be a data-accepting + process, a gateway to some other protocol, or a separate entity that + accepts annotations. In contrast, the URI in a PUT request identifies + the entity enclosed with the request -- the user agent knows what URI + is intended and the server should not apply the request to some other + resource. + +D.1.2 DELETE + + The DELETE method requests that the origin server delete the resource + identified by the Request-URI. + +D.1.3 LINK + + The LINK method establishes one or more Link relationships between + the existing resource identified by the Request-URI and other + existing resources. + +D.1.4 UNLINK + + The UNLINK method removes one or more Link relationships from the + existing resource identified by the Request-URI. + +D.2 Additional Header Field Definitions + +D.2.1 Accept + + The Accept request-header field can be used to indicate a list of + media ranges which are acceptable as a response to the request. The + asterisk "*" character is used to group media types into ranges, with + "*/*" indicating all media types and "type/*" indicating all subtypes + + + +Berners-Lee, et al Informational [Page 58] + +RFC 1945 HTTP/1.0 May 1996 + + + of that type. The set of ranges given by the client should represent + what types are acceptable given the context of the request. + +D.2.2 Accept-Charset + + The Accept-Charset request-header field can be used to indicate a + list of preferred character sets other than the default US-ASCII and + ISO-8859-1. This field allows clients capable of understanding more + comprehensive or special-purpose character sets to signal that + capability to a server which is capable of representing documents in + those character sets. + +D.2.3 Accept-Encoding + + The Accept-Encoding request-header field is similar to Accept, but + restricts the content-coding values which are acceptable in the + response. + +D.2.4 Accept-Language + + The Accept-Language request-header field is similar to Accept, but + restricts the set of natural languages that are preferred as a + response to the request. + +D.2.5 Content-Language + + The Content-Language entity-header field describes the natural + language(s) of the intended audience for the enclosed entity. Note + that this may not be equivalent to all the languages used within the + entity. + +D.2.6 Link + + The Link entity-header field provides a means for describing a + relationship between the entity and some other resource. An entity + may include multiple Link values. Links at the metainformation level + typically indicate relationships like hierarchical structure and + navigation paths. + +D.2.7 MIME-Version + + HTTP messages may include a single MIME-Version general-header field + to indicate what version of the MIME protocol was used to construct + the message. Use of the MIME-Version header field, as defined by RFC + 1521 [5], should indicate that the message is MIME-conformant. + Unfortunately, some older HTTP/1.0 servers send it indiscriminately, + and thus this field should be ignored. + + + + +Berners-Lee, et al Informational [Page 59] + +RFC 1945 HTTP/1.0 May 1996 + + +D.2.8 Retry-After + + The Retry-After response-header field can be used with a 503 (service + unavailable) response to indicate how long the service is expected to + be unavailable to the requesting client. The value of this field can + be either an HTTP-date or an integer number of seconds (in decimal) + after the time of the response. + +D.2.9 Title + + The Title entity-header field indicates the title of the entity. + +D.2.10 URI + + The URI entity-header field may contain some or all of the Uniform + Resource Identifiers (Section 3.2) by which the Request-URI resource + can be identified. There is no guarantee that the resource can be + accessed using the URI(s) specified. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al Informational [Page 60] + diff --git a/docs/specs/rfc2068.txt b/docs/specs/rfc2068.txt new file mode 100644 index 0000000..e16e4fd --- /dev/null +++ b/docs/specs/rfc2068.txt @@ -0,0 +1,9075 @@ + + + + + + +Network Working Group R. Fielding +Request for Comments: 2068 UC Irvine +Category: Standards Track J. Gettys + J. Mogul + DEC + H. Frystyk + T. Berners-Lee + MIT/LCS + January 1997 + + + Hypertext Transfer Protocol -- HTTP/1.1 + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Abstract + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol for distributed, collaborative, hypermedia information + systems. It is a generic, stateless, object-oriented protocol which + can be used for many tasks, such as name servers and distributed + object management systems, through extension of its request methods. + A feature of HTTP is the typing and negotiation of data + representation, allowing systems to be built independently of the + data being transferred. + + HTTP has been in use by the World-Wide Web global information + initiative since 1990. This specification defines the protocol + referred to as "HTTP/1.1". + +Table of Contents + + 1 Introduction.............................................7 + 1.1 Purpose ..............................................7 + 1.2 Requirements .........................................7 + 1.3 Terminology ..........................................8 + 1.4 Overall Operation ...................................11 + 2 Notational Conventions and Generic Grammar..............13 + 2.1 Augmented BNF .......................................13 + 2.2 Basic Rules .........................................15 + 3 Protocol Parameters.....................................17 + 3.1 HTTP Version ........................................17 + + + +Fielding, et. al. Standards Track [Page 1] + +RFC 2068 HTTP/1.1 January 1997 + + + 3.2 Uniform Resource Identifiers ........................18 + 3.2.1 General Syntax ...................................18 + 3.2.2 http URL .........................................19 + 3.2.3 URI Comparison ...................................20 + 3.3 Date/Time Formats ...................................21 + 3.3.1 Full Date ........................................21 + 3.3.2 Delta Seconds ....................................22 + 3.4 Character Sets ......................................22 + 3.5 Content Codings .....................................23 + 3.6 Transfer Codings ....................................24 + 3.7 Media Types .........................................25 + 3.7.1 Canonicalization and Text Defaults ...............26 + 3.7.2 Multipart Types ..................................27 + 3.8 Product Tokens ......................................28 + 3.9 Quality Values ......................................28 + 3.10 Language Tags ......................................28 + 3.11 Entity Tags ........................................29 + 3.12 Range Units ........................................30 + 4 HTTP Message............................................30 + 4.1 Message Types .......................................30 + 4.2 Message Headers .....................................31 + 4.3 Message Body ........................................32 + 4.4 Message Length ......................................32 + 4.5 General Header Fields ...............................34 + 5 Request.................................................34 + 5.1 Request-Line ........................................34 + 5.1.1 Method ...........................................35 + 5.1.2 Request-URI ......................................35 + 5.2 The Resource Identified by a Request ................37 + 5.3 Request Header Fields ...............................37 + 6 Response................................................38 + 6.1 Status-Line .........................................38 + 6.1.1 Status Code and Reason Phrase ....................39 + 6.2 Response Header Fields ..............................41 + 7 Entity..................................................41 + 7.1 Entity Header Fields ................................41 + 7.2 Entity Body .........................................42 + 7.2.1 Type .............................................42 + 7.2.2 Length ...........................................43 + 8 Connections.............................................43 + 8.1 Persistent Connections ..............................43 + 8.1.1 Purpose ..........................................43 + 8.1.2 Overall Operation ................................44 + 8.1.3 Proxy Servers ....................................45 + 8.1.4 Practical Considerations .........................45 + 8.2 Message Transmission Requirements ...................46 + 9 Method Definitions......................................48 + 9.1 Safe and Idempotent Methods .........................48 + + + +Fielding, et. al. Standards Track [Page 2] + +RFC 2068 HTTP/1.1 January 1997 + + + 9.1.1 Safe Methods .....................................48 + 9.1.2 Idempotent Methods ...............................49 + 9.2 OPTIONS .............................................49 + 9.3 GET .................................................50 + 9.4 HEAD ................................................50 + 9.5 POST ................................................51 + 9.6 PUT .................................................52 + 9.7 DELETE ..............................................53 + 9.8 TRACE ...............................................53 + 10 Status Code Definitions................................53 + 10.1 Informational 1xx ..................................54 + 10.1.1 100 Continue ....................................54 + 10.1.2 101 Switching Protocols .........................54 + 10.2 Successful 2xx .....................................54 + 10.2.1 200 OK ..........................................54 + 10.2.2 201 Created .....................................55 + 10.2.3 202 Accepted ....................................55 + 10.2.4 203 Non-Authoritative Information ...............55 + 10.2.5 204 No Content ..................................55 + 10.2.6 205 Reset Content ...............................56 + 10.2.7 206 Partial Content .............................56 + 10.3 Redirection 3xx ....................................56 + 10.3.1 300 Multiple Choices ............................57 + 10.3.2 301 Moved Permanently ...........................57 + 10.3.3 302 Moved Temporarily ...........................58 + 10.3.4 303 See Other ...................................58 + 10.3.5 304 Not Modified ................................58 + 10.3.6 305 Use Proxy ...................................59 + 10.4 Client Error 4xx ...................................59 + 10.4.1 400 Bad Request .................................60 + 10.4.2 401 Unauthorized ................................60 + 10.4.3 402 Payment Required ............................60 + 10.4.4 403 Forbidden ...................................60 + 10.4.5 404 Not Found ...................................60 + 10.4.6 405 Method Not Allowed ..........................61 + 10.4.7 406 Not Acceptable ..............................61 + 10.4.8 407 Proxy Authentication Required ...............61 + 10.4.9 408 Request Timeout .............................62 + 10.4.10 409 Conflict ...................................62 + 10.4.11 410 Gone .......................................62 + 10.4.12 411 Length Required ............................63 + 10.4.13 412 Precondition Failed ........................63 + 10.4.14 413 Request Entity Too Large ...................63 + 10.4.15 414 Request-URI Too Long .......................63 + 10.4.16 415 Unsupported Media Type .....................63 + 10.5 Server Error 5xx ...................................64 + 10.5.1 500 Internal Server Error .......................64 + 10.5.2 501 Not Implemented .............................64 + + + +Fielding, et. al. Standards Track [Page 3] + +RFC 2068 HTTP/1.1 January 1997 + + + 10.5.3 502 Bad Gateway .................................64 + 10.5.4 503 Service Unavailable .........................64 + 10.5.5 504 Gateway Timeout .............................64 + 10.5.6 505 HTTP Version Not Supported ..................65 + 11 Access Authentication..................................65 + 11.1 Basic Authentication Scheme ........................66 + 11.2 Digest Authentication Scheme .......................67 + 12 Content Negotiation....................................67 + 12.1 Server-driven Negotiation ..........................68 + 12.2 Agent-driven Negotiation ...........................69 + 12.3 Transparent Negotiation ............................70 + 13 Caching in HTTP........................................70 + 13.1.1 Cache Correctness ...............................72 + 13.1.2 Warnings ........................................73 + 13.1.3 Cache-control Mechanisms ........................74 + 13.1.4 Explicit User Agent Warnings ....................74 + 13.1.5 Exceptions to the Rules and Warnings ............75 + 13.1.6 Client-controlled Behavior ......................75 + 13.2 Expiration Model ...................................75 + 13.2.1 Server-Specified Expiration .....................75 + 13.2.2 Heuristic Expiration ............................76 + 13.2.3 Age Calculations ................................77 + 13.2.4 Expiration Calculations .........................79 + 13.2.5 Disambiguating Expiration Values ................80 + 13.2.6 Disambiguating Multiple Responses ...............80 + 13.3 Validation Model ...................................81 + 13.3.1 Last-modified Dates .............................82 + 13.3.2 Entity Tag Cache Validators .....................82 + 13.3.3 Weak and Strong Validators ......................82 + 13.3.4 Rules for When to Use Entity Tags and Last- + modified Dates..........................................85 + 13.3.5 Non-validating Conditionals .....................86 + 13.4 Response Cachability ...............................86 + 13.5 Constructing Responses From Caches .................87 + 13.5.1 End-to-end and Hop-by-hop Headers ...............88 + 13.5.2 Non-modifiable Headers ..........................88 + 13.5.3 Combining Headers ...............................89 + 13.5.4 Combining Byte Ranges ...........................90 + 13.6 Caching Negotiated Responses .......................90 + 13.7 Shared and Non-Shared Caches .......................91 + 13.8 Errors or Incomplete Response Cache Behavior .......91 + 13.9 Side Effects of GET and HEAD .......................92 + 13.10 Invalidation After Updates or Deletions ...........92 + 13.11 Write-Through Mandatory ...........................93 + 13.12 Cache Replacement .................................93 + 13.13 History Lists .....................................93 + 14 Header Field Definitions...............................94 + 14.1 Accept .............................................95 + + + +Fielding, et. al. Standards Track [Page 4] + +RFC 2068 HTTP/1.1 January 1997 + + + 14.2 Accept-Charset .....................................97 + 14.3 Accept-Encoding ....................................97 + 14.4 Accept-Language ....................................98 + 14.5 Accept-Ranges ......................................99 + 14.6 Age ................................................99 + 14.7 Allow .............................................100 + 14.8 Authorization .....................................100 + 14.9 Cache-Control .....................................101 + 14.9.1 What is Cachable ...............................103 + 14.9.2 What May be Stored by Caches ...................103 + 14.9.3 Modifications of the Basic Expiration Mechanism 104 + 14.9.4 Cache Revalidation and Reload Controls .........105 + 14.9.5 No-Transform Directive .........................107 + 14.9.6 Cache Control Extensions .......................108 + 14.10 Connection .......................................109 + 14.11 Content-Base .....................................109 + 14.12 Content-Encoding .................................110 + 14.13 Content-Language .................................110 + 14.14 Content-Length ...................................111 + 14.15 Content-Location .................................112 + 14.16 Content-MD5 ......................................113 + 14.17 Content-Range ....................................114 + 14.18 Content-Type .....................................116 + 14.19 Date .............................................116 + 14.20 ETag .............................................117 + 14.21 Expires ..........................................117 + 14.22 From .............................................118 + 14.23 Host .............................................119 + 14.24 If-Modified-Since ................................119 + 14.25 If-Match .........................................121 + 14.26 If-None-Match ....................................122 + 14.27 If-Range .........................................123 + 14.28 If-Unmodified-Since ..............................124 + 14.29 Last-Modified ....................................124 + 14.30 Location .........................................125 + 14.31 Max-Forwards .....................................125 + 14.32 Pragma ...........................................126 + 14.33 Proxy-Authenticate ...............................127 + 14.34 Proxy-Authorization ..............................127 + 14.35 Public ...........................................127 + 14.36 Range ............................................128 + 14.36.1 Byte Ranges ...................................128 + 14.36.2 Range Retrieval Requests ......................130 + 14.37 Referer ..........................................131 + 14.38 Retry-After ......................................131 + 14.39 Server ...........................................132 + 14.40 Transfer-Encoding ................................132 + 14.41 Upgrade ..........................................132 + + + +Fielding, et. al. Standards Track [Page 5] + +RFC 2068 HTTP/1.1 January 1997 + + + 14.42 User-Agent .......................................134 + 14.43 Vary .............................................134 + 14.44 Via ..............................................135 + 14.45 Warning ..........................................137 + 14.46 WWW-Authenticate .................................139 + 15 Security Considerations...............................139 + 15.1 Authentication of Clients .........................139 + 15.2 Offering a Choice of Authentication Schemes .......140 + 15.3 Abuse of Server Log Information ...................141 + 15.4 Transfer of Sensitive Information .................141 + 15.5 Attacks Based On File and Path Names ..............142 + 15.6 Personal Information ..............................143 + 15.7 Privacy Issues Connected to Accept Headers ........143 + 15.8 DNS Spoofing ......................................144 + 15.9 Location Headers and Spoofing .....................144 + 16 Acknowledgments.......................................144 + 17 References............................................146 + 18 Authors' Addresses....................................149 + 19 Appendices............................................150 + 19.1 Internet Media Type message/http ..................150 + 19.2 Internet Media Type multipart/byteranges ..........150 + 19.3 Tolerant Applications .............................151 + 19.4 Differences Between HTTP Entities and + MIME Entities...........................................152 + 19.4.1 Conversion to Canonical Form ...................152 + 19.4.2 Conversion of Date Formats .....................153 + 19.4.3 Introduction of Content-Encoding ...............153 + 19.4.4 No Content-Transfer-Encoding ...................153 + 19.4.5 HTTP Header Fields in Multipart Body-Parts .....153 + 19.4.6 Introduction of Transfer-Encoding ..............154 + 19.4.7 MIME-Version ...................................154 + 19.5 Changes from HTTP/1.0 .............................154 + 19.5.1 Changes to Simplify Multi-homed Web Servers and + Conserve IP Addresses .................................155 + 19.6 Additional Features ...............................156 + 19.6.1 Additional Request Methods .....................156 + 19.6.2 Additional Header Field Definitions ............156 + 19.7 Compatibility with Previous Versions ..............160 + 19.7.1 Compatibility with HTTP/1.0 Persistent + Connections............................................161 + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 6] + +RFC 2068 HTTP/1.1 January 1997 + + +1 Introduction + +1.1 Purpose + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol for distributed, collaborative, hypermedia information + systems. HTTP has been in use by the World-Wide Web global + information initiative since 1990. The first version of HTTP, + referred to as HTTP/0.9, was a simple protocol for raw data transfer + across the Internet. HTTP/1.0, as defined by RFC 1945 [6], improved + the protocol by allowing messages to be in the format of MIME-like + messages, containing metainformation about the data transferred and + modifiers on the request/response semantics. However, HTTP/1.0 does + not sufficiently take into consideration the effects of hierarchical + proxies, caching, the need for persistent connections, and virtual + hosts. In addition, the proliferation of incompletely-implemented + applications calling themselves "HTTP/1.0" has necessitated a + protocol version change in order for two communicating applications + to determine each other's true capabilities. + + This specification defines the protocol referred to as "HTTP/1.1". + This protocol includes more stringent requirements than HTTP/1.0 in + order to ensure reliable implementation of its features. + + Practical information systems require more functionality than simple + retrieval, including search, front-end update, and annotation. HTTP + allows an open-ended set of methods that indicate the purpose of a + request. It builds on the discipline of reference provided by the + Uniform Resource Identifier (URI) [3][20], as a location (URL) [4] or + name (URN) , for indicating the resource to which a method is to be + applied. Messages are passed in a format similar to that used by + Internet mail as defined by the Multipurpose Internet Mail Extensions + (MIME). + + HTTP is also used as a generic protocol for communication between + user agents and proxies/gateways to other Internet systems, including + those supported by the SMTP [16], NNTP [13], FTP [18], Gopher [2], + and WAIS [10] protocols. In this way, HTTP allows basic hypermedia + access to resources available from diverse applications. + +1.2 Requirements + + This specification uses the same words as RFC 1123 [8] for defining + the significance of each particular requirement. These words are: + + MUST + This word or the adjective "required" means that the item is an + absolute requirement of the specification. + + + +Fielding, et. al. Standards Track [Page 7] + +RFC 2068 HTTP/1.1 January 1997 + + + SHOULD + This word or the adjective "recommended" means that there may + exist valid reasons in particular circumstances to ignore this + item, but the full implications should be understood and the case + carefully weighed before choosing a different course. + + MAY + This word or the adjective "optional" means that this item is + truly optional. One vendor may choose to include the item because + a particular marketplace requires it or because it enhances the + product, for example; another vendor may omit the same item. + + An implementation is not compliant if it fails to satisfy one or more + of the MUST requirements for the protocols it implements. An + implementation that satisfies all the MUST and all the SHOULD + requirements for its protocols is said to be "unconditionally + compliant"; one that satisfies all the MUST requirements but not all + the SHOULD requirements for its protocols is said to be + "conditionally compliant." + +1.3 Terminology + + This specification uses a number of terms to refer to the roles + played by participants in, and objects of, the HTTP communication. + + connection + A transport layer virtual circuit established between two programs + for the purpose of communication. + + message + The basic unit of HTTP communication, consisting of a structured + sequence of octets matching the syntax defined in section 4 and + transmitted via the connection. + + request + An HTTP request message, as defined in section 5. + + response + An HTTP response message, as defined in section 6. + + resource + A network data object or service that can be identified by a URI, + as defined in section 3.2. Resources may be available in multiple + representations (e.g. multiple languages, data formats, size, + resolutions) or vary in other ways. + + + + + + +Fielding, et. al. Standards Track [Page 8] + +RFC 2068 HTTP/1.1 January 1997 + + + entity + The information transferred as the payload of a request or + response. An entity consists of metainformation in the form of + entity-header fields and content in the form of an entity-body, as + described in section 7. + + representation + An entity included with a response that is subject to content + negotiation, as described in section 12. There may exist multiple + representations associated with a particular response status. + + content negotiation + The mechanism for selecting the appropriate representation when + servicing a request, as described in section 12. The + representation of entities in any response can be negotiated + (including error responses). + + variant + A resource may have one, or more than one, representation(s) + associated with it at any given instant. Each of these + representations is termed a `variant.' Use of the term `variant' + does not necessarily imply that the resource is subject to content + negotiation. + + client + A program that establishes connections for the purpose of sending + requests. + + user agent + The client which initiates a request. These are often browsers, + editors, spiders (web-traversing robots), or other end user tools. + + server + An application program that accepts connections in order to + service requests by sending back responses. Any given program may + be capable of being both a client and a server; our use of these + terms refers only to the role being performed by the program for a + particular connection, rather than to the program's capabilities + in general. Likewise, any server may act as an origin server, + proxy, gateway, or tunnel, switching behavior based on the nature + of each request. + + origin server + The server on which a given resource resides or is to be created. + + + + + + + +Fielding, et. al. Standards Track [Page 9] + +RFC 2068 HTTP/1.1 January 1997 + + + proxy + An intermediary program which acts as both a server and a client + for the purpose of making requests on behalf of other clients. + Requests are serviced internally or by passing them on, with + possible translation, to other servers. A proxy must implement + both the client and server requirements of this specification. + + gateway + A server which acts as an intermediary for some other server. + Unlike a proxy, a gateway receives requests as if it were the + origin server for the requested resource; the requesting client + may not be aware that it is communicating with a gateway. + + tunnel + An intermediary program which is acting as a blind relay between + two connections. Once active, a tunnel is not considered a party + to the HTTP communication, though the tunnel may have been + initiated by an HTTP request. The tunnel ceases to exist when both + ends of the relayed connections are closed. + + cache + A program's local store of response messages and the subsystem + that controls its message storage, retrieval, and deletion. A + cache stores cachable responses in order to reduce the response + time and network bandwidth consumption on future, equivalent + requests. Any client or server may include a cache, though a cache + cannot be used by a server that is acting as a tunnel. + + cachable + A response is cachable if a cache is allowed to store a copy of + the response message for use in answering subsequent requests. The + rules for determining the cachability of HTTP responses are + defined in section 13. Even if a resource is cachable, there may + be additional constraints on whether a cache can use the cached + copy for a particular request. + + first-hand + A response is first-hand if it comes directly and without + unnecessary delay from the origin server, perhaps via one or more + proxies. A response is also first-hand if its validity has just + been checked directly with the origin server. + + explicit expiration time + The time at which the origin server intends that an entity should + no longer be returned by a cache without further validation. + + + + + + +Fielding, et. al. Standards Track [Page 10] + +RFC 2068 HTTP/1.1 January 1997 + + + heuristic expiration time + An expiration time assigned by a cache when no explicit expiration + time is available. + + age + The age of a response is the time since it was sent by, or + successfully validated with, the origin server. + + freshness lifetime + The length of time between the generation of a response and its + expiration time. + + fresh + A response is fresh if its age has not yet exceeded its freshness + lifetime. + + stale + A response is stale if its age has passed its freshness lifetime. + + semantically transparent + A cache behaves in a "semantically transparent" manner, with + respect to a particular response, when its use affects neither the + requesting client nor the origin server, except to improve + performance. When a cache is semantically transparent, the client + receives exactly the same response (except for hop-by-hop headers) + that it would have received had its request been handled directly + by the origin server. + + validator + A protocol element (e.g., an entity tag or a Last-Modified time) + that is used to find out whether a cache entry is an equivalent + copy of an entity. + +1.4 Overall Operation + + The HTTP protocol is a request/response protocol. A client sends a + request to the server in the form of a request method, URI, and + protocol version, followed by a MIME-like message containing request + modifiers, client information, and possible body content over a + connection with a server. The server responds with a status line, + including the message's protocol version and a success or error code, + followed by a MIME-like message containing server information, entity + metainformation, and possible entity-body content. The relationship + between HTTP and MIME is described in appendix 19.4. + + + + + + + +Fielding, et. al. Standards Track [Page 11] + +RFC 2068 HTTP/1.1 January 1997 + + + Most HTTP communication is initiated by a user agent and consists of + a request to be applied to a resource on some origin server. In the + simplest case, this may be accomplished via a single connection (v) + between the user agent (UA) and the origin server (O). + + request chain ------------------------> + UA -------------------v------------------- O + <----------------------- response chain + + A more complicated situation occurs when one or more intermediaries + are present in the request/response chain. There are three common + forms of intermediary: proxy, gateway, and tunnel. A proxy is a + forwarding agent, receiving requests for a URI in its absolute form, + rewriting all or part of the message, and forwarding the reformatted + request toward the server identified by the URI. A gateway is a + receiving agent, acting as a layer above some other server(s) and, if + necessary, translating the requests to the underlying server's + protocol. A tunnel acts as a relay point between two connections + without changing the messages; tunnels are used when the + communication needs to pass through an intermediary (such as a + firewall) even when the intermediary cannot understand the contents + of the messages. + + request chain --------------------------------------> + UA -----v----- A -----v----- B -----v----- C -----v----- O + <------------------------------------- response chain + + The figure above shows three intermediaries (A, B, and C) between the + user agent and origin server. A request or response message that + travels the whole chain will pass through four separate connections. + This distinction is important because some HTTP communication options + may apply only to the connection with the nearest, non-tunnel + neighbor, only to the end-points of the chain, or to all connections + along the chain. Although the diagram is linear, each participant + may be engaged in multiple, simultaneous communications. For example, + B may be receiving requests from many clients other than A, and/or + forwarding requests to servers other than C, at the same time that it + is handling A's request. + + Any party to the communication which is not acting as a tunnel may + employ an internal cache for handling requests. The effect of a cache + is that the request/response chain is shortened if one of the + participants along the chain has a cached response applicable to that + request. The following illustrates the resulting chain if B has a + cached copy of an earlier response from O (via C) for a request which + has not been cached by UA or A. + + + + + +Fielding, et. al. Standards Track [Page 12] + +RFC 2068 HTTP/1.1 January 1997 + + + request chain ----------> + UA -----v----- A -----v----- B - - - - - - C - - - - - - O + <--------- response chain + + Not all responses are usefully cachable, and some requests may + contain modifiers which place special requirements on cache behavior. + HTTP requirements for cache behavior and cachable responses are + defined in section 13. + + In fact, there are a wide variety of architectures and configurations + of caches and proxies currently being experimented with or deployed + across the World Wide Web; these systems include national hierarchies + of proxy caches to save transoceanic bandwidth, systems that + broadcast or multicast cache entries, organizations that distribute + subsets of cached data via CD-ROM, and so on. HTTP systems are used + in corporate intranets over high-bandwidth links, and for access via + PDAs with low-power radio links and intermittent connectivity. The + goal of HTTP/1.1 is to support the wide diversity of configurations + already deployed while introducing protocol constructs that meet the + needs of those who build web applications that require high + reliability and, failing that, at least reliable indications of + failure. + + HTTP communication usually takes place over TCP/IP connections. The + default port is TCP 80, but other ports can be used. This does not + preclude HTTP from being implemented on top of any other protocol on + the Internet, or on other networks. HTTP only presumes a reliable + transport; any protocol that provides such guarantees can be used; + the mapping of the HTTP/1.1 request and response structures onto the + transport data units of the protocol in question is outside the scope + of this specification. + + In HTTP/1.0, most implementations used a new connection for each + request/response exchange. In HTTP/1.1, a connection may be used for + one or more request/response exchanges, although connections may be + closed for a variety of reasons (see section 8.1). + +2 Notational Conventions and Generic Grammar + +2.1 Augmented BNF + + All of the mechanisms specified in this document are described in + both prose and an augmented Backus-Naur Form (BNF) similar to that + used by RFC 822 [9]. Implementers will need to be familiar with the + notation in order to understand this specification. The augmented BNF + includes the following constructs: + + + + + +Fielding, et. al. Standards Track [Page 13] + +RFC 2068 HTTP/1.1 January 1997 + + +name = definition + The name of a rule is simply the name itself (without any enclosing + "<" and ">") and is separated from its definition by the equal "=" + character. Whitespace is only significant in that indentation of + continuation lines is used to indicate a rule definition that spans + more than one line. Certain basic rules are in uppercase, such as + SP, LWS, HT, CRLF, DIGIT, ALPHA, etc. Angle brackets are used + within definitions whenever their presence will facilitate + discerning the use of rule names. + +"literal" + Quotation marks surround literal text. Unless stated otherwise, the + text is case-insensitive. + +rule1 | rule2 + Elements separated by a bar ("|") are alternatives, e.g., "yes | + no" will accept yes or no. + +(rule1 rule2) + Elements enclosed in parentheses are treated as a single element. + Thus, "(elem (foo | bar) elem)" allows the token sequences "elem + foo elem" and "elem bar elem". + +*rule + The character "*" preceding an element indicates repetition. The + full form is "*element" indicating at least and at most + occurrences of element. Default values are 0 and infinity so + that "*(element)" allows any number, including zero; "1*element" + requires at least one; and "1*2element" allows one or two. + +[rule] + Square brackets enclose optional elements; "[foo bar]" is + equivalent to "*1(foo bar)". + +N rule + Specific repetition: "(element)" is equivalent to + "*(element)"; that is, exactly occurrences of (element). + Thus 2DIGIT is a 2-digit number, and 3ALPHA is a string of three + alphabetic characters. + +#rule + A construct "#" is defined, similar to "*", for defining lists of + elements. The full form is "#element " indicating at least + and at most elements, each separated by one or more commas + (",") and optional linear whitespace (LWS). This makes the usual + form of lists very easy; a rule such as "( *LWS element *( *LWS "," + *LWS element )) " can be shown as "1#element". Wherever this + construct is used, null elements are allowed, but do not contribute + + + +Fielding, et. al. Standards Track [Page 14] + +RFC 2068 HTTP/1.1 January 1997 + + + to the count of elements present. That is, "(element), , (element) + " is permitted, but counts as only two elements. Therefore, where + at least one element is required, at least one non-null element + must be present. Default values are 0 and infinity so that + "#element" allows any number, including zero; "1#element" requires + at least one; and "1#2element" allows one or two. + +; comment + A semi-colon, set off some distance to the right of rule text, + starts a comment that continues to the end of line. This is a + simple way of including useful notes in parallel with the + specifications. + +implied *LWS + The grammar described by this specification is word-based. Except + where noted otherwise, linear whitespace (LWS) can be included + between any two adjacent words (token or quoted-string), and + between adjacent tokens and delimiters (tspecials), without + changing the interpretation of a field. At least one delimiter + (tspecials) must exist between any two tokens, since they would + otherwise be interpreted as a single token. + +2.2 Basic Rules + + The following rules are used throughout this specification to + describe basic parsing constructs. The US-ASCII coded character set + is defined by ANSI X3.4-1986 [21]. + + OCTET = + CHAR = + UPALPHA = + LOALPHA = + ALPHA = UPALPHA | LOALPHA + DIGIT = + CTL = + CR = + LF = + SP = + HT = + <"> = + + + + + + + + + + +Fielding, et. al. Standards Track [Page 15] + +RFC 2068 HTTP/1.1 January 1997 + + + HTTP/1.1 defines the sequence CR LF as the end-of-line marker for all + protocol elements except the entity-body (see appendix 19.3 for + tolerant applications). The end-of-line marker within an entity-body + is defined by its associated media type, as described in section 3.7. + + CRLF = CR LF + + HTTP/1.1 headers can be folded onto multiple lines if the + continuation line begins with a space or horizontal tab. All linear + white space, including folding, has the same semantics as SP. + + LWS = [CRLF] 1*( SP | HT ) + + The TEXT rule is only used for descriptive field contents and values + that are not intended to be interpreted by the message parser. Words + of *TEXT may contain characters from character sets other than ISO + 8859-1 [22] only when encoded according to the rules of RFC 1522 + [14]. + + TEXT = + + Hexadecimal numeric characters are used in several protocol elements. + + HEX = "A" | "B" | "C" | "D" | "E" | "F" + | "a" | "b" | "c" | "d" | "e" | "f" | DIGIT + + Many HTTP/1.1 header field values consist of words separated by LWS + or special characters. These special characters MUST be in a quoted + string to be used within a parameter value. + + token = 1* + + tspecials = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + Comments can be included in some HTTP header fields by surrounding + the comment text with parentheses. Comments are only allowed in + fields containing "comment" as part of their field value definition. + In all other fields, parentheses are considered part of the field + value. + + comment = "(" *( ctext | comment ) ")" + ctext = + + + + + +Fielding, et. al. Standards Track [Page 16] + +RFC 2068 HTTP/1.1 January 1997 + + + A string of text is parsed as a single word if it is quoted using + double-quote marks. + + quoted-string = ( <"> *(qdtext) <"> ) + + qdtext = > + + The backslash character ("\") may be used as a single-character quoting + mechanism only within quoted-string and comment constructs. + + quoted-pair = "\" CHAR + +3 Protocol Parameters + +3.1 HTTP Version + + HTTP uses a "." numbering scheme to indicate versions + of the protocol. The protocol versioning policy is intended to allow + the sender to indicate the format of a message and its capacity for + understanding further HTTP communication, rather than the features + obtained via that communication. No change is made to the version + number for the addition of message components which do not affect + communication behavior or which only add to extensible field values. + The number is incremented when the changes made to the + protocol add features which do not change the general message parsing + algorithm, but which may add to the message semantics and imply + additional capabilities of the sender. The number is + incremented when the format of a message within the protocol is + changed. + + The version of an HTTP message is indicated by an HTTP-Version field + in the first line of the message. + + HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT + + Note that the major and minor numbers MUST be treated as separate + integers and that each may be incremented higher than a single digit. + Thus, HTTP/2.4 is a lower version than HTTP/2.13, which in turn is + lower than HTTP/12.3. Leading zeros MUST be ignored by recipients and + MUST NOT be sent. + + Applications sending Request or Response messages, as defined by this + specification, MUST include an HTTP-Version of "HTTP/1.1". Use of + this version number indicates that the sending application is at + least conditionally compliant with this specification. + + The HTTP version of an application is the highest HTTP version for + which the application is at least conditionally compliant. + + + +Fielding, et. al. Standards Track [Page 17] + +RFC 2068 HTTP/1.1 January 1997 + + + Proxy and gateway applications must be careful when forwarding + messages in protocol versions different from that of the application. + Since the protocol version indicates the protocol capability of the + sender, a proxy/gateway MUST never send a message with a version + indicator which is greater than its actual version; if a higher + version request is received, the proxy/gateway MUST either downgrade + the request version, respond with an error, or switch to tunnel + behavior. Requests with a version lower than that of the + proxy/gateway's version MAY be upgraded before being forwarded; the + proxy/gateway's response to that request MUST be in the same major + version as the request. + + Note: Converting between versions of HTTP may involve modification + of header fields required or forbidden by the versions involved. + +3.2 Uniform Resource Identifiers + + URIs have been known by many names: WWW addresses, Universal Document + Identifiers, Universal Resource Identifiers , and finally the + combination of Uniform Resource Locators (URL) and Names (URN). As + far as HTTP is concerned, Uniform Resource Identifiers are simply + formatted strings which identify--via name, location, or any other + characteristic--a resource. + +3.2.1 General Syntax + + URIs in HTTP can be represented in absolute form or relative to some + known base URI, depending upon the context of their use. The two + forms are differentiated by the fact that absolute URIs always begin + with a scheme name followed by a colon. + + URI = ( absoluteURI | relativeURI ) [ "#" fragment ] + + absoluteURI = scheme ":" *( uchar | reserved ) + + relativeURI = net_path | abs_path | rel_path + + net_path = "//" net_loc [ abs_path ] + abs_path = "/" rel_path + rel_path = [ path ] [ ";" params ] [ "?" query ] + + path = fsegment *( "/" segment ) + fsegment = 1*pchar + segment = *pchar + + params = param *( ";" param ) + param = *( pchar | "/" ) + + + + +Fielding, et. al. Standards Track [Page 18] + +RFC 2068 HTTP/1.1 January 1997 + + + scheme = 1*( ALPHA | DIGIT | "+" | "-" | "." ) + net_loc = *( pchar | ";" | "?" ) + + query = *( uchar | reserved ) + fragment = *( uchar | reserved ) + + pchar = uchar | ":" | "@" | "&" | "=" | "+" + uchar = unreserved | escape + unreserved = ALPHA | DIGIT | safe | extra | national + + escape = "%" HEX HEX + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" + extra = "!" | "*" | "'" | "(" | ")" | "," + safe = "$" | "-" | "_" | "." + unsafe = CTL | SP | <"> | "#" | "%" | "<" | ">" + national = + + For definitive information on URL syntax and semantics, see RFC 1738 + [4] and RFC 1808 [11]. The BNF above includes national characters not + allowed in valid URLs as specified by RFC 1738, since HTTP servers + are not restricted in the set of unreserved characters allowed to + represent the rel_path part of addresses, and HTTP proxies may + receive requests for URIs not defined by RFC 1738. + + The HTTP protocol does not place any a priori limit on the length of + a URI. Servers MUST be able to handle the URI of any resource they + serve, and SHOULD be able to handle URIs of unbounded length if they + provide GET-based forms that could generate such URIs. A server + SHOULD return 414 (Request-URI Too Long) status if a URI is longer + than the server can handle (see section 10.4.15). + + Note: Servers should be cautious about depending on URI lengths + above 255 bytes, because some older client or proxy implementations + may not properly support these lengths. + +3.2.2 http URL + + The "http" scheme is used to locate network resources via the HTTP + protocol. This section defines the scheme-specific syntax and + semantics for http URLs. + + + + + + + + + + +Fielding, et. al. Standards Track [Page 19] + +RFC 2068 HTTP/1.1 January 1997 + + + http_URL = "http:" "//" host [ ":" port ] [ abs_path ] + + host = + + port = *DIGIT + + If the port is empty or not given, port 80 is assumed. The semantics + are that the identified resource is located at the server listening + for TCP connections on that port of that host, and the Request-URI + for the resource is abs_path. The use of IP addresses in URL's SHOULD + be avoided whenever possible (see RFC 1900 [24]). If the abs_path is + not present in the URL, it MUST be given as "/" when used as a + Request-URI for a resource (section 5.1.2). + +3.2.3 URI Comparison + + When comparing two URIs to decide if they match or not, a client + SHOULD use a case-sensitive octet-by-octet comparison of the entire + URIs, with these exceptions: + + o A port that is empty or not given is equivalent to the default + port for that URI; + + o Comparisons of host names MUST be case-insensitive; + + o Comparisons of scheme names MUST be case-insensitive; + + o An empty abs_path is equivalent to an abs_path of "/". + + Characters other than those in the "reserved" and "unsafe" sets (see + section 3.2) are equivalent to their ""%" HEX HEX" encodings. + + For example, the following three URIs are equivalent: + + http://abc.com:80/~smith/home.html + http://ABC.com/%7Esmith/home.html + http://ABC.com:/%7esmith/home.html + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 20] + +RFC 2068 HTTP/1.1 January 1997 + + +3.3 Date/Time Formats + +3.3.1 Full Date + + HTTP applications have historically allowed three different formats + for the representation of date/time stamps: + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + + The first format is preferred as an Internet standard and represents + a fixed-length subset of that defined by RFC 1123 (an update to RFC + 822). The second format is in common use, but is based on the + obsolete RFC 850 [12] date format and lacks a four-digit year. + HTTP/1.1 clients and servers that parse the date value MUST accept + all three formats (for compatibility with HTTP/1.0), though they MUST + only generate the RFC 1123 format for representing HTTP-date values + in header fields. + + Note: Recipients of date values are encouraged to be robust in + accepting date values that may have been sent by non-HTTP + applications, as is sometimes the case when retrieving or posting + messages via proxies/gateways to SMTP or NNTP. + + All HTTP date/time stamps MUST be represented in Greenwich Mean Time + (GMT), without exception. This is indicated in the first two formats + by the inclusion of "GMT" as the three-letter abbreviation for time + zone, and MUST be assumed when reading the asctime format. + + HTTP-date = rfc1123-date | rfc850-date | asctime-date + + rfc1123-date = wkday "," SP date1 SP time SP "GMT" + rfc850-date = weekday "," SP date2 SP time SP "GMT" + asctime-date = wkday SP date3 SP time SP 4DIGIT + + date1 = 2DIGIT SP month SP 4DIGIT + ; day month year (e.g., 02 Jun 1982) + date2 = 2DIGIT "-" month "-" 2DIGIT + ; day-month-year (e.g., 02-Jun-82) + date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) + ; month day (e.g., Jun 2) + + time = 2DIGIT ":" 2DIGIT ":" 2DIGIT + ; 00:00:00 - 23:59:59 + + wkday = "Mon" | "Tue" | "Wed" + | "Thu" | "Fri" | "Sat" | "Sun" + + + +Fielding, et. al. Standards Track [Page 21] + +RFC 2068 HTTP/1.1 January 1997 + + + weekday = "Monday" | "Tuesday" | "Wednesday" + | "Thursday" | "Friday" | "Saturday" | "Sunday" + + month = "Jan" | "Feb" | "Mar" | "Apr" + | "May" | "Jun" | "Jul" | "Aug" + | "Sep" | "Oct" | "Nov" | "Dec" + + Note: HTTP requirements for the date/time stamp format apply only + to their usage within the protocol stream. Clients and servers are + not required to use these formats for user presentation, request + logging, etc. + +3.3.2 Delta Seconds + + Some HTTP header fields allow a time value to be specified as an + integer number of seconds, represented in decimal, after the time + that the message was received. + + delta-seconds = 1*DIGIT + +3.4 Character Sets + + HTTP uses the same definition of the term "character set" as that + described for MIME: + + The term "character set" is used in this document to refer to a + method used with one or more tables to convert a sequence of octets + into a sequence of characters. Note that unconditional conversion + in the other direction is not required, in that not all characters + may be available in a given character set and a character set may + provide more than one sequence of octets to represent a particular + character. This definition is intended to allow various kinds of + character encodings, from simple single-table mappings such as US- + ASCII to complex table switching methods such as those that use ISO + 2022's techniques. However, the definition associated with a MIME + character set name MUST fully specify the mapping to be performed + from octets to characters. In particular, use of external profiling + information to determine the exact mapping is not permitted. + + Note: This use of the term "character set" is more commonly + referred to as a "character encoding." However, since HTTP and MIME + share the same registry, it is important that the terminology also + be shared. + + + + + + + + +Fielding, et. al. Standards Track [Page 22] + +RFC 2068 HTTP/1.1 January 1997 + + + HTTP character sets are identified by case-insensitive tokens. The + complete set of tokens is defined by the IANA Character Set registry + [19]. + + charset = token + + Although HTTP allows an arbitrary token to be used as a charset + value, any token that has a predefined value within the IANA + Character Set registry MUST represent the character set defined by + that registry. Applications SHOULD limit their use of character sets + to those defined by the IANA registry. + +3.5 Content Codings + + Content coding values indicate an encoding transformation that has + been or can be applied to an entity. Content codings are primarily + used to allow a document to be compressed or otherwise usefully + transformed without losing the identity of its underlying media type + and without loss of information. Frequently, the entity is stored in + coded form, transmitted directly, and only decoded by the recipient. + + content-coding = token + + All content-coding values are case-insensitive. HTTP/1.1 uses + content-coding values in the Accept-Encoding (section 14.3) and + Content-Encoding (section 14.12) header fields. Although the value + describes the content-coding, what is more important is that it + indicates what decoding mechanism will be required to remove the + encoding. + + The Internet Assigned Numbers Authority (IANA) acts as a registry for + content-coding value tokens. Initially, the registry contains the + following tokens: + + gzip An encoding format produced by the file compression program "gzip" + (GNU zip) as described in RFC 1952 [25]. This format is a Lempel- + Ziv coding (LZ77) with a 32 bit CRC. + + compress + The encoding format produced by the common UNIX file compression + program "compress". This format is an adaptive Lempel-Ziv-Welch + coding (LZW). + + + + + + + + + +Fielding, et. al. Standards Track [Page 23] + +RFC 2068 HTTP/1.1 January 1997 + + + Note: Use of program names for the identification of encoding + formats is not desirable and should be discouraged for future + encodings. Their use here is representative of historical practice, + not good design. For compatibility with previous implementations of + HTTP, applications should consider "x-gzip" and "x-compress" to be + equivalent to "gzip" and "compress" respectively. + + deflate The "zlib" format defined in RFC 1950[31] in combination with + the "deflate" compression mechanism described in RFC 1951[29]. + + New content-coding value tokens should be registered; to allow + interoperability between clients and servers, specifications of the + content coding algorithms needed to implement a new value should be + publicly available and adequate for independent implementation, and + conform to the purpose of content coding defined in this section. + +3.6 Transfer Codings + + Transfer coding values are used to indicate an encoding + transformation that has been, can be, or may need to be applied to an + entity-body in order to ensure "safe transport" through the network. + This differs from a content coding in that the transfer coding is a + property of the message, not of the original entity. + + transfer-coding = "chunked" | transfer-extension + + transfer-extension = token + + All transfer-coding values are case-insensitive. HTTP/1.1 uses + transfer coding values in the Transfer-Encoding header field (section + 14.40). + + Transfer codings are analogous to the Content-Transfer-Encoding + values of MIME , which were designed to enable safe transport of + binary data over a 7-bit transport service. However, safe transport + has a different focus for an 8bit-clean transfer protocol. In HTTP, + the only unsafe characteristic of message-bodies is the difficulty in + determining the exact body length (section 7.2.2), or the desire to + encrypt data over a shared transport. + + The chunked encoding modifies the body of a message in order to + transfer it as a series of chunks, each with its own size indicator, + followed by an optional footer containing entity-header fields. This + allows dynamically-produced content to be transferred along with the + information necessary for the recipient to verify that it has + received the full message. + + + + + +Fielding, et. al. Standards Track [Page 24] + +RFC 2068 HTTP/1.1 January 1997 + + + Chunked-Body = *chunk + "0" CRLF + footer + CRLF + + chunk = chunk-size [ chunk-ext ] CRLF + chunk-data CRLF + + hex-no-zero = + + chunk-size = hex-no-zero *HEX + chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-value ] ) + chunk-ext-name = token + chunk-ext-val = token | quoted-string + chunk-data = chunk-size(OCTET) + + footer = *entity-header + + The chunked encoding is ended by a zero-sized chunk followed by the + footer, which is terminated by an empty line. The purpose of the + footer is to provide an efficient way to supply information about an + entity that is generated dynamically; applications MUST NOT send + header fields in the footer which are not explicitly defined as being + appropriate for the footer, such as Content-MD5 or future extensions + to HTTP for digital signatures or other facilities. + + An example process for decoding a Chunked-Body is presented in + appendix 19.4.6. + + All HTTP/1.1 applications MUST be able to receive and decode the + "chunked" transfer coding, and MUST ignore transfer coding extensions + they do not understand. A server which receives an entity-body with a + transfer-coding it does not understand SHOULD return 501 + (Unimplemented), and close the connection. A server MUST NOT send + transfer-codings to an HTTP/1.0 client. + +3.7 Media Types + + HTTP uses Internet Media Types in the Content-Type (section 14.18) + and Accept (section 14.1) header fields in order to provide open and + extensible data typing and type negotiation. + + media-type = type "/" subtype *( ";" parameter ) + type = token + subtype = token + + Parameters may follow the type/subtype in the form of attribute/value + pairs. + + + +Fielding, et. al. Standards Track [Page 25] + +RFC 2068 HTTP/1.1 January 1997 + + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + The type, subtype, and parameter attribute names are case- + insensitive. Parameter values may or may not be case-sensitive, + depending on the semantics of the parameter name. Linear white space + (LWS) MUST NOT be used between the type and subtype, nor between an + attribute and its value. User agents that recognize the media-type + MUST process (or arrange to be processed by any external applications + used to process that type/subtype by the user agent) the parameters + for that MIME type as described by that type/subtype definition to + the and inform the user of any problems discovered. + + Note: some older HTTP applications do not recognize media type + parameters. When sending data to older HTTP applications, + implementations should only use media type parameters when they are + required by that type/subtype definition. + + Media-type values are registered with the Internet Assigned Number + Authority (IANA). The media type registration process is outlined in + RFC 2048 [17]. Use of non-registered media types is discouraged. + +3.7.1 Canonicalization and Text Defaults + + Internet media types are registered with a canonical form. In + general, an entity-body transferred via HTTP messages MUST be + represented in the appropriate canonical form prior to its + transmission; the exception is "text" types, as defined in the next + paragraph. + + When in canonical form, media subtypes of the "text" type use CRLF as + the text line break. HTTP relaxes this requirement and allows the + transport of text media with plain CR or LF alone representing a line + break when it is done consistently for an entire entity-body. HTTP + applications MUST accept CRLF, bare CR, and bare LF as being + representative of a line break in text media received via HTTP. In + addition, if the text is represented in a character set that does not + use octets 13 and 10 for CR and LF respectively, as is the case for + some multi-byte character sets, HTTP allows the use of whatever octet + sequences are defined by that character set to represent the + equivalent of CR and LF for line breaks. This flexibility regarding + line breaks applies only to text media in the entity-body; a bare CR + or LF MUST NOT be substituted for CRLF within any of the HTTP control + structures (such as header fields and multipart boundaries). + + If an entity-body is encoded with a Content-Encoding, the underlying + data MUST be in a form defined above prior to being encoded. + + + +Fielding, et. al. Standards Track [Page 26] + +RFC 2068 HTTP/1.1 January 1997 + + + The "charset" parameter is used with some media types to define the + character set (section 3.4) of the data. When no explicit charset + parameter is provided by the sender, media subtypes of the "text" + type are defined to have a default charset value of "ISO-8859-1" when + received via HTTP. Data in character sets other than "ISO-8859-1" or + its subsets MUST be labeled with an appropriate charset value. + + Some HTTP/1.0 software has interpreted a Content-Type header without + charset parameter incorrectly to mean "recipient should guess." + Senders wishing to defeat this behavior MAY include a charset + parameter even when the charset is ISO-8859-1 and SHOULD do so when + it is known that it will not confuse the recipient. + + Unfortunately, some older HTTP/1.0 clients did not deal properly with + an explicit charset parameter. HTTP/1.1 recipients MUST respect the + charset label provided by the sender; and those user agents that have + a provision to "guess" a charset MUST use the charset from the + content-type field if they support that charset, rather than the + recipient's preference, when initially displaying a document. + +3.7.2 Multipart Types + + MIME provides for a number of "multipart" types -- encapsulations of + one or more entities within a single message-body. All multipart + types share a common syntax, as defined in MIME [7], and MUST + include a boundary parameter as part of the media type value. The + message body is itself a protocol element and MUST therefore use only + CRLF to represent line breaks between body-parts. Unlike in MIME, the + epilogue of any multipart message MUST be empty; HTTP applications + MUST NOT transmit the epilogue (even if the original multipart + contains an epilogue). + + In HTTP, multipart body-parts MAY contain header fields which are + significant to the meaning of that part. A Content-Location header + field (section 14.15) SHOULD be included in the body-part of each + enclosed entity that can be identified by a URL. + + In general, an HTTP user agent SHOULD follow the same or similar + behavior as a MIME user agent would upon receipt of a multipart type. + If an application receives an unrecognized multipart subtype, the + application MUST treat it as being equivalent to "multipart/mixed". + + Note: The "multipart/form-data" type has been specifically defined + for carrying form data suitable for processing via the POST request + method, as described in RFC 1867 [15]. + + + + + + +Fielding, et. al. Standards Track [Page 27] + +RFC 2068 HTTP/1.1 January 1997 + + +3.8 Product Tokens + + Product tokens are used to allow communicating applications to + identify themselves by software name and version. Most fields using + product tokens also allow sub-products which form a significant part + of the application to be listed, separated by whitespace. By + convention, the products are listed in order of their significance + for identifying the application. + + product = token ["/" product-version] + product-version = token + + Examples: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + Server: Apache/0.8.4 + + Product tokens should be short and to the point -- use of them for + advertising or other non-essential information is explicitly + forbidden. Although any token character may appear in a product- + version, this token SHOULD only be used for a version identifier + (i.e., successive versions of the same product SHOULD only differ in + the product-version portion of the product value). + +3.9 Quality Values + + HTTP content negotiation (section 12) uses short "floating point" + numbers to indicate the relative importance ("weight") of various + negotiable parameters. A weight is normalized to a real number in the + range 0 through 1, where 0 is the minimum and 1 the maximum value. + HTTP/1.1 applications MUST NOT generate more than three digits after + the decimal point. User configuration of these values SHOULD also be + limited in this fashion. + + qvalue = ( "0" [ "." 0*3DIGIT ] ) + | ( "1" [ "." 0*3("0") ] ) + + "Quality values" is a misnomer, since these values merely represent + relative degradation in desired quality. + +3.10 Language Tags + + A language tag identifies a natural language spoken, written, or + otherwise conveyed by human beings for communication of information + to other human beings. Computer languages are explicitly excluded. + HTTP uses language tags within the Accept-Language and Content- + Language fields. + + + + +Fielding, et. al. Standards Track [Page 28] + +RFC 2068 HTTP/1.1 January 1997 + + + The syntax and registry of HTTP language tags is the same as that + defined by RFC 1766 [1]. In summary, a language tag is composed of 1 + or more parts: A primary language tag and a possibly empty series of + subtags: + + language-tag = primary-tag *( "-" subtag ) + + primary-tag = 1*8ALPHA + subtag = 1*8ALPHA + + Whitespace is not allowed within the tag and all tags are case- + insensitive. The name space of language tags is administered by the + IANA. Example tags include: + + en, en-US, en-cockney, i-cherokee, x-pig-latin + + where any two-letter primary-tag is an ISO 639 language abbreviation + and any two-letter initial subtag is an ISO 3166 country code. (The + last three tags above are not registered tags; all but the last are + examples of tags which could be registered in future.) + +3.11 Entity Tags + + Entity tags are used for comparing two or more entities from the same + requested resource. HTTP/1.1 uses entity tags in the ETag (section + 14.20), If-Match (section 14.25), If-None-Match (section 14.26), and + If-Range (section 14.27) header fields. The definition of how they + are used and compared as cache validators is in section 13.3.3. An + entity tag consists of an opaque quoted string, possibly prefixed by + a weakness indicator. + + entity-tag = [ weak ] opaque-tag + + weak = "W/" + opaque-tag = quoted-string + + A "strong entity tag" may be shared by two entities of a resource + only if they are equivalent by octet equality. + + A "weak entity tag," indicated by the "W/" prefix, may be shared by + two entities of a resource only if the entities are equivalent and + could be substituted for each other with no significant change in + semantics. A weak entity tag can only be used for weak comparison. + + An entity tag MUST be unique across all versions of all entities + associated with a particular resource. A given entity tag value may + be used for entities obtained by requests on different URIs without + implying anything about the equivalence of those entities. + + + +Fielding, et. al. Standards Track [Page 29] + +RFC 2068 HTTP/1.1 January 1997 + + +3.12 Range Units + + HTTP/1.1 allows a client to request that only part (a range of) the + response entity be included within the response. HTTP/1.1 uses range + units in the Range (section 14.36) and Content-Range (section 14.17) + header fields. An entity may be broken down into subranges according + to various structural units. + + range-unit = bytes-unit | other-range-unit + + bytes-unit = "bytes" + other-range-unit = token + +The only range unit defined by HTTP/1.1 is "bytes". HTTP/1.1 + implementations may ignore ranges specified using other units. + HTTP/1.1 has been designed to allow implementations of applications + that do not depend on knowledge of ranges. + +4 HTTP Message + +4.1 Message Types + + HTTP messages consist of requests from client to server and responses + from server to client. + + HTTP-message = Request | Response ; HTTP/1.1 messages + + Request (section 5) and Response (section 6) messages use the generic + message format of RFC 822 [9] for transferring entities (the payload + of the message). Both types of message consist of a start-line, one + or more header fields (also known as "headers"), an empty line (i.e., + a line with nothing preceding the CRLF) indicating the end of the + header fields, and an optional message-body. + + generic-message = start-line + *message-header + CRLF + [ message-body ] + + start-line = Request-Line | Status-Line + + In the interest of robustness, servers SHOULD ignore any empty + line(s) received where a Request-Line is expected. In other words, if + the server is reading the protocol stream at the beginning of a + message and receives a CRLF first, it should ignore the CRLF. + + + + + + +Fielding, et. al. Standards Track [Page 30] + +RFC 2068 HTTP/1.1 January 1997 + + + Note: certain buggy HTTP/1.0 client implementations generate an + extra CRLF's after a POST request. To restate what is explicitly + forbidden by the BNF, an HTTP/1.1 client must not preface or follow + a request with an extra CRLF. + +4.2 Message Headers + + HTTP header fields, which include general-header (section 4.5), + request-header (section 5.3), response-header (section 6.2), and + entity-header (section 7.1) fields, follow the same generic format as + that given in Section 3.1 of RFC 822 [9]. Each header field consists + of a name followed by a colon (":") and the field value. Field names + are case-insensitive. The field value may be preceded by any amount + of LWS, though a single SP is preferred. Header fields can be + extended over multiple lines by preceding each extra line with at + least one SP or HT. Applications SHOULD follow "common form" when + generating HTTP constructs, since there might exist some + implementations that fail to accept anything beyond the common forms. + + message-header = field-name ":" [ field-value ] CRLF + + field-name = token + field-value = *( field-content | LWS ) + + field-content = + + The order in which header fields with differing field names are + received is not significant. However, it is "good practice" to send + general-header fields first, followed by request-header or response- + header fields, and ending with the entity-header fields. + + Multiple message-header fields with the same field-name may be + present in a message if and only if the entire field-value for that + header field is defined as a comma-separated list [i.e., #(values)]. + It MUST be possible to combine the multiple header fields into one + "field-name: field-value" pair, without changing the semantics of the + message, by appending each subsequent field-value to the first, each + separated by a comma. The order in which header fields with the same + field-name are received is therefore significant to the + interpretation of the combined field value, and thus a proxy MUST NOT + change the order of these field values when a message is forwarded. + + + + + + + + +Fielding, et. al. Standards Track [Page 31] + +RFC 2068 HTTP/1.1 January 1997 + + +4.3 Message Body + + The message-body (if any) of an HTTP message is used to carry the + entity-body associated with the request or response. The message-body + differs from the entity-body only when a transfer coding has been + applied, as indicated by the Transfer-Encoding header field (section + 14.40). + + message-body = entity-body + | + + Transfer-Encoding MUST be used to indicate any transfer codings + applied by an application to ensure safe and proper transfer of the + message. Transfer-Encoding is a property of the message, not of the + entity, and thus can be added or removed by any application along the + request/response chain. + + The rules for when a message-body is allowed in a message differ for + requests and responses. + + The presence of a message-body in a request is signaled by the + inclusion of a Content-Length or Transfer-Encoding header field in + the request's message-headers. A message-body MAY be included in a + request only when the request method (section 5.1.1) allows an + entity-body. + + For response messages, whether or not a message-body is included with + a message is dependent on both the request method and the response + status code (section 6.1.1). All responses to the HEAD request method + MUST NOT include a message-body, even though the presence of entity- + header fields might lead one to believe they do. All 1xx + (informational), 204 (no content), and 304 (not modified) responses + MUST NOT include a message-body. All other responses do include a + message-body, although it may be of zero length. + +4.4 Message Length + + When a message-body is included with a message, the length of that + body is determined by one of the following (in order of precedence): + + 1. Any response message which MUST NOT include a message-body + (such as the 1xx, 204, and 304 responses and any response to a HEAD + request) is always terminated by the first empty line after the + header fields, regardless of the entity-header fields present in the + message. + + 2. If a Transfer-Encoding header field (section 14.40) is present and + indicates that the "chunked" transfer coding has been applied, then + + + +Fielding, et. al. Standards Track [Page 32] + +RFC 2068 HTTP/1.1 January 1997 + + + the length is defined by the chunked encoding (section 3.6). + + 3. If a Content-Length header field (section 14.14) is present, its + value in bytes represents the length of the message-body. + + 4. If the message uses the media type "multipart/byteranges", which is + self-delimiting, then that defines the length. This media type MUST + NOT be used unless the sender knows that the recipient can parse it; + the presence in a request of a Range header with multiple byte-range + specifiers implies that the client can parse multipart/byteranges + responses. + + 5. By the server closing the connection. (Closing the connection + cannot be used to indicate the end of a request body, since that + would leave no possibility for the server to send back a response.) + + For compatibility with HTTP/1.0 applications, HTTP/1.1 requests + containing a message-body MUST include a valid Content-Length header + field unless the server is known to be HTTP/1.1 compliant. If a + request contains a message-body and a Content-Length is not given, + the server SHOULD respond with 400 (bad request) if it cannot + determine the length of the message, or with 411 (length required) if + it wishes to insist on receiving a valid Content-Length. + + All HTTP/1.1 applications that receive entities MUST accept the + "chunked" transfer coding (section 3.6), thus allowing this mechanism + to be used for messages when the message length cannot be determined + in advance. + + Messages MUST NOT include both a Content-Length header field and the + "chunked" transfer coding. If both are received, the Content-Length + MUST be ignored. + + When a Content-Length is given in a message where a message-body is + allowed, its field value MUST exactly match the number of OCTETs in + the message-body. HTTP/1.1 user agents MUST notify the user when an + invalid length is received and detected. + + + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 33] + +RFC 2068 HTTP/1.1 January 1997 + + +4.5 General Header Fields + + There are a few header fields which have general applicability for + both request and response messages, but which do not apply to the + entity being transferred. These header fields apply only to the + message being transmitted. + + general-header = Cache-Control ; Section 14.9 + | Connection ; Section 14.10 + | Date ; Section 14.19 + | Pragma ; Section 14.32 + | Transfer-Encoding ; Section 14.40 + | Upgrade ; Section 14.41 + | Via ; Section 14.44 + + General-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields may be given the semantics of general + header fields if all parties in the communication recognize them to + be general-header fields. Unrecognized header fields are treated as + entity-header fields. + +5 Request + + A request message from a client to a server includes, within the + first line of that message, the method to be applied to the resource, + the identifier of the resource, and the protocol version in use. + + Request = Request-Line ; Section 5.1 + *( general-header ; Section 4.5 + | request-header ; Section 5.3 + | entity-header ) ; Section 7.1 + CRLF + [ message-body ] ; Section 7.2 + +5.1 Request-Line + + The Request-Line begins with a method token, followed by the + Request-URI and the protocol version, and ending with CRLF. The + elements are separated by SP characters. No CR or LF are allowed + except in the final CRLF sequence. + + Request-Line = Method SP Request-URI SP HTTP-Version CRLF + + + + + + + + +Fielding, et. al. Standards Track [Page 34] + +RFC 2068 HTTP/1.1 January 1997 + + +5.1.1 Method + + The Method token indicates the method to be performed on the resource + identified by the Request-URI. The method is case-sensitive. + + Method = "OPTIONS" ; Section 9.2 + | "GET" ; Section 9.3 + | "HEAD" ; Section 9.4 + | "POST" ; Section 9.5 + | "PUT" ; Section 9.6 + | "DELETE" ; Section 9.7 + | "TRACE" ; Section 9.8 + | extension-method + + extension-method = token + + The list of methods allowed by a resource can be specified in an + Allow header field (section 14.7). The return code of the response + always notifies the client whether a method is currently allowed on a + resource, since the set of allowed methods can change dynamically. + Servers SHOULD return the status code 405 (Method Not Allowed) if the + method is known by the server but not allowed for the requested + resource, and 501 (Not Implemented) if the method is unrecognized or + not implemented by the server. The list of methods known by a server + can be listed in a Public response-header field (section 14.35). + + The methods GET and HEAD MUST be supported by all general-purpose + servers. All other methods are optional; however, if the above + methods are implemented, they MUST be implemented with the same + semantics as those specified in section 9. + +5.1.2 Request-URI + + The Request-URI is a Uniform Resource Identifier (section 3.2) and + identifies the resource upon which to apply the request. + + Request-URI = "*" | absoluteURI | abs_path + + The three options for Request-URI are dependent on the nature of the + request. The asterisk "*" means that the request does not apply to a + particular resource, but to the server itself, and is only allowed + when the method used does not necessarily apply to a resource. One + example would be + + OPTIONS * HTTP/1.1 + + The absoluteURI form is required when the request is being made to a + proxy. The proxy is requested to forward the request or service it + + + +Fielding, et. al. Standards Track [Page 35] + +RFC 2068 HTTP/1.1 January 1997 + + + from a valid cache, and return the response. Note that the proxy MAY + forward the request on to another proxy or directly to the server + specified by the absoluteURI. In order to avoid request loops, a + proxy MUST be able to recognize all of its server names, including + any aliases, local variations, and the numeric IP address. An example + Request-Line would be: + + GET http://www.w3.org/pub/WWW/TheProject.html HTTP/1.1 + + To allow for transition to absoluteURIs in all requests in future + versions of HTTP, all HTTP/1.1 servers MUST accept the absoluteURI + form in requests, even though HTTP/1.1 clients will only generate + them in requests to proxies. + + The most common form of Request-URI is that used to identify a + resource on an origin server or gateway. In this case the absolute + path of the URI MUST be transmitted (see section 3.2.1, abs_path) as + the Request-URI, and the network location of the URI (net_loc) MUST + be transmitted in a Host header field. For example, a client wishing + to retrieve the resource above directly from the origin server would + create a TCP connection to port 80 of the host "www.w3.org" and send + the lines: + + GET /pub/WWW/TheProject.html HTTP/1.1 + Host: www.w3.org + + followed by the remainder of the Request. Note that the absolute path + cannot be empty; if none is present in the original URI, it MUST be + given as "/" (the server root). + + If a proxy receives a request without any path in the Request-URI and + the method specified is capable of supporting the asterisk form of + request, then the last proxy on the request chain MUST forward the + request with "*" as the final Request-URI. For example, the request + + OPTIONS http://www.ics.uci.edu:8001 HTTP/1.1 + + would be forwarded by the proxy as + + OPTIONS * HTTP/1.1 + Host: www.ics.uci.edu:8001 + + after connecting to port 8001 of host "www.ics.uci.edu". + + The Request-URI is transmitted in the format specified in section + 3.2.1. The origin server MUST decode the Request-URI in order to + properly interpret the request. Servers SHOULD respond to invalid + Request-URIs with an appropriate status code. + + + +Fielding, et. al. Standards Track [Page 36] + +RFC 2068 HTTP/1.1 January 1997 + + + In requests that they forward, proxies MUST NOT rewrite the + "abs_path" part of a Request-URI in any way except as noted above to + replace a null abs_path with "*", no matter what the proxy does in + its internal implementation. + + Note: The "no rewrite" rule prevents the proxy from changing the + meaning of the request when the origin server is improperly using a + non-reserved URL character for a reserved purpose. Implementers + should be aware that some pre-HTTP/1.1 proxies have been known to + rewrite the Request-URI. + +5.2 The Resource Identified by a Request + + HTTP/1.1 origin servers SHOULD be aware that the exact resource + identified by an Internet request is determined by examining both the + Request-URI and the Host header field. + + An origin server that does not allow resources to differ by the + requested host MAY ignore the Host header field value. (But see + section 19.5.1 for other requirements on Host support in HTTP/1.1.) + + An origin server that does differentiate resources based on the host + requested (sometimes referred to as virtual hosts or vanity + hostnames) MUST use the following rules for determining the requested + resource on an HTTP/1.1 request: + + 1. If Request-URI is an absoluteURI, the host is part of the + Request-URI. Any Host header field value in the request MUST be + ignored. + + 2. If the Request-URI is not an absoluteURI, and the request + includes a Host header field, the host is determined by the Host + header field value. + + 3. If the host as determined by rule 1 or 2 is not a valid host on + the server, the response MUST be a 400 (Bad Request) error + message. + + Recipients of an HTTP/1.0 request that lacks a Host header field MAY + attempt to use heuristics (e.g., examination of the URI path for + something unique to a particular host) in order to determine what + exact resource is being requested. + +5.3 Request Header Fields + + The request-header fields allow the client to pass additional + information about the request, and about the client itself, to the + server. These fields act as request modifiers, with semantics + + + +Fielding, et. al. Standards Track [Page 37] + +RFC 2068 HTTP/1.1 January 1997 + + + equivalent to the parameters on a programming language method + invocation. + + request-header = Accept ; Section 14.1 + | Accept-Charset ; Section 14.2 + | Accept-Encoding ; Section 14.3 + | Accept-Language ; Section 14.4 + | Authorization ; Section 14.8 + | From ; Section 14.22 + | Host ; Section 14.23 + | If-Modified-Since ; Section 14.24 + | If-Match ; Section 14.25 + | If-None-Match ; Section 14.26 + | If-Range ; Section 14.27 + | If-Unmodified-Since ; Section 14.28 + | Max-Forwards ; Section 14.31 + | Proxy-Authorization ; Section 14.34 + | Range ; Section 14.36 + | Referer ; Section 14.37 + | User-Agent ; Section 14.42 + + Request-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields MAY be given the semantics of request- + header fields if all parties in the communication recognize them to + be request-header fields. Unrecognized header fields are treated as + entity-header fields. + +6 Response + + After receiving and interpreting a request message, a server responds + with an HTTP response message. + + Response = Status-Line ; Section 6.1 + *( general-header ; Section 4.5 + | response-header ; Section 6.2 + | entity-header ) ; Section 7.1 + CRLF + [ message-body ] ; Section 7.2 + +6.1 Status-Line + + The first line of a Response message is the Status-Line, consisting + of the protocol version followed by a numeric status code and its + associated textual phrase, with each element separated by SP + characters. No CR or LF is allowed except in the final CRLF + sequence. + + + + +Fielding, et. al. Standards Track [Page 38] + +RFC 2068 HTTP/1.1 January 1997 + + + Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF + +6.1.1 Status Code and Reason Phrase + + The Status-Code element is a 3-digit integer result code of the + attempt to understand and satisfy the request. These codes are fully + defined in section 10. The Reason-Phrase is intended to give a short + textual description of the Status-Code. The Status-Code is intended + for use by automata and the Reason-Phrase is intended for the human + user. The client is not required to examine or display the Reason- + Phrase. + + The first digit of the Status-Code defines the class of response. The + last two digits do not have any categorization role. There are 5 + values for the first digit: + + o 1xx: Informational - Request received, continuing process + + o 2xx: Success - The action was successfully received, understood, + and accepted + + o 3xx: Redirection - Further action must be taken in order to + complete the request + + o 4xx: Client Error - The request contains bad syntax or cannot be + fulfilled + + o 5xx: Server Error - The server failed to fulfill an apparently + valid request + + The individual values of the numeric status codes defined for + HTTP/1.1, and an example set of corresponding Reason-Phrase's, are + presented below. The reason phrases listed here are only recommended + -- they may be replaced by local equivalents without affecting the + protocol. + + Status-Code = "100" ; Continue + | "101" ; Switching Protocols + | "200" ; OK + | "201" ; Created + | "202" ; Accepted + | "203" ; Non-Authoritative Information + | "204" ; No Content + | "205" ; Reset Content + | "206" ; Partial Content + | "300" ; Multiple Choices + | "301" ; Moved Permanently + | "302" ; Moved Temporarily + + + +Fielding, et. al. Standards Track [Page 39] + +RFC 2068 HTTP/1.1 January 1997 + + + | "303" ; See Other + | "304" ; Not Modified + | "305" ; Use Proxy + | "400" ; Bad Request + | "401" ; Unauthorized + | "402" ; Payment Required + | "403" ; Forbidden + | "404" ; Not Found + | "405" ; Method Not Allowed + | "406" ; Not Acceptable + | "407" ; Proxy Authentication Required + | "408" ; Request Time-out + | "409" ; Conflict + | "410" ; Gone + | "411" ; Length Required + | "412" ; Precondition Failed + | "413" ; Request Entity Too Large + | "414" ; Request-URI Too Large + | "415" ; Unsupported Media Type + | "500" ; Internal Server Error + | "501" ; Not Implemented + | "502" ; Bad Gateway + | "503" ; Service Unavailable + | "504" ; Gateway Time-out + | "505" ; HTTP Version not supported + | extension-code + + extension-code = 3DIGIT + + Reason-Phrase = * + + HTTP status codes are extensible. HTTP applications are not required + to understand the meaning of all registered status codes, though such + understanding is obviously desirable. However, applications MUST + understand the class of any status code, as indicated by the first + digit, and treat any unrecognized response as being equivalent to the + x00 status code of that class, with the exception that an + unrecognized response MUST NOT be cached. For example, if an + unrecognized status code of 431 is received by the client, it can + safely assume that there was something wrong with its request and + treat the response as if it had received a 400 status code. In such + cases, user agents SHOULD present to the user the entity returned + with the response, since that entity is likely to include human- + readable information which will explain the unusual status. + + + + + + + +Fielding, et. al. Standards Track [Page 40] + +RFC 2068 HTTP/1.1 January 1997 + + +6.2 Response Header Fields + + The response-header fields allow the server to pass additional + information about the response which cannot be placed in the Status- + Line. These header fields give information about the server and about + further access to the resource identified by the Request-URI. + + response-header = Age ; Section 14.6 + | Location ; Section 14.30 + | Proxy-Authenticate ; Section 14.33 + | Public ; Section 14.35 + | Retry-After ; Section 14.38 + | Server ; Section 14.39 + | Vary ; Section 14.43 + | Warning ; Section 14.45 + | WWW-Authenticate ; Section 14.46 + + Response-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields MAY be given the semantics of response- + header fields if all parties in the communication recognize them to + be response-header fields. Unrecognized header fields are treated as + entity-header fields. + +7 Entity + + Request and Response messages MAY transfer an entity if not otherwise + restricted by the request method or response status code. An entity + consists of entity-header fields and an entity-body, although some + responses will only include the entity-headers. + + In this section, both sender and recipient refer to either the client + or the server, depending on who sends and who receives the entity. + +7.1 Entity Header Fields + + Entity-header fields define optional metainformation about the + entity-body or, if no body is present, about the resource identified + by the request. + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 41] + +RFC 2068 HTTP/1.1 January 1997 + + + entity-header = Allow ; Section 14.7 + | Content-Base ; Section 14.11 + | Content-Encoding ; Section 14.12 + | Content-Language ; Section 14.13 + | Content-Length ; Section 14.14 + | Content-Location ; Section 14.15 + | Content-MD5 ; Section 14.16 + | Content-Range ; Section 14.17 + | Content-Type ; Section 14.18 + | ETag ; Section 14.20 + | Expires ; Section 14.21 + | Last-Modified ; Section 14.29 + | extension-header + + extension-header = message-header + + The extension-header mechanism allows additional entity-header fields + to be defined without changing the protocol, but these fields cannot + be assumed to be recognizable by the recipient. Unrecognized header + fields SHOULD be ignored by the recipient and forwarded by proxies. + +7.2 Entity Body + + The entity-body (if any) sent with an HTTP request or response is in + a format and encoding defined by the entity-header fields. + + entity-body = *OCTET + + An entity-body is only present in a message when a message-body is + present, as described in section 4.3. The entity-body is obtained + from the message-body by decoding any Transfer-Encoding that may have + been applied to ensure safe and proper transfer of the message. + +7.2.1 Type + + When an entity-body is included with a message, the data type of that + body is determined via the header fields Content-Type and Content- + Encoding. These define a two-layer, ordered encoding model: + + entity-body := Content-Encoding( Content-Type( data ) ) + + Content-Type specifies the media type of the underlying data. + Content-Encoding may be used to indicate any additional content + codings applied to the data, usually for the purpose of data + compression, that are a property of the requested resource. There is + no default encoding. + + + + + +Fielding, et. al. Standards Track [Page 42] + +RFC 2068 HTTP/1.1 January 1997 + + + Any HTTP/1.1 message containing an entity-body SHOULD include a + Content-Type header field defining the media type of that body. If + and only if the media type is not given by a Content-Type field, the + recipient MAY attempt to guess the media type via inspection of its + content and/or the name extension(s) of the URL used to identify the + resource. If the media type remains unknown, the recipient SHOULD + treat it as type "application/octet-stream". + +7.2.2 Length + + The length of an entity-body is the length of the message-body after + any transfer codings have been removed. Section 4.4 defines how the + length of a message-body is determined. + +8 Connections + +8.1 Persistent Connections + +8.1.1 Purpose + + Prior to persistent connections, a separate TCP connection was + established to fetch each URL, increasing the load on HTTP servers + and causing congestion on the Internet. The use of inline images and + other associated data often requires a client to make multiple + requests of the same server in a short amount of time. Analyses of + these performance problems are available [30][27]; analysis and + results from a prototype implementation are in [26]. + + Persistent HTTP connections have a number of advantages: + + o By opening and closing fewer TCP connections, CPU time is saved, + and memory used for TCP protocol control blocks is also saved. + o HTTP requests and responses can be pipelined on a connection. + Pipelining allows a client to make multiple requests without + waiting for each response, allowing a single TCP connection to be + used much more efficiently, with much lower elapsed time. + o Network congestion is reduced by reducing the number of packets + caused by TCP opens, and by allowing TCP sufficient time to + determine the congestion state of the network. + o HTTP can evolve more gracefully; since errors can be reported + without the penalty of closing the TCP connection. Clients using + future versions of HTTP might optimistically try a new feature, but + if communicating with an older server, retry with old semantics + after an error is reported. + + HTTP implementations SHOULD implement persistent connections. + + + + + +Fielding, et. al. Standards Track [Page 43] + +RFC 2068 HTTP/1.1 January 1997 + + +8.1.2 Overall Operation + + A significant difference between HTTP/1.1 and earlier versions of + HTTP is that persistent connections are the default behavior of any + HTTP connection. That is, unless otherwise indicated, the client may + assume that the server will maintain a persistent connection. + + Persistent connections provide a mechanism by which a client and a + server can signal the close of a TCP connection. This signaling takes + place using the Connection header field. Once a close has been + signaled, the client MUST not send any more requests on that + connection. + +8.1.2.1 Negotiation + + An HTTP/1.1 server MAY assume that a HTTP/1.1 client intends to + maintain a persistent connection unless a Connection header including + the connection-token "close" was sent in the request. If the server + chooses to close the connection immediately after sending the + response, it SHOULD send a Connection header including the + connection-token close. + + An HTTP/1.1 client MAY expect a connection to remain open, but would + decide to keep it open based on whether the response from a server + contains a Connection header with the connection-token close. In case + the client does not want to maintain a connection for more than that + request, it SHOULD send a Connection header including the + connection-token close. + + If either the client or the server sends the close token in the + Connection header, that request becomes the last one for the + connection. + + Clients and servers SHOULD NOT assume that a persistent connection is + maintained for HTTP versions less than 1.1 unless it is explicitly + signaled. See section 19.7.1 for more information on backwards + compatibility with HTTP/1.0 clients. + + In order to remain persistent, all messages on the connection must + have a self-defined message length (i.e., one not defined by closure + of the connection), as described in section 4.4. + +8.1.2.2 Pipelining + + A client that supports persistent connections MAY "pipeline" its + requests (i.e., send multiple requests without waiting for each + response). A server MUST send its responses to those requests in the + same order that the requests were received. + + + +Fielding, et. al. Standards Track [Page 44] + +RFC 2068 HTTP/1.1 January 1997 + + + Clients which assume persistent connections and pipeline immediately + after connection establishment SHOULD be prepared to retry their + connection if the first pipelined attempt fails. If a client does + such a retry, it MUST NOT pipeline before it knows the connection is + persistent. Clients MUST also be prepared to resend their requests if + the server closes the connection before sending all of the + corresponding responses. + +8.1.3 Proxy Servers + + It is especially important that proxies correctly implement the + properties of the Connection header field as specified in 14.2.1. + + The proxy server MUST signal persistent connections separately with + its clients and the origin servers (or other proxy servers) that it + connects to. Each persistent connection applies to only one transport + link. + + A proxy server MUST NOT establish a persistent connection with an + HTTP/1.0 client. + +8.1.4 Practical Considerations + + Servers will usually have some time-out value beyond which they will + no longer maintain an inactive connection. Proxy servers might make + this a higher value since it is likely that the client will be making + more connections through the same server. The use of persistent + connections places no requirements on the length of this time-out for + either the client or the server. + + When a client or server wishes to time-out it SHOULD issue a graceful + close on the transport connection. Clients and servers SHOULD both + constantly watch for the other side of the transport close, and + respond to it as appropriate. If a client or server does not detect + the other side's close promptly it could cause unnecessary resource + drain on the network. + + A client, server, or proxy MAY close the transport connection at any + time. For example, a client MAY have started to send a new request at + the same time that the server has decided to close the "idle" + connection. From the server's point of view, the connection is being + closed while it was idle, but from the client's point of view, a + request is in progress. + + This means that clients, servers, and proxies MUST be able to recover + from asynchronous close events. Client software SHOULD reopen the + transport connection and retransmit the aborted request without user + interaction so long as the request method is idempotent (see section + + + +Fielding, et. al. Standards Track [Page 45] + +RFC 2068 HTTP/1.1 January 1997 + + + 9.1.2); other methods MUST NOT be automatically retried, although + user agents MAY offer a human operator the choice of retrying the + request. + + However, this automatic retry SHOULD NOT be repeated if the second + request fails. + + Servers SHOULD always respond to at least one request per connection, + if at all possible. Servers SHOULD NOT close a connection in the + middle of transmitting a response, unless a network or client failure + is suspected. + + Clients that use persistent connections SHOULD limit the number of + simultaneous connections that they maintain to a given server. A + single-user client SHOULD maintain AT MOST 2 connections with any + server or proxy. A proxy SHOULD use up to 2*N connections to another + server or proxy, where N is the number of simultaneously active + users. These guidelines are intended to improve HTTP response times + and avoid congestion of the Internet or other networks. + +8.2 Message Transmission Requirements + +General requirements: + +o HTTP/1.1 servers SHOULD maintain persistent connections and use + TCP's flow control mechanisms to resolve temporary overloads, + rather than terminating connections with the expectation that + clients will retry. The latter technique can exacerbate network + congestion. + +o An HTTP/1.1 (or later) client sending a message-body SHOULD monitor + the network connection for an error status while it is transmitting + the request. If the client sees an error status, it SHOULD + immediately cease transmitting the body. If the body is being sent + using a "chunked" encoding (section 3.6), a zero length chunk and + empty footer MAY be used to prematurely mark the end of the + message. If the body was preceded by a Content-Length header, the + client MUST close the connection. + +o An HTTP/1.1 (or later) client MUST be prepared to accept a 100 + (Continue) status followed by a regular response. + +o An HTTP/1.1 (or later) server that receives a request from a + HTTP/1.0 (or earlier) client MUST NOT transmit the 100 (continue) + response; it SHOULD either wait for the request to be completed + normally (thus avoiding an interrupted request) or close the + connection prematurely. + + + + +Fielding, et. al. Standards Track [Page 46] + +RFC 2068 HTTP/1.1 January 1997 + + + Upon receiving a method subject to these requirements from an + HTTP/1.1 (or later) client, an HTTP/1.1 (or later) server MUST either + respond with 100 (Continue) status and continue to read from the + input stream, or respond with an error status. If it responds with an + error status, it MAY close the transport (TCP) connection or it MAY + continue to read and discard the rest of the request. It MUST NOT + perform the requested method if it returns an error status. + + Clients SHOULD remember the version number of at least the most + recently used server; if an HTTP/1.1 client has seen an HTTP/1.1 or + later response from the server, and it sees the connection close + before receiving any status from the server, the client SHOULD retry + the request without user interaction so long as the request method is + idempotent (see section 9.1.2); other methods MUST NOT be + automatically retried, although user agents MAY offer a human + operator the choice of retrying the request.. If the client does + retry the request, the client + + o MUST first send the request header fields, and then + + o MUST wait for the server to respond with either a 100 (Continue) + response, in which case the client should continue, or with an + error status. + + If an HTTP/1.1 client has not seen an HTTP/1.1 or later response from + the server, it should assume that the server implements HTTP/1.0 or + older and will not use the 100 (Continue) response. If in this case + the client sees the connection close before receiving any status from + the server, the client SHOULD retry the request. If the client does + retry the request to this HTTP/1.0 server, it should use the + following "binary exponential backoff" algorithm to be assured of + obtaining a reliable response: + + 1. Initiate a new connection to the server + + 2. Transmit the request-headers + + 3. Initialize a variable R to the estimated round-trip time to the + server (e.g., based on the time it took to establish the + connection), or to a constant value of 5 seconds if the round-trip + time is not available. + + 4. Compute T = R * (2**N), where N is the number of previous retries + of this request. + + 5. Wait either for an error response from the server, or for T seconds + (whichever comes first) + + + + +Fielding, et. al. Standards Track [Page 47] + +RFC 2068 HTTP/1.1 January 1997 + + + 6. If no error response is received, after T seconds transmit the body + of the request. + + 7. If client sees that the connection is closed prematurely, repeat + from step 1 until the request is accepted, an error response is + received, or the user becomes impatient and terminates the retry + process. + + No matter what the server version, if an error status is received, + the client + + o MUST NOT continue and + + o MUST close the connection if it has not completed sending the + message. + + An HTTP/1.1 (or later) client that sees the connection close after + receiving a 100 (Continue) but before receiving any other status + SHOULD retry the request, and need not wait for 100 (Continue) + response (but MAY do so if this simplifies the implementation). + +9 Method Definitions + + The set of common methods for HTTP/1.1 is defined below. Although + this set can be expanded, additional methods cannot be assumed to + share the same semantics for separately extended clients and servers. + + The Host request-header field (section 14.23) MUST accompany all + HTTP/1.1 requests. + +9.1 Safe and Idempotent Methods + +9.1.1 Safe Methods + + Implementers should be aware that the software represents the user in + their interactions over the Internet, and should be careful to allow + the user to be aware of any actions they may take which may have an + unexpected significance to themselves or others. + + In particular, the convention has been established that the GET and + HEAD methods should never have the significance of taking an action + other than retrieval. These methods should be considered "safe." This + allows user agents to represent other methods, such as POST, PUT and + DELETE, in a special way, so that the user is made aware of the fact + that a possibly unsafe action is being requested. + + Naturally, it is not possible to ensure that the server does not + generate side-effects as a result of performing a GET request; in + + + +Fielding, et. al. Standards Track [Page 48] + +RFC 2068 HTTP/1.1 January 1997 + + + fact, some dynamic resources consider that a feature. The important + distinction here is that the user did not request the side-effects, + so therefore cannot be held accountable for them. + +9.1.2 Idempotent Methods + + Methods may also have the property of "idempotence" in that (aside + from error or expiration issues) the side-effects of N > 0 identical + requests is the same as for a single request. The methods GET, HEAD, + PUT and DELETE share this property. + +9.2 OPTIONS + + The OPTIONS method represents a request for information about the + communication options available on the request/response chain + identified by the Request-URI. This method allows the client to + determine the options and/or requirements associated with a resource, + or the capabilities of a server, without implying a resource action + or initiating a resource retrieval. + + Unless the server's response is an error, the response MUST NOT + include entity information other than what can be considered as + communication options (e.g., Allow is appropriate, but Content-Type + is not). Responses to this method are not cachable. + + If the Request-URI is an asterisk ("*"), the OPTIONS request is + intended to apply to the server as a whole. A 200 response SHOULD + include any header fields which indicate optional features + implemented by the server (e.g., Public), including any extensions + not defined by this specification, in addition to any applicable + general or response-header fields. As described in section 5.1.2, an + "OPTIONS *" request can be applied through a proxy by specifying the + destination server in the Request-URI without any path information. + + If the Request-URI is not an asterisk, the OPTIONS request applies + only to the options that are available when communicating with that + resource. A 200 response SHOULD include any header fields which + indicate optional features implemented by the server and applicable + to that resource (e.g., Allow), including any extensions not defined + by this specification, in addition to any applicable general or + response-header fields. If the OPTIONS request passes through a + proxy, the proxy MUST edit the response to exclude those options + which apply to a proxy's capabilities and which are known to be + unavailable through that proxy. + + + + + + + +Fielding, et. al. Standards Track [Page 49] + +RFC 2068 HTTP/1.1 January 1997 + + +9.3 GET + + The GET method means retrieve whatever information (in the form of an + entity) is identified by the Request-URI. If the Request-URI refers + to a data-producing process, it is the produced data which shall be + returned as the entity in the response and not the source text of the + process, unless that text happens to be the output of the process. + + The semantics of the GET method change to a "conditional GET" if the + request message includes an If-Modified-Since, If-Unmodified-Since, + If-Match, If-None-Match, or If-Range header field. A conditional GET + method requests that the entity be transferred only under the + circumstances described by the conditional header field(s). The + conditional GET method is intended to reduce unnecessary network + usage by allowing cached entities to be refreshed without requiring + multiple requests or transferring data already held by the client. + + The semantics of the GET method change to a "partial GET" if the + request message includes a Range header field. A partial GET requests + that only part of the entity be transferred, as described in section + 14.36. The partial GET method is intended to reduce unnecessary + network usage by allowing partially-retrieved entities to be + completed without transferring data already held by the client. + + The response to a GET request is cachable if and only if it meets the + requirements for HTTP caching described in section 13. + +9.4 HEAD + + The HEAD method is identical to GET except that the server MUST NOT + return a message-body in the response. The metainformation contained + in the HTTP headers in response to a HEAD request SHOULD be identical + to the information sent in response to a GET request. This method can + be used for obtaining metainformation about the entity implied by the + request without transferring the entity-body itself. This method is + often used for testing hypertext links for validity, accessibility, + and recent modification. + + The response to a HEAD request may be cachable in the sense that the + information contained in the response may be used to update a + previously cached entity from that resource. If the new field values + indicate that the cached entity differs from the current entity (as + would be indicated by a change in Content-Length, Content-MD5, ETag + or Last-Modified), then the cache MUST treat the cache entry as + stale. + + + + + + +Fielding, et. al. Standards Track [Page 50] + +RFC 2068 HTTP/1.1 January 1997 + + +9.5 POST + + The POST method is used to request that the destination server accept + the entity enclosed in the request as a new subordinate of the + resource identified by the Request-URI in the Request-Line. POST is + designed to allow a uniform method to cover the following functions: + + o Annotation of existing resources; + + o Posting a message to a bulletin board, newsgroup, mailing list, + or similar group of articles; + + o Providing a block of data, such as the result of submitting a + form, to a data-handling process; + + o Extending a database through an append operation. + + The actual function performed by the POST method is determined by the + server and is usually dependent on the Request-URI. The posted entity + is subordinate to that URI in the same way that a file is subordinate + to a directory containing it, a news article is subordinate to a + newsgroup to which it is posted, or a record is subordinate to a + database. + + The action performed by the POST method might not result in a + resource that can be identified by a URI. In this case, either 200 + (OK) or 204 (No Content) is the appropriate response status, + depending on whether or not the response includes an entity that + describes the result. + + If a resource has been created on the origin server, the response + SHOULD be 201 (Created) and contain an entity which describes the + status of the request and refers to the new resource, and a Location + header (see section 14.30). + + Responses to this method are not cachable, unless the response + includes appropriate Cache-Control or Expires header fields. However, + the 303 (See Other) response can be used to direct the user agent to + retrieve a cachable resource. + + POST requests must obey the message transmission requirements set out + in section 8.2. + + + + + + + + + +Fielding, et. al. Standards Track [Page 51] + +RFC 2068 HTTP/1.1 January 1997 + + +9.6 PUT + + The PUT method requests that the enclosed entity be stored under the + supplied Request-URI. If the Request-URI refers to an already + existing resource, the enclosed entity SHOULD be considered as a + modified version of the one residing on the origin server. If the + Request-URI does not point to an existing resource, and that URI is + capable of being defined as a new resource by the requesting user + agent, the origin server can create the resource with that URI. If a + new resource is created, the origin server MUST inform the user agent + via the 201 (Created) response. If an existing resource is modified, + either the 200 (OK) or 204 (No Content) response codes SHOULD be sent + to indicate successful completion of the request. If the resource + could not be created or modified with the Request-URI, an appropriate + error response SHOULD be given that reflects the nature of the + problem. The recipient of the entity MUST NOT ignore any Content-* + (e.g. Content-Range) headers that it does not understand or implement + and MUST return a 501 (Not Implemented) response in such cases. + + If the request passes through a cache and the Request-URI identifies + one or more currently cached entities, those entries should be + treated as stale. Responses to this method are not cachable. + + The fundamental difference between the POST and PUT requests is + reflected in the different meaning of the Request-URI. The URI in a + POST request identifies the resource that will handle the enclosed + entity. That resource may be a data-accepting process, a gateway to + some other protocol, or a separate entity that accepts annotations. + In contrast, the URI in a PUT request identifies the entity enclosed + with the request -- the user agent knows what URI is intended and the + server MUST NOT attempt to apply the request to some other resource. + If the server desires that the request be applied to a different URI, + it MUST send a 301 (Moved Permanently) response; the user agent MAY + then make its own decision regarding whether or not to redirect the + request. + + A single resource MAY be identified by many different URIs. For + example, an article may have a URI for identifying "the current + version" which is separate from the URI identifying each particular + version. In this case, a PUT request on a general URI may result in + several other URIs being defined by the origin server. + + HTTP/1.1 does not define how a PUT method affects the state of an + origin server. + + PUT requests must obey the message transmission requirements set out + in section 8.2. + + + + +Fielding, et. al. Standards Track [Page 52] + +RFC 2068 HTTP/1.1 January 1997 + + +9.7 DELETE + + The DELETE method requests that the origin server delete the resource + identified by the Request-URI. This method MAY be overridden by human + intervention (or other means) on the origin server. The client cannot + be guaranteed that the operation has been carried out, even if the + status code returned from the origin server indicates that the action + has been completed successfully. However, the server SHOULD not + indicate success unless, at the time the response is given, it + intends to delete the resource or move it to an inaccessible + location. + + A successful response SHOULD be 200 (OK) if the response includes an + entity describing the status, 202 (Accepted) if the action has not + yet been enacted, or 204 (No Content) if the response is OK but does + not include an entity. + + If the request passes through a cache and the Request-URI identifies + one or more currently cached entities, those entries should be + treated as stale. Responses to this method are not cachable. + +9.8 TRACE + + The TRACE method is used to invoke a remote, application-layer loop- + back of the request message. The final recipient of the request + SHOULD reflect the message received back to the client as the + entity-body of a 200 (OK) response. The final recipient is either the + origin server or the first proxy or gateway to receive a Max-Forwards + value of zero (0) in the request (see section 14.31). A TRACE request + MUST NOT include an entity. + + TRACE allows the client to see what is being received at the other + end of the request chain and use that data for testing or diagnostic + information. The value of the Via header field (section 14.44) is of + particular interest, since it acts as a trace of the request chain. + Use of the Max-Forwards header field allows the client to limit the + length of the request chain, which is useful for testing a chain of + proxies forwarding messages in an infinite loop. + + If successful, the response SHOULD contain the entire request message + in the entity-body, with a Content-Type of "message/http". Responses + to this method MUST NOT be cached. + +10 Status Code Definitions + + Each Status-Code is described below, including a description of which + method(s) it can follow and any metainformation required in the + response. + + + +Fielding, et. al. Standards Track [Page 53] + +RFC 2068 HTTP/1.1 January 1997 + + +10.1 Informational 1xx + + This class of status code indicates a provisional response, + consisting only of the Status-Line and optional headers, and is + terminated by an empty line. Since HTTP/1.0 did not define any 1xx + status codes, servers MUST NOT send a 1xx response to an HTTP/1.0 + client except under experimental conditions. + +10.1.1 100 Continue + + The client may continue with its request. This interim response is + used to inform the client that the initial part of the request has + been received and has not yet been rejected by the server. The client + SHOULD continue by sending the remainder of the request or, if the + request has already been completed, ignore this response. The server + MUST send a final response after the request has been completed. + +10.1.2 101 Switching Protocols + + The server understands and is willing to comply with the client's + request, via the Upgrade message header field (section 14.41), for a + change in the application protocol being used on this connection. The + server will switch protocols to those defined by the response's + Upgrade header field immediately after the empty line which + terminates the 101 response. + + The protocol should only be switched when it is advantageous to do + so. For example, switching to a newer version of HTTP is + advantageous over older versions, and switching to a real-time, + synchronous protocol may be advantageous when delivering resources + that use such features. + +10.2 Successful 2xx + + This class of status code indicates that the client's request was + successfully received, understood, and accepted. + +10.2.1 200 OK + + The request has succeeded. The information returned with the response + is dependent on the method used in the request, for example: + + GET an entity corresponding to the requested resource is sent in the + response; + + HEAD the entity-header fields corresponding to the requested resource + are sent in the response without any message-body; + + + + +Fielding, et. al. Standards Track [Page 54] + +RFC 2068 HTTP/1.1 January 1997 + + + POST an entity describing or containing the result of the action; + + TRACE an entity containing the request message as received by the end + server. + +10.2.2 201 Created + + The request has been fulfilled and resulted in a new resource being + created. The newly created resource can be referenced by the URI(s) + returned in the entity of the response, with the most specific URL + for the resource given by a Location header field. The origin server + MUST create the resource before returning the 201 status code. If the + action cannot be carried out immediately, the server should respond + with 202 (Accepted) response instead. + +10.2.3 202 Accepted + + The request has been accepted for processing, but the processing has + not been completed. The request MAY or MAY NOT eventually be acted + upon, as it MAY be disallowed when processing actually takes place. + There is no facility for re-sending a status code from an + asynchronous operation such as this. + + The 202 response is intentionally non-committal. Its purpose is to + allow a server to accept a request for some other process (perhaps a + batch-oriented process that is only run once per day) without + requiring that the user agent's connection to the server persist + until the process is completed. The entity returned with this + response SHOULD include an indication of the request's current status + and either a pointer to a status monitor or some estimate of when the + user can expect the request to be fulfilled. + +10.2.4 203 Non-Authoritative Information + + The returned metainformation in the entity-header is not the + definitive set as available from the origin server, but is gathered + from a local or a third-party copy. The set presented MAY be a subset + or superset of the original version. For example, including local + annotation information about the resource MAY result in a superset of + the metainformation known by the origin server. Use of this response + code is not required and is only appropriate when the response would + otherwise be 200 (OK). + +10.2.5 204 No Content + + The server has fulfilled the request but there is no new information + to send back. If the client is a user agent, it SHOULD NOT change its + document view from that which caused the request to be sent. This + + + +Fielding, et. al. Standards Track [Page 55] + +RFC 2068 HTTP/1.1 January 1997 + + + response is primarily intended to allow input for actions to take + place without causing a change to the user agent's active document + view. The response MAY include new metainformation in the form of + entity-headers, which SHOULD apply to the document currently in the + user agent's active view. + + The 204 response MUST NOT include a message-body, and thus is always + terminated by the first empty line after the header fields. + +10.2.6 205 Reset Content + + The server has fulfilled the request and the user agent SHOULD reset + the document view which caused the request to be sent. This response + is primarily intended to allow input for actions to take place via + user input, followed by a clearing of the form in which the input is + given so that the user can easily initiate another input action. The + response MUST NOT include an entity. + +10.2.7 206 Partial Content + + The server has fulfilled the partial GET request for the resource. + The request must have included a Range header field (section 14.36) + indicating the desired range. The response MUST include either a + Content-Range header field (section 14.17) indicating the range + included with this response, or a multipart/byteranges Content-Type + including Content-Range fields for each part. If multipart/byteranges + is not used, the Content-Length header field in the response MUST + match the actual number of OCTETs transmitted in the message-body. + + A cache that does not support the Range and Content-Range headers + MUST NOT cache 206 (Partial) responses. + +10.3 Redirection 3xx + + This class of status code indicates that further action needs to be + taken by the user agent in order to fulfill the request. The action + required MAY be carried out by the user agent without interaction + with the user if and only if the method used in the second request is + GET or HEAD. A user agent SHOULD NOT automatically redirect a request + more than 5 times, since such redirections usually indicate an + infinite loop. + + + + + + + + + + +Fielding, et. al. Standards Track [Page 56] + +RFC 2068 HTTP/1.1 January 1997 + + +10.3.1 300 Multiple Choices + + The requested resource corresponds to any one of a set of + representations, each with its own specific location, and agent- + driven negotiation information (section 12) is being provided so that + the user (or user agent) can select a preferred representation and + redirect its request to that location. + + Unless it was a HEAD request, the response SHOULD include an entity + containing a list of resource characteristics and location(s) from + which the user or user agent can choose the one most appropriate. The + entity format is specified by the media type given in the Content- + Type header field. Depending upon the format and the capabilities of + the user agent, selection of the most appropriate choice may be + performed automatically. However, this specification does not define + any standard for such automatic selection. + + If the server has a preferred choice of representation, it SHOULD + include the specific URL for that representation in the Location + field; user agents MAY use the Location field value for automatic + redirection. This response is cachable unless indicated otherwise. + +10.3.2 301 Moved Permanently + + The requested resource has been assigned a new permanent URI and any + future references to this resource SHOULD be done using one of the + returned URIs. Clients with link editing capabilities SHOULD + automatically re-link references to the Request-URI to one or more of + the new references returned by the server, where possible. This + response is cachable unless indicated otherwise. + + If the new URI is a location, its URL SHOULD be given by the Location + field in the response. Unless the request method was HEAD, the entity + of the response SHOULD contain a short hypertext note with a + hyperlink to the new URI(s). + + If the 301 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + Note: When automatically redirecting a POST request after receiving + a 301 status code, some existing HTTP/1.0 user agents will + erroneously change it into a GET request. + + + + + + + +Fielding, et. al. Standards Track [Page 57] + +RFC 2068 HTTP/1.1 January 1997 + + +10.3.3 302 Moved Temporarily + + The requested resource resides temporarily under a different URI. + Since the redirection may be altered on occasion, the client SHOULD + continue to use the Request-URI for future requests. This response is + only cachable if indicated by a Cache-Control or Expires header + field. + + If the new URI is a location, its URL SHOULD be given by the Location + field in the response. Unless the request method was HEAD, the entity + of the response SHOULD contain a short hypertext note with a + hyperlink to the new URI(s). + + If the 302 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + Note: When automatically redirecting a POST request after receiving + a 302 status code, some existing HTTP/1.0 user agents will + erroneously change it into a GET request. + +10.3.4 303 See Other + + The response to the request can be found under a different URI and + SHOULD be retrieved using a GET method on that resource. This method + exists primarily to allow the output of a POST-activated script to + redirect the user agent to a selected resource. The new URI is not a + substitute reference for the originally requested resource. The 303 + response is not cachable, but the response to the second (redirected) + request MAY be cachable. + + If the new URI is a location, its URL SHOULD be given by the Location + field in the response. Unless the request method was HEAD, the entity + of the response SHOULD contain a short hypertext note with a + hyperlink to the new URI(s). + +10.3.5 304 Not Modified + + If the client has performed a conditional GET request and access is + allowed, but the document has not been modified, the server SHOULD + respond with this status code. The response MUST NOT contain a + message-body. + + + + + + + + +Fielding, et. al. Standards Track [Page 58] + +RFC 2068 HTTP/1.1 January 1997 + + + The response MUST include the following header fields: + + o Date + + o ETag and/or Content-Location, if the header would have been sent in + a 200 response to the same request + + o Expires, Cache-Control, and/or Vary, if the field-value might + differ from that sent in any previous response for the same variant + + If the conditional GET used a strong cache validator (see section + 13.3.3), the response SHOULD NOT include other entity-headers. + Otherwise (i.e., the conditional GET used a weak validator), the + response MUST NOT include other entity-headers; this prevents + inconsistencies between cached entity-bodies and updated headers. + + If a 304 response indicates an entity not currently cached, then the + cache MUST disregard the response and repeat the request without the + conditional. + + If a cache uses a received 304 response to update a cache entry, the + cache MUST update the entry to reflect any new field values given in + the response. + + The 304 response MUST NOT include a message-body, and thus is always + terminated by the first empty line after the header fields. + +10.3.6 305 Use Proxy + + The requested resource MUST be accessed through the proxy given by + the Location field. The Location field gives the URL of the proxy. + The recipient is expected to repeat the request via the proxy. + +10.4 Client Error 4xx + + The 4xx class of status code is intended for cases in which the + client seems to have erred. Except when responding to a HEAD request, + the server SHOULD include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. These status codes are applicable to any request method. + User agents SHOULD display any included entity to the user. + + Note: If the client is sending data, a server implementation using + TCP should be careful to ensure that the client acknowledges + receipt of the packet(s) containing the response, before the server + closes the input connection. If the client continues sending data + to the server after the close, the server's TCP stack will send a + reset packet to the client, which may erase the client's + + + +Fielding, et. al. Standards Track [Page 59] + +RFC 2068 HTTP/1.1 January 1997 + + + unacknowledged input buffers before they can be read and + interpreted by the HTTP application. + +10.4.1 400 Bad Request + + The request could not be understood by the server due to malformed + syntax. The client SHOULD NOT repeat the request without + modifications. + +10.4.2 401 Unauthorized + + The request requires user authentication. The response MUST include a + WWW-Authenticate header field (section 14.46) containing a challenge + applicable to the requested resource. The client MAY repeat the + request with a suitable Authorization header field (section 14.8). If + the request already included Authorization credentials, then the 401 + response indicates that authorization has been refused for those + credentials. If the 401 response contains the same challenge as the + prior response, and the user agent has already attempted + authentication at least once, then the user SHOULD be presented the + entity that was given in the response, since that entity MAY include + relevant diagnostic information. HTTP access authentication is + explained in section 11. + +10.4.3 402 Payment Required + + This code is reserved for future use. + +10.4.4 403 Forbidden + + The server understood the request, but is refusing to fulfill it. + Authorization will not help and the request SHOULD NOT be repeated. + If the request method was not HEAD and the server wishes to make + public why the request has not been fulfilled, it SHOULD describe the + reason for the refusal in the entity. This status code is commonly + used when the server does not wish to reveal exactly why the request + has been refused, or when no other response is applicable. + +10.4.5 404 Not Found + + The server has not found anything matching the Request-URI. No + indication is given of whether the condition is temporary or + permanent. + + + + + + + + +Fielding, et. al. Standards Track [Page 60] + +RFC 2068 HTTP/1.1 January 1997 + + + If the server does not wish to make this information available to the + client, the status code 403 (Forbidden) can be used instead. The 410 + (Gone) status code SHOULD be used if the server knows, through some + internally configurable mechanism, that an old resource is + permanently unavailable and has no forwarding address. + +10.4.6 405 Method Not Allowed + + The method specified in the Request-Line is not allowed for the + resource identified by the Request-URI. The response MUST include an + Allow header containing a list of valid methods for the requested + resource. + +10.4.7 406 Not Acceptable + + The resource identified by the request is only capable of generating + response entities which have content characteristics not acceptable + according to the accept headers sent in the request. + + Unless it was a HEAD request, the response SHOULD include an entity + containing a list of available entity characteristics and location(s) + from which the user or user agent can choose the one most + appropriate. The entity format is specified by the media type given + in the Content-Type header field. Depending upon the format and the + capabilities of the user agent, selection of the most appropriate + choice may be performed automatically. However, this specification + does not define any standard for such automatic selection. + + Note: HTTP/1.1 servers are allowed to return responses which are + not acceptable according to the accept headers sent in the request. + In some cases, this may even be preferable to sending a 406 + response. User agents are encouraged to inspect the headers of an + incoming response to determine if it is acceptable. If the response + could be unacceptable, a user agent SHOULD temporarily stop receipt + of more data and query the user for a decision on further actions. + +10.4.8 407 Proxy Authentication Required + + This code is similar to 401 (Unauthorized), but indicates that the + client MUST first authenticate itself with the proxy. The proxy MUST + return a Proxy-Authenticate header field (section 14.33) containing a + challenge applicable to the proxy for the requested resource. The + client MAY repeat the request with a suitable Proxy-Authorization + header field (section 14.34). HTTP access authentication is explained + in section 11. + + + + + + +Fielding, et. al. Standards Track [Page 61] + +RFC 2068 HTTP/1.1 January 1997 + + +10.4.9 408 Request Timeout + + The client did not produce a request within the time that the server + was prepared to wait. The client MAY repeat the request without + modifications at any later time. + +10.4.10 409 Conflict + + The request could not be completed due to a conflict with the current + state of the resource. This code is only allowed in situations where + it is expected that the user might be able to resolve the conflict + and resubmit the request. The response body SHOULD include enough + information for the user to recognize the source of the conflict. + Ideally, the response entity would include enough information for the + user or user agent to fix the problem; however, that may not be + possible and is not required. + + Conflicts are most likely to occur in response to a PUT request. If + versioning is being used and the entity being PUT includes changes to + a resource which conflict with those made by an earlier (third-party) + request, the server MAY use the 409 response to indicate that it + can't complete the request. In this case, the response entity SHOULD + contain a list of the differences between the two versions in a + format defined by the response Content-Type. + +10.4.11 410 Gone + + The requested resource is no longer available at the server and no + forwarding address is known. This condition SHOULD be considered + permanent. Clients with link editing capabilities SHOULD delete + references to the Request-URI after user approval. If the server does + not know, or has no facility to determine, whether or not the + condition is permanent, the status code 404 (Not Found) SHOULD be + used instead. This response is cachable unless indicated otherwise. + + The 410 response is primarily intended to assist the task of web + maintenance by notifying the recipient that the resource is + intentionally unavailable and that the server owners desire that + remote links to that resource be removed. Such an event is common for + limited-time, promotional services and for resources belonging to + individuals no longer working at the server's site. It is not + necessary to mark all permanently unavailable resources as "gone" or + to keep the mark for any length of time -- that is left to the + discretion of the server owner. + + + + + + + +Fielding, et. al. Standards Track [Page 62] + +RFC 2068 HTTP/1.1 January 1997 + + +10.4.12 411 Length Required + + The server refuses to accept the request without a defined Content- + Length. The client MAY repeat the request if it adds a valid + Content-Length header field containing the length of the message-body + in the request message. + +10.4.13 412 Precondition Failed + + The precondition given in one or more of the request-header fields + evaluated to false when it was tested on the server. This response + code allows the client to place preconditions on the current resource + metainformation (header field data) and thus prevent the requested + method from being applied to a resource other than the one intended. + +10.4.14 413 Request Entity Too Large + + The server is refusing to process a request because the request + entity is larger than the server is willing or able to process. The + server may close the connection to prevent the client from continuing + the request. + + If the condition is temporary, the server SHOULD include a Retry- + After header field to indicate that it is temporary and after what + time the client may try again. + +10.4.15 414 Request-URI Too Long + + The server is refusing to service the request because the Request-URI + is longer than the server is willing to interpret. This rare + condition is only likely to occur when a client has improperly + converted a POST request to a GET request with long query + information, when the client has descended into a URL "black hole" of + redirection (e.g., a redirected URL prefix that points to a suffix of + itself), or when the server is under attack by a client attempting to + exploit security holes present in some servers using fixed-length + buffers for reading or manipulating the Request-URI. + +10.4.16 415 Unsupported Media Type + + The server is refusing to service the request because the entity of + the request is in a format not supported by the requested resource + for the requested method. + + + + + + + + +Fielding, et. al. Standards Track [Page 63] + +RFC 2068 HTTP/1.1 January 1997 + + +10.5 Server Error 5xx + + Response status codes beginning with the digit "5" indicate cases in + which the server is aware that it has erred or is incapable of + performing the request. Except when responding to a HEAD request, the + server SHOULD include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. User agents SHOULD display any included entity to the + user. These response codes are applicable to any request method. + +10.5.1 500 Internal Server Error + + The server encountered an unexpected condition which prevented it + from fulfilling the request. + +10.5.2 501 Not Implemented + + The server does not support the functionality required to fulfill the + request. This is the appropriate response when the server does not + recognize the request method and is not capable of supporting it for + any resource. + +10.5.3 502 Bad Gateway + + The server, while acting as a gateway or proxy, received an invalid + response from the upstream server it accessed in attempting to + fulfill the request. + +10.5.4 503 Service Unavailable + + The server is currently unable to handle the request due to a + temporary overloading or maintenance of the server. The implication + is that this is a temporary condition which will be alleviated after + some delay. If known, the length of the delay may be indicated in a + Retry-After header. If no Retry-After is given, the client SHOULD + handle the response as it would for a 500 response. + + Note: The existence of the 503 status code does not imply that a + server must use it when becoming overloaded. Some servers may wish + to simply refuse the connection. + +10.5.5 504 Gateway Timeout + + The server, while acting as a gateway or proxy, did not receive a + timely response from the upstream server it accessed in attempting to + complete the request. + + + + + +Fielding, et. al. Standards Track [Page 64] + +RFC 2068 HTTP/1.1 January 1997 + + +10.5.6 505 HTTP Version Not Supported + + The server does not support, or refuses to support, the HTTP protocol + version that was used in the request message. The server is + indicating that it is unable or unwilling to complete the request + using the same major version as the client, as described in section + 3.1, other than with this error message. The response SHOULD contain + an entity describing why that version is not supported and what other + protocols are supported by that server. + +11 Access Authentication + + HTTP provides a simple challenge-response authentication mechanism + which MAY be used by a server to challenge a client request and by a + client to provide authentication information. It uses an extensible, + case-insensitive token to identify the authentication scheme, + followed by a comma-separated list of attribute-value pairs which + carry the parameters necessary for achieving authentication via that + scheme. + + auth-scheme = token + + auth-param = token "=" quoted-string + + The 401 (Unauthorized) response message is used by an origin server + to challenge the authorization of a user agent. This response MUST + include a WWW-Authenticate header field containing at least one + challenge applicable to the requested resource. + + challenge = auth-scheme 1*SP realm *( "," auth-param ) + + realm = "realm" "=" realm-value + realm-value = quoted-string + + The realm attribute (case-insensitive) is required for all + authentication schemes which issue a challenge. The realm value + (case-sensitive), in combination with the canonical root URL (see + section 5.1.2) of the server being accessed, defines the protection + space. These realms allow the protected resources on a server to be + partitioned into a set of protection spaces, each with its own + authentication scheme and/or authorization database. The realm value + is a string, generally assigned by the origin server, which may have + additional semantics specific to the authentication scheme. + + A user agent that wishes to authenticate itself with a server-- + usually, but not necessarily, after receiving a 401 or 411 response- + -MAY do so by including an Authorization header field with the + request. The Authorization field value consists of credentials + + + +Fielding, et. al. Standards Track [Page 65] + +RFC 2068 HTTP/1.1 January 1997 + + + containing the authentication information of the user agent for the + realm of the resource being requested. + + credentials = basic-credentials + | auth-scheme #auth-param + + The domain over which credentials can be automatically applied by a + user agent is determined by the protection space. If a prior request + has been authorized, the same credentials MAY be reused for all other + requests within that protection space for a period of time determined + by the authentication scheme, parameters, and/or user preference. + Unless otherwise defined by the authentication scheme, a single + protection space cannot extend outside the scope of its server. + + If the server does not wish to accept the credentials sent with a + request, it SHOULD return a 401 (Unauthorized) response. The response + MUST include a WWW-Authenticate header field containing the (possibly + new) challenge applicable to the requested resource and an entity + explaining the refusal. + + The HTTP protocol does not restrict applications to this simple + challenge-response mechanism for access authentication. Additional + mechanisms MAY be used, such as encryption at the transport level or + via message encapsulation, and with additional header fields + specifying authentication information. However, these additional + mechanisms are not defined by this specification. + + Proxies MUST be completely transparent regarding user agent + authentication. That is, they MUST forward the WWW-Authenticate and + Authorization headers untouched, and follow the rules found in + section 14.8. + + HTTP/1.1 allows a client to pass authentication information to and + from a proxy via the Proxy-Authenticate and Proxy-Authorization + headers. + +11.1 Basic Authentication Scheme + + The "basic" authentication scheme is based on the model that the user + agent must authenticate itself with a user-ID and a password for each + realm. The realm value should be considered an opaque string which + can only be compared for equality with other realms on that server. + The server will service the request only if it can validate the + user-ID and password for the protection space of the Request-URI. + There are no optional authentication parameters. + + + + + + +Fielding, et. al. Standards Track [Page 66] + +RFC 2068 HTTP/1.1 January 1997 + + + Upon receipt of an unauthorized request for a URI within the + protection space, the server MAY respond with a challenge like the + following: + + WWW-Authenticate: Basic realm="WallyWorld" + + where "WallyWorld" is the string assigned by the server to identify + the protection space of the Request-URI. + + To receive authorization, the client sends the userid and password, + separated by a single colon (":") character, within a base64 encoded + string in the credentials. + + basic-credentials = "Basic" SP basic-cookie + + basic-cookie = + + user-pass = userid ":" password + + userid = * + + password = *TEXT + + Userids might be case sensitive. + + If the user agent wishes to send the userid "Aladdin" and password + "open sesame", it would use the following header field: + + Authorization: Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ== + + See section 15 for security considerations associated with Basic + authentication. + +11.2 Digest Authentication Scheme + + A digest authentication for HTTP is specified in RFC 2069 [32]. + +12 Content Negotiation + + Most HTTP responses include an entity which contains information for + interpretation by a human user. Naturally, it is desirable to supply + the user with the "best available" entity corresponding to the + request. Unfortunately for servers and caches, not all users have + the same preferences for what is "best," and not all user agents are + equally capable of rendering all entity types. For that reason, HTTP + has provisions for several mechanisms for "content negotiation" -- + the process of selecting the best representation for a given response + + + +Fielding, et. al. Standards Track [Page 67] + +RFC 2068 HTTP/1.1 January 1997 + + + when there are multiple representations available. + + Note: This is not called "format negotiation" because the alternate + representations may be of the same media type, but use different + capabilities of that type, be in different languages, etc. + + Any response containing an entity-body MAY be subject to negotiation, + including error responses. + + There are two kinds of content negotiation which are possible in + HTTP: server-driven and agent-driven negotiation. These two kinds of + negotiation are orthogonal and thus may be used separately or in + combination. One method of combination, referred to as transparent + negotiation, occurs when a cache uses the agent-driven negotiation + information provided by the origin server in order to provide + server-driven negotiation for subsequent requests. + +12.1 Server-driven Negotiation + + If the selection of the best representation for a response is made by + an algorithm located at the server, it is called server-driven + negotiation. Selection is based on the available representations of + the response (the dimensions over which it can vary; e.g. language, + content-coding, etc.) and the contents of particular header fields in + the request message or on other information pertaining to the request + (such as the network address of the client). + + Server-driven negotiation is advantageous when the algorithm for + selecting from among the available representations is difficult to + describe to the user agent, or when the server desires to send its + "best guess" to the client along with the first response (hoping to + avoid the round-trip delay of a subsequent request if the "best + guess" is good enough for the user). In order to improve the server's + guess, the user agent MAY include request header fields (Accept, + Accept-Language, Accept-Encoding, etc.) which describe its + preferences for such a response. + + Server-driven negotiation has disadvantages: + +1. It is impossible for the server to accurately determine what might be + "best" for any given user, since that would require complete + knowledge of both the capabilities of the user agent and the intended + use for the response (e.g., does the user want to view it on screen + or print it on paper?). + +2. Having the user agent describe its capabilities in every request can + be both very inefficient (given that only a small percentage of + responses have multiple representations) and a potential violation of + + + +Fielding, et. al. Standards Track [Page 68] + +RFC 2068 HTTP/1.1 January 1997 + + + the user's privacy. + +3. It complicates the implementation of an origin server and the + algorithms for generating responses to a request. + +4. It may limit a public cache's ability to use the same response for + multiple user's requests. + + HTTP/1.1 includes the following request-header fields for enabling + server-driven negotiation through description of user agent + capabilities and user preferences: Accept (section 14.1), Accept- + Charset (section 14.2), Accept-Encoding (section 14.3), Accept- + Language (section 14.4), and User-Agent (section 14.42). However, an + origin server is not limited to these dimensions and MAY vary the + response based on any aspect of the request, including information + outside the request-header fields or within extension header fields + not defined by this specification. + + HTTP/1.1 origin servers MUST include an appropriate Vary header field + (section 14.43) in any cachable response based on server-driven + negotiation. The Vary header field describes the dimensions over + which the response might vary (i.e. the dimensions over which the + origin server picks its "best guess" response from multiple + representations). + + HTTP/1.1 public caches MUST recognize the Vary header field when it + is included in a response and obey the requirements described in + section 13.6 that describes the interactions between caching and + content negotiation. + +12.2 Agent-driven Negotiation + + With agent-driven negotiation, selection of the best representation + for a response is performed by the user agent after receiving an + initial response from the origin server. Selection is based on a list + of the available representations of the response included within the + header fields (this specification reserves the field-name Alternates, + as described in appendix 19.6.2.1) or entity-body of the initial + response, with each representation identified by its own URI. + Selection from among the representations may be performed + automatically (if the user agent is capable of doing so) or manually + by the user selecting from a generated (possibly hypertext) menu. + + Agent-driven negotiation is advantageous when the response would vary + over commonly-used dimensions (such as type, language, or encoding), + when the origin server is unable to determine a user agent's + capabilities from examining the request, and generally when public + caches are used to distribute server load and reduce network usage. + + + +Fielding, et. al. Standards Track [Page 69] + +RFC 2068 HTTP/1.1 January 1997 + + + Agent-driven negotiation suffers from the disadvantage of needing a + second request to obtain the best alternate representation. This + second request is only efficient when caching is used. In addition, + this specification does not define any mechanism for supporting + automatic selection, though it also does not prevent any such + mechanism from being developed as an extension and used within + HTTP/1.1. + + HTTP/1.1 defines the 300 (Multiple Choices) and 406 (Not Acceptable) + status codes for enabling agent-driven negotiation when the server is + unwilling or unable to provide a varying response using server-driven + negotiation. + +12.3 Transparent Negotiation + + Transparent negotiation is a combination of both server-driven and + agent-driven negotiation. When a cache is supplied with a form of the + list of available representations of the response (as in agent-driven + negotiation) and the dimensions of variance are completely understood + by the cache, then the cache becomes capable of performing server- + driven negotiation on behalf of the origin server for subsequent + requests on that resource. + + Transparent negotiation has the advantage of distributing the + negotiation work that would otherwise be required of the origin + server and also removing the second request delay of agent-driven + negotiation when the cache is able to correctly guess the right + response. + + This specification does not define any mechanism for transparent + negotiation, though it also does not prevent any such mechanism from + being developed as an extension and used within HTTP/1.1. An HTTP/1.1 + cache performing transparent negotiation MUST include a Vary header + field in the response (defining the dimensions of its variance) if it + is cachable to ensure correct interoperation with all HTTP/1.1 + clients. The agent-driven negotiation information supplied by the + origin server SHOULD be included with the transparently negotiated + response. + +13 Caching in HTTP + + HTTP is typically used for distributed information systems, where + performance can be improved by the use of response caches. The + HTTP/1.1 protocol includes a number of elements intended to make + caching work as well as possible. Because these elements are + inextricable from other aspects of the protocol, and because they + interact with each other, it is useful to describe the basic caching + design of HTTP separately from the detailed descriptions of methods, + + + +Fielding, et. al. Standards Track [Page 70] + +RFC 2068 HTTP/1.1 January 1997 + + + headers, response codes, etc. + + Caching would be useless if it did not significantly improve + performance. The goal of caching in HTTP/1.1 is to eliminate the need + to send requests in many cases, and to eliminate the need to send + full responses in many other cases. The former reduces the number of + network round-trips required for many operations; we use an + "expiration" mechanism for this purpose (see section 13.2). The + latter reduces network bandwidth requirements; we use a "validation" + mechanism for this purpose (see section 13.3). + + Requirements for performance, availability, and disconnected + operation require us to be able to relax the goal of semantic + transparency. The HTTP/1.1 protocol allows origin servers, caches, + and clients to explicitly reduce transparency when necessary. + However, because non-transparent operation may confuse non-expert + users, and may be incompatible with certain server applications (such + as those for ordering merchandise), the protocol requires that + transparency be relaxed + + o only by an explicit protocol-level request when relaxed by client + or origin server + + o only with an explicit warning to the end user when relaxed by cache + or client + + + + + + + + + + + + + + + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 71] + +RFC 2068 HTTP/1.1 January 1997 + + + Therefore, the HTTP/1.1 protocol provides these important elements: + + 1. Protocol features that provide full semantic transparency when this + is required by all parties. + + 2. Protocol features that allow an origin server or user agent to + explicitly request and control non-transparent operation. + + 3. Protocol features that allow a cache to attach warnings to + responses that do not preserve the requested approximation of + semantic transparency. + + A basic principle is that it must be possible for the clients to + detect any potential relaxation of semantic transparency. + + Note: The server, cache, or client implementer may be faced with + design decisions not explicitly discussed in this specification. If + a decision may affect semantic transparency, the implementer ought + to err on the side of maintaining transparency unless a careful and + complete analysis shows significant benefits in breaking + transparency. + +13.1.1 Cache Correctness + + A correct cache MUST respond to a request with the most up-to-date + response held by the cache that is appropriate to the request (see + sections 13.2.5, 13.2.6, and 13.12) which meets one of the following + conditions: + + 1. It has been checked for equivalence with what the origin server + would have returned by revalidating the response with the origin + server (section 13.3); + + 2. It is "fresh enough" (see section 13.2). In the default case, this + means it meets the least restrictive freshness requirement of the + client, server, and cache (see section 14.9); if the origin server + so specifies, it is the freshness requirement of the origin server + alone. + + 3. It includes a warning if the freshness demand of the client or the + origin server is violated (see section 13.1.5 and 14.45). + + 4. It is an appropriate 304 (Not Modified), 305 (Proxy Redirect), or + error (4xx or 5xx) response message. + + If the cache can not communicate with the origin server, then a + correct cache SHOULD respond as above if the response can be + correctly served from the cache; if not it MUST return an error or + + + +Fielding, et. al. Standards Track [Page 72] + +RFC 2068 HTTP/1.1 January 1997 + + + warning indicating that there was a communication failure. + + If a cache receives a response (either an entire response, or a 304 + (Not Modified) response) that it would normally forward to the + requesting client, and the received response is no longer fresh, the + cache SHOULD forward it to the requesting client without adding a new + Warning (but without removing any existing Warning headers). A cache + SHOULD NOT attempt to revalidate a response simply because that + response became stale in transit; this might lead to an infinite + loop. An user agent that receives a stale response without a Warning + MAY display a warning indication to the user. + +13.1.2 Warnings + + Whenever a cache returns a response that is neither first-hand nor + "fresh enough" (in the sense of condition 2 in section 13.1.1), it + must attach a warning to that effect, using a Warning response- + header. This warning allows clients to take appropriate action. + + Warnings may be used for other purposes, both cache-related and + otherwise. The use of a warning, rather than an error status code, + distinguish these responses from true failures. + + Warnings are always cachable, because they never weaken the + transparency of a response. This means that warnings can be passed to + HTTP/1.0 caches without danger; such caches will simply pass the + warning along as an entity-header in the response. + + Warnings are assigned numbers between 0 and 99. This specification + defines the code numbers and meanings of each currently assigned + warnings, allowing a client or cache to take automated action in some + (but not all) cases. + + Warnings also carry a warning text. The text may be in any + appropriate natural language (perhaps based on the client's Accept + headers), and include an optional indication of what character set is + used. + + Multiple warnings may be attached to a response (either by the origin + server or by a cache), including multiple warnings with the same code + number. For example, a server may provide the same warning with texts + in both English and Basque. + + When multiple warnings are attached to a response, it may not be + practical or reasonable to display all of them to the user. This + version of HTTP does not specify strict priority rules for deciding + which warnings to display and in what order, but does suggest some + heuristics. + + + +Fielding, et. al. Standards Track [Page 73] + +RFC 2068 HTTP/1.1 January 1997 + + + The Warning header and the currently defined warnings are described + in section 14.45. + +13.1.3 Cache-control Mechanisms + + The basic cache mechanisms in HTTP/1.1 (server-specified expiration + times and validators) are implicit directives to caches. In some + cases, a server or client may need to provide explicit directives to + the HTTP caches. We use the Cache-Control header for this purpose. + + The Cache-Control header allows a client or server to transmit a + variety of directives in either requests or responses. These + directives typically override the default caching algorithms. As a + general rule, if there is any apparent conflict between header + values, the most restrictive interpretation should be applied (that + is, the one that is most likely to preserve semantic transparency). + However, in some cases, Cache-Control directives are explicitly + specified as weakening the approximation of semantic transparency + (for example, "max-stale" or "public"). + + The Cache-Control directives are described in detail in section 14.9. + +13.1.4 Explicit User Agent Warnings + + Many user agents make it possible for users to override the basic + caching mechanisms. For example, the user agent may allow the user to + specify that cached entities (even explicitly stale ones) are never + validated. Or the user agent might habitually add "Cache-Control: + max-stale=3600" to every request. The user should have to explicitly + request either non-transparent behavior, or behavior that results in + abnormally ineffective caching. + + If the user has overridden the basic caching mechanisms, the user + agent should explicitly indicate to the user whenever this results in + the display of information that might not meet the server's + transparency requirements (in particular, if the displayed entity is + known to be stale). Since the protocol normally allows the user agent + to determine if responses are stale or not, this indication need only + be displayed when this actually happens. The indication need not be a + dialog box; it could be an icon (for example, a picture of a rotting + fish) or some other visual indicator. + + If the user has overridden the caching mechanisms in a way that would + abnormally reduce the effectiveness of caches, the user agent should + continually display an indication (for example, a picture of currency + in flames) so that the user does not inadvertently consume excess + resources or suffer from excessive latency. + + + + +Fielding, et. al. Standards Track [Page 74] + +RFC 2068 HTTP/1.1 January 1997 + + +13.1.5 Exceptions to the Rules and Warnings + + In some cases, the operator of a cache may choose to configure it to + return stale responses even when not requested by clients. This + decision should not be made lightly, but may be necessary for reasons + of availability or performance, especially when the cache is poorly + connected to the origin server. Whenever a cache returns a stale + response, it MUST mark it as such (using a Warning header). This + allows the client software to alert the user that there may be a + potential problem. + + It also allows the user agent to take steps to obtain a first-hand or + fresh response. For this reason, a cache SHOULD NOT return a stale + response if the client explicitly requests a first-hand or fresh one, + unless it is impossible to comply for technical or policy reasons. + +13.1.6 Client-controlled Behavior + + While the origin server (and to a lesser extent, intermediate caches, + by their contribution to the age of a response) are the primary + source of expiration information, in some cases the client may need + to control a cache's decision about whether to return a cached + response without validating it. Clients do this using several + directives of the Cache-Control header. + + A client's request may specify the maximum age it is willing to + accept of an unvalidated response; specifying a value of zero forces + the cache(s) to revalidate all responses. A client may also specify + the minimum time remaining before a response expires. Both of these + options increase constraints on the behavior of caches, and so cannot + further relax the cache's approximation of semantic transparency. + + A client may also specify that it will accept stale responses, up to + some maximum amount of staleness. This loosens the constraints on the + caches, and so may violate the origin server's specified constraints + on semantic transparency, but may be necessary to support + disconnected operation, or high availability in the face of poor + connectivity. + +13.2 Expiration Model + +13.2.1 Server-Specified Expiration + + HTTP caching works best when caches can entirely avoid making + requests to the origin server. The primary mechanism for avoiding + requests is for an origin server to provide an explicit expiration + time in the future, indicating that a response may be used to satisfy + subsequent requests. In other words, a cache can return a fresh + + + +Fielding, et. al. Standards Track [Page 75] + +RFC 2068 HTTP/1.1 January 1997 + + + response without first contacting the server. + + Our expectation is that servers will assign future explicit + expiration times to responses in the belief that the entity is not + likely to change, in a semantically significant way, before the + expiration time is reached. This normally preserves semantic + transparency, as long as the server's expiration times are carefully + chosen. + + The expiration mechanism applies only to responses taken from a cache + and not to first-hand responses forwarded immediately to the + requesting client. + + If an origin server wishes to force a semantically transparent cache + to validate every request, it may assign an explicit expiration time + in the past. This means that the response is always stale, and so the + cache SHOULD validate it before using it for subsequent requests. See + section 14.9.4 for a more restrictive way to force revalidation. + + If an origin server wishes to force any HTTP/1.1 cache, no matter how + it is configured, to validate every request, it should use the + "must-revalidate" Cache-Control directive (see section 14.9). + + Servers specify explicit expiration times using either the Expires + header, or the max-age directive of the Cache-Control header. + + An expiration time cannot be used to force a user agent to refresh + its display or reload a resource; its semantics apply only to caching + mechanisms, and such mechanisms need only check a resource's + expiration status when a new request for that resource is initiated. + See section 13.13 for explanation of the difference between caches + and history mechanisms. + +13.2.2 Heuristic Expiration + + Since origin servers do not always provide explicit expiration times, + HTTP caches typically assign heuristic expiration times, employing + algorithms that use other header values (such as the Last-Modified + time) to estimate a plausible expiration time. The HTTP/1.1 + specification does not provide specific algorithms, but does impose + worst-case constraints on their results. Since heuristic expiration + times may compromise semantic transparency, they should be used + cautiously, and we encourage origin servers to provide explicit + expiration times as much as possible. + + + + + + + +Fielding, et. al. Standards Track [Page 76] + +RFC 2068 HTTP/1.1 January 1997 + + +13.2.3 Age Calculations + + In order to know if a cached entry is fresh, a cache needs to know if + its age exceeds its freshness lifetime. We discuss how to calculate + the latter in section 13.2.4; this section describes how to calculate + the age of a response or cache entry. + + In this discussion, we use the term "now" to mean "the current value + of the clock at the host performing the calculation." Hosts that use + HTTP, but especially hosts running origin servers and caches, should + use NTP [28] or some similar protocol to synchronize their clocks to + a globally accurate time standard. + + Also note that HTTP/1.1 requires origin servers to send a Date header + with every response, giving the time at which the response was + generated. We use the term "date_value" to denote the value of the + Date header, in a form appropriate for arithmetic operations. + + HTTP/1.1 uses the Age response-header to help convey age information + between caches. The Age header value is the sender's estimate of the + amount of time since the response was generated at the origin server. + In the case of a cached response that has been revalidated with the + origin server, the Age value is based on the time of revalidation, + not of the original response. + + In essence, the Age value is the sum of the time that the response + has been resident in each of the caches along the path from the + origin server, plus the amount of time it has been in transit along + network paths. + + We use the term "age_value" to denote the value of the Age header, in + a form appropriate for arithmetic operations. + + A response's age can be calculated in two entirely independent ways: + + 1. now minus date_value, if the local clock is reasonably well + synchronized to the origin server's clock. If the result is + negative, the result is replaced by zero. + + 2. age_value, if all of the caches along the response path + implement HTTP/1.1. + + Given that we have two independent ways to compute the age of a + response when it is received, we can combine these as + + corrected_received_age = max(now - date_value, age_value) + + and as long as we have either nearly synchronized clocks or all- + + + +Fielding, et. al. Standards Track [Page 77] + +RFC 2068 HTTP/1.1 January 1997 + + + HTTP/1.1 paths, one gets a reliable (conservative) result. + + Note that this correction is applied at each HTTP/1.1 cache along the + path, so that if there is an HTTP/1.0 cache in the path, the correct + received age is computed as long as the receiving cache's clock is + nearly in sync. We don't need end-to-end clock synchronization + (although it is good to have), and there is no explicit clock + synchronization step. + + Because of network-imposed delays, some significant interval may pass + from the time that a server generates a response and the time it is + received at the next outbound cache or client. If uncorrected, this + delay could result in improperly low ages. + + Because the request that resulted in the returned Age value must have + been initiated prior to that Age value's generation, we can correct + for delays imposed by the network by recording the time at which the + request was initiated. Then, when an Age value is received, it MUST + be interpreted relative to the time the request was initiated, not + the time that the response was received. This algorithm results in + conservative behavior no matter how much delay is experienced. So, we + compute: + + corrected_initial_age = corrected_received_age + + (now - request_time) + + where "request_time" is the time (according to the local clock) when + the request that elicited this response was sent. + + Summary of age calculation algorithm, when a cache receives a + response: + + /* + * age_value + * is the value of Age: header received by the cache with + * this response. + * date_value + * is the value of the origin server's Date: header + * request_time + * is the (local) time when the cache made the request + * that resulted in this cached response + * response_time + * is the (local) time when the cache received the + * response + * now + * is the current (local) time + */ + apparent_age = max(0, response_time - date_value); + + + +Fielding, et. al. Standards Track [Page 78] + +RFC 2068 HTTP/1.1 January 1997 + + + corrected_received_age = max(apparent_age, age_value); + response_delay = response_time - request_time; + corrected_initial_age = corrected_received_age + response_delay; + resident_time = now - response_time; + current_age = corrected_initial_age + resident_time; + + When a cache sends a response, it must add to the + corrected_initial_age the amount of time that the response was + resident locally. It must then transmit this total age, using the Age + header, to the next recipient cache. + + Note that a client cannot reliably tell that a response is first- + hand, but the presence of an Age header indicates that a response + is definitely not first-hand. Also, if the Date in a response is + earlier than the client's local request time, the response is + probably not first-hand (in the absence of serious clock skew). + +13.2.4 Expiration Calculations + + In order to decide whether a response is fresh or stale, we need to + compare its freshness lifetime to its age. The age is calculated as + described in section 13.2.3; this section describes how to calculate + the freshness lifetime, and to determine if a response has expired. + In the discussion below, the values can be represented in any form + appropriate for arithmetic operations. + + We use the term "expires_value" to denote the value of the Expires + header. We use the term "max_age_value" to denote an appropriate + value of the number of seconds carried by the max-age directive of + the Cache-Control header in a response (see section 14.10. + + The max-age directive takes priority over Expires, so if max-age is + present in a response, the calculation is simply: + + freshness_lifetime = max_age_value + + Otherwise, if Expires is present in the response, the calculation is: + + freshness_lifetime = expires_value - date_value + + Note that neither of these calculations is vulnerable to clock skew, + since all of the information comes from the origin server. + + If neither Expires nor Cache-Control: max-age appears in the + response, and the response does not include other restrictions on + caching, the cache MAY compute a freshness lifetime using a + heuristic. If the value is greater than 24 hours, the cache must + attach Warning 13 to any response whose age is more than 24 hours if + + + +Fielding, et. al. Standards Track [Page 79] + +RFC 2068 HTTP/1.1 January 1997 + + + such warning has not already been added. + + Also, if the response does have a Last-Modified time, the heuristic + expiration value SHOULD be no more than some fraction of the interval + since that time. A typical setting of this fraction might be 10%. + + The calculation to determine if a response has expired is quite + simple: + + response_is_fresh = (freshness_lifetime > current_age) + +13.2.5 Disambiguating Expiration Values + + Because expiration values are assigned optimistically, it is possible + for two caches to contain fresh values for the same resource that are + different. + + If a client performing a retrieval receives a non-first-hand response + for a request that was already fresh in its own cache, and the Date + header in its existing cache entry is newer than the Date on the new + response, then the client MAY ignore the response. If so, it MAY + retry the request with a "Cache-Control: max-age=0" directive (see + section 14.9), to force a check with the origin server. + + If a cache has two fresh responses for the same representation with + different validators, it MUST use the one with the more recent Date + header. This situation may arise because the cache is pooling + responses from other caches, or because a client has asked for a + reload or a revalidation of an apparently fresh cache entry. + +13.2.6 Disambiguating Multiple Responses + + Because a client may be receiving responses via multiple paths, so + that some responses flow through one set of caches and other + responses flow through a different set of caches, a client may + receive responses in an order different from that in which the origin + server sent them. We would like the client to use the most recently + generated response, even if older responses are still apparently + fresh. + + Neither the entity tag nor the expiration value can impose an + ordering on responses, since it is possible that a later response + intentionally carries an earlier expiration time. However, the + HTTP/1.1 specification requires the transmission of Date headers on + every response, and the Date values are ordered to a granularity of + one second. + + + + + +Fielding, et. al. Standards Track [Page 80] + +RFC 2068 HTTP/1.1 January 1997 + + + When a client tries to revalidate a cache entry, and the response it + receives contains a Date header that appears to be older than the one + for the existing entry, then the client SHOULD repeat the request + unconditionally, and include + + Cache-Control: max-age=0 + + to force any intermediate caches to validate their copies directly + with the origin server, or + + Cache-Control: no-cache + + to force any intermediate caches to obtain a new copy from the origin + server. + + If the Date values are equal, then the client may use either response + (or may, if it is being extremely prudent, request a new response). + Servers MUST NOT depend on clients being able to choose + deterministically between responses generated during the same second, + if their expiration times overlap. + +13.3 Validation Model + + When a cache has a stale entry that it would like to use as a + response to a client's request, it first has to check with the origin + server (or possibly an intermediate cache with a fresh response) to + see if its cached entry is still usable. We call this "validating" + the cache entry. Since we do not want to have to pay the overhead of + retransmitting the full response if the cached entry is good, and we + do not want to pay the overhead of an extra round trip if the cached + entry is invalid, the HTTP/1.1 protocol supports the use of + conditional methods. + + The key protocol features for supporting conditional methods are + those concerned with "cache validators." When an origin server + generates a full response, it attaches some sort of validator to it, + which is kept with the cache entry. When a client (user agent or + proxy cache) makes a conditional request for a resource for which it + has a cache entry, it includes the associated validator in the + request. + + The server then checks that validator against the current validator + for the entity, and, if they match, it responds with a special status + code (usually, 304 (Not Modified)) and no entity-body. Otherwise, it + returns a full response (including entity-body). Thus, we avoid + transmitting the full response if the validator matches, and we avoid + an extra round trip if it does not match. + + + + +Fielding, et. al. Standards Track [Page 81] + +RFC 2068 HTTP/1.1 January 1997 + + + Note: the comparison functions used to decide if validators match + are defined in section 13.3.3. + + In HTTP/1.1, a conditional request looks exactly the same as a normal + request for the same resource, except that it carries a special + header (which includes the validator) that implicitly turns the + method (usually, GET) into a conditional. + + The protocol includes both positive and negative senses of cache- + validating conditions. That is, it is possible to request either that + a method be performed if and only if a validator matches or if and + only if no validators match. + + Note: a response that lacks a validator may still be cached, and + served from cache until it expires, unless this is explicitly + prohibited by a Cache-Control directive. However, a cache cannot do + a conditional retrieval if it does not have a validator for the + entity, which means it will not be refreshable after it expires. + +13.3.1 Last-modified Dates + + The Last-Modified entity-header field value is often used as a cache + validator. In simple terms, a cache entry is considered to be valid + if the entity has not been modified since the Last-Modified value. + +13.3.2 Entity Tag Cache Validators + + The ETag entity-header field value, an entity tag, provides for an + "opaque" cache validator. This may allow more reliable validation in + situations where it is inconvenient to store modification dates, + where the one-second resolution of HTTP date values is not + sufficient, or where the origin server wishes to avoid certain + paradoxes that may arise from the use of modification dates. + + Entity Tags are described in section 3.11. The headers used with + entity tags are described in sections 14.20, 14.25, 14.26 and 14.43. + +13.3.3 Weak and Strong Validators + + Since both origin servers and caches will compare two validators to + decide if they represent the same or different entities, one normally + would expect that if the entity (the entity-body or any entity- + headers) changes in any way, then the associated validator would + change as well. If this is true, then we call this validator a + "strong validator." + + However, there may be cases when a server prefers to change the + validator only on semantically significant changes, and not when + + + +Fielding, et. al. Standards Track [Page 82] + +RFC 2068 HTTP/1.1 January 1997 + + + insignificant aspects of the entity change. A validator that does not + always change when the resource changes is a "weak validator." + + Entity tags are normally "strong validators," but the protocol + provides a mechanism to tag an entity tag as "weak." One can think of + a strong validator as one that changes whenever the bits of an entity + changes, while a weak value changes whenever the meaning of an entity + changes. Alternatively, one can think of a strong validator as part + of an identifier for a specific entity, while a weak validator is + part of an identifier for a set of semantically equivalent entities. + + Note: One example of a strong validator is an integer that is + incremented in stable storage every time an entity is changed. + + An entity's modification time, if represented with one-second + resolution, could be a weak validator, since it is possible that + the resource may be modified twice during a single second. + + Support for weak validators is optional; however, weak validators + allow for more efficient caching of equivalent objects; for + example, a hit counter on a site is probably good enough if it is + updated every few days or weeks, and any value during that period + is likely "good enough" to be equivalent. + + A "use" of a validator is either when a client generates a request + and includes the validator in a validating header field, or when a + server compares two validators. + + Strong validators are usable in any context. Weak validators are only + usable in contexts that do not depend on exact equality of an entity. + For example, either kind is usable for a conditional GET of a full + entity. However, only a strong validator is usable for a sub-range + retrieval, since otherwise the client may end up with an internally + inconsistent entity. + + The only function that the HTTP/1.1 protocol defines on validators is + comparison. There are two validator comparison functions, depending + on whether the comparison context allows the use of weak validators + or not: + + o The strong comparison function: in order to be considered equal, + both validators must be identical in every way, and neither may be + weak. + o The weak comparison function: in order to be considered equal, both + validators must be identical in every way, but either or both of + them may be tagged as "weak" without affecting the result. + + The weak comparison function MAY be used for simple (non-subrange) + + + +Fielding, et. al. Standards Track [Page 83] + +RFC 2068 HTTP/1.1 January 1997 + + + GET requests. The strong comparison function MUST be used in all + other cases. + + An entity tag is strong unless it is explicitly tagged as weak. + Section 3.11 gives the syntax for entity tags. + + A Last-Modified time, when used as a validator in a request, is + implicitly weak unless it is possible to deduce that it is strong, + using the following rules: + + o The validator is being compared by an origin server to the actual + current validator for the entity and, + o That origin server reliably knows that the associated entity did + not change twice during the second covered by the presented + validator. +or + + o The validator is about to be used by a client in an If-Modified- + Since or If-Unmodified-Since header, because the client has a cache + entry for the associated entity, and + o That cache entry includes a Date value, which gives the time when + the origin server sent the original response, and + o The presented Last-Modified time is at least 60 seconds before the + Date value. +or + + o The validator is being compared by an intermediate cache to the + validator stored in its cache entry for the entity, and + o That cache entry includes a Date value, which gives the time when + the origin server sent the original response, and + o The presented Last-Modified time is at least 60 seconds before the + Date value. + + This method relies on the fact that if two different responses were + sent by the origin server during the same second, but both had the + same Last-Modified time, then at least one of those responses would + have a Date value equal to its Last-Modified time. The arbitrary 60- + second limit guards against the possibility that the Date and Last- + Modified values are generated from different clocks, or at somewhat + different times during the preparation of the response. An + implementation may use a value larger than 60 seconds, if it is + believed that 60 seconds is too short. + + If a client wishes to perform a sub-range retrieval on a value for + which it has only a Last-Modified time and no opaque validator, it + may do this only if the Last-Modified time is strong in the sense + described here. + + + + +Fielding, et. al. Standards Track [Page 84] + +RFC 2068 HTTP/1.1 January 1997 + + + A cache or origin server receiving a cache-conditional request, other + than a full-body GET request, MUST use the strong comparison function + to evaluate the condition. + + These rules allow HTTP/1.1 caches and clients to safely perform sub- + range retrievals on values that have been obtained from HTTP/1.0 + servers. + +13.3.4 Rules for When to Use Entity Tags and Last-modified Dates + + We adopt a set of rules and recommendations for origin servers, + clients, and caches regarding when various validator types should be + used, and for what purposes. + + HTTP/1.1 origin servers: + + o SHOULD send an entity tag validator unless it is not feasible to + generate one. + o MAY send a weak entity tag instead of a strong entity tag, if + performance considerations support the use of weak entity tags, or + if it is unfeasible to send a strong entity tag. + o SHOULD send a Last-Modified value if it is feasible to send one, + unless the risk of a breakdown in semantic transparency that could + result from using this date in an If-Modified-Since header would + lead to serious problems. + + In other words, the preferred behavior for an HTTP/1.1 origin server + is to send both a strong entity tag and a Last-Modified value. + + In order to be legal, a strong entity tag MUST change whenever the + associated entity value changes in any way. A weak entity tag SHOULD + change whenever the associated entity changes in a semantically + significant way. + + Note: in order to provide semantically transparent caching, an + origin server must avoid reusing a specific strong entity tag value + for two different entities, or reusing a specific weak entity tag + value for two semantically different entities. Cache entries may + persist for arbitrarily long periods, regardless of expiration + times, so it may be inappropriate to expect that a cache will never + again attempt to validate an entry using a validator that it + obtained at some point in the past. + + HTTP/1.1 clients: + + o If an entity tag has been provided by the origin server, MUST + use that entity tag in any cache-conditional request (using + If-Match or If-None-Match). + + + +Fielding, et. al. Standards Track [Page 85] + +RFC 2068 HTTP/1.1 January 1997 + + + o If only a Last-Modified value has been provided by the origin + server, SHOULD use that value in non-subrange cache-conditional + requests (using If-Modified-Since). + o If only a Last-Modified value has been provided by an HTTP/1.0 + origin server, MAY use that value in subrange cache-conditional + requests (using If-Unmodified-Since:). The user agent should + provide a way to disable this, in case of difficulty. + o If both an entity tag and a Last-Modified value have been + provided by the origin server, SHOULD use both validators in + cache-conditional requests. This allows both HTTP/1.0 and + HTTP/1.1 caches to respond appropriately. + + An HTTP/1.1 cache, upon receiving a request, MUST use the most + restrictive validator when deciding whether the client's cache entry + matches the cache's own cache entry. This is only an issue when the + request contains both an entity tag and a last-modified-date + validator (If-Modified-Since or If-Unmodified-Since). + + A note on rationale: The general principle behind these rules is + that HTTP/1.1 servers and clients should transmit as much non- + redundant information as is available in their responses and + requests. HTTP/1.1 systems receiving this information will make the + most conservative assumptions about the validators they receive. + + HTTP/1.0 clients and caches will ignore entity tags. Generally, + last-modified values received or used by these systems will support + transparent and efficient caching, and so HTTP/1.1 origin servers + should provide Last-Modified values. In those rare cases where the + use of a Last-Modified value as a validator by an HTTP/1.0 system + could result in a serious problem, then HTTP/1.1 origin servers + should not provide one. + +13.3.5 Non-validating Conditionals + + The principle behind entity tags is that only the service author + knows the semantics of a resource well enough to select an + appropriate cache validation mechanism, and the specification of any + validator comparison function more complex than byte-equality would + open up a can of worms. Thus, comparisons of any other headers + (except Last-Modified, for compatibility with HTTP/1.0) are never + used for purposes of validating a cache entry. + +13.4 Response Cachability + + Unless specifically constrained by a Cache-Control (section 14.9) + directive, a caching system may always store a successful response + (see section 13.8) as a cache entry, may return it without validation + if it is fresh, and may return it after successful validation. If + + + +Fielding, et. al. Standards Track [Page 86] + +RFC 2068 HTTP/1.1 January 1997 + + + there is neither a cache validator nor an explicit expiration time + associated with a response, we do not expect it to be cached, but + certain caches may violate this expectation (for example, when little + or no network connectivity is available). A client can usually detect + that such a response was taken from a cache by comparing the Date + header to the current time. + + Note that some HTTP/1.0 caches are known to violate this + expectation without providing any Warning. + + However, in some cases it may be inappropriate for a cache to retain + an entity, or to return it in response to a subsequent request. This + may be because absolute semantic transparency is deemed necessary by + the service author, or because of security or privacy considerations. + Certain Cache-Control directives are therefore provided so that the + server can indicate that certain resource entities, or portions + thereof, may not be cached regardless of other considerations. + + Note that section 14.8 normally prevents a shared cache from saving + and returning a response to a previous request if that request + included an Authorization header. + + A response received with a status code of 200, 203, 206, 300, 301 or + 410 may be stored by a cache and used in reply to a subsequent + request, subject to the expiration mechanism, unless a Cache-Control + directive prohibits caching. However, a cache that does not support + the Range and Content-Range headers MUST NOT cache 206 (Partial + Content) responses. + + A response received with any other status code MUST NOT be returned + in a reply to a subsequent request unless there are Cache-Control + directives or another header(s) that explicitly allow it. For + example, these include the following: an Expires header (section + 14.21); a "max-age", "must-revalidate", "proxy-revalidate", "public" + or "private" Cache-Control directive (section 14.9). + +13.5 Constructing Responses From Caches + + The purpose of an HTTP cache is to store information received in + response to requests, for use in responding to future requests. In + many cases, a cache simply returns the appropriate parts of a + response to the requester. However, if the cache holds a cache entry + based on a previous response, it may have to combine parts of a new + response with what is held in the cache entry. + + + + + + + +Fielding, et. al. Standards Track [Page 87] + +RFC 2068 HTTP/1.1 January 1997 + + +13.5.1 End-to-end and Hop-by-hop Headers + + For the purpose of defining the behavior of caches and non-caching + proxies, we divide HTTP headers into two categories: + + o End-to-end headers, which must be transmitted to the + ultimate recipient of a request or response. End-to-end + headers in responses must be stored as part of a cache entry + and transmitted in any response formed from a cache entry. + o Hop-by-hop headers, which are meaningful only for a single + transport-level connection, and are not stored by caches or + forwarded by proxies. + + The following HTTP/1.1 headers are hop-by-hop headers: + + o Connection + o Keep-Alive + o Public + o Proxy-Authenticate + o Transfer-Encoding + o Upgrade + + All other headers defined by HTTP/1.1 are end-to-end headers. + + Hop-by-hop headers introduced in future versions of HTTP MUST be + listed in a Connection header, as described in section 14.10. + +13.5.2 Non-modifiable Headers + + Some features of the HTTP/1.1 protocol, such as Digest + Authentication, depend on the value of certain end-to-end headers. A + cache or non-caching proxy SHOULD NOT modify an end-to-end header + unless the definition of that header requires or specifically allows + that. + + A cache or non-caching proxy MUST NOT modify any of the following + fields in a request or response, nor may it add any of these fields + if not already present: + + o Content-Location + o ETag + o Expires + o Last-Modified + + + + + + + + +Fielding, et. al. Standards Track [Page 88] + +RFC 2068 HTTP/1.1 January 1997 + + + A cache or non-caching proxy MUST NOT modify or add any of the + following fields in a response that contains the no-transform Cache- + Control directive, or in any request: + + o Content-Encoding + o Content-Length + o Content-Range + o Content-Type + + A cache or non-caching proxy MAY modify or add these fields in a + response that does not include no-transform, but if it does so, it + MUST add a Warning 14 (Transformation applied) if one does not + already appear in the response. + + Warning: unnecessary modification of end-to-end headers may cause + authentication failures if stronger authentication mechanisms are + introduced in later versions of HTTP. Such authentication + mechanisms may rely on the values of header fields not listed here. + +13.5.3 Combining Headers + + When a cache makes a validating request to a server, and the server + provides a 304 (Not Modified) response, the cache must construct a + response to send to the requesting client. The cache uses the + entity-body stored in the cache entry as the entity-body of this + outgoing response. The end-to-end headers stored in the cache entry + are used for the constructed response, except that any end-to-end + headers provided in the 304 response MUST replace the corresponding + headers from the cache entry. Unless the cache decides to remove the + cache entry, it MUST also replace the end-to-end headers stored with + the cache entry with corresponding headers received in the incoming + response. + + In other words, the set of end-to-end headers received in the + incoming response overrides all corresponding end-to-end headers + stored with the cache entry. The cache may add Warning headers (see + section 14.45) to this set. + + If a header field-name in the incoming response matches more than one + header in the cache entry, all such old headers are replaced. + + Note: this rule allows an origin server to use a 304 (Not Modified) + response to update any header associated with a previous response + for the same entity, although it might not always be meaningful or + correct to do so. This rule does not allow an origin server to use + a 304 (not Modified) response to entirely delete a header that it + had provided with a previous response. + + + + +Fielding, et. al. Standards Track [Page 89] + +RFC 2068 HTTP/1.1 January 1997 + + +13.5.4 Combining Byte Ranges + + A response may transfer only a subrange of the bytes of an entity- + body, either because the request included one or more Range + specifications, or because a connection was broken prematurely. After + several such transfers, a cache may have received several ranges of + the same entity-body. + + If a cache has a stored non-empty set of subranges for an entity, and + an incoming response transfers another subrange, the cache MAY + combine the new subrange with the existing set if both the following + conditions are met: + + o Both the incoming response and the cache entry must have a cache + validator. + o The two cache validators must match using the strong comparison + function (see section 13.3.3). + + If either requirement is not meant, the cache must use only the most + recent partial response (based on the Date values transmitted with + every response, and using the incoming response if these values are + equal or missing), and must discard the other partial information. + +13.6 Caching Negotiated Responses + + Use of server-driven content negotiation (section 12), as indicated + by the presence of a Vary header field in a response, alters the + conditions and procedure by which a cache can use the response for + subsequent requests. + + A server MUST use the Vary header field (section 14.43) to inform a + cache of what header field dimensions are used to select among + multiple representations of a cachable response. A cache may use the + selected representation (the entity included with that particular + response) for replying to subsequent requests on that resource only + when the subsequent requests have the same or equivalent values for + all header fields specified in the Vary response-header. Requests + with a different value for one or more of those header fields would + be forwarded toward the origin server. + + If an entity tag was assigned to the representation, the forwarded + request SHOULD be conditional and include the entity tags in an If- + None-Match header field from all its cache entries for the Request- + URI. This conveys to the server the set of entities currently held by + the cache, so that if any one of these entities matches the requested + entity, the server can use the ETag header in its 304 (Not Modified) + response to tell the cache which entry is appropriate. If the + entity-tag of the new response matches that of an existing entry, the + + + +Fielding, et. al. Standards Track [Page 90] + +RFC 2068 HTTP/1.1 January 1997 + + + new response SHOULD be used to update the header fields of the + existing entry, and the result MUST be returned to the client. + + The Vary header field may also inform the cache that the + representation was selected using criteria not limited to the + request-headers; in this case, a cache MUST NOT use the response in a + reply to a subsequent request unless the cache relays the new request + to the origin server in a conditional request and the server responds + with 304 (Not Modified), including an entity tag or Content-Location + that indicates which entity should be used. + + If any of the existing cache entries contains only partial content + for the associated entity, its entity-tag SHOULD NOT be included in + the If-None-Match header unless the request is for a range that would + be fully satisfied by that entry. + + If a cache receives a successful response whose Content-Location + field matches that of an existing cache entry for the same Request- + URI, whose entity-tag differs from that of the existing entry, and + whose Date is more recent than that of the existing entry, the + existing entry SHOULD NOT be returned in response to future requests, + and should be deleted from the cache. + +13.7 Shared and Non-Shared Caches + + For reasons of security and privacy, it is necessary to make a + distinction between "shared" and "non-shared" caches. A non-shared + cache is one that is accessible only to a single user. Accessibility + in this case SHOULD be enforced by appropriate security mechanisms. + All other caches are considered to be "shared." Other sections of + this specification place certain constraints on the operation of + shared caches in order to prevent loss of privacy or failure of + access controls. + +13.8 Errors or Incomplete Response Cache Behavior + + A cache that receives an incomplete response (for example, with fewer + bytes of data than specified in a Content-Length header) may store + the response. However, the cache MUST treat this as a partial + response. Partial responses may be combined as described in section + 13.5.4; the result might be a full response or might still be + partial. A cache MUST NOT return a partial response to a client + without explicitly marking it as such, using the 206 (Partial + Content) status code. A cache MUST NOT return a partial response + using a status code of 200 (OK). + + If a cache receives a 5xx response while attempting to revalidate an + entry, it may either forward this response to the requesting client, + + + +Fielding, et. al. Standards Track [Page 91] + +RFC 2068 HTTP/1.1 January 1997 + + + or act as if the server failed to respond. In the latter case, it MAY + return a previously received response unless the cached entry + includes the "must-revalidate" Cache-Control directive (see section + 14.9). + +13.9 Side Effects of GET and HEAD + + Unless the origin server explicitly prohibits the caching of their + responses, the application of GET and HEAD methods to any resources + SHOULD NOT have side effects that would lead to erroneous behavior if + these responses are taken from a cache. They may still have side + effects, but a cache is not required to consider such side effects in + its caching decisions. Caches are always expected to observe an + origin server's explicit restrictions on caching. + + We note one exception to this rule: since some applications have + traditionally used GETs and HEADs with query URLs (those containing a + "?" in the rel_path part) to perform operations with significant side + effects, caches MUST NOT treat responses to such URLs as fresh unless + the server provides an explicit expiration time. This specifically + means that responses from HTTP/1.0 servers for such URIs should not + be taken from a cache. See section 9.1.1 for related information. + +13.10 Invalidation After Updates or Deletions + + The effect of certain methods at the origin server may cause one or + more existing cache entries to become non-transparently invalid. That + is, although they may continue to be "fresh," they do not accurately + reflect what the origin server would return for a new request. + + There is no way for the HTTP protocol to guarantee that all such + cache entries are marked invalid. For example, the request that + caused the change at the origin server may not have gone through the + proxy where a cache entry is stored. However, several rules help + reduce the likelihood of erroneous behavior. + + In this section, the phrase "invalidate an entity" means that the + cache should either remove all instances of that entity from its + storage, or should mark these as "invalid" and in need of a mandatory + revalidation before they can be returned in response to a subsequent + request. + + + + + + + + + + +Fielding, et. al. Standards Track [Page 92] + +RFC 2068 HTTP/1.1 January 1997 + + + Some HTTP methods may invalidate an entity. This is either the entity + referred to by the Request-URI, or by the Location or Content- + Location response-headers (if present). These methods are: + + o PUT + o DELETE + o POST + + In order to prevent denial of service attacks, an invalidation based + on the URI in a Location or Content-Location header MUST only be + performed if the host part is the same as in the Request-URI. + +13.11 Write-Through Mandatory + + All methods that may be expected to cause modifications to the origin + server's resources MUST be written through to the origin server. This + currently includes all methods except for GET and HEAD. A cache MUST + NOT reply to such a request from a client before having transmitted + the request to the inbound server, and having received a + corresponding response from the inbound server. This does not prevent + a cache from sending a 100 (Continue) response before the inbound + server has replied. + + The alternative (known as "write-back" or "copy-back" caching) is not + allowed in HTTP/1.1, due to the difficulty of providing consistent + updates and the problems arising from server, cache, or network + failure prior to write-back. + +13.12 Cache Replacement + + If a new cachable (see sections 14.9.2, 13.2.5, 13.2.6 and 13.8) + response is received from a resource while any existing responses for + the same resource are cached, the cache SHOULD use the new response + to reply to the current request. It may insert it into cache storage + and may, if it meets all other requirements, use it to respond to any + future requests that would previously have caused the old response to + be returned. If it inserts the new response into cache storage it + should follow the rules in section 13.5.3. + + Note: a new response that has an older Date header value than + existing cached responses is not cachable. + +13.13 History Lists + + User agents often have history mechanisms, such as "Back" buttons and + history lists, which can be used to redisplay an entity retrieved + earlier in a session. + + + + +Fielding, et. al. Standards Track [Page 93] + +RFC 2068 HTTP/1.1 January 1997 + + + History mechanisms and caches are different. In particular history + mechanisms SHOULD NOT try to show a semantically transparent view of + the current state of a resource. Rather, a history mechanism is meant + to show exactly what the user saw at the time when the resource was + retrieved. + + By default, an expiration time does not apply to history mechanisms. + If the entity is still in storage, a history mechanism should display + it even if the entity has expired, unless the user has specifically + configured the agent to refresh expired history documents. + + This should not be construed to prohibit the history mechanism from + telling the user that a view may be stale. + + Note: if history list mechanisms unnecessarily prevent users from + viewing stale resources, this will tend to force service authors to + avoid using HTTP expiration controls and cache controls when they + would otherwise like to. Service authors may consider it important + that users not be presented with error messages or warning messages + when they use navigation controls (such as BACK) to view previously + fetched resources. Even though sometimes such resources ought not + to cached, or ought to expire quickly, user interface + considerations may force service authors to resort to other means + of preventing caching (e.g. "once-only" URLs) in order not to + suffer the effects of improperly functioning history mechanisms. + +14 Header Field Definitions + + This section defines the syntax and semantics of all standard + HTTP/1.1 header fields. For entity-header fields, both sender and + recipient refer to either the client or the server, depending on who + sends and who receives the entity. + + + + + + + + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 94] + +RFC 2068 HTTP/1.1 January 1997 + + +14.1 Accept + + The Accept request-header field can be used to specify certain media + types which are acceptable for the response. Accept headers can be + used to indicate that the request is specifically limited to a small + set of desired types, as in the case of a request for an in-line + image. + + Accept = "Accept" ":" + #( media-range [ accept-params ] ) + + media-range = ( "*/*" + | ( type "/" "*" ) + | ( type "/" subtype ) + ) *( ";" parameter ) + + accept-params = ";" "q" "=" qvalue *( accept-extension ) + + accept-extension = ";" token [ "=" ( token | quoted-string ) ] + + The asterisk "*" character is used to group media types into ranges, + with "*/*" indicating all media types and "type/*" indicating all + subtypes of that type. The media-range MAY include media type + parameters that are applicable to that range. + + Each media-range MAY be followed by one or more accept-params, + beginning with the "q" parameter for indicating a relative quality + factor. The first "q" parameter (if any) separates the media-range + parameter(s) from the accept-params. Quality factors allow the user + or user agent to indicate the relative degree of preference for that + media-range, using the qvalue scale from 0 to 1 (section 3.9). The + default value is q=1. + + Note: Use of the "q" parameter name to separate media type + parameters from Accept extension parameters is due to historical + practice. Although this prevents any media type parameter named + "q" from being used with a media range, such an event is believed + to be unlikely given the lack of any "q" parameters in the IANA + media type registry and the rare usage of any media type parameters + in Accept. Future media types should be discouraged from + registering any parameter named "q". + + The example + + Accept: audio/*; q=0.2, audio/basic + + SHOULD be interpreted as "I prefer audio/basic, but send me any audio + type if it is the best available after an 80% mark-down in quality." + + + +Fielding, et. al. Standards Track [Page 95] + +RFC 2068 HTTP/1.1 January 1997 + + + If no Accept header field is present, then it is assumed that the + client accepts all media types. If an Accept header field is present, + and if the server cannot send a response which is acceptable + according to the combined Accept field value, then the server SHOULD + send a 406 (not acceptable) response. + + A more elaborate example is + + Accept: text/plain; q=0.5, text/html, + text/x-dvi; q=0.8, text/x-c + + Verbally, this would be interpreted as "text/html and text/x-c are + the preferred media types, but if they do not exist, then send the + text/x-dvi entity, and if that does not exist, send the text/plain + entity." + + Media ranges can be overridden by more specific media ranges or + specific media types. If more than one media range applies to a given + type, the most specific reference has precedence. For example, + + Accept: text/*, text/html, text/html;level=1, */* + + have the following precedence: + + 1) text/html;level=1 + 2) text/html + 3) text/* + 4) */* + + The media type quality factor associated with a given type is + determined by finding the media range with the highest precedence + which matches that type. For example, + + Accept: text/*;q=0.3, text/html;q=0.7, text/html;level=1, + text/html;level=2;q=0.4, */*;q=0.5 + + would cause the following values to be associated: + + text/html;level=1 = 1 + text/html = 0.7 + text/plain = 0.3 + image/jpeg = 0.5 + text/html;level=2 = 0.4 + text/html;level=3 = 0.7 + + Note: A user agent may be provided with a default set of quality + values for certain media ranges. However, unless the user agent is + a closed system which cannot interact with other rendering agents, + + + +Fielding, et. al. Standards Track [Page 96] + +RFC 2068 HTTP/1.1 January 1997 + + + this default set should be configurable by the user. + +14.2 Accept-Charset + + The Accept-Charset request-header field can be used to indicate what + character sets are acceptable for the response. This field allows + clients capable of understanding more comprehensive or special- + purpose character sets to signal that capability to a server which is + capable of representing documents in those character sets. The ISO- + 8859-1 character set can be assumed to be acceptable to all user + agents. + + Accept-Charset = "Accept-Charset" ":" + 1#( charset [ ";" "q" "=" qvalue ] ) + + Character set values are described in section 3.4. Each charset may + be given an associated quality value which represents the user's + preference for that charset. The default value is q=1. An example is + + Accept-Charset: iso-8859-5, unicode-1-1;q=0.8 + + If no Accept-Charset header is present, the default is that any + character set is acceptable. If an Accept-Charset header is present, + and if the server cannot send a response which is acceptable + according to the Accept-Charset header, then the server SHOULD send + an error response with the 406 (not acceptable) status code, though + the sending of an unacceptable response is also allowed. + +14.3 Accept-Encoding + + The Accept-Encoding request-header field is similar to Accept, but + restricts the content-coding values (section 14.12) which are + acceptable in the response. + + Accept-Encoding = "Accept-Encoding" ":" + #( content-coding ) + + An example of its use is + + Accept-Encoding: compress, gzip + + If no Accept-Encoding header is present in a request, the server MAY + assume that the client will accept any content coding. If an Accept- + Encoding header is present, and if the server cannot send a response + which is acceptable according to the Accept-Encoding header, then the + server SHOULD send an error response with the 406 (Not Acceptable) + status code. + + + + +Fielding, et. al. Standards Track [Page 97] + +RFC 2068 HTTP/1.1 January 1997 + + + An empty Accept-Encoding value indicates none are acceptable. + +14.4 Accept-Language + + The Accept-Language request-header field is similar to Accept, but + restricts the set of natural languages that are preferred as a + response to the request. + + Accept-Language = "Accept-Language" ":" + 1#( language-range [ ";" "q" "=" qvalue ] ) + + language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) + + Each language-range MAY be given an associated quality value which + represents an estimate of the user's preference for the languages + specified by that range. The quality value defaults to "q=1". For + example, + + Accept-Language: da, en-gb;q=0.8, en;q=0.7 + + would mean: "I prefer Danish, but will accept British English and + other types of English." A language-range matches a language-tag if + it exactly equals the tag, or if it exactly equals a prefix of the + tag such that the first tag character following the prefix is "-". + The special range "*", if present in the Accept-Language field, + matches every tag not matched by any other range present in the + Accept-Language field. + + Note: This use of a prefix matching rule does not imply that + language tags are assigned to languages in such a way that it is + always true that if a user understands a language with a certain + tag, then this user will also understand all languages with tags + for which this tag is a prefix. The prefix rule simply allows the + use of prefix tags if this is the case. + + The language quality factor assigned to a language-tag by the + Accept-Language field is the quality value of the longest language- + range in the field that matches the language-tag. If no language- + range in the field matches the tag, the language quality factor + assigned is 0. If no Accept-Language header is present in the + request, the server SHOULD assume that all languages are equally + acceptable. If an Accept-Language header is present, then all + languages which are assigned a quality factor greater than 0 are + acceptable. + + It may be contrary to the privacy expectations of the user to send an + Accept-Language header with the complete linguistic preferences of + the user in every request. For a discussion of this issue, see + + + +Fielding, et. al. Standards Track [Page 98] + +RFC 2068 HTTP/1.1 January 1997 + + + section 15.7. + + Note: As intelligibility is highly dependent on the individual + user, it is recommended that client applications make the choice of + linguistic preference available to the user. If the choice is not + made available, then the Accept-Language header field must not be + given in the request. + +14.5 Accept-Ranges + + The Accept-Ranges response-header field allows the server to indicate + its acceptance of range requests for a resource: + + Accept-Ranges = "Accept-Ranges" ":" acceptable-ranges + + acceptable-ranges = 1#range-unit | "none" + + Origin servers that accept byte-range requests MAY send + + Accept-Ranges: bytes + + but are not required to do so. Clients MAY generate byte-range + requests without having received this header for the resource + involved. + + Servers that do not accept any kind of range request for a resource + MAY send + + Accept-Ranges: none + + to advise the client not to attempt a range request. + +14.6 Age + + The Age response-header field conveys the sender's estimate of the + amount of time since the response (or its revalidation) was generated + at the origin server. A cached response is "fresh" if its age does + not exceed its freshness lifetime. Age values are calculated as + specified in section 13.2.3. + + Age = "Age" ":" age-value + + age-value = delta-seconds + + Age values are non-negative decimal integers, representing time in + seconds. + + + + + +Fielding, et. al. Standards Track [Page 99] + +RFC 2068 HTTP/1.1 January 1997 + + + If a cache receives a value larger than the largest positive integer + it can represent, or if any of its age calculations overflows, it + MUST transmit an Age header with a value of 2147483648 (2^31). + HTTP/1.1 caches MUST send an Age header in every response. Caches + SHOULD use an arithmetic type of at least 31 bits of range. + +14.7 Allow + + The Allow entity-header field lists the set of methods supported by + the resource identified by the Request-URI. The purpose of this field + is strictly to inform the recipient of valid methods associated with + the resource. An Allow header field MUST be present in a 405 (Method + Not Allowed) response. + + Allow = "Allow" ":" 1#method + + Example of use: + + Allow: GET, HEAD, PUT + + This field cannot prevent a client from trying other methods. + However, the indications given by the Allow header field value SHOULD + be followed. The actual set of allowed methods is defined by the + origin server at the time of each request. + + The Allow header field MAY be provided with a PUT request to + recommend the methods to be supported by the new or modified + resource. The server is not required to support these methods and + SHOULD include an Allow header in the response giving the actual + supported methods. + + A proxy MUST NOT modify the Allow header field even if it does not + understand all the methods specified, since the user agent MAY have + other means of communicating with the origin server. + + The Allow header field does not indicate what methods are implemented + at the server level. Servers MAY use the Public response-header field + (section 14.35) to describe what methods are implemented on the + server as a whole. + +14.8 Authorization + + A user agent that wishes to authenticate itself with a server-- + usually, but not necessarily, after receiving a 401 response--MAY do + so by including an Authorization request-header field with the + request. The Authorization field value consists of credentials + containing the authentication information of the user agent for the + realm of the resource being requested. + + + +Fielding, et. al. Standards Track [Page 100] + +RFC 2068 HTTP/1.1 January 1997 + + + Authorization = "Authorization" ":" credentials + + HTTP access authentication is described in section 11. If a request + is authenticated and a realm specified, the same credentials SHOULD + be valid for all other requests within this realm. + + When a shared cache (see section 13.7) receives a request containing + an Authorization field, it MUST NOT return the corresponding response + as a reply to any other request, unless one of the following specific + exceptions holds: + + 1. If the response includes the "proxy-revalidate" Cache-Control + directive, the cache MAY use that response in replying to a + subsequent request, but a proxy cache MUST first revalidate it with + the origin server, using the request-headers from the new request + to allow the origin server to authenticate the new request. + 2. If the response includes the "must-revalidate" Cache-Control + directive, the cache MAY use that response in replying to a + subsequent request, but all caches MUST first revalidate it with + the origin server, using the request-headers from the new request + to allow the origin server to authenticate the new request. + 3. If the response includes the "public" Cache-Control directive, it + may be returned in reply to any subsequent request. + +14.9 Cache-Control + + The Cache-Control general-header field is used to specify directives + that MUST be obeyed by all caching mechanisms along the + request/response chain. The directives specify behavior intended to + prevent caches from adversely interfering with the request or + response. These directives typically override the default caching + algorithms. Cache directives are unidirectional in that the presence + of a directive in a request does not imply that the same directive + should be given in the response. + + Note that HTTP/1.0 caches may not implement Cache-Control and may + only implement Pragma: no-cache (see section 14.32). + + Cache directives must be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives may be applicable to all recipients along the + request/response chain. It is not possible to specify a cache- + directive for a specific cache. + + Cache-Control = "Cache-Control" ":" 1#cache-directive + + cache-directive = cache-request-directive + | cache-response-directive + + + +Fielding, et. al. Standards Track [Page 101] + +RFC 2068 HTTP/1.1 January 1997 + + + cache-request-directive = + "no-cache" [ "=" <"> 1#field-name <"> ] + | "no-store" + | "max-age" "=" delta-seconds + | "max-stale" [ "=" delta-seconds ] + | "min-fresh" "=" delta-seconds + | "only-if-cached" + | cache-extension + + cache-response-directive = + "public" + | "private" [ "=" <"> 1#field-name <"> ] + | "no-cache" [ "=" <"> 1#field-name <"> ] + | "no-store" + | "no-transform" + | "must-revalidate" + | "proxy-revalidate" + | "max-age" "=" delta-seconds + | cache-extension + + cache-extension = token [ "=" ( token | quoted-string ) ] + + When a directive appears without any 1#field-name parameter, the + directive applies to the entire request or response. When such a + directive appears with a 1#field-name parameter, it applies only to + the named field or fields, and not to the rest of the request or + response. This mechanism supports extensibility; implementations of + future versions of the HTTP protocol may apply these directives to + header fields not defined in HTTP/1.1. + + The cache-control directives can be broken down into these general + categories: + + o Restrictions on what is cachable; these may only be imposed by the + origin server. + o Restrictions on what may be stored by a cache; these may be imposed + by either the origin server or the user agent. + o Modifications of the basic expiration mechanism; these may be + imposed by either the origin server or the user agent. + o Controls over cache revalidation and reload; these may only be + imposed by a user agent. + o Control over transformation of entities. + o Extensions to the caching system. + + + + + + + + +Fielding, et. al. Standards Track [Page 102] + +RFC 2068 HTTP/1.1 January 1997 + + +14.9.1 What is Cachable + + By default, a response is cachable if the requirements of the request + method, request header fields, and the response status indicate that + it is cachable. Section 13.4 summarizes these defaults for + cachability. The following Cache-Control response directives allow an + origin server to override the default cachability of a response: + +public + Indicates that the response is cachable by any cache, even if it + would normally be non-cachable or cachable only within a non-shared + cache. (See also Authorization, section 14.8, for additional + details.) + +private + Indicates that all or part of the response message is intended for a + single user and MUST NOT be cached by a shared cache. This allows an + origin server to state that the specified parts of the response are + intended for only one user and are not a valid response for requests + by other users. A private (non-shared) cache may cache the response. + + Note: This usage of the word private only controls where the + response may be cached, and cannot ensure the privacy of the + message content. + +no-cache + Indicates that all or part of the response message MUST NOT be cached + anywhere. This allows an origin server to prevent caching even by + caches that have been configured to return stale responses to client + requests. + + Note: Most HTTP/1.0 caches will not recognize or obey this + directive. + +14.9.2 What May be Stored by Caches + + The purpose of the no-store directive is to prevent the inadvertent + release or retention of sensitive information (for example, on backup + tapes). The no-store directive applies to the entire message, and may + be sent either in a response or in a request. If sent in a request, a + cache MUST NOT store any part of either this request or any response + to it. If sent in a response, a cache MUST NOT store any part of + either this response or the request that elicited it. This directive + applies to both non-shared and shared caches. "MUST NOT store" in + this context means that the cache MUST NOT intentionally store the + information in non-volatile storage, and MUST make a best-effort + attempt to remove the information from volatile storage as promptly + as possible after forwarding it. + + + +Fielding, et. al. Standards Track [Page 103] + +RFC 2068 HTTP/1.1 January 1997 + + + Even when this directive is associated with a response, users may + explicitly store such a response outside of the caching system (e.g., + with a "Save As" dialog). History buffers may store such responses as + part of their normal operation. + + The purpose of this directive is to meet the stated requirements of + certain users and service authors who are concerned about accidental + releases of information via unanticipated accesses to cache data + structures. While the use of this directive may improve privacy in + some cases, we caution that it is NOT in any way a reliable or + sufficient mechanism for ensuring privacy. In particular, malicious + or compromised caches may not recognize or obey this directive; and + communications networks may be vulnerable to eavesdropping. + +14.9.3 Modifications of the Basic Expiration Mechanism + + The expiration time of an entity may be specified by the origin + server using the Expires header (see section 14.21). Alternatively, + it may be specified using the max-age directive in a response. + + If a response includes both an Expires header and a max-age + directive, the max-age directive overrides the Expires header, even + if the Expires header is more restrictive. This rule allows an origin + server to provide, for a given response, a longer expiration time to + an HTTP/1.1 (or later) cache than to an HTTP/1.0 cache. This may be + useful if certain HTTP/1.0 caches improperly calculate ages or + expiration times, perhaps due to desynchronized clocks. + + Note: most older caches, not compliant with this specification, do + not implement any Cache-Control directives. An origin server + wishing to use a Cache-Control directive that restricts, but does + not prevent, caching by an HTTP/1.1-compliant cache may exploit the + requirement that the max-age directive overrides the Expires + header, and the fact that non-HTTP/1.1-compliant caches do not + observe the max-age directive. + + Other directives allow an user agent to modify the basic expiration + mechanism. These directives may be specified on a request: + + max-age + Indicates that the client is willing to accept a response whose age + is no greater than the specified time in seconds. Unless max-stale + directive is also included, the client is not willing to accept a + stale response. + + min-fresh + Indicates that the client is willing to accept a response whose + freshness lifetime is no less than its current age plus the + + + +Fielding, et. al. Standards Track [Page 104] + +RFC 2068 HTTP/1.1 January 1997 + + + specified time in seconds. That is, the client wants a response + that will still be fresh for at least the specified number of + seconds. + + max-stale + Indicates that the client is willing to accept a response that has + exceeded its expiration time. If max-stale is assigned a value, + then the client is willing to accept a response that has exceeded + its expiration time by no more than the specified number of + seconds. If no value is assigned to max-stale, then the client is + willing to accept a stale response of any age. + + If a cache returns a stale response, either because of a max-stale + directive on a request, or because the cache is configured to + override the expiration time of a response, the cache MUST attach a + Warning header to the stale response, using Warning 10 (Response is + stale). + +14.9.4 Cache Revalidation and Reload Controls + + Sometimes an user agent may want or need to insist that a cache + revalidate its cache entry with the origin server (and not just with + the next cache along the path to the origin server), or to reload its + cache entry from the origin server. End-to-end revalidation may be + necessary if either the cache or the origin server has overestimated + the expiration time of the cached response. End-to-end reload may be + necessary if the cache entry has become corrupted for some reason. + + End-to-end revalidation may be requested either when the client does + not have its own local cached copy, in which case we call it + "unspecified end-to-end revalidation", or when the client does have a + local cached copy, in which case we call it "specific end-to-end + revalidation." + + The client can specify these three kinds of action using Cache- + Control request directives: + + End-to-end reload + The request includes a "no-cache" Cache-Control directive or, for + compatibility with HTTP/1.0 clients, "Pragma: no-cache". No field + names may be included with the no-cache directive in a request. The + server MUST NOT use a cached copy when responding to such a + request. + + Specific end-to-end revalidation + The request includes a "max-age=0" Cache-Control directive, which + forces each cache along the path to the origin server to revalidate + its own entry, if any, with the next cache or server. The initial + + + +Fielding, et. al. Standards Track [Page 105] + +RFC 2068 HTTP/1.1 January 1997 + + + request includes a cache-validating conditional with the client's + current validator. + + Unspecified end-to-end revalidation + The request includes "max-age=0" Cache-Control directive, which + forces each cache along the path to the origin server to revalidate + its own entry, if any, with the next cache or server. The initial + request does not include a cache-validating conditional; the first + cache along the path (if any) that holds a cache entry for this + resource includes a cache-validating conditional with its current + validator. + + When an intermediate cache is forced, by means of a max-age=0 + directive, to revalidate its own cache entry, and the client has + supplied its own validator in the request, the supplied validator may + differ from the validator currently stored with the cache entry. In + this case, the cache may use either validator in making its own + request without affecting semantic transparency. + + However, the choice of validator may affect performance. The best + approach is for the intermediate cache to use its own validator when + making its request. If the server replies with 304 (Not Modified), + then the cache should return its now validated copy to the client + with a 200 (OK) response. If the server replies with a new entity and + cache validator, however, the intermediate cache should compare the + returned validator with the one provided in the client's request, + using the strong comparison function. If the client's validator is + equal to the origin server's, then the intermediate cache simply + returns 304 (Not Modified). Otherwise, it returns the new entity with + a 200 (OK) response. + + If a request includes the no-cache directive, it should not include + min-fresh, max-stale, or max-age. + + In some cases, such as times of extremely poor network connectivity, + a client may want a cache to return only those responses that it + currently has stored, and not to reload or revalidate with the origin + server. To do this, the client may include the only-if-cached + directive in a request. If it receives this directive, a cache SHOULD + either respond using a cached entry that is consistent with the other + constraints of the request, or respond with a 504 (Gateway Timeout) + status. However, if a group of caches is being operated as a unified + system with good internal connectivity, such a request MAY be + forwarded within that group of caches. + + Because a cache may be configured to ignore a server's specified + expiration time, and because a client request may include a max-stale + directive (which has a similar effect), the protocol also includes a + + + +Fielding, et. al. Standards Track [Page 106] + +RFC 2068 HTTP/1.1 January 1997 + + + mechanism for the origin server to require revalidation of a cache + entry on any subsequent use. When the must-revalidate directive is + present in a response received by a cache, that cache MUST NOT use + the entry after it becomes stale to respond to a subsequent request + without first revalidating it with the origin server. (I.e., the + cache must do an end-to-end revalidation every time, if, based solely + on the origin server's Expires or max-age value, the cached response + is stale.) + + The must-revalidate directive is necessary to support reliable + operation for certain protocol features. In all circumstances an + HTTP/1.1 cache MUST obey the must-revalidate directive; in + particular, if the cache cannot reach the origin server for any + reason, it MUST generate a 504 (Gateway Timeout) response. + + Servers should send the must-revalidate directive if and only if + failure to revalidate a request on the entity could result in + incorrect operation, such as a silently unexecuted financial + transaction. Recipients MUST NOT take any automated action that + violates this directive, and MUST NOT automatically provide an + unvalidated copy of the entity if revalidation fails. + + Although this is not recommended, user agents operating under severe + connectivity constraints may violate this directive but, if so, MUST + explicitly warn the user that an unvalidated response has been + provided. The warning MUST be provided on each unvalidated access, + and SHOULD require explicit user confirmation. + + The proxy-revalidate directive has the same meaning as the must- + revalidate directive, except that it does not apply to non-shared + user agent caches. It can be used on a response to an authenticated + request to permit the user's cache to store and later return the + response without needing to revalidate it (since it has already been + authenticated once by that user), while still requiring proxies that + service many users to revalidate each time (in order to make sure + that each user has been authenticated). Note that such authenticated + responses also need the public cache control directive in order to + allow them to be cached at all. + +14.9.5 No-Transform Directive + + Implementers of intermediate caches (proxies) have found it useful to + convert the media type of certain entity bodies. A proxy might, for + example, convert between image formats in order to save cache space + or to reduce the amount of traffic on a slow link. HTTP has to date + been silent on these transformations. + + + + + +Fielding, et. al. Standards Track [Page 107] + +RFC 2068 HTTP/1.1 January 1997 + + + Serious operational problems have already occurred, however, when + these transformations have been applied to entity bodies intended for + certain kinds of applications. For example, applications for medical + imaging, scientific data analysis and those using end-to-end + authentication, all depend on receiving an entity body that is bit + for bit identical to the original entity-body. + + Therefore, if a response includes the no-transform directive, an + intermediate cache or proxy MUST NOT change those headers that are + listed in section 13.5.2 as being subject to the no-transform + directive. This implies that the cache or proxy must not change any + aspect of the entity-body that is specified by these headers. + +14.9.6 Cache Control Extensions + + The Cache-Control header field can be extended through the use of one + or more cache-extension tokens, each with an optional assigned value. + Informational extensions (those which do not require a change in + cache behavior) may be added without changing the semantics of other + directives. Behavioral extensions are designed to work by acting as + modifiers to the existing base of cache directives. Both the new + directive and the standard directive are supplied, such that + applications which do not understand the new directive will default + to the behavior specified by the standard directive, and those that + understand the new directive will recognize it as modifying the + requirements associated with the standard directive. In this way, + extensions to the Cache-Control directives can be made without + requiring changes to the base protocol. + + This extension mechanism depends on a HTTP cache obeying all of the + cache-control directives defined for its native HTTP-version, obeying + certain extensions, and ignoring all directives that it does not + understand. + + For example, consider a hypothetical new response directive called + "community" which acts as a modifier to the "private" directive. We + define this new directive to mean that, in addition to any non-shared + cache, any cache which is shared only by members of the community + named within its value may cache the response. An origin server + wishing to allow the "UCI" community to use an otherwise private + response in their shared cache(s) may do so by including + + Cache-Control: private, community="UCI" + + A cache seeing this header field will act correctly even if the cache + does not understand the "community" cache-extension, since it will + also see and understand the "private" directive and thus default to + the safe behavior. + + + +Fielding, et. al. Standards Track [Page 108] + +RFC 2068 HTTP/1.1 January 1997 + + + Unrecognized cache-directives MUST be ignored; it is assumed that any + cache-directive likely to be unrecognized by an HTTP/1.1 cache will + be combined with standard directives (or the response's default + cachability) such that the cache behavior will remain minimally + correct even if the cache does not understand the extension(s). + +14.10 Connection + + The Connection general-header field allows the sender to specify + options that are desired for that particular connection and MUST NOT + be communicated by proxies over further connections. + + The Connection header has the following grammar: + + Connection-header = "Connection" ":" 1#(connection-token) + connection-token = token + + HTTP/1.1 proxies MUST parse the Connection header field before a + message is forwarded and, for each connection-token in this field, + remove any header field(s) from the message with the same name as the + connection-token. Connection options are signaled by the presence of + a connection-token in the Connection header field, not by any + corresponding additional header field(s), since the additional header + field may not be sent if there are no parameters associated with that + connection option. HTTP/1.1 defines the "close" connection option + for the sender to signal that the connection will be closed after + completion of the response. For example, + + Connection: close + + in either the request or the response header fields indicates that + the connection should not be considered `persistent' (section 8.1) + after the current request/response is complete. + + HTTP/1.1 applications that do not support persistent connections MUST + include the "close" connection option in every message. + +14.11 Content-Base + + The Content-Base entity-header field may be used to specify the base + URI for resolving relative URLs within the entity. This header field + is described as Base in RFC 1808, which is expected to be revised. + + Content-Base = "Content-Base" ":" absoluteURI + + If no Content-Base field is present, the base URI of an entity is + defined either by its Content-Location (if that Content-Location URI + is an absolute URI) or the URI used to initiate the request, in that + + + +Fielding, et. al. Standards Track [Page 109] + +RFC 2068 HTTP/1.1 January 1997 + + + order of precedence. Note, however, that the base URI of the contents + within the entity-body may be redefined within that entity-body. + +14.12 Content-Encoding + + The Content-Encoding entity-header field is used as a modifier to the + media-type. When present, its value indicates what additional content + codings have been applied to the entity-body, and thus what decoding + mechanisms MUST be applied in order to obtain the media-type + referenced by the Content-Type header field. Content-Encoding is + primarily used to allow a document to be compressed without losing + the identity of its underlying media type. + + Content-Encoding = "Content-Encoding" ":" 1#content-coding + + Content codings are defined in section 3.5. An example of its use is + + Content-Encoding: gzip + + The Content-Encoding is a characteristic of the entity identified by + the Request-URI. Typically, the entity-body is stored with this + encoding and is only decoded before rendering or analogous usage. + + If multiple encodings have been applied to an entity, the content + codings MUST be listed in the order in which they were applied. + + Additional information about the encoding parameters MAY be provided + by other entity-header fields not defined by this specification. + +14.13 Content-Language + + The Content-Language entity-header field describes the natural + language(s) of the intended audience for the enclosed entity. Note + that this may not be equivalent to all the languages used within the + entity-body. + + Content-Language = "Content-Language" ":" 1#language-tag + + Language tags are defined in section 3.10. The primary purpose of + Content-Language is to allow a user to identify and differentiate + entities according to the user's own preferred language. Thus, if the + body content is intended only for a Danish-literate audience, the + appropriate field is + + Content-Language: da + + If no Content-Language is specified, the default is that the content + is intended for all language audiences. This may mean that the sender + + + +Fielding, et. al. Standards Track [Page 110] + +RFC 2068 HTTP/1.1 January 1997 + + + does not consider it to be specific to any natural language, or that + the sender does not know for which language it is intended. + + Multiple languages MAY be listed for content that is intended for + multiple audiences. For example, a rendition of the "Treaty of + Waitangi," presented simultaneously in the original Maori and English + versions, would call for + + Content-Language: mi, en + + However, just because multiple languages are present within an entity + does not mean that it is intended for multiple linguistic audiences. + An example would be a beginner's language primer, such as "A First + Lesson in Latin," which is clearly intended to be used by an + English-literate audience. In this case, the Content-Language should + only include "en". + + Content-Language may be applied to any media type -- it is not + limited to textual documents. + +14.14 Content-Length + + The Content-Length entity-header field indicates the size of the + message-body, in decimal number of octets, sent to the recipient or, + in the case of the HEAD method, the size of the entity-body that + would have been sent had the request been a GET. + + Content-Length = "Content-Length" ":" 1*DIGIT + + An example is + + Content-Length: 3495 + + Applications SHOULD use this field to indicate the size of the + message-body to be transferred, regardless of the media type of the + entity. It must be possible for the recipient to reliably determine + the end of HTTP/1.1 requests containing an entity-body, e.g., because + the request has a valid Content-Length field, uses Transfer-Encoding: + chunked or a multipart body. + + Any Content-Length greater than or equal to zero is a valid value. + Section 4.4 describes how to determine the length of a message-body + if a Content-Length is not given. + + + + + + + + +Fielding, et. al. Standards Track [Page 111] + +RFC 2068 HTTP/1.1 January 1997 + + + Note: The meaning of this field is significantly different from the + corresponding definition in MIME, where it is an optional field + used within the "message/external-body" content-type. In HTTP, it + SHOULD be sent whenever the message's length can be determined + prior to being transferred. + +14.15 Content-Location + + The Content-Location entity-header field may be used to supply the + resource location for the entity enclosed in the message. In the case + where a resource has multiple entities associated with it, and those + entities actually have separate locations by which they might be + individually accessed, the server should provide a Content-Location + for the particular variant which is returned. In addition, a server + SHOULD provide a Content-Location for the resource corresponding to + the response entity. + + Content-Location = "Content-Location" ":" + ( absoluteURI | relativeURI ) + + If no Content-Base header field is present, the value of Content- + Location also defines the base URL for the entity (see section + 14.11). + + The Content-Location value is not a replacement for the original + requested URI; it is only a statement of the location of the resource + corresponding to this particular entity at the time of the request. + Future requests MAY use the Content-Location URI if the desire is to + identify the source of that particular entity. + + A cache cannot assume that an entity with a Content-Location + different from the URI used to retrieve it can be used to respond to + later requests on that Content-Location URI. However, the Content- + Location can be used to differentiate between multiple entities + retrieved from a single requested resource, as described in section + 13.6. + + If the Content-Location is a relative URI, the URI is interpreted + relative to any Content-Base URI provided in the response. If no + Content-Base is provided, the relative URI is interpreted relative to + the Request-URI. + + + + + + + + + + +Fielding, et. al. Standards Track [Page 112] + +RFC 2068 HTTP/1.1 January 1997 + + +14.16 Content-MD5 + + The Content-MD5 entity-header field, as defined in RFC 1864 [23], is + an MD5 digest of the entity-body for the purpose of providing an + end-to-end message integrity check (MIC) of the entity-body. (Note: a + MIC is good for detecting accidental modification of the entity-body + in transit, but is not proof against malicious attacks.) + + Content-MD5 = "Content-MD5" ":" md5-digest + + md5-digest = + + The Content-MD5 header field may be generated by an origin server to + function as an integrity check of the entity-body. Only origin + servers may generate the Content-MD5 header field; proxies and + gateways MUST NOT generate it, as this would defeat its value as an + end-to-end integrity check. Any recipient of the entity-body, + including gateways and proxies, MAY check that the digest value in + this header field matches that of the entity-body as received. + + The MD5 digest is computed based on the content of the entity-body, + including any Content-Encoding that has been applied, but not + including any Transfer-Encoding that may have been applied to the + message-body. If the message is received with a Transfer-Encoding, + that encoding must be removed prior to checking the Content-MD5 value + against the received entity. + + This has the result that the digest is computed on the octets of the + entity-body exactly as, and in the order that, they would be sent if + no Transfer-Encoding were being applied. + + HTTP extends RFC 1864 to permit the digest to be computed for MIME + composite media-types (e.g., multipart/* and message/rfc822), but + this does not change how the digest is computed as defined in the + preceding paragraph. + + Note: There are several consequences of this. The entity-body for + composite types may contain many body-parts, each with its own MIME + and HTTP headers (including Content-MD5, Content-Transfer-Encoding, + and Content-Encoding headers). If a body-part has a Content- + Transfer-Encoding or Content-Encoding header, it is assumed that + the content of the body-part has had the encoding applied, and the + body-part is included in the Content-MD5 digest as is -- i.e., + after the application. The Transfer-Encoding header field is not + allowed within body-parts. + + Note: while the definition of Content-MD5 is exactly the same for + HTTP as in RFC 1864 for MIME entity-bodies, there are several ways + + + +Fielding, et. al. Standards Track [Page 113] + +RFC 2068 HTTP/1.1 January 1997 + + + in which the application of Content-MD5 to HTTP entity-bodies + differs from its application to MIME entity-bodies. One is that + HTTP, unlike MIME, does not use Content-Transfer-Encoding, and does + use Transfer-Encoding and Content-Encoding. Another is that HTTP + more frequently uses binary content types than MIME, so it is worth + noting that, in such cases, the byte order used to compute the + digest is the transmission byte order defined for the type. Lastly, + HTTP allows transmission of text types with any of several line + break conventions and not just the canonical form using CRLF. + Conversion of all line breaks to CRLF should not be done before + computing or checking the digest: the line break convention used in + the text actually transmitted should be left unaltered when + computing the digest. + +14.17 Content-Range + + The Content-Range entity-header is sent with a partial entity-body to + specify where in the full entity-body the partial body should be + inserted. It also indicates the total size of the full entity-body. + When a server returns a partial response to a client, it must + describe both the extent of the range covered by the response, and + the length of the entire entity-body. + + Content-Range = "Content-Range" ":" content-range-spec + + content-range-spec = byte-content-range-spec + + byte-content-range-spec = bytes-unit SP first-byte-pos "-" + last-byte-pos "/" entity-length + + entity-length = 1*DIGIT + + Unlike byte-ranges-specifier values, a byte-content-range-spec may + only specify one range, and must contain absolute byte positions for + both the first and last byte of the range. + + A byte-content-range-spec whose last-byte-pos value is less than its + first-byte-pos value, or whose entity-length value is less than or + equal to its last-byte-pos value, is invalid. The recipient of an + invalid byte-content-range-spec MUST ignore it and any content + transferred along with it. + + + + + + + + + + +Fielding, et. al. Standards Track [Page 114] + +RFC 2068 HTTP/1.1 January 1997 + + + Examples of byte-content-range-spec values, assuming that the entity + contains a total of 1234 bytes: + + o The first 500 bytes: + + bytes 0-499/1234 + + o The second 500 bytes: + + bytes 500-999/1234 + + o All except for the first 500 bytes: + + bytes 500-1233/1234 + + o The last 500 bytes: + + bytes 734-1233/1234 + + When an HTTP message includes the content of a single range (for + example, a response to a request for a single range, or to a request + for a set of ranges that overlap without any holes), this content is + transmitted with a Content-Range header, and a Content-Length header + showing the number of bytes actually transferred. For example, + + HTTP/1.1 206 Partial content + Date: Wed, 15 Nov 1995 06:25:24 GMT + Last-modified: Wed, 15 Nov 1995 04:58:08 GMT + Content-Range: bytes 21010-47021/47022 + Content-Length: 26012 + Content-Type: image/gif + + When an HTTP message includes the content of multiple ranges (for + example, a response to a request for multiple non-overlapping + ranges), these are transmitted as a multipart MIME message. The + multipart MIME content-type used for this purpose is defined in this + specification to be "multipart/byteranges". See appendix 19.2 for its + definition. + + A client that cannot decode a MIME multipart/byteranges message + should not ask for multiple byte-ranges in a single request. + + When a client requests multiple byte-ranges in one request, the + server SHOULD return them in the order that they appeared in the + request. + + If the server ignores a byte-range-spec because it is invalid, the + server should treat the request as if the invalid Range header field + + + +Fielding, et. al. Standards Track [Page 115] + +RFC 2068 HTTP/1.1 January 1997 + + + did not exist. (Normally, this means return a 200 response containing + the full entity). The reason is that the only time a client will make + such an invalid request is when the entity is smaller than the entity + retrieved by a prior request. + +14.18 Content-Type + + The Content-Type entity-header field indicates the media type of the + entity-body sent to the recipient or, in the case of the HEAD method, + the media type that would have been sent had the request been a GET. + + Content-Type = "Content-Type" ":" media-type + Media types are defined in section 3.7. An example of the field is + + Content-Type: text/html; charset=ISO-8859-4 + + Further discussion of methods for identifying the media type of an + entity is provided in section 7.2.1. + +14.19 Date + + The Date general-header field represents the date and time at which + the message was originated, having the same semantics as orig-date in + RFC 822. The field value is an HTTP-date, as described in section + 3.3.1. + + Date = "Date" ":" HTTP-date + + An example is + + Date: Tue, 15 Nov 1994 08:12:31 GMT + + If a message is received via direct connection with the user agent + (in the case of requests) or the origin server (in the case of + responses), then the date can be assumed to be the current date at + the receiving end. However, since the date--as it is believed by the + origin--is important for evaluating cached responses, origin servers + MUST include a Date header field in all responses. Clients SHOULD + only send a Date header field in messages that include an entity- + body, as in the case of the PUT and POST requests, and even then it + is optional. A received message which does not have a Date header + field SHOULD be assigned one by the recipient if the message will be + cached by that recipient or gatewayed via a protocol which requires a + Date. + + + + + + + +Fielding, et. al. Standards Track [Page 116] + +RFC 2068 HTTP/1.1 January 1997 + + + In theory, the date SHOULD represent the moment just before the + entity is generated. In practice, the date can be generated at any + time during the message origination without affecting its semantic + value. + + The format of the Date is an absolute date and time as defined by + HTTP-date in section 3.3; it MUST be sent in RFC1123 [8]-date format. + +14.20 ETag + + The ETag entity-header field defines the entity tag for the + associated entity. The headers used with entity tags are described in + sections 14.20, 14.25, 14.26 and 14.43. The entity tag may be used + for comparison with other entities from the same resource (see + section 13.3.2). + + ETag = "ETag" ":" entity-tag + + Examples: + + ETag: "xyzzy" + ETag: W/"xyzzy" + ETag: "" + +14.21 Expires + + The Expires entity-header field gives the date/time after which the + response should be considered stale. A stale cache entry may not + normally be returned by a cache (either a proxy cache or an user + agent cache) unless it is first validated with the origin server (or + with an intermediate cache that has a fresh copy of the entity). See + section 13.2 for further discussion of the expiration model. + + The presence of an Expires field does not imply that the original + resource will change or cease to exist at, before, or after that + time. + + The format is an absolute date and time as defined by HTTP-date in + section 3.3; it MUST be in RFC1123-date format: + + Expires = "Expires" ":" HTTP-date + + + + + + + + + + +Fielding, et. al. Standards Track [Page 117] + +RFC 2068 HTTP/1.1 January 1997 + + + An example of its use is + + Expires: Thu, 01 Dec 1994 16:00:00 GMT + + Note: if a response includes a Cache-Control field with the max-age + directive, that directive overrides the Expires field. + + HTTP/1.1 clients and caches MUST treat other invalid date formats, + especially including the value "0", as in the past (i.e., "already + expired"). + + To mark a response as "already expired," an origin server should use + an Expires date that is equal to the Date header value. (See the + rules for expiration calculations in section 13.2.4.) + + To mark a response as "never expires," an origin server should use an + Expires date approximately one year from the time the response is + sent. HTTP/1.1 servers should not send Expires dates more than one + year in the future. + + The presence of an Expires header field with a date value of some + time in the future on an response that otherwise would by default be + non-cacheable indicates that the response is cachable, unless + indicated otherwise by a Cache-Control header field (section 14.9). + +14.22 From + + The From request-header field, if given, SHOULD contain an Internet + e-mail address for the human user who controls the requesting user + agent. The address SHOULD be machine-usable, as defined by mailbox + in RFC 822 (as updated by RFC 1123 ): + + From = "From" ":" mailbox + + An example is: + + From: webmaster@w3.org + + This header field MAY be used for logging purposes and as a means for + identifying the source of invalid or unwanted requests. It SHOULD NOT + be used as an insecure form of access protection. The interpretation + of this field is that the request is being performed on behalf of the + person given, who accepts responsibility for the method performed. In + particular, robot agents SHOULD include this header so that the + person responsible for running the robot can be contacted if problems + occur on the receiving end. + + + + + +Fielding, et. al. Standards Track [Page 118] + +RFC 2068 HTTP/1.1 January 1997 + + + The Internet e-mail address in this field MAY be separate from the + Internet host which issued the request. For example, when a request + is passed through a proxy the original issuer's address SHOULD be + used. + + Note: The client SHOULD not send the From header field without the + user's approval, as it may conflict with the user's privacy + interests or their site's security policy. It is strongly + recommended that the user be able to disable, enable, and modify + the value of this field at any time prior to a request. + +14.23 Host + + The Host request-header field specifies the Internet host and port + number of the resource being requested, as obtained from the original + URL given by the user or referring resource (generally an HTTP URL, + as described in section 3.2.2). The Host field value MUST represent + the network location of the origin server or gateway given by the + original URL. This allows the origin server or gateway to + differentiate between internally-ambiguous URLs, such as the root "/" + URL of a server for multiple host names on a single IP address. + + Host = "Host" ":" host [ ":" port ] ; Section 3.2.2 + + A "host" without any trailing port information implies the default + port for the service requested (e.g., "80" for an HTTP URL). For + example, a request on the origin server for + MUST include: + + GET /pub/WWW/ HTTP/1.1 + Host: www.w3.org + + A client MUST include a Host header field in all HTTP/1.1 request + messages on the Internet (i.e., on any message corresponding to a + request for a URL which includes an Internet host address for the + service being requested). If the Host field is not already present, + an HTTP/1.1 proxy MUST add a Host field to the request message prior + to forwarding it on the Internet. All Internet-based HTTP/1.1 servers + MUST respond with a 400 status code to any HTTP/1.1 request message + which lacks a Host header field. + + See sections 5.2 and 19.5.1 for other requirements relating to Host. + +14.24 If-Modified-Since + + The If-Modified-Since request-header field is used with the GET + method to make it conditional: if the requested variant has not been + modified since the time specified in this field, an entity will not + + + +Fielding, et. al. Standards Track [Page 119] + +RFC 2068 HTTP/1.1 January 1997 + + + be returned from the server; instead, a 304 (not modified) response + will be returned without any message-body. + + If-Modified-Since = "If-Modified-Since" ":" HTTP-date + + An example of the field is: + + If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + A GET method with an If-Modified-Since header and no Range header + requests that the identified entity be transferred only if it has + been modified since the date given by the If-Modified-Since header. + The algorithm for determining this includes the following cases: + + a)If the request would normally result in anything other than a 200 + (OK) status, or if the passed If-Modified-Since date is invalid, the + response is exactly the same as for a normal GET. A date which is + later than the server's current time is invalid. + + b)If the variant has been modified since the If-Modified-Since date, + the response is exactly the same as for a normal GET. + + c)If the variant has not been modified since a valid If-Modified-Since + date, the server MUST return a 304 (Not Modified) response. + + The purpose of this feature is to allow efficient updates of cached + information with a minimum amount of transaction overhead. + + Note that the Range request-header field modifies the meaning of + If-Modified-Since; see section 14.36 for full details. + + Note that If-Modified-Since times are interpreted by the server, + whose clock may not be synchronized with the client. + + Note that if a client uses an arbitrary date in the If-Modified-Since + header instead of a date taken from the Last-Modified header for the + same request, the client should be aware of the fact that this date + is interpreted in the server's understanding of time. The client + should consider unsynchronized clocks and rounding problems due to + the different encodings of time between the client and server. This + includes the possibility of race conditions if the document has + changed between the time it was first requested and the If-Modified- + Since date of a subsequent request, and the possibility of clock- + skew-related problems if the If-Modified-Since date is derived from + the client's clock without correction to the server's clock. + Corrections for different time bases between client and server are at + best approximate due to network latency. + + + + +Fielding, et. al. Standards Track [Page 120] + +RFC 2068 HTTP/1.1 January 1997 + + +14.25 If-Match + + The If-Match request-header field is used with a method to make it + conditional. A client that has one or more entities previously + obtained from the resource can verify that one of those entities is + current by including a list of their associated entity tags in the + If-Match header field. The purpose of this feature is to allow + efficient updates of cached information with a minimum amount of + transaction overhead. It is also used, on updating requests, to + prevent inadvertent modification of the wrong version of a resource. + As a special case, the value "*" matches any current entity of the + resource. + + If-Match = "If-Match" ":" ( "*" | 1#entity-tag ) + + If any of the entity tags match the entity tag of the entity that + would have been returned in the response to a similar GET request + (without the If-Match header) on that resource, or if "*" is given + and any current entity exists for that resource, then the server MAY + perform the requested method as if the If-Match header field did not + exist. + + A server MUST use the strong comparison function (see section 3.11) + to compare the entity tags in If-Match. + + If none of the entity tags match, or if "*" is given and no current + entity exists, the server MUST NOT perform the requested method, and + MUST return a 412 (Precondition Failed) response. This behavior is + most useful when the client wants to prevent an updating method, such + as PUT, from modifying a resource that has changed since the client + last retrieved it. + + If the request would, without the If-Match header field, result in + anything other than a 2xx status, then the If-Match header MUST be + ignored. + + The meaning of "If-Match: *" is that the method SHOULD be performed + if the representation selected by the origin server (or by a cache, + possibly using the Vary mechanism, see section 14.43) exists, and + MUST NOT be performed if the representation does not exist. + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 121] + +RFC 2068 HTTP/1.1 January 1997 + + + A request intended to update a resource (e.g., a PUT) MAY include an + If-Match header field to signal that the request method MUST NOT be + applied if the entity corresponding to the If-Match value (a single + entity tag) is no longer a representation of that resource. This + allows the user to indicate that they do not wish the request to be + successful if the resource has been changed without their knowledge. + Examples: + + If-Match: "xyzzy" + If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + If-Match: * + +14.26 If-None-Match + + The If-None-Match request-header field is used with a method to make + it conditional. A client that has one or more entities previously + obtained from the resource can verify that none of those entities is + current by including a list of their associated entity tags in the + If-None-Match header field. The purpose of this feature is to allow + efficient updates of cached information with a minimum amount of + transaction overhead. It is also used, on updating requests, to + prevent inadvertent modification of a resource which was not known to + exist. + + As a special case, the value "*" matches any current entity of the + resource. + + If-None-Match = "If-None-Match" ":" ( "*" | 1#entity-tag ) + + If any of the entity tags match the entity tag of the entity that + would have been returned in the response to a similar GET request + (without the If-None-Match header) on that resource, or if "*" is + given and any current entity exists for that resource, then the + server MUST NOT perform the requested method. Instead, if the request + method was GET or HEAD, the server SHOULD respond with a 304 (Not + Modified) response, including the cache-related entity-header fields + (particularly ETag) of one of the entities that matched. For all + other request methods, the server MUST respond with a status of 412 + (Precondition Failed). + + See section 13.3.3 for rules on how to determine if two entity tags + match. The weak comparison function can only be used with GET or HEAD + requests. + + If none of the entity tags match, or if "*" is given and no current + entity exists, then the server MAY perform the requested method as if + the If-None-Match header field did not exist. + + + + +Fielding, et. al. Standards Track [Page 122] + +RFC 2068 HTTP/1.1 January 1997 + + + If the request would, without the If-None-Match header field, result + in anything other than a 2xx status, then the If-None-Match header + MUST be ignored. + + The meaning of "If-None-Match: *" is that the method MUST NOT be + performed if the representation selected by the origin server (or by + a cache, possibly using the Vary mechanism, see section 14.43) + exists, and SHOULD be performed if the representation does not exist. + This feature may be useful in preventing races between PUT + operations. + + Examples: + + If-None-Match: "xyzzy" + If-None-Match: W/"xyzzy" + If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + If-None-Match: W/"xyzzy", W/"r2d2xxxx", W/"c3piozzzz" + If-None-Match: * + +14.27 If-Range + + If a client has a partial copy of an entity in its cache, and wishes + to have an up-to-date copy of the entire entity in its cache, it + could use the Range request-header with a conditional GET (using + either or both of If-Unmodified-Since and If-Match.) However, if the + condition fails because the entity has been modified, the client + would then have to make a second request to obtain the entire current + entity-body. + + The If-Range header allows a client to "short-circuit" the second + request. Informally, its meaning is `if the entity is unchanged, send + me the part(s) that I am missing; otherwise, send me the entire new + entity.' + + If-Range = "If-Range" ":" ( entity-tag | HTTP-date ) + + If the client has no entity tag for an entity, but does have a Last- + Modified date, it may use that date in a If-Range header. (The server + can distinguish between a valid HTTP-date and any form of entity-tag + by examining no more than two characters.) The If-Range header should + only be used together with a Range header, and must be ignored if the + request does not include a Range header, or if the server does not + support the sub-range operation. + + + + + + + + +Fielding, et. al. Standards Track [Page 123] + +RFC 2068 HTTP/1.1 January 1997 + + + If the entity tag given in the If-Range header matches the current + entity tag for the entity, then the server should provide the + specified sub-range of the entity using a 206 (Partial content) + response. If the entity tag does not match, then the server should + return the entire entity using a 200 (OK) response. + +14.28 If-Unmodified-Since + + The If-Unmodified-Since request-header field is used with a method to + make it conditional. If the requested resource has not been modified + since the time specified in this field, the server should perform the + requested operation as if the If-Unmodified-Since header were not + present. + + If the requested variant has been modified since the specified time, + the server MUST NOT perform the requested operation, and MUST return + a 412 (Precondition Failed). + + If-Unmodified-Since = "If-Unmodified-Since" ":" HTTP-date + + An example of the field is: + + If-Unmodified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + If the request normally (i.e., without the If-Unmodified-Since + header) would result in anything other than a 2xx status, the If- + Unmodified-Since header should be ignored. + + If the specified date is invalid, the header is ignored. + +14.29 Last-Modified + + The Last-Modified entity-header field indicates the date and time at + which the origin server believes the variant was last modified. + + Last-Modified = "Last-Modified" ":" HTTP-date + + An example of its use is + + Last-Modified: Tue, 15 Nov 1994 12:45:26 GMT + + The exact meaning of this header field depends on the implementation + of the origin server and the nature of the original resource. For + files, it may be just the file system last-modified time. For + entities with dynamically included parts, it may be the most recent + of the set of last-modify times for its component parts. For database + gateways, it may be the last-update time stamp of the record. For + virtual objects, it may be the last time the internal state changed. + + + +Fielding, et. al. Standards Track [Page 124] + +RFC 2068 HTTP/1.1 January 1997 + + + An origin server MUST NOT send a Last-Modified date which is later + than the server's time of message origination. In such cases, where + the resource's last modification would indicate some time in the + future, the server MUST replace that date with the message + origination date. + + An origin server should obtain the Last-Modified value of the entity + as close as possible to the time that it generates the Date value of + its response. This allows a recipient to make an accurate assessment + of the entity's modification time, especially if the entity changes + near the time that the response is generated. + + HTTP/1.1 servers SHOULD send Last-Modified whenever feasible. + +14.30 Location + + The Location response-header field is used to redirect the recipient + to a location other than the Request-URI for completion of the + request or identification of a new resource. For 201 (Created) + responses, the Location is that of the new resource which was created + by the request. For 3xx responses, the location SHOULD indicate the + server's preferred URL for automatic redirection to the resource. The + field value consists of a single absolute URL. + + Location = "Location" ":" absoluteURI + + An example is + + Location: http://www.w3.org/pub/WWW/People.html + + Note: The Content-Location header field (section 14.15) differs + from Location in that the Content-Location identifies the original + location of the entity enclosed in the request. It is therefore + possible for a response to contain header fields for both Location + and Content-Location. Also see section 13.10 for cache requirements + of some methods. + +14.31 Max-Forwards + + The Max-Forwards request-header field may be used with the TRACE + method (section 14.31) to limit the number of proxies or gateways + that can forward the request to the next inbound server. This can be + useful when the client is attempting to trace a request chain which + appears to be failing or looping in mid-chain. + + Max-Forwards = "Max-Forwards" ":" 1*DIGIT + + + + + +Fielding, et. al. Standards Track [Page 125] + +RFC 2068 HTTP/1.1 January 1997 + + + The Max-Forwards value is a decimal integer indicating the remaining + number of times this request message may be forwarded. + + Each proxy or gateway recipient of a TRACE request containing a Max- + Forwards header field SHOULD check and update its value prior to + forwarding the request. If the received value is zero (0), the + recipient SHOULD NOT forward the request; instead, it SHOULD respond + as the final recipient with a 200 (OK) response containing the + received request message as the response entity-body (as described in + section 9.8). If the received Max-Forwards value is greater than + zero, then the forwarded message SHOULD contain an updated Max- + Forwards field with a value decremented by one (1). + + The Max-Forwards header field SHOULD be ignored for all other methods + defined by this specification and for any extension methods for which + it is not explicitly referred to as part of that method definition. + +14.32 Pragma + + The Pragma general-header field is used to include implementation- + specific directives that may apply to any recipient along the + request/response chain. All pragma directives specify optional + behavior from the viewpoint of the protocol; however, some systems + MAY require that behavior be consistent with the directives. + + Pragma = "Pragma" ":" 1#pragma-directive + + pragma-directive = "no-cache" | extension-pragma + extension-pragma = token [ "=" ( token | quoted-string ) ] + + When the no-cache directive is present in a request message, an + application SHOULD forward the request toward the origin server even + if it has a cached copy of what is being requested. This pragma + directive has the same semantics as the no-cache cache-directive (see + section 14.9) and is defined here for backwards compatibility with + HTTP/1.0. Clients SHOULD include both header fields when a no-cache + request is sent to a server not known to be HTTP/1.1 compliant. + + Pragma directives MUST be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives may be applicable to all recipients along the + request/response chain. It is not possible to specify a pragma for a + specific recipient; however, any pragma directive not relevant to a + recipient SHOULD be ignored by that recipient. + + + + + + + +Fielding, et. al. Standards Track [Page 126] + +RFC 2068 HTTP/1.1 January 1997 + + + HTTP/1.1 clients SHOULD NOT send the Pragma request-header. HTTP/1.1 + caches SHOULD treat "Pragma: no-cache" as if the client had sent + "Cache-Control: no-cache". No new Pragma directives will be defined + in HTTP. + +14.33 Proxy-Authenticate + + The Proxy-Authenticate response-header field MUST be included as part + of a 407 (Proxy Authentication Required) response. The field value + consists of a challenge that indicates the authentication scheme and + parameters applicable to the proxy for this Request-URI. + + Proxy-Authenticate = "Proxy-Authenticate" ":" challenge + + The HTTP access authentication process is described in section 11. + Unlike WWW-Authenticate, the Proxy-Authenticate header field applies + only to the current connection and SHOULD NOT be passed on to + downstream clients. However, an intermediate proxy may need to obtain + its own credentials by requesting them from the downstream client, + which in some circumstances will appear as if the proxy is forwarding + the Proxy-Authenticate header field. + +14.34 Proxy-Authorization + + The Proxy-Authorization request-header field allows the client to + identify itself (or its user) to a proxy which requires + authentication. The Proxy-Authorization field value consists of + credentials containing the authentication information of the user + agent for the proxy and/or realm of the resource being requested. + + Proxy-Authorization = "Proxy-Authorization" ":" credentials + + The HTTP access authentication process is described in section 11. + Unlike Authorization, the Proxy-Authorization header field applies + only to the next outbound proxy that demanded authentication using + the Proxy-Authenticate field. When multiple proxies are used in a + chain, the Proxy-Authorization header field is consumed by the first + outbound proxy that was expecting to receive credentials. A proxy MAY + relay the credentials from the client request to the next proxy if + that is the mechanism by which the proxies cooperatively authenticate + a given request. + +14.35 Public + + The Public response-header field lists the set of methods supported + by the server. The purpose of this field is strictly to inform the + recipient of the capabilities of the server regarding unusual + methods. The methods listed may or may not be applicable to the + + + +Fielding, et. al. Standards Track [Page 127] + +RFC 2068 HTTP/1.1 January 1997 + + + Request-URI; the Allow header field (section 14.7) MAY be used to + indicate methods allowed for a particular URI. + + Public = "Public" ":" 1#method + + Example of use: + + Public: OPTIONS, MGET, MHEAD, GET, HEAD + + This header field applies only to the server directly connected to + the client (i.e., the nearest neighbor in a chain of connections). If + the response passes through a proxy, the proxy MUST either remove the + Public header field or replace it with one applicable to its own + capabilities. + +14.36 Range + +14.36.1 Byte Ranges + + Since all HTTP entities are represented in HTTP messages as sequences + of bytes, the concept of a byte range is meaningful for any HTTP + entity. (However, not all clients and servers need to support byte- + range operations.) + + Byte range specifications in HTTP apply to the sequence of bytes in + the entity-body (not necessarily the same as the message-body). + + A byte range operation may specify a single range of bytes, or a set + of ranges within a single entity. + + ranges-specifier = byte-ranges-specifier + + byte-ranges-specifier = bytes-unit "=" byte-range-set + + byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec ) + + byte-range-spec = first-byte-pos "-" [last-byte-pos] + + first-byte-pos = 1*DIGIT + + last-byte-pos = 1*DIGIT + + The first-byte-pos value in a byte-range-spec gives the byte-offset + of the first byte in a range. The last-byte-pos value gives the + byte-offset of the last byte in the range; that is, the byte + positions specified are inclusive. Byte offsets start at zero. + + + + + +Fielding, et. al. Standards Track [Page 128] + +RFC 2068 HTTP/1.1 January 1997 + + + If the last-byte-pos value is present, it must be greater than or + equal to the first-byte-pos in that byte-range-spec, or the byte- + range-spec is invalid. The recipient of an invalid byte-range-spec + must ignore it. + + If the last-byte-pos value is absent, or if the value is greater than + or equal to the current length of the entity-body, last-byte-pos is + taken to be equal to one less than the current length of the entity- + body in bytes. + + By its choice of last-byte-pos, a client can limit the number of + bytes retrieved without knowing the size of the entity. + + suffix-byte-range-spec = "-" suffix-length + + suffix-length = 1*DIGIT + + A suffix-byte-range-spec is used to specify the suffix of the + entity-body, of a length given by the suffix-length value. (That is, + this form specifies the last N bytes of an entity-body.) If the + entity is shorter than the specified suffix-length, the entire + entity-body is used. + + Examples of byte-ranges-specifier values (assuming an entity-body of + length 10000): + + o The first 500 bytes (byte offsets 0-499, inclusive): + + bytes=0-499 + + o The second 500 bytes (byte offsets 500-999, inclusive): + + bytes=500-999 + + o The final 500 bytes (byte offsets 9500-9999, inclusive): + + bytes=-500 + + o Or + + bytes=9500- + + o The first and last bytes only (bytes 0 and 9999): + + bytes=0-0,-1 + + + + + + +Fielding, et. al. Standards Track [Page 129] + +RFC 2068 HTTP/1.1 January 1997 + + + o Several legal but not canonical specifications of the second + 500 bytes (byte offsets 500-999, inclusive): + + bytes=500-600,601-999 + + bytes=500-700,601-999 + +14.36.2 Range Retrieval Requests + + HTTP retrieval requests using conditional or unconditional GET + methods may request one or more sub-ranges of the entity, instead of + the entire entity, using the Range request header, which applies to + the entity returned as the result of the request: + + Range = "Range" ":" ranges-specifier + + A server MAY ignore the Range header. However, HTTP/1.1 origin + servers and intermediate caches SHOULD support byte ranges when + possible, since Range supports efficient recovery from partially + failed transfers, and supports efficient partial retrieval of large + entities. + + If the server supports the Range header and the specified range or + ranges are appropriate for the entity: + + o The presence of a Range header in an unconditional GET modifies + what is returned if the GET is otherwise successful. In other + words, the response carries a status code of 206 (Partial + Content) instead of 200 (OK). + + o The presence of a Range header in a conditional GET (a request + using one or both of If-Modified-Since and If-None-Match, or + one or both of If-Unmodified-Since and If-Match) modifies what + is returned if the GET is otherwise successful and the condition + is true. It does not affect the 304 (Not Modified) response + returned if the conditional is false. + + In some cases, it may be more appropriate to use the If-Range header + (see section 14.27) in addition to the Range header. + + If a proxy that supports ranges receives a Range request, forwards + the request to an inbound server, and receives an entire entity in + reply, it SHOULD only return the requested range to its client. It + SHOULD store the entire received response in its cache, if that is + consistent with its cache allocation policies. + + + + + + +Fielding, et. al. Standards Track [Page 130] + +RFC 2068 HTTP/1.1 January 1997 + + +14.37 Referer + + The Referer[sic] request-header field allows the client to specify, + for the server's benefit, the address (URI) of the resource from + which the Request-URI was obtained (the "referrer", although the + header field is misspelled.) The Referer request-header allows a + server to generate lists of back-links to resources for interest, + logging, optimized caching, etc. It also allows obsolete or mistyped + links to be traced for maintenance. The Referer field MUST NOT be + sent if the Request-URI was obtained from a source that does not have + its own URI, such as input from the user keyboard. + + Referer = "Referer" ":" ( absoluteURI | relativeURI ) + + Example: + + Referer: http://www.w3.org/hypertext/DataSources/Overview.html + + If the field value is a partial URI, it SHOULD be interpreted + relative to the Request-URI. The URI MUST NOT include a fragment. + + Note: Because the source of a link may be private information or + may reveal an otherwise private information source, it is strongly + recommended that the user be able to select whether or not the + Referer field is sent. For example, a browser client could have a + toggle switch for browsing openly/anonymously, which would + respectively enable/disable the sending of Referer and From + information. + +14.38 Retry-After + + The Retry-After response-header field can be used with a 503 (Service + Unavailable) response to indicate how long the service is expected to + be unavailable to the requesting client. The value of this field can + be either an HTTP-date or an integer number of seconds (in decimal) + after the time of the response. + + Retry-After = "Retry-After" ":" ( HTTP-date | delta-seconds ) + + Two examples of its use are + + Retry-After: Fri, 31 Dec 1999 23:59:59 GMT + Retry-After: 120 + + In the latter example, the delay is 2 minutes. + + + + + + +Fielding, et. al. Standards Track [Page 131] + +RFC 2068 HTTP/1.1 January 1997 + + +14.39 Server + + The Server response-header field contains information about the + software used by the origin server to handle the request. The field + can contain multiple product tokens (section 3.8) and comments + identifying the server and any significant subproducts. The product + tokens are listed in order of their significance for identifying the + application. + + Server = "Server" ":" 1*( product | comment ) + + Example: + + Server: CERN/3.0 libwww/2.17 + + If the response is being forwarded through a proxy, the proxy + application MUST NOT modify the Server response-header. Instead, it + SHOULD include a Via field (as described in section 14.44). + + Note: Revealing the specific software version of the server may + allow the server machine to become more vulnerable to attacks + against software that is known to contain security holes. Server + implementers are encouraged to make this field a configurable + option. + +14.40 Transfer-Encoding + + The Transfer-Encoding general-header field indicates what (if any) + type of transformation has been applied to the message body in order + to safely transfer it between the sender and the recipient. This + differs from the Content-Encoding in that the transfer coding is a + property of the message, not of the entity. + + Transfer-Encoding = "Transfer-Encoding" ":" 1#transfer- + coding + + Transfer codings are defined in section 3.6. An example is: + + Transfer-Encoding: chunked + + Many older HTTP/1.0 applications do not understand the Transfer- + Encoding header. + +14.41 Upgrade + + The Upgrade general-header allows the client to specify what + additional communication protocols it supports and would like to use + if the server finds it appropriate to switch protocols. The server + + + +Fielding, et. al. Standards Track [Page 132] + +RFC 2068 HTTP/1.1 January 1997 + + + MUST use the Upgrade header field within a 101 (Switching Protocols) + response to indicate which protocol(s) are being switched. + + Upgrade = "Upgrade" ":" 1#product + + For example, + + Upgrade: HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11 + + The Upgrade header field is intended to provide a simple mechanism + for transition from HTTP/1.1 to some other, incompatible protocol. It + does so by allowing the client to advertise its desire to use another + protocol, such as a later version of HTTP with a higher major version + number, even though the current request has been made using HTTP/1.1. + This eases the difficult transition between incompatible protocols by + allowing the client to initiate a request in the more commonly + supported protocol while indicating to the server that it would like + to use a "better" protocol if available (where "better" is determined + by the server, possibly according to the nature of the method and/or + resource being requested). + + The Upgrade header field only applies to switching application-layer + protocols upon the existing transport-layer connection. Upgrade + cannot be used to insist on a protocol change; its acceptance and use + by the server is optional. The capabilities and nature of the + application-layer communication after the protocol change is entirely + dependent upon the new protocol chosen, although the first action + after changing the protocol MUST be a response to the initial HTTP + request containing the Upgrade header field. + + The Upgrade header field only applies to the immediate connection. + Therefore, the upgrade keyword MUST be supplied within a Connection + header field (section 14.10) whenever Upgrade is present in an + HTTP/1.1 message. + + The Upgrade header field cannot be used to indicate a switch to a + protocol on a different connection. For that purpose, it is more + appropriate to use a 301, 302, 303, or 305 redirection response. + + This specification only defines the protocol name "HTTP" for use by + the family of Hypertext Transfer Protocols, as defined by the HTTP + version rules of section 3.1 and future updates to this + specification. Any token can be used as a protocol name; however, it + will only be useful if both the client and server associate the name + with the same protocol. + + + + + + +Fielding, et. al. Standards Track [Page 133] + +RFC 2068 HTTP/1.1 January 1997 + + +14.42 User-Agent + + The User-Agent request-header field contains information about the + user agent originating the request. This is for statistical purposes, + the tracing of protocol violations, and automated recognition of user + agents for the sake of tailoring responses to avoid particular user + agent limitations. User agents SHOULD include this field with + requests. The field can contain multiple product tokens (section 3.8) + and comments identifying the agent and any subproducts which form a + significant part of the user agent. By convention, the product tokens + are listed in order of their significance for identifying the + application. + + User-Agent = "User-Agent" ":" 1*( product | comment ) + + Example: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + +14.43 Vary + + The Vary response-header field is used by a server to signal that the + response entity was selected from the available representations of + the response using server-driven negotiation (section 12). Field- + names listed in Vary headers are those of request-headers. The Vary + field value indicates either that the given set of header fields + encompass the dimensions over which the representation might vary, or + that the dimensions of variance are unspecified ("*") and thus may + vary over any aspect of future requests. + + Vary = "Vary" ":" ( "*" | 1#field-name ) + + An HTTP/1.1 server MUST include an appropriate Vary header field with + any cachable response that is subject to server-driven negotiation. + Doing so allows a cache to properly interpret future requests on that + resource and informs the user agent about the presence of negotiation + on that resource. A server SHOULD include an appropriate Vary header + field with a non-cachable response that is subject to server-driven + negotiation, since this might provide the user agent with useful + information about the dimensions over which the response might vary. + + The set of header fields named by the Vary field value is known as + the "selecting" request-headers. + + When the cache receives a subsequent request whose Request-URI + specifies one or more cache entries including a Vary header, the + cache MUST NOT use such a cache entry to construct a response to the + new request unless all of the headers named in the cached Vary header + + + +Fielding, et. al. Standards Track [Page 134] + +RFC 2068 HTTP/1.1 January 1997 + + + are present in the new request, and all of the stored selecting + request-headers from the previous request match the corresponding + headers in the new request. + + The selecting request-headers from two requests are defined to match + if and only if the selecting request-headers in the first request can + be transformed to the selecting request-headers in the second request + by adding or removing linear whitespace (LWS) at places where this is + allowed by the corresponding BNF, and/or combining multiple message- + header fields with the same field name following the rules about + message headers in section 4.2. + + A Vary field value of "*" signals that unspecified parameters, + possibly other than the contents of request-header fields (e.g., the + network address of the client), play a role in the selection of the + response representation. Subsequent requests on that resource can + only be properly interpreted by the origin server, and thus a cache + MUST forward a (possibly conditional) request even when it has a + fresh response cached for the resource. See section 13.6 for use of + the Vary header by caches. + + A Vary field value consisting of a list of field-names signals that + the representation selected for the response is based on a selection + algorithm which considers ONLY the listed request-header field values + in selecting the most appropriate representation. A cache MAY assume + that the same selection will be made for future requests with the + same values for the listed field names, for the duration of time in + which the response is fresh. + + The field-names given are not limited to the set of standard + request-header fields defined by this specification. Field names are + case-insensitive. + +14.44 Via + + The Via general-header field MUST be used by gateways and proxies to + indicate the intermediate protocols and recipients between the user + agent and the server on requests, and between the origin server and + the client on responses. It is analogous to the "Received" field of + RFC 822 and is intended to be used for tracking message forwards, + avoiding request loops, and identifying the protocol capabilities of + all senders along the request/response chain. + + + + + + + + + +Fielding, et. al. Standards Track [Page 135] + +RFC 2068 HTTP/1.1 January 1997 + + + Via = "Via" ":" 1#( received-protocol received-by [ comment ] ) + + received-protocol = [ protocol-name "/" ] protocol-version + protocol-name = token + protocol-version = token + received-by = ( host [ ":" port ] ) | pseudonym + pseudonym = token + + The received-protocol indicates the protocol version of the message + received by the server or client along each segment of the + request/response chain. The received-protocol version is appended to + the Via field value when the message is forwarded so that information + about the protocol capabilities of upstream applications remains + visible to all recipients. + + The protocol-name is optional if and only if it would be "HTTP". The + received-by field is normally the host and optional port number of a + recipient server or client that subsequently forwarded the message. + However, if the real host is considered to be sensitive information, + it MAY be replaced by a pseudonym. If the port is not given, it MAY + be assumed to be the default port of the received-protocol. + + Multiple Via field values represent each proxy or gateway that has + forwarded the message. Each recipient MUST append its information + such that the end result is ordered according to the sequence of + forwarding applications. + + Comments MAY be used in the Via header field to identify the software + of the recipient proxy or gateway, analogous to the User-Agent and + Server header fields. However, all comments in the Via field are + optional and MAY be removed by any recipient prior to forwarding the + message. + + For example, a request message could be sent from an HTTP/1.0 user + agent to an internal proxy code-named "fred", which uses HTTP/1.1 to + forward the request to a public proxy at nowhere.com, which completes + the request by forwarding it to the origin server at www.ics.uci.edu. + The request received by www.ics.uci.edu would then have the following + Via header field: + + Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) + + Proxies and gateways used as a portal through a network firewall + SHOULD NOT, by default, forward the names and ports of hosts within + the firewall region. This information SHOULD only be propagated if + explicitly enabled. If not enabled, the received-by host of any host + behind the firewall SHOULD be replaced by an appropriate pseudonym + for that host. + + + +Fielding, et. al. Standards Track [Page 136] + +RFC 2068 HTTP/1.1 January 1997 + + + For organizations that have strong privacy requirements for hiding + internal structures, a proxy MAY combine an ordered subsequence of + Via header field entries with identical received-protocol values into + a single such entry. For example, + + Via: 1.0 ricky, 1.1 ethel, 1.1 fred, 1.0 lucy + + could be collapsed to + + Via: 1.0 ricky, 1.1 mertz, 1.0 lucy + + Applications SHOULD NOT combine multiple entries unless they are all + under the same organizational control and the hosts have already been + replaced by pseudonyms. Applications MUST NOT combine entries which + have different received-protocol values. + +14.45 Warning + + The Warning response-header field is used to carry additional + information about the status of a response which may not be reflected + by the response status code. This information is typically, though + not exclusively, used to warn about a possible lack of semantic + transparency from caching operations. + + Warning headers are sent with responses using: + + Warning = "Warning" ":" 1#warning-value + + warning-value = warn-code SP warn-agent SP warn-text + warn-code = 2DIGIT + warn-agent = ( host [ ":" port ] ) | pseudonym + ; the name or pseudonym of the server adding + ; the Warning header, for use in debugging + warn-text = quoted-string + + A response may carry more than one Warning header. + + The warn-text should be in a natural language and character set that + is most likely to be intelligible to the human user receiving the + response. This decision may be based on any available knowledge, + such as the location of the cache or user, the Accept-Language field + in a request, the Content-Language field in a response, etc. The + default language is English and the default character set is ISO- + 8859-1. + + If a character set other than ISO-8859-1 is used, it MUST be encoded + in the warn-text using the method described in RFC 1522 [14]. + + + + +Fielding, et. al. Standards Track [Page 137] + +RFC 2068 HTTP/1.1 January 1997 + + + Any server or cache may add Warning headers to a response. New + Warning headers should be added after any existing Warning headers. A + cache MUST NOT delete any Warning header that it received with a + response. However, if a cache successfully validates a cache entry, + it SHOULD remove any Warning headers previously attached to that + entry except as specified for specific Warning codes. It MUST then + add any Warning headers received in the validating response. In other + words, Warning headers are those that would be attached to the most + recent relevant response. + + When multiple Warning headers are attached to a response, the user + agent SHOULD display as many of them as possible, in the order that + they appear in the response. If it is not possible to display all of + the warnings, the user agent should follow these heuristics: + + o Warnings that appear early in the response take priority over those + appearing later in the response. + o Warnings in the user's preferred character set take priority over + warnings in other character sets but with identical warn-codes and + warn-agents. + + Systems that generate multiple Warning headers should order them with + this user agent behavior in mind. + + This is a list of the currently-defined warn-codes, each with a + recommended warn-text in English, and a description of its meaning. + +10 Response is stale + MUST be included whenever the returned response is stale. A cache may + add this warning to any response, but may never remove it until the + response is known to be fresh. + +11 Revalidation failed + MUST be included if a cache returns a stale response because an + attempt to revalidate the response failed, due to an inability to + reach the server. A cache may add this warning to any response, but + may never remove it until the response is successfully revalidated. + +12 Disconnected operation + SHOULD be included if the cache is intentionally disconnected from + the rest of the network for a period of time. + +13 Heuristic expiration + MUST be included if the cache heuristically chose a freshness + lifetime greater than 24 hours and the response's age is greater than + 24 hours. + + + + + +Fielding, et. al. Standards Track [Page 138] + +RFC 2068 HTTP/1.1 January 1997 + + +14 Transformation applied + MUST be added by an intermediate cache or proxy if it applies any + transformation changing the content-coding (as specified in the + Content-Encoding header) or media-type (as specified in the + Content-Type header) of the response, unless this Warning code + already appears in the response. MUST NOT be deleted from a response + even after revalidation. + +99 Miscellaneous warning + The warning text may include arbitrary information to be presented to + a human user, or logged. A system receiving this warning MUST NOT + take any automated action. + +14.46 WWW-Authenticate + + The WWW-Authenticate response-header field MUST be included in 401 + (Unauthorized) response messages. The field value consists of at + least one challenge that indicates the authentication scheme(s) and + parameters applicable to the Request-URI. + + WWW-Authenticate = "WWW-Authenticate" ":" 1#challenge + + The HTTP access authentication process is described in section 11. + User agents MUST take special care in parsing the WWW-Authenticate + field value if it contains more than one challenge, or if more than + one WWW-Authenticate header field is provided, since the contents of + a challenge may itself contain a comma-separated list of + authentication parameters. + +15 Security Considerations + + This section is meant to inform application developers, information + providers, and users of the security limitations in HTTP/1.1 as + described by this document. The discussion does not include + definitive solutions to the problems revealed, though it does make + some suggestions for reducing security risks. + +15.1 Authentication of Clients + + The Basic authentication scheme is not a secure method of user + authentication, nor does it in any way protect the entity, which is + transmitted in clear text across the physical network used as the + carrier. HTTP does not prevent additional authentication schemes and + encryption mechanisms from being employed to increase security or the + addition of enhancements (such as schemes to use one-time passwords) + to Basic authentication. + + + + + +Fielding, et. al. Standards Track [Page 139] + +RFC 2068 HTTP/1.1 January 1997 + + + The most serious flaw in Basic authentication is that it results in + the essentially clear text transmission of the user's password over + the physical network. It is this problem which Digest Authentication + attempts to address. + + Because Basic authentication involves the clear text transmission of + passwords it SHOULD never be used (without enhancements) to protect + sensitive or valuable information. + + A common use of Basic authentication is for identification purposes + -- requiring the user to provide a user name and password as a means + of identification, for example, for purposes of gathering accurate + usage statistics on a server. When used in this way it is tempting to + think that there is no danger in its use if illicit access to the + protected documents is not a major concern. This is only correct if + the server issues both user name and password to the users and in + particular does not allow the user to choose his or her own password. + The danger arises because naive users frequently reuse a single + password to avoid the task of maintaining multiple passwords. + + If a server permits users to select their own passwords, then the + threat is not only illicit access to documents on the server but also + illicit access to the accounts of all users who have chosen to use + their account password. If users are allowed to choose their own + password that also means the server must maintain files containing + the (presumably encrypted) passwords. Many of these may be the + account passwords of users perhaps at distant sites. The owner or + administrator of such a system could conceivably incur liability if + this information is not maintained in a secure fashion. + + Basic Authentication is also vulnerable to spoofing by counterfeit + servers. If a user can be led to believe that he is connecting to a + host containing information protected by basic authentication when in + fact he is connecting to a hostile server or gateway then the + attacker can request a password, store it for later use, and feign an + error. This type of attack is not possible with Digest Authentication + [32]. Server implementers SHOULD guard against the possibility of + this sort of counterfeiting by gateways or CGI scripts. In particular + it is very dangerous for a server to simply turn over a connection to + a gateway since that gateway can then use the persistent connection + mechanism to engage in multiple transactions with the client while + impersonating the original server in a way that is not detectable by + the client. + +15.2 Offering a Choice of Authentication Schemes + + An HTTP/1.1 server may return multiple challenges with a 401 + (Authenticate) response, and each challenge may use a different + + + +Fielding, et. al. Standards Track [Page 140] + +RFC 2068 HTTP/1.1 January 1997 + + + scheme. The order of the challenges returned to the user agent is in + the order that the server would prefer they be chosen. The server + should order its challenges with the "most secure" authentication + scheme first. A user agent should choose as the challenge to be made + to the user the first one that the user agent understands. + + When the server offers choices of authentication schemes using the + WWW-Authenticate header, the "security" of the authentication is only + as malicious user could capture the set of challenges and try to + authenticate him/herself using the weakest of the authentication + schemes. Thus, the ordering serves more to protect the user's + credentials than the server's information. + + A possible man-in-the-middle (MITM) attack would be to add a weak + authentication scheme to the set of choices, hoping that the client + will use one that exposes the user's credentials (e.g. password). For + this reason, the client should always use the strongest scheme that + it understands from the choices accepted. + + An even better MITM attack would be to remove all offered choices, + and to insert a challenge that requests Basic authentication. For + this reason, user agents that are concerned about this kind of attack + could remember the strongest authentication scheme ever requested by + a server and produce a warning message that requires user + confirmation before using a weaker one. A particularly insidious way + to mount such a MITM attack would be to offer a "free" proxy caching + service to gullible users. + +15.3 Abuse of Server Log Information + + A server is in the position to save personal data about a user's + requests which may identify their reading patterns or subjects of + interest. This information is clearly confidential in nature and its + handling may be constrained by law in certain countries. People using + the HTTP protocol to provide data are responsible for ensuring that + such material is not distributed without the permission of any + individuals that are identifiable by the published results. + +15.4 Transfer of Sensitive Information + + Like any generic data transfer protocol, HTTP cannot regulate the + content of the data that is transferred, nor is there any a priori + method of determining the sensitivity of any particular piece of + information within the context of any given request. Therefore, + applications SHOULD supply as much control over this information as + possible to the provider of that information. Four header fields are + worth special mention in this context: Server, Via, Referer and From. + + + + +Fielding, et. al. Standards Track [Page 141] + +RFC 2068 HTTP/1.1 January 1997 + + + Revealing the specific software version of the server may allow the + server machine to become more vulnerable to attacks against software + that is known to contain security holes. Implementers SHOULD make the + Server header field a configurable option. + + Proxies which serve as a portal through a network firewall SHOULD + take special precautions regarding the transfer of header information + that identifies the hosts behind the firewall. In particular, they + SHOULD remove, or replace with sanitized versions, any Via fields + generated behind the firewall. + + The Referer field allows reading patterns to be studied and reverse + links drawn. Although it can be very useful, its power can be abused + if user details are not separated from the information contained in + the Referer. Even when the personal information has been removed, the + Referer field may indicate a private document's URI whose publication + would be inappropriate. + + The information sent in the From field might conflict with the user's + privacy interests or their site's security policy, and hence it + SHOULD NOT be transmitted without the user being able to disable, + enable, and modify the contents of the field. The user MUST be able + to set the contents of this field within a user preference or + application defaults configuration. + + We suggest, though do not require, that a convenient toggle interface + be provided for the user to enable or disable the sending of From and + Referer information. + +15.5 Attacks Based On File and Path Names + + Implementations of HTTP origin servers SHOULD be careful to restrict + the documents returned by HTTP requests to be only those that were + intended by the server administrators. If an HTTP server translates + HTTP URIs directly into file system calls, the server MUST take + special care not to serve files that were not intended to be + delivered to HTTP clients. For example, UNIX, Microsoft Windows, and + other operating systems use ".." as a path component to indicate a + directory level above the current one. On such a system, an HTTP + server MUST disallow any such construct in the Request-URI if it + would otherwise allow access to a resource outside those intended to + be accessible via the HTTP server. Similarly, files intended for + reference only internally to the server (such as access control + files, configuration files, and script code) MUST be protected from + inappropriate retrieval, since they might contain sensitive + information. Experience has shown that minor bugs in such HTTP server + implementations have turned into security risks. + + + + +Fielding, et. al. Standards Track [Page 142] + +RFC 2068 HTTP/1.1 January 1997 + + +15.6 Personal Information + + HTTP clients are often privy to large amounts of personal information + (e.g. the user's name, location, mail address, passwords, encryption + keys, etc.), and SHOULD be very careful to prevent unintentional + leakage of this information via the HTTP protocol to other sources. + We very strongly recommend that a convenient interface be provided + for the user to control dissemination of such information, and that + designers and implementers be particularly careful in this area. + History shows that errors in this area are often both serious + security and/or privacy problems, and often generate highly adverse + publicity for the implementer's company. + +15.7 Privacy Issues Connected to Accept Headers + + Accept request-headers can reveal information about the user to all + servers which are accessed. The Accept-Language header in particular + can reveal information the user would consider to be of a private + nature, because the understanding of particular languages is often + strongly correlated to the membership of a particular ethnic group. + User agents which offer the option to configure the contents of an + Accept-Language header to be sent in every request are strongly + encouraged to let the configuration process include a message which + makes the user aware of the loss of privacy involved. + + An approach that limits the loss of privacy would be for a user agent + to omit the sending of Accept-Language headers by default, and to ask + the user whether it should start sending Accept-Language headers to a + server if it detects, by looking for any Vary response-header fields + generated by the server, that such sending could improve the quality + of service. + + Elaborate user-customized accept header fields sent in every request, + in particular if these include quality values, can be used by servers + as relatively reliable and long-lived user identifiers. Such user + identifiers would allow content providers to do click-trail tracking, + and would allow collaborating content providers to match cross-server + click-trails or form submissions of individual users. Note that for + many users not behind a proxy, the network address of the host + running the user agent will also serve as a long-lived user + identifier. In environments where proxies are used to enhance + privacy, user agents should be conservative in offering accept header + configuration options to end users. As an extreme privacy measure, + proxies could filter the accept headers in relayed requests. General + purpose user agents which provide a high degree of header + configurability should warn users about the loss of privacy which can + be involved. + + + + +Fielding, et. al. Standards Track [Page 143] + +RFC 2068 HTTP/1.1 January 1997 + + +15.8 DNS Spoofing + + Clients using HTTP rely heavily on the Domain Name Service, and are + thus generally prone to security attacks based on the deliberate + mis-association of IP addresses and DNS names. Clients need to be + cautious in assuming the continuing validity of an IP number/DNS name + association. + + In particular, HTTP clients SHOULD rely on their name resolver for + confirmation of an IP number/DNS name association, rather than + caching the result of previous host name lookups. Many platforms + already can cache host name lookups locally when appropriate, and + they SHOULD be configured to do so. These lookups should be cached, + however, only when the TTL (Time To Live) information reported by the + name server makes it likely that the cached information will remain + useful. + + If HTTP clients cache the results of host name lookups in order to + achieve a performance improvement, they MUST observe the TTL + information reported by DNS. + + If HTTP clients do not observe this rule, they could be spoofed when + a previously-accessed server's IP address changes. As network + renumbering is expected to become increasingly common, the + possibility of this form of attack will grow. Observing this + requirement thus reduces this potential security vulnerability. + + This requirement also improves the load-balancing behavior of clients + for replicated servers using the same DNS name and reduces the + likelihood of a user's experiencing failure in accessing sites which + use that strategy. + +15.9 Location Headers and Spoofing + + If a single server supports multiple organizations that do not trust + one another, then it must check the values of Location and Content- + Location headers in responses that are generated under control of + said organizations to make sure that they do not attempt to + invalidate resources over which they have no authority. + +16 Acknowledgments + + This specification makes heavy use of the augmented BNF and generic + constructs defined by David H. Crocker for RFC 822. Similarly, it + reuses many of the definitions provided by Nathaniel Borenstein and + Ned Freed for MIME. We hope that their inclusion in this + specification will help reduce past confusion over the relationship + between HTTP and Internet mail message formats. + + + +Fielding, et. al. Standards Track [Page 144] + +RFC 2068 HTTP/1.1 January 1997 + + + The HTTP protocol has evolved considerably over the past four years. + It has benefited from a large and active developer community--the + many people who have participated on the www-talk mailing list--and + it is that community which has been most responsible for the success + of HTTP and of the World-Wide Web in general. Marc Andreessen, Robert + Cailliau, Daniel W. Connolly, Bob Denny, John Franks, Jean-Francois + Groff, Phillip M. Hallam-Baker, Hakon W. Lie, Ari Luotonen, Rob + McCool, Lou Montulli, Dave Raggett, Tony Sanders, and Marc + VanHeyningen deserve special recognition for their efforts in + defining early aspects of the protocol. + + This document has benefited greatly from the comments of all those + participating in the HTTP-WG. In addition to those already mentioned, + the following individuals have contributed to this specification: + + Gary Adams Albert Lunde + Harald Tveit Alvestrand John C. Mallery + Keith Ball Jean-Philippe Martin-Flatin + Brian Behlendorf Larry Masinter + Paul Burchard Mitra + Maurizio Codogno David Morris + Mike Cowlishaw Gavin Nicol + Roman Czyborra Bill Perry + Michael A. Dolan Jeffrey Perry + David J. Fiander Scott Powers + Alan Freier Owen Rees + Marc Hedlund Luigi Rizzo + Greg Herlihy David Robinson + Koen Holtman Marc Salomon + Alex Hopmann Rich Salz + Bob Jernigan Allan M. Schiffman + Shel Kaphan Jim Seidman + Rohit Khare Chuck Shotton + John Klensin Eric W. Sink + Martijn Koster Simon E. Spero + Alexei Kosut Richard N. Taylor + David M. Kristol Robert S. Thau + Daniel LaLiberte Bill (BearHeart) Weinman + Ben Laurie Francois Yergeau + Paul J. Leach Mary Ellen Zurko + Daniel DuBois + + Much of the content and presentation of the caching design is due to + suggestions and comments from individuals including: Shel Kaphan, + Paul Leach, Koen Holtman, David Morris, and Larry Masinter. + + + + + + +Fielding, et. al. Standards Track [Page 145] + +RFC 2068 HTTP/1.1 January 1997 + + + Most of the specification of ranges is based on work originally done + by Ari Luotonen and John Franks, with additional input from Steve + Zilles. + + Thanks to the "cave men" of Palo Alto. You know who you are. + + Jim Gettys (the current editor of this document) wishes particularly + to thank Roy Fielding, the previous editor of this document, along + with John Klensin, Jeff Mogul, Paul Leach, Dave Kristol, Koen + Holtman, John Franks, Alex Hopmann, and Larry Masinter for their + help. + +17 References + + [1] Alvestrand, H., "Tags for the identification of languages", RFC + 1766, UNINETT, March 1995. + + [2] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D., Torrey, + D., and B. Alberti. "The Internet Gopher Protocol: (a distributed + document search and retrieval protocol)", RFC 1436, University of + Minnesota, March 1993. + + [3] Berners-Lee, T., "Universal Resource Identifiers in WWW", A + Unifying Syntax for the Expression of Names and Addresses of Objects + on the Network as used in the World-Wide Web", RFC 1630, CERN, June + 1994. + + [4] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform Resource + Locators (URL)", RFC 1738, CERN, Xerox PARC, University of Minnesota, + December 1994. + + [5] Berners-Lee, T., and D. Connolly, "HyperText Markup Language + Specification - 2.0", RFC 1866, MIT/LCS, November 1995. + + [6] Berners-Lee, T., Fielding, R., and H. Frystyk, "Hypertext + Transfer Protocol -- HTTP/1.0.", RFC 1945 MIT/LCS, UC Irvine, May + 1996. + + [7] Freed, N., and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part One: Format of Internet Message Bodies", RFC + 2045, Innosoft, First Virtual, November 1996. + + [8] Braden, R., "Requirements for Internet hosts - application and + support", STD 3, RFC 1123, IETF, October 1989. + + [9] Crocker, D., "Standard for the Format of ARPA Internet Text + Messages", STD 11, RFC 822, UDEL, August 1982. + + + + +Fielding, et. al. Standards Track [Page 146] + +RFC 2068 HTTP/1.1 January 1997 + + + [10] Davis, F., Kahle, B., Morris, H., Salem, J., Shen, T., Wang, R., + Sui, J., and M. Grinbaum. "WAIS Interface Protocol Prototype + Functional Specification", (v1.5), Thinking Machines Corporation, + April 1990. + + [11] Fielding, R., "Relative Uniform Resource Locators", RFC 1808, UC + Irvine, June 1995. + + [12] Horton, M., and R. Adams. "Standard for interchange of USENET + messages", RFC 1036, AT&T Bell Laboratories, Center for Seismic + Studies, December 1987. + + [13] Kantor, B., and P. Lapsley. "Network News Transfer Protocol." A + Proposed Standard for the Stream-Based Transmission of News", RFC + 977, UC San Diego, UC Berkeley, February 1986. + + [14] Moore, K., "MIME (Multipurpose Internet Mail Extensions) Part + Three: Message Header Extensions for Non-ASCII Text", RFC 2047, + University of Tennessee, November 1996. + + [15] Nebel, E., and L. Masinter. "Form-based File Upload in HTML", + RFC 1867, Xerox Corporation, November 1995. + + [16] Postel, J., "Simple Mail Transfer Protocol", STD 10, RFC 821, + USC/ISI, August 1982. + + [17] Postel, J., "Media Type Registration Procedure", RFC 2048, + USC/ISI, November 1996. + + [18] Postel, J., and J. Reynolds, "File Transfer Protocol (FTP)", STD + 9, RFC 959, USC/ISI, October 1985. + + [19] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC + 1700, USC/ISI, October 1994. + + [20] Sollins, K., and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, MIT/LCS, Xerox Corporation, + December 1994. + + [21] US-ASCII. Coded Character Set - 7-Bit American Standard Code for + Information Interchange. Standard ANSI X3.4-1986, ANSI, 1986. + + [22] ISO-8859. International Standard -- Information Processing -- + 8-bit Single-Byte Coded Graphic Character Sets -- + Part 1: Latin alphabet No. 1, ISO 8859-1:1987. + Part 2: Latin alphabet No. 2, ISO 8859-2, 1987. + Part 3: Latin alphabet No. 3, ISO 8859-3, 1988. + Part 4: Latin alphabet No. 4, ISO 8859-4, 1988. + + + +Fielding, et. al. Standards Track [Page 147] + +RFC 2068 HTTP/1.1 January 1997 + + + Part 5: Latin/Cyrillic alphabet, ISO 8859-5, 1988. + Part 6: Latin/Arabic alphabet, ISO 8859-6, 1987. + Part 7: Latin/Greek alphabet, ISO 8859-7, 1987. + Part 8: Latin/Hebrew alphabet, ISO 8859-8, 1988. + Part 9: Latin alphabet No. 5, ISO 8859-9, 1990. + + [23] Meyers, J., and M. Rose "The Content-MD5 Header Field", RFC + 1864, Carnegie Mellon, Dover Beach Consulting, October, 1995. + + [24] Carpenter, B., and Y. Rekhter, "Renumbering Needs Work", RFC + 1900, IAB, February 1996. + + [25] Deutsch, P., "GZIP file format specification version 4.3." RFC + 1952, Aladdin Enterprises, May 1996. + + [26] Venkata N. Padmanabhan and Jeffrey C. Mogul. Improving HTTP + Latency. Computer Networks and ISDN Systems, v. 28, pp. 25-35, Dec. + 1995. Slightly revised version of paper in Proc. 2nd International + WWW Conf. '94: Mosaic and the Web, Oct. 1994, which is available at + http://www.ncsa.uiuc.edu/SDG/IT94/Proceedings/DDay/mogul/ + HTTPLatency.html. + + [27] Joe Touch, John Heidemann, and Katia Obraczka, "Analysis of HTTP + Performance", , + USC/Information Sciences Institute, June 1996 + + [28] Mills, D., "Network Time Protocol, Version 3, Specification, + Implementation and Analysis", RFC 1305, University of Delaware, March + 1992. + + [29] Deutsch, P., "DEFLATE Compressed Data Format Specification + version 1.3." RFC 1951, Aladdin Enterprises, May 1996. + + [30] Spero, S., "Analysis of HTTP Performance Problems" + . + + [31] Deutsch, P., and J-L. Gailly, "ZLIB Compressed Data Format + Specification version 3.3", RFC 1950, Aladdin Enterprises, Info-ZIP, + May 1996. + + [32] Franks, J., Hallam-Baker, P., Hostetler, J., Leach, P., + Luotonen, A., Sink, E., and L. Stewart, "An Extension to HTTP : + Digest Access Authentication", RFC 2069, January 1997. + + + + + + + + +Fielding, et. al. Standards Track [Page 148] + +RFC 2068 HTTP/1.1 January 1997 + + +18 Authors' Addresses + + Roy T. Fielding + Department of Information and Computer Science + University of California + Irvine, CA 92717-3425, USA + + Fax: +1 (714) 824-4056 + EMail: fielding@ics.uci.edu + + + Jim Gettys + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: jg@w3.org + + + Jeffrey C. Mogul + Western Research Laboratory + Digital Equipment Corporation + 250 University Avenue + Palo Alto, California, 94305, USA + + EMail: mogul@wrl.dec.com + + + Henrik Frystyk Nielsen + W3 Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: frystyk@w3.org + + + Tim Berners-Lee + Director, W3 Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: timbl@w3.org + + + + +Fielding, et. al. Standards Track [Page 149] + +RFC 2068 HTTP/1.1 January 1997 + + +19 Appendices + +19.1 Internet Media Type message/http + + In addition to defining the HTTP/1.1 protocol, this document serves + as the specification for the Internet media type "message/http". The + following is to be registered with IANA. + + Media Type name: message + Media subtype name: http + Required parameters: none + Optional parameters: version, msgtype + + version: The HTTP-Version number of the enclosed message + (e.g., "1.1"). If not present, the version can be + determined from the first line of the body. + + msgtype: The message type -- "request" or "response". If not + present, the type can be determined from the first + line of the body. + + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + + Security considerations: none + +19.2 Internet Media Type multipart/byteranges + + When an HTTP message includes the content of multiple ranges (for + example, a response to a request for multiple non-overlapping + ranges), these are transmitted as a multipart MIME message. The + multipart media type for this purpose is called + "multipart/byteranges". + + The multipart/byteranges media type includes two or more parts, each + with its own Content-Type and Content-Range fields. The parts are + separated using a MIME boundary parameter. + + Media Type name: multipart + Media subtype name: byteranges + Required parameters: boundary + Optional parameters: none + + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + + Security considerations: none + + + + +Fielding, et. al. Standards Track [Page 150] + +RFC 2068 HTTP/1.1 January 1997 + + +For example: + + HTTP/1.1 206 Partial content + Date: Wed, 15 Nov 1995 06:25:24 GMT + Last-modified: Wed, 15 Nov 1995 04:58:08 GMT + Content-type: multipart/byteranges; boundary=THIS_STRING_SEPARATES + + --THIS_STRING_SEPARATES + Content-type: application/pdf + Content-range: bytes 500-999/8000 + + ...the first range... + --THIS_STRING_SEPARATES + Content-type: application/pdf + Content-range: bytes 7000-7999/8000 + + ...the second range + --THIS_STRING_SEPARATES-- + +19.3 Tolerant Applications + + Although this document specifies the requirements for the generation + of HTTP/1.1 messages, not all applications will be correct in their + implementation. We therefore recommend that operational applications + be tolerant of deviations whenever those deviations can be + interpreted unambiguously. + + Clients SHOULD be tolerant in parsing the Status-Line and servers + tolerant when parsing the Request-Line. In particular, they SHOULD + accept any amount of SP or HT characters between fields, even though + only a single SP is required. + + The line terminator for message-header fields is the sequence CRLF. + However, we recommend that applications, when parsing such headers, + recognize a single LF as a line terminator and ignore the leading CR. + + The character set of an entity-body should be labeled as the lowest + common denominator of the character codes used within that body, with + the exception that no label is preferred over the labels US-ASCII or + ISO-8859-1. + + Additional rules for requirements on parsing and encoding of dates + and other potential problems with date encodings include: + + o HTTP/1.1 clients and caches should assume that an RFC-850 date + which appears to be more than 50 years in the future is in fact + in the past (this helps solve the "year 2000" problem). + + + + +Fielding, et. al. Standards Track [Page 151] + +RFC 2068 HTTP/1.1 January 1997 + + + o An HTTP/1.1 implementation may internally represent a parsed + Expires date as earlier than the proper value, but MUST NOT + internally represent a parsed Expires date as later than the + proper value. + + o All expiration-related calculations must be done in GMT. The + local time zone MUST NOT influence the calculation or comparison + of an age or expiration time. + + o If an HTTP header incorrectly carries a date value with a time + zone other than GMT, it must be converted into GMT using the most + conservative possible conversion. + +19.4 Differences Between HTTP Entities and MIME Entities + + HTTP/1.1 uses many of the constructs defined for Internet Mail (RFC + 822) and the Multipurpose Internet Mail Extensions (MIME ) to allow + entities to be transmitted in an open variety of representations and + with extensible mechanisms. However, MIME [7] discusses mail, and + HTTP has a few features that are different from those described in + MIME. These differences were carefully chosen to optimize + performance over binary connections, to allow greater freedom in the + use of new media types, to make date comparisons easier, and to + acknowledge the practice of some early HTTP servers and clients. + + This appendix describes specific areas where HTTP differs from MIME. + Proxies and gateways to strict MIME environments SHOULD be aware of + these differences and provide the appropriate conversions where + necessary. Proxies and gateways from MIME environments to HTTP also + need to be aware of the differences because some conversions may be + required. + +19.4.1 Conversion to Canonical Form + + MIME requires that an Internet mail entity be converted to canonical + form prior to being transferred. Section 3.7.1 of this document + describes the forms allowed for subtypes of the "text" media type + when transmitted over HTTP. MIME requires that content with a type of + "text" represent line breaks as CRLF and forbids the use of CR or LF + outside of line break sequences. HTTP allows CRLF, bare CR, and bare + LF to indicate a line break within text content when a message is + transmitted over HTTP. + + Where it is possible, a proxy or gateway from HTTP to a strict MIME + environment SHOULD translate all line breaks within the text media + types described in section 3.7.1 of this document to the MIME + canonical form of CRLF. Note, however, that this may be complicated + by the presence of a Content-Encoding and by the fact that HTTP + + + +Fielding, et. al. Standards Track [Page 152] + +RFC 2068 HTTP/1.1 January 1997 + + + allows the use of some character sets which do not use octets 13 and + 10 to represent CR and LF, as is the case for some multi-byte + character sets. + +19.4.2 Conversion of Date Formats + + HTTP/1.1 uses a restricted set of date formats (section 3.3.1) to + simplify the process of date comparison. Proxies and gateways from + other protocols SHOULD ensure that any Date header field present in a + message conforms to one of the HTTP/1.1 formats and rewrite the date + if necessary. + +19.4.3 Introduction of Content-Encoding + + MIME does not include any concept equivalent to HTTP/1.1's Content- + Encoding header field. Since this acts as a modifier on the media + type, proxies and gateways from HTTP to MIME-compliant protocols MUST + either change the value of the Content-Type header field or decode + the entity-body before forwarding the message. (Some experimental + applications of Content-Type for Internet mail have used a media-type + parameter of ";conversions=" to perform an equivalent + function as Content-Encoding. However, this parameter is not part of + MIME.) + +19.4.4 No Content-Transfer-Encoding + + HTTP does not use the Content-Transfer-Encoding (CTE) field of MIME. + Proxies and gateways from MIME-compliant protocols to HTTP MUST + remove any non-identity CTE ("quoted-printable" or "base64") encoding + prior to delivering the response message to an HTTP client. + + Proxies and gateways from HTTP to MIME-compliant protocols are + responsible for ensuring that the message is in the correct format + and encoding for safe transport on that protocol, where "safe + transport" is defined by the limitations of the protocol being used. + Such a proxy or gateway SHOULD label the data with an appropriate + Content-Transfer-Encoding if doing so will improve the likelihood of + safe transport over the destination protocol. + +19.4.5 HTTP Header Fields in Multipart Body-Parts + + In MIME, most header fields in multipart body-parts are generally + ignored unless the field name begins with "Content-". In HTTP/1.1, + multipart body-parts may contain any HTTP header fields which are + significant to the meaning of that part. + + + + + + +Fielding, et. al. Standards Track [Page 153] + +RFC 2068 HTTP/1.1 January 1997 + + +19.4.6 Introduction of Transfer-Encoding + + HTTP/1.1 introduces the Transfer-Encoding header field (section + 14.40). Proxies/gateways MUST remove any transfer coding prior to + forwarding a message via a MIME-compliant protocol. + + A process for decoding the "chunked" transfer coding (section 3.6) + can be represented in pseudo-code as: + + length := 0 + read chunk-size, chunk-ext (if any) and CRLF + while (chunk-size > 0) { + read chunk-data and CRLF + append chunk-data to entity-body + length := length + chunk-size + read chunk-size and CRLF + } + read entity-header + while (entity-header not empty) { + append entity-header to existing header fields + read entity-header + } + Content-Length := length + Remove "chunked" from Transfer-Encoding + +19.4.7 MIME-Version + + HTTP is not a MIME-compliant protocol (see appendix 19.4). However, + HTTP/1.1 messages may include a single MIME-Version general-header + field to indicate what version of the MIME protocol was used to + construct the message. Use of the MIME-Version header field indicates + that the message is in full compliance with the MIME protocol. + Proxies/gateways are responsible for ensuring full compliance (where + possible) when exporting HTTP messages to strict MIME environments. + + MIME-Version = "MIME-Version" ":" 1*DIGIT "." 1*DIGIT + + MIME version "1.0" is the default for use in HTTP/1.1. However, + HTTP/1.1 message parsing and semantics are defined by this document + and not the MIME specification. + +19.5 Changes from HTTP/1.0 + + This section summarizes major differences between versions HTTP/1.0 + and HTTP/1.1. + + + + + + +Fielding, et. al. Standards Track [Page 154] + +RFC 2068 HTTP/1.1 January 1997 + + +19.5.1 Changes to Simplify Multi-homed Web Servers and Conserve IP + Addresses + + The requirements that clients and servers support the Host request- + header, report an error if the Host request-header (section 14.23) is + missing from an HTTP/1.1 request, and accept absolute URIs (section + 5.1.2) are among the most important changes defined by this + specification. + + Older HTTP/1.0 clients assumed a one-to-one relationship of IP + addresses and servers; there was no other established mechanism for + distinguishing the intended server of a request than the IP address + to which that request was directed. The changes outlined above will + allow the Internet, once older HTTP clients are no longer common, to + support multiple Web sites from a single IP address, greatly + simplifying large operational Web servers, where allocation of many + IP addresses to a single host has created serious problems. The + Internet will also be able to recover the IP addresses that have been + allocated for the sole purpose of allowing special-purpose domain + names to be used in root-level HTTP URLs. Given the rate of growth of + the Web, and the number of servers already deployed, it is extremely + important that all implementations of HTTP (including updates to + existing HTTP/1.0 applications) correctly implement these + requirements: + + o Both clients and servers MUST support the Host request-header. + + o Host request-headers are required in HTTP/1.1 requests. + + o Servers MUST report a 400 (Bad Request) error if an HTTP/1.1 + request does not include a Host request-header. + + o Servers MUST accept absolute URIs. + + + + + + + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 155] + +RFC 2068 HTTP/1.1 January 1997 + + +19.6 Additional Features + + This appendix documents protocol elements used by some existing HTTP + implementations, but not consistently and correctly across most + HTTP/1.1 applications. Implementers should be aware of these + features, but cannot rely upon their presence in, or interoperability + with, other HTTP/1.1 applications. Some of these describe proposed + experimental features, and some describe features that experimental + deployment found lacking that are now addressed in the base HTTP/1.1 + specification. + +19.6.1 Additional Request Methods + +19.6.1.1 PATCH + + The PATCH method is similar to PUT except that the entity contains a + list of differences between the original version of the resource + identified by the Request-URI and the desired content of the resource + after the PATCH action has been applied. The list of differences is + in a format defined by the media type of the entity (e.g., + "application/diff") and MUST include sufficient information to allow + the server to recreate the changes necessary to convert the original + version of the resource to the desired version. + + If the request passes through a cache and the Request-URI identifies + a currently cached entity, that entity MUST be removed from the + cache. Responses to this method are not cachable. + + The actual method for determining how the patched resource is placed, + and what happens to its predecessor, is defined entirely by the + origin server. If the original version of the resource being patched + included a Content-Version header field, the request entity MUST + include a Derived-From header field corresponding to the value of the + original Content-Version header field. Applications are encouraged to + use these fields for constructing versioning relationships and + resolving version conflicts. + + PATCH requests must obey the message transmission requirements set + out in section 8.2. + + Caches that implement PATCH should invalidate cached responses as + defined in section 13.10 for PUT. + +19.6.1.2 LINK + + The LINK method establishes one or more Link relationships between + the existing resource identified by the Request-URI and other + existing resources. The difference between LINK and other methods + + + +Fielding, et. al. Standards Track [Page 156] + +RFC 2068 HTTP/1.1 January 1997 + + + allowing links to be established between resources is that the LINK + method does not allow any message-body to be sent in the request and + does not directly result in the creation of new resources. + + If the request passes through a cache and the Request-URI identifies + a currently cached entity, that entity MUST be removed from the + cache. Responses to this method are not cachable. + + Caches that implement LINK should invalidate cached responses as + defined in section 13.10 for PUT. + +19.6.1.3 UNLINK + + The UNLINK method removes one or more Link relationships from the + existing resource identified by the Request-URI. These relationships + may have been established using the LINK method or by any other + method supporting the Link header. The removal of a link to a + resource does not imply that the resource ceases to exist or becomes + inaccessible for future references. + + If the request passes through a cache and the Request-URI identifies + a currently cached entity, that entity MUST be removed from the + cache. Responses to this method are not cachable. + + Caches that implement UNLINK should invalidate cached responses as + defined in section 13.10 for PUT. + +19.6.2 Additional Header Field Definitions + +19.6.2.1 Alternates + + The Alternates response-header field has been proposed as a means for + the origin server to inform the client about other available + representations of the requested resource, along with their + distinguishing attributes, and thus providing a more reliable means + for a user agent to perform subsequent selection of another + representation which better fits the desires of its user (described + as agent-driven negotiation in section 12). + + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 157] + +RFC 2068 HTTP/1.1 January 1997 + + + The Alternates header field is orthogonal to the Vary header field in + that both may coexist in a message without affecting the + interpretation of the response or the available representations. It + is expected that Alternates will provide a significant improvement + over the server-driven negotiation provided by the Vary field for + those resources that vary over common dimensions like type and + language. + + The Alternates header field will be defined in a future + specification. + +19.6.2.2 Content-Version + + The Content-Version entity-header field defines the version tag + associated with a rendition of an evolving entity. Together with the + Derived-From field described in section 19.6.2.3, it allows a group + of people to work simultaneously on the creation of a work as an + iterative process. The field should be used to allow evolution of a + particular work along a single path rather than derived works or + renditions in different representations. + + Content-Version = "Content-Version" ":" quoted-string + + Examples of the Content-Version field include: + + Content-Version: "2.1.2" + Content-Version: "Fred 19950116-12:26:48" + Content-Version: "2.5a4-omega7" + +19.6.2.3 Derived-From + + The Derived-From entity-header field can be used to indicate the + version tag of the resource from which the enclosed entity was + derived before modifications were made by the sender. This field is + used to help manage the process of merging successive changes to a + resource, particularly when such changes are being made in parallel + and from multiple sources. + + Derived-From = "Derived-From" ":" quoted-string + + An example use of the field is: + + Derived-From: "2.1.1" + + The Derived-From field is required for PUT and PATCH requests if the + entity being sent was previously retrieved from the same URI and a + Content-Version header was included with the entity when it was last + retrieved. + + + +Fielding, et. al. Standards Track [Page 158] + +RFC 2068 HTTP/1.1 January 1997 + + +19.6.2.4 Link + + The Link entity-header field provides a means for describing a + relationship between two resources, generally between the requested + resource and some other resource. An entity MAY include multiple Link + values. Links at the metainformation level typically indicate + relationships like hierarchical structure and navigation paths. The + Link field is semantically equivalent to the element in + HTML.[5] + + Link = "Link" ":" #("<" URI ">" *( ";" link-param ) + + link-param = ( ( "rel" "=" relationship ) + | ( "rev" "=" relationship ) + | ( "title" "=" quoted-string ) + | ( "anchor" "=" <"> URI <"> ) + | ( link-extension ) ) + + link-extension = token [ "=" ( token | quoted-string ) ] + + relationship = sgml-name + | ( <"> sgml-name *( SP sgml-name) <"> ) + + sgml-name = ALPHA *( ALPHA | DIGIT | "." | "-" ) + + Relationship values are case-insensitive and MAY be extended within + the constraints of the sgml-name syntax. The title parameter MAY be + used to label the destination of a link such that it can be used as + identification within a human-readable menu. The anchor parameter MAY + be used to indicate a source anchor other than the entire current + resource, such as a fragment of this resource or a third resource. + + Examples of usage include: + + Link: ; rel="Previous" + + Link: ; rev="Made"; title="Tim Berners-Lee" + + The first example indicates that chapter2 is previous to this + resource in a logical navigation path. The second indicates that the + person responsible for making the resource available is identified by + the given e-mail address. + +19.6.2.5 URI + + The URI header field has, in past versions of this specification, + been used as a combination of the existing Location, Content- + Location, and Vary header fields as well as the future Alternates + + + +Fielding, et. al. Standards Track [Page 159] + +RFC 2068 HTTP/1.1 January 1997 + + + field (above). Its primary purpose has been to include a list of + additional URIs for the resource, including names and mirror + locations. However, it has become clear that the combination of many + different functions within this single field has been a barrier to + consistently and correctly implementing any of those functions. + Furthermore, we believe that the identification of names and mirror + locations would be better performed via the Link header field. The + URI header field is therefore deprecated in favor of those other + fields. + + URI-header = "URI" ":" 1#( "<" URI ">" ) + +19.7 Compatibility with Previous Versions + + It is beyond the scope of a protocol specification to mandate + compliance with previous versions. HTTP/1.1 was deliberately + designed, however, to make supporting previous versions easy. It is + worth noting that at the time of composing this specification, we + would expect commercial HTTP/1.1 servers to: + + o recognize the format of the Request-Line for HTTP/0.9, 1.0, and 1.1 + requests; + + o understand any valid request in the format of HTTP/0.9, 1.0, or + 1.1; + + o respond appropriately with a message in the same major version used + by the client. + + And we would expect HTTP/1.1 clients to: + + o recognize the format of the Status-Line for HTTP/1.0 and 1.1 + responses; + + o understand any valid response in the format of HTTP/0.9, 1.0, or + 1.1. + + For most implementations of HTTP/1.0, each connection is established + by the client prior to the request and closed by the server after + sending the response. A few implementations implement the Keep-Alive + version of persistent connections described in section 19.7.1.1. + + + + + + + + + + +Fielding, et. al. Standards Track [Page 160] + +RFC 2068 HTTP/1.1 January 1997 + + +19.7.1 Compatibility with HTTP/1.0 Persistent Connections + + Some clients and servers may wish to be compatible with some previous + implementations of persistent connections in HTTP/1.0 clients and + servers. Persistent connections in HTTP/1.0 must be explicitly + negotiated as they are not the default behavior. HTTP/1.0 + experimental implementations of persistent connections are faulty, + and the new facilities in HTTP/1.1 are designed to rectify these + problems. The problem was that some existing 1.0 clients may be + sending Keep-Alive to a proxy server that doesn't understand + Connection, which would then erroneously forward it to the next + inbound server, which would establish the Keep-Alive connection and + result in a hung HTTP/1.0 proxy waiting for the close on the + response. The result is that HTTP/1.0 clients must be prevented from + using Keep-Alive when talking to proxies. + + However, talking to proxies is the most important use of persistent + connections, so that prohibition is clearly unacceptable. Therefore, + we need some other mechanism for indicating a persistent connection + is desired, which is safe to use even when talking to an old proxy + that ignores Connection. Persistent connections are the default for + HTTP/1.1 messages; we introduce a new keyword (Connection: close) for + declaring non-persistence. + + The following describes the original HTTP/1.0 form of persistent + connections. + + When it connects to an origin server, an HTTP client MAY send the + Keep-Alive connection-token in addition to the Persist connection- + token: + + Connection: Keep-Alive + + An HTTP/1.0 server would then respond with the Keep-Alive connection + token and the client may proceed with an HTTP/1.0 (or Keep-Alive) + persistent connection. + + An HTTP/1.1 server may also establish persistent connections with + HTTP/1.0 clients upon receipt of a Keep-Alive connection token. + However, a persistent connection with an HTTP/1.0 client cannot make + use of the chunked transfer-coding, and therefore MUST use a + Content-Length for marking the ending boundary of each message. + + A client MUST NOT send the Keep-Alive connection token to a proxy + server as HTTP/1.0 proxy servers do not obey the rules of HTTP/1.1 + for parsing the Connection header field. + + + + + +Fielding, et. al. Standards Track [Page 161] + +RFC 2068 HTTP/1.1 January 1997 + + +19.7.1.1 The Keep-Alive Header + + When the Keep-Alive connection-token has been transmitted with a + request or a response, a Keep-Alive header field MAY also be + included. The Keep-Alive header field takes the following form: + + Keep-Alive-header = "Keep-Alive" ":" 0# keepalive-param + + keepalive-param = param-name "=" value + + The Keep-Alive header itself is optional, and is used only if a + parameter is being sent. HTTP/1.1 does not define any parameters. + + If the Keep-Alive header is sent, the corresponding connection token + MUST be transmitted. The Keep-Alive header MUST be ignored if + received without the connection token. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Fielding, et. al. Standards Track [Page 162] + diff --git a/docs/specs/rfc2109.txt b/docs/specs/rfc2109.txt new file mode 100644 index 0000000..432fdcc --- /dev/null +++ b/docs/specs/rfc2109.txt @@ -0,0 +1,1179 @@ + + + + + + +Network Working Group D. Kristol +Request for Comments: 2109 Bell Laboratories, Lucent Technologies +Category: Standards Track L. Montulli + Netscape Communications + February 1997 + + + HTTP State Management Mechanism + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +1. ABSTRACT + + This document specifies a way to create a stateful session with HTTP + requests and responses. It describes two new headers, Cookie and + Set-Cookie, which carry state information between participating + origin servers and user agents. The method described here differs + from Netscape's Cookie proposal, but it can interoperate with + HTTP/1.0 user agents that use Netscape's method. (See the HISTORICAL + section.) + +2. TERMINOLOGY + + The terms user agent, client, server, proxy, and origin server have + the same meaning as in the HTTP/1.0 specification. + + Fully-qualified host name (FQHN) means either the fully-qualified + domain name (FQDN) of a host (i.e., a completely specified domain + name ending in a top-level domain such as .com or .uk), or the + numeric Internet Protocol (IP) address of a host. The fully + qualified domain name is preferred; use of numeric IP addresses is + strongly discouraged. + + The terms request-host and request-URI refer to the values the client + would send to the server as, respectively, the host (but not port) + and abs_path portions of the absoluteURI (http_URL) of the HTTP + request line. Note that request-host must be a FQHN. + + + + + + + + +Kristol & Montulli Standards Track [Page 1] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + Hosts names can be specified either as an IP address or a FQHN + string. Sometimes we compare one host name with another. Host A's + name domain-matches host B's if + + * both host names are IP addresses and their host name strings match + exactly; or + + * both host names are FQDN strings and their host name strings match + exactly; or + + * A is a FQDN string and has the form NB, where N is a non-empty name + string, B has the form .B', and B' is a FQDN string. (So, x.y.com + domain-matches .y.com but not y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + Because it was used in Netscape's original implementation of state + management, we will use the term cookie to refer to the state + information that passes between an origin server and user agent, and + that gets stored by the user agent. + +3. STATE AND SESSIONS + + This document describes a way to create stateful sessions with HTTP + requests and responses. Currently, HTTP servers respond to each + client request without relating that request to previous or + subsequent requests; the technique allows clients and servers that + wish to exchange state information to place HTTP requests and + responses within a larger context, which we term a "session". This + context might be used to create, for example, a "shopping cart", in + which user selections can be aggregated before purchase, or a + magazine browsing system, in which a user's previous reading affects + which offerings are presented. + + There are, of course, many different potential contexts and thus many + different potential types of session. The designers' paradigm for + sessions created by the exchange of cookies has these key attributes: + + 1. Each session has a beginning and an end. + + 2. Each session is relatively short-lived. + + 3. Either the user agent or the origin server may terminate a + session. + + 4. The session is implicit in the exchange of state information. + + + + +Kristol & Montulli Standards Track [Page 2] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +4. OUTLINE + + We outline here a way for an origin server to send state information + to the user agent, and for the user agent to return the state + information to the origin server. The goal is to have a minimal + impact on HTTP and user agents. Only origin servers that need to + maintain sessions would suffer any significant impact, and that + impact can largely be confined to Common Gateway Interface (CGI) + programs, unless the server provides more sophisticated state + management support. (See Implementation Considerations, below.) + +4.1 Syntax: General + + The two state management headers, Set-Cookie and Cookie, have common + syntactic properties involving attribute-value pairs. The following + grammar uses the notation, and tokens DIGIT (decimal digits) and + token (informally, a sequence of non-special, non-white space + characters) from the HTTP/1.1 specification [RFC 2068] to describe + their syntax. + + av-pairs = av-pair *(";" av-pair) + av-pair = attr ["=" value] ; optional value + attr = token + value = word + word = token | quoted-string + + Attributes (names) (attr) are case-insensitive. White space is + permitted between tokens. Note that while the above syntax + description shows value as optional, most attrs require them. + + NOTE: The syntax above allows whitespace between the attribute and + the = sign. + +4.2 Origin Server Role + +4.2.1 General + + The origin server initiates a session, if it so desires. (Note that + "session" here does not refer to a persistent network connection but + to a logical session created from HTTP requests and responses. The + presence or absence of a persistent connection should have no effect + on the use of cookie-derived sessions). To initiate a session, the + origin server returns an extra response header to the client, Set- + Cookie. (The details follow later.) + + A user agent returns a Cookie request header (see below) to the + origin server if it chooses to continue a session. The origin server + may ignore it or use it to determine the current state of the + + + +Kristol & Montulli Standards Track [Page 3] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + session. It may send back to the client a Set-Cookie response header + with the same or different information, or it may send no Set-Cookie + header at all. The origin server effectively ends a session by + sending the client a Set-Cookie header with Max-Age=0. + + Servers may return a Set-Cookie response headers with any response. + User agents should send Cookie request headers, subject to other + rules detailed below, with every request. + + An origin server may include multiple Set-Cookie headers in a + response. Note that an intervening gateway could fold multiple such + headers into a single header. + +4.2.2 Set-Cookie Syntax + + The syntax for the Set-Cookie response header is + + set-cookie = "Set-Cookie:" cookies + cookies = 1#cookie + cookie = NAME "=" VALUE *(";" cookie-av) + NAME = attr + VALUE = value + cookie-av = "Comment" "=" value + | "Domain" "=" value + | "Max-Age" "=" value + | "Path" "=" value + | "Secure" + | "Version" "=" 1*DIGIT + + Informally, the Set-Cookie response header comprises the token Set- + Cookie:, followed by a comma-separated list of one or more cookies. + Each cookie begins with a NAME=VALUE pair, followed by zero or more + semi-colon-separated attribute-value pairs. The syntax for + attribute-value pairs was shown earlier. The specific attributes and + the semantics of their values follows. The NAME=VALUE attribute- + value pair must come first in each cookie. The others, if present, + can occur in any order. If an attribute appears more than once in a + cookie, the behavior is undefined. + + NAME=VALUE + Required. The name of the state information ("cookie") is NAME, + and its value is VALUE. NAMEs that begin with $ are reserved for + other uses and must not be used by applications. + + + + + + + + +Kristol & Montulli Standards Track [Page 4] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + The VALUE is opaque to the user agent and may be anything the + origin server chooses to send, possibly in a server-selected + printable ASCII encoding. "Opaque" implies that the content is of + interest and relevance only to the origin server. The content + may, in fact, be readable by anyone that examines the Set-Cookie + header. + + Comment=comment + Optional. Because cookies can contain private information about a + user, the Cookie attribute allows an origin server to document its + intended use of a cookie. The user can inspect the information to + decide whether to initiate or continue a session with this cookie. + + Domain=domain + Optional. The Domain attribute specifies the domain for which the + cookie is valid. An explicitly specified domain must always start + with a dot. + + Max-Age=delta-seconds + Optional. The Max-Age attribute defines the lifetime of the + cookie, in seconds. The delta-seconds value is a decimal non- + negative integer. After delta-seconds seconds elapse, the client + should discard the cookie. A value of zero means the cookie + should be discarded immediately. + + Path=path + Optional. The Path attribute specifies the subset of URLs to + which this cookie applies. + + Secure + Optional. The Secure attribute (with no value) directs the user + agent to use only (unspecified) secure means to contact the origin + server whenever it sends back this cookie. + + The user agent (possibly under the user's control) may determine + what level of security it considers appropriate for "secure" + cookies. The Secure attribute should be considered security + advice from the server to the user agent, indicating that it is in + the session's interest to protect the cookie contents. + + Version=version + Required. The Version attribute, a decimal integer, identifies to + which version of the state management specification the cookie + conforms. For this specification, Version=1 applies. + + + + + + + +Kristol & Montulli Standards Track [Page 5] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +4.2.3 Controlling Caching + + An origin server must be cognizant of the effect of possible caching + of both the returned resource and the Set-Cookie header. Caching + "public" documents is desirable. For example, if the origin server + wants to use a public document such as a "front door" page as a + sentinel to indicate the beginning of a session for which a Set- + Cookie response header must be generated, the page should be stored + in caches "pre-expired" so that the origin server will see further + requests. "Private documents", for example those that contain + information strictly private to a session, should not be cached in + shared caches. + + If the cookie is intended for use by a single user, the Set-cookie + header should not be cached. A Set-cookie header that is intended to + be shared by multiple users may be cached. + + The origin server should send the following additional HTTP/1.1 + response headers, depending on circumstances: + + * To suppress caching of the Set-Cookie header: Cache-control: no- + cache="set-cookie". + + and one of the following: + + * To suppress caching of a private document in shared caches: Cache- + control: private. + + * To allow caching of a document and require that it be validated + before returning it to the client: Cache-control: must-revalidate. + + * To allow caching of a document, but to require that proxy caches + (not user agent caches) validate it before returning it to the + client: Cache-control: proxy-revalidate. + + * To allow caching of a document and request that it be validated + before returning it to the client (by "pre-expiring" it): + Cache-control: max-age=0. Not all caches will revalidate the + document in every case. + + HTTP/1.1 servers must send Expires: old-date (where old-date is a + date long in the past) on responses containing Set-Cookie response + headers unless they know for certain (by out of band means) that + there are no downsteam HTTP/1.0 proxies. HTTP/1.1 servers may send + other Cache-Control directives that permit caching by HTTP/1.1 + proxies in addition to the Expires: old-date directive; the Cache- + Control directive will override the Expires: old-date for HTTP/1.1 + proxies. + + + +Kristol & Montulli Standards Track [Page 6] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +4.3 User Agent Role + +4.3.1 Interpreting Set-Cookie + + The user agent keeps separate track of state information that arrives + via Set-Cookie response headers from each origin server (as + distinguished by name or IP address and port). The user agent + applies these defaults for optional attributes that are missing: + + VersionDefaults to "old cookie" behavior as originally specified by + Netscape. See the HISTORICAL section. + + Domain Defaults to the request-host. (Note that there is no dot at + the beginning of request-host.) + + Max-AgeThe default behavior is to discard the cookie when the user + agent exits. + + Path Defaults to the path of the request URL that generated the + Set-Cookie response, up to, but not including, the + right-most /. + + Secure If absent, the user agent may send the cookie over an + insecure channel. + +4.3.2 Rejecting Cookies + + To prevent possible security or privacy violations, a user agent + rejects a cookie (shall not store its information) if any of the + following is true: + + * The value for the Path attribute is not a prefix of the request- + URI. + + * The value for the Domain attribute contains no embedded dots or + does not start with a dot. + + * The value for the request-host does not domain-match the Domain + attribute. + + * The request-host is a FQDN (not IP address) and has the form HD, + where D is the value of the Domain attribute, and H is a string + that contains one or more dots. + + Examples: + + * A Set-Cookie from request-host y.x.foo.com for Domain=.foo.com + would be rejected, because H is y.x and contains a dot. + + + +Kristol & Montulli Standards Track [Page 7] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + * A Set-Cookie from request-host x.foo.com for Domain=.foo.com would + be accepted. + + * A Set-Cookie with Domain=.com or Domain=.com., will always be + rejected, because there is no embedded dot. + + * A Set-Cookie with Domain=ajax.com will be rejected because the + value for Domain does not begin with a dot. + +4.3.3 Cookie Management + + If a user agent receives a Set-Cookie response header whose NAME is + the same as a pre-existing cookie, and whose Domain and Path + attribute values exactly (string) match those of a pre-existing + cookie, the new cookie supersedes the old. However, if the Set- + Cookie has a value for Max-Age of zero, the (old and new) cookie is + discarded. Otherwise cookies accumulate until they expire (resources + permitting), at which time they are discarded. + + Because user agents have finite space in which to store cookies, they + may also discard older cookies to make space for newer ones, using, + for example, a least-recently-used algorithm, along with constraints + on the maximum number of cookies that each origin server may set. + + If a Set-Cookie response header includes a Comment attribute, the + user agent should store that information in a human-readable form + with the cookie and should display the comment text as part of a + cookie inspection user interface. + + User agents should allow the user to control cookie destruction. An + infrequently-used cookie may function as a "preferences file" for + network applications, and a user may wish to keep it even if it is + the least-recently-used cookie. One possible implementation would be + an interface that allows the permanent storage of a cookie through a + checkbox (or, conversely, its immediate destruction). + + Privacy considerations dictate that the user have considerable + control over cookie management. The PRIVACY section contains more + information. + +4.3.4 Sending Cookies to the Origin Server + + When it sends a request to an origin server, the user agent sends a + Cookie request header to the origin server if it has cookies that are + applicable to the request, based on + + * the request-host; + + + + +Kristol & Montulli Standards Track [Page 8] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + * the request-URI; + + * the cookie's age. + + The syntax for the header is: + + cookie = "Cookie:" cookie-version + 1*((";" | ",") cookie-value) + cookie-value = NAME "=" VALUE [";" path] [";" domain] + cookie-version = "$Version" "=" value + NAME = attr + VALUE = value + path = "$Path" "=" value + domain = "$Domain" "=" value + + The value of the cookie-version attribute must be the value from the + Version attribute, if any, of the corresponding Set-Cookie response + header. Otherwise the value for cookie-version is 0. The value for + the path attribute must be the value from the Path attribute, if any, + of the corresponding Set-Cookie response header. Otherwise the + attribute should be omitted from the Cookie request header. The + value for the domain attribute must be the value from the Domain + attribute, if any, of the corresponding Set-Cookie response header. + Otherwise the attribute should be omitted from the Cookie request + header. + + Note that there is no Comment attribute in the Cookie request header + corresponding to the one in the Set-Cookie response header. The user + agent does not return the comment information to the origin server. + + The following rules apply to choosing applicable cookie-values from + among all the cookies the user agent has. + + Domain Selection + The origin server's fully-qualified host name must domain-match + the Domain attribute of the cookie. + + Path Selection + The Path attribute of the cookie must match a prefix of the + request-URI. + + Max-Age Selection + Cookies that have expired should have been discarded and thus + are not forwarded to an origin server. + + + + + + + +Kristol & Montulli Standards Track [Page 9] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + If multiple cookies satisfy the criteria above, they are ordered in + the Cookie header such that those with more specific Path attributes + precede those with less specific. Ordering with respect to other + attributes (e.g., Domain) is unspecified. + + Note: For backward compatibility, the separator in the Cookie header + is semi-colon (;) everywhere. A server should also accept comma (,) + as the separator between cookie-values for future compatibility. + +4.3.5 Sending Cookies in Unverifiable Transactions + + Users must have control over sessions in order to ensure privacy. + (See PRIVACY section below.) To simplify implementation and to + prevent an additional layer of complexity where adequate safeguards + exist, however, this document distinguishes between transactions that + are verifiable and those that are unverifiable. A transaction is + verifiable if the user has the option to review the request-URI prior + to its use in the transaction. A transaction is unverifiable if the + user does not have that option. Unverifiable transactions typically + arise when a user agent automatically requests inlined or embedded + entities or when it resolves redirection (3xx) responses from an + origin server. Typically the origin transaction, the transaction + that the user initiates, is verifiable, and that transaction may + directly or indirectly induce the user agent to make unverifiable + transactions. + + When it makes an unverifiable transaction, a user agent must enable a + session only if a cookie with a domain attribute D was sent or + received in its origin transaction, such that the host name in the + Request-URI of the unverifiable transaction domain-matches D. + + This restriction prevents a malicious service author from using + unverifiable transactions to induce a user agent to start or continue + a session with a server in a different domain. The starting or + continuation of such sessions could be contrary to the privacy + expectations of the user, and could also be a security problem. + + User agents may offer configurable options that allow the user agent, + or any autonomous programs that the user agent executes, to ignore + the above rule, so long as these override options default to "off". + + Many current user agents already provide a review option that would + render many links verifiable. For instance, some user agents display + the URL that would be referenced for a particular link when the mouse + pointer is placed over that link. The user can therefore determine + whether to visit that site before causing the browser to do so. + (Though not implemented on current user agents, a similar technique + could be used for a button used to submit a form -- the user agent + + + +Kristol & Montulli Standards Track [Page 10] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + could display the action to be taken if the user were to select that + button.) However, even this would not make all links verifiable; for + example, links to automatically loaded images would not normally be + subject to "mouse pointer" verification. + + Many user agents also provide the option for a user to view the HTML + source of a document, or to save the source to an external file where + it can be viewed by another application. While such an option does + provide a crude review mechanism, some users might not consider it + acceptable for this purpose. + +4.4 How an Origin Server Interprets the Cookie Header + + A user agent returns much of the information in the Set-Cookie header + to the origin server when the Path attribute matches that of a new + request. When it receives a Cookie header, the origin server should + treat cookies with NAMEs whose prefix is $ specially, as an attribute + for the adjacent cookie. The value for such a NAME is to be + interpreted as applying to the lexically (left-to-right) most recent + cookie whose name does not have the $ prefix. If there is no + previous cookie, the value applies to the cookie mechanism as a + whole. For example, consider the cookie + + Cookie: $Version="1"; Customer="WILE_E_COYOTE"; + $Path="/acme" + + $Version applies to the cookie mechanism as a whole (and gives the + version number for the cookie mechanism). $Path is an attribute + whose value (/acme) defines the Path attribute that was used when the + Customer cookie was defined in a Set-Cookie response header. + +4.5 Caching Proxy Role + + One reason for separating state information from both a URL and + document content is to facilitate the scaling that caching permits. + To support cookies, a caching proxy must obey these rules already in + the HTTP specification: + + * Honor requests from the cache, if possible, based on cache validity + rules. + + * Pass along a Cookie request header in any request that the proxy + must make of another server. + + * Return the response to the client. Include any Set-Cookie response + header. + + + + + +Kristol & Montulli Standards Track [Page 11] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + * Cache the received response subject to the control of the usual + headers, such as Expires, Cache-control: no-cache, and Cache- + control: private, + + * Cache the Set-Cookie subject to the control of the usual header, + Cache-control: no-cache="set-cookie". (The Set-Cookie header + should usually not be cached.) + + Proxies must not introduce Set-Cookie (Cookie) headers of their own + in proxy responses (requests). + +5. EXAMPLES + +5.1 Example 1 + + Most detail of request and response headers has been omitted. Assume + the user agent has no stored cookies. + + 1. User Agent -> Server + + POST /acme/login HTTP/1.1 + [form data] + + User identifies self via a form. + + 2. Server -> User Agent + + HTTP/1.1 200 OK + Set-Cookie: Customer="WILE_E_COYOTE"; Version="1"; Path="/acme" + + Cookie reflects user's identity. + + 3. User Agent -> Server + + POST /acme/pickitem HTTP/1.1 + Cookie: $Version="1"; Customer="WILE_E_COYOTE"; $Path="/acme" + [form data] + + User selects an item for "shopping basket." + + 4. Server -> User Agent + + HTTP/1.1 200 OK + Set-Cookie: Part_Number="Rocket_Launcher_0001"; Version="1"; + Path="/acme" + + Shopping basket contains an item. + + + + +Kristol & Montulli Standards Track [Page 12] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + 5. User Agent -> Server + + POST /acme/shipping HTTP/1.1 + Cookie: $Version="1"; + Customer="WILE_E_COYOTE"; $Path="/acme"; + Part_Number="Rocket_Launcher_0001"; $Path="/acme" + [form data] + + User selects shipping method from form. + + 6. Server -> User Agent + + HTTP/1.1 200 OK + Set-Cookie: Shipping="FedEx"; Version="1"; Path="/acme" + + New cookie reflects shipping method. + + 7. User Agent -> Server + + POST /acme/process HTTP/1.1 + Cookie: $Version="1"; + Customer="WILE_E_COYOTE"; $Path="/acme"; + Part_Number="Rocket_Launcher_0001"; $Path="/acme"; + Shipping="FedEx"; $Path="/acme" + [form data] + + User chooses to process order. + + 8. Server -> User Agent + + HTTP/1.1 200 OK + + Transaction is complete. + + The user agent makes a series of requests on the origin server, after + each of which it receives a new cookie. All the cookies have the + same Path attribute and (default) domain. Because the request URLs + all have /acme as a prefix, and that matches the Path attribute, each + request contains all the cookies received so far. + +5.2 Example 2 + + This example illustrates the effect of the Path attribute. All + detail of request and response headers has been omitted. Assume the + user agent has no stored cookies. + + Imagine the user agent has received, in response to earlier requests, + the response headers + + + +Kristol & Montulli Standards Track [Page 13] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + Set-Cookie: Part_Number="Rocket_Launcher_0001"; Version="1"; + Path="/acme" + + and + + Set-Cookie: Part_Number="Riding_Rocket_0023"; Version="1"; + Path="/acme/ammo" + + A subsequent request by the user agent to the (same) server for URLs + of the form /acme/ammo/... would include the following request + header: + + Cookie: $Version="1"; + Part_Number="Riding_Rocket_0023"; $Path="/acme/ammo"; + Part_Number="Rocket_Launcher_0001"; $Path="/acme" + + Note that the NAME=VALUE pair for the cookie with the more specific + Path attribute, /acme/ammo, comes before the one with the less + specific Path attribute, /acme. Further note that the same cookie + name appears more than once. + + A subsequent request by the user agent to the (same) server for a URL + of the form /acme/parts/ would include the following request header: + + Cookie: $Version="1"; Part_Number="Rocket_Launcher_0001"; $Path="/acme" + + Here, the second cookie's Path attribute /acme/ammo is not a prefix + of the request URL, /acme/parts/, so the cookie does not get + forwarded to the server. + +6. IMPLEMENTATION CONSIDERATIONS + + Here we speculate on likely or desirable details for an origin server + that implements state management. + +6.1 Set-Cookie Content + + An origin server's content should probably be divided into disjoint + application areas, some of which require the use of state + information. The application areas can be distinguished by their + request URLs. The Set-Cookie header can incorporate information + about the application areas by setting the Path attribute for each + one. + + The session information can obviously be clear or encoded text that + describes state. However, if it grows too large, it can become + unwieldy. Therefore, an implementor might choose for the session + information to be a key to a server-side resource. Of course, using + + + +Kristol & Montulli Standards Track [Page 14] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + a database creates some problems that this state management + specification was meant to avoid, namely: + + 1. keeping real state on the server side; + + 2. how and when to garbage-collect the database entry, in case the + user agent terminates the session by, for example, exiting. + +6.2 Stateless Pages + + Caching benefits the scalability of WWW. Therefore it is important + to reduce the number of documents that have state embedded in them + inherently. For example, if a shopping-basket-style application + always displays a user's current basket contents on each page, those + pages cannot be cached, because each user's basket's contents would + be different. On the other hand, if each page contains just a link + that allows the user to "Look at My Shopping Basket", the page can be + cached. + +6.3 Implementation Limits + + Practical user agent implementations have limits on the number and + size of cookies that they can store. In general, user agents' cookie + support should have no fixed limits. They should strive to store as + many frequently-used cookies as possible. Furthermore, general-use + user agents should provide each of the following minimum capabilities + individually, although not necessarily simultaneously: + + * at least 300 cookies + + * at least 4096 bytes per cookie (as measured by the size of the + characters that comprise the cookie non-terminal in the syntax + description of the Set-Cookie header) + + * at least 20 cookies per unique host or domain name + + User agents created for specific purposes or for limited-capacity + devices should provide at least 20 cookies of 4096 bytes, to ensure + that the user can interact with a session-based origin server. + + The information in a Set-Cookie response header must be retained in + its entirety. If for some reason there is inadequate space to store + the cookie, it must be discarded, not truncated. + + Applications should use as few and as small cookies as possible, and + they should cope gracefully with the loss of a cookie. + + + + + +Kristol & Montulli Standards Track [Page 15] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +6.3.1 Denial of Service Attacks + + User agents may choose to set an upper bound on the number of cookies + to be stored from a given host or domain name or on the size of the + cookie information. Otherwise a malicious server could attempt to + flood a user agent with many cookies, or large cookies, on successive + responses, which would force out cookies the user agent had received + from other servers. However, the minima specified above should still + be supported. + +7. PRIVACY + +7.1 User Agent Control + + An origin server could create a Set-Cookie header to track the path + of a user through the server. Users may object to this behavior as + an intrusive accumulation of information, even if their identity is + not evident. (Identity might become evident if a user subsequently + fills out a form that contains identifying information.) This state + management specification therefore requires that a user agent give + the user control over such a possible intrusion, although the + interface through which the user is given this control is left + unspecified. However, the control mechanisms provided shall at least + allow the user + + * to completely disable the sending and saving of cookies. + + * to determine whether a stateful session is in progress. + + * to control the saving of a cookie on the basis of the cookie's + Domain attribute. + + Such control could be provided by, for example, mechanisms + + * to notify the user when the user agent is about to send a cookie + to the origin server, offering the option not to begin a session. + + * to display a visual indication that a stateful session is in + progress. + + * to let the user decide which cookies, if any, should be saved + when the user concludes a window or user agent session. + + * to let the user examine the contents of a cookie at any time. + + A user agent usually begins execution with no remembered state + information. It should be possible to configure a user agent never + to send Cookie headers, in which case it can never sustain state with + + + +Kristol & Montulli Standards Track [Page 16] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + an origin server. (The user agent would then behave like one that is + unaware of how to handle Set-Cookie response headers.) + + When the user agent terminates execution, it should let the user + discard all state information. Alternatively, the user agent may ask + the user whether state information should be retained; the default + should be "no". If the user chooses to retain state information, it + would be restored the next time the user agent runs. + + NOTE: User agents should probably be cautious about using files to + store cookies long-term. If a user runs more than one instance of + the user agent, the cookies could be commingled or otherwise messed + up. + +7.2 Protocol Design + + The restrictions on the value of the Domain attribute, and the rules + concerning unverifiable transactions, are meant to reduce the ways + that cookies can "leak" to the "wrong" site. The intent is to + restrict cookies to one, or a closely related set of hosts. + Therefore a request-host is limited as to what values it can set for + Domain. We consider it acceptable for hosts host1.foo.com and + host2.foo.com to share cookies, but not a.com and b.com. + + Similarly, a server can only set a Path for cookies that are related + to the request-URI. + +8. SECURITY CONSIDERATIONS + +8.1 Clear Text + + The information in the Set-Cookie and Cookie headers is unprotected. + Two consequences are: + + 1. Any sensitive information that is conveyed in them is exposed + to intruders. + + 2. A malicious intermediary could alter the headers as they travel + in either direction, with unpredictable results. + + These facts imply that information of a personal and/or financial + nature should only be sent over a secure channel. For less sensitive + information, or when the content of the header is a database key, an + origin server should be vigilant to prevent a bad Cookie value from + causing failures. + + + + + + +Kristol & Montulli Standards Track [Page 17] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +8.2 Cookie Spoofing + + Proper application design can avoid spoofing attacks from related + domains. Consider: + + 1. User agent makes request to victim.cracker.edu, gets back + cookie session_id="1234" and sets the default domain + victim.cracker.edu. + + 2. User agent makes request to spoof.cracker.edu, gets back + cookie session-id="1111", with Domain=".cracker.edu". + + 3. User agent makes request to victim.cracker.edu again, and + passes + + Cookie: $Version="1"; + session_id="1234"; + session_id="1111"; $Domain=".cracker.edu" + + The server at victim.cracker.edu should detect that the second + cookie was not one it originated by noticing that the Domain + attribute is not for itself and ignore it. + +8.3 Unexpected Cookie Sharing + + A user agent should make every attempt to prevent the sharing of + session information between hosts that are in different domains. + Embedded or inlined objects may cause particularly severe privacy + problems if they can be used to share cookies between disparate + hosts. For example, a malicious server could embed cookie + information for host a.com in a URI for a CGI on host b.com. User + agent implementors are strongly encouraged to prevent this sort of + exchange whenever possible. + +9. OTHER, SIMILAR, PROPOSALS + + Three other proposals have been made to accomplish similar goals. + This specification is an amalgam of Kristol's State-Info proposal and + Netscape's Cookie proposal. + + Brian Behlendorf proposed a Session-ID header that would be user- + agent-initiated and could be used by an origin server to track + "clicktrails". It would not carry any origin-server-defined state, + however. Phillip Hallam-Baker has proposed another client-defined + session ID mechanism for similar purposes. + + + + + + +Kristol & Montulli Standards Track [Page 18] + +RFC 2109 HTTP State Management Mechanism February 1997 + + + While both session IDs and cookies can provide a way to sustain + stateful sessions, their intended purpose is different, and, + consequently, the privacy requirements for them are different. A + user initiates session IDs to allow servers to track progress through + them, or to distinguish multiple users on a shared machine. Cookies + are server-initiated, so the cookie mechanism described here gives + users control over something that would otherwise take place without + the users' awareness. Furthermore, cookies convey rich, server- + selected information, whereas session IDs comprise user-selected, + simple information. + +10. HISTORICAL + +10.1 Compatibility With Netscape's Implementation + + HTTP/1.0 clients and servers may use Set-Cookie and Cookie headers + that reflect Netscape's original cookie proposal. These notes cover + inter-operation between "old" and "new" cookies. + +10.1.1 Extended Cookie Header + + This proposal adds attribute-value pairs to the Cookie request header + in a compatible way. An "old" client that receives a "new" cookie + will ignore attributes it does not understand; it returns what it + does understand to the origin server. A "new" client always sends + cookies in the new form. + + An "old" server that receives a "new" cookie will see what it thinks + are many cookies with names that begin with a $, and it will ignore + them. (The "old" server expects these cookies to be separated by + semi-colon, not comma.) A "new" server can detect cookies that have + passed through an "old" client, because they lack a $Version + attribute. + +10.1.2 Expires and Max-Age + + Netscape's original proposal defined an Expires header that took a + date value in a fixed-length variant format in place of Max-Age: + + Wdy, DD-Mon-YY HH:MM:SS GMT + + Note that the Expires date format contains embedded spaces, and that + "old" cookies did not have quotes around values. Clients that + implement to this specification should be aware of "old" cookies and + Expires. + + + + + + +Kristol & Montulli Standards Track [Page 19] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +10.1.3 Punctuation + + In Netscape's original proposal, the values in attribute-value pairs + did not accept "-quoted strings. Origin servers should be cautious + about sending values that require quotes unless they know the + receiving user agent understands them (i.e., "new" cookies). A + ("new") user agent should only use quotes around values in Cookie + headers when the cookie's version(s) is (are) all compliant with this + specification or later. + + In Netscape's original proposal, no whitespace was permitted around + the = that separates attribute-value pairs. Therefore such + whitespace should be used with caution in new implementations. + +10.2 Caching and HTTP/1.0 + + Some caches, such as those conforming to HTTP/1.0, will inevitably + cache the Set-Cookie header, because there was no mechanism to + suppress caching of headers prior to HTTP/1.1. This caching can lead + to security problems. Documents transmitted by an origin server + along with Set-Cookie headers will usually either be uncachable, or + will be "pre-expired". As long as caches obey instructions not to + cache documents (following Expires: or Pragma: + no-cache (HTTP/1.0), or Cache-control: no-cache (HTTP/1.1)) + uncachable documents present no problem. However, pre-expired + documents may be stored in caches. They require validation (a + conditional GET) on each new request, but some cache operators loosen + the rules for their caches, and sometimes serve expired documents + without first validating them. This combination of factors can lead + to cookies meant for one user later being sent to another user. The + Set-Cookie header is stored in the cache, and, although the document + is stale (expired), the cache returns the document in response to + later requests, including cached headers. + +11. ACKNOWLEDGEMENTS + + This document really represents the collective efforts of the + following people, in addition to the authors: Roy Fielding, Marc + Hedlund, Ted Hardie, Koen Holtman, Shel Kaphan, Rohit Khare. + + + + + + + + + + + + +Kristol & Montulli Standards Track [Page 20] + +RFC 2109 HTTP State Management Mechanism February 1997 + + +12. AUTHORS' ADDRESSES + + David M. Kristol + Bell Laboratories, Lucent Technologies + 600 Mountain Ave. Room 2A-227 + Murray Hill, NJ 07974 + + Phone: (908) 582-2250 + Fax: (908) 582-5809 + EMail: dmk@bell-labs.com + + + Lou Montulli + Netscape Communications Corp. + 501 E. Middlefield Rd. + Mountain View, CA 94043 + + Phone: (415) 528-2600 + EMail: montulli@netscape.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Kristol & Montulli Standards Track [Page 21] + diff --git a/docs/specs/rfc2145.txt b/docs/specs/rfc2145.txt new file mode 100644 index 0000000..b6db4d5 --- /dev/null +++ b/docs/specs/rfc2145.txt @@ -0,0 +1,395 @@ + + + + + + +Network Working Group J. C. Mogul +Request for Comments: 2145 DEC +Category: Informational R. Fielding + UC Irvine + J. Gettys + DEC + H. Frystyk + MIT/LCS + May 1997 + + Use and Interpretation of + HTTP Version Numbers + +Status of this Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + + Distribution of this document is unlimited. Please send comments to + the HTTP working group at . Discussions + of the working group are archived at + . General discussions + about HTTP and the applications which use HTTP should take place on + the mailing list. + +Abstract + + HTTP request and response messages include an HTTP protocol version + number. Some confusion exists concerning the proper use and + interpretation of HTTP version numbers, and concerning + interoperability of HTTP implementations of different protocol + versions. This document is an attempt to clarify the situation. It + is not a modification of the intended meaning of the existing + HTTP/1.0 and HTTP/1.1 documents, but it does describe the intention + of the authors of those documents, and can be considered definitive + when there is any ambiguity in those documents concerning HTTP + version numbers, for all versions of HTTP. + + + + + + + + + + + + + +Mogul, et. al. Informational [Page 1] + +RFC 2145 HTTP Version Numbers May 1997 + + +TABLE OF CONTENTS + + 1 Introduction. . . . . . . . . . . . . . . . . . . . . . . . . . 2 + 1.1 Robustness Principle . . . . . . . . . . . . . . . . . . 3 + 2 HTTP version numbers. . . . . . . . . . . . . . . . . . . . . . 3 + 2.1 Proxy behavior. . . . . . . . . . . . . . . . . . . . . . . . 4 + 2.2 Compatibility between minor versions of the same major + version. . . . . . . . . . . . . . . . . . . . . . . . 4 + 2.3 Which version number to send in a message. . . . . . . . 5 + 3 Security Considerations . . . . . . . . . . . . . . . . . . . . 6 + 4 References. . . . . . . . . . . . . . . . . . . . . . . . . . . 6 + 5 Authors' addresses. . . . . . . . . . . . . . . . . . . . . . . 6 + +1 Introduction + + HTTP request and response messages include an HTTP protocol version + number. According to section 3.1 of the HTTP/1.1 specification [2], + + HTTP uses a "." numbering scheme to indicate + versions of the protocol. The protocol versioning policy is + intended to allow the sender to indicate the format of a message + and its capacity for understanding further HTTP communication, + rather than the features obtained via that communication. No + change is made to the version number for the addition of message + components which do not affect communication behavior or which + only add to extensible field values. The number is + incremented when the changes made to the protocol add features + which do not change the general message parsing algorithm, but + which may add to the message semantics and imply additional + capabilities of the sender. The number is incremented when + the format of a message within the protocol is changed. + + The same language appears in the description of HTTP/1.0 [1]. + + Many readers of these documents have expressed some confusion about + the intended meaning of this policy. Also, some people who wrote + HTTP implementations before RFC1945 [1] was issued were not aware of + the intentions behind the introduction of version numbers in + HTTP/1.0. This has led to debate and inconsistency regarding the use + and interpretation of HTTP version numbers, and has led to + interoperability problems in certain cases. + + + + + + + + + + +Mogul, et. al. Informational [Page 2] + +RFC 2145 HTTP Version Numbers May 1997 + + + This document is an attempt to clarify the situation. It is not a + modification of the intended meaning of the existing HTTP/1.0 and + HTTP/1.1 documents, but it does describe the intention of the authors + of those documents. In any case where either of those two documents + is ambiguous regarding the use and interpretation of HTTP version + numbers, this document should be considered the definitive as to the + intentions of the designers of HTTP. + + The specification described in this document is not part of the + specification of any individual version of HTTP, such as HTTP/1.0 or + HTTP/1.1. Rather, this document describes the use of HTTP version + numbers in any version of HTTP (except for HTTP/0.9, which did not + include version numbers). + + No vendor or other provider of an HTTP implementation should claim + any compliance with any IETF HTTP specification unless the + implementation conditionally complies with the rules in this + document. + +1.1 Robustness Principle + + RFC791 [4] defines the "robustness principle" in section 3.2: + + an implementation must be conservative in its sending + behavior, and liberal in its receiving behavior. + + This principle applies to HTTP, as well. It is the fundamental basis + for interpreting any part of the HTTP specification that might still + be ambiguous. In particular, implementations of HTTP SHOULD NOT + reject messages or generate errors unnecessarily. + +2 HTTP version numbers + + We start by restating the language quoted above from section 3.1 of + the HTTP/1.1 specification [2]: + + It is, and has always been, the explicit intent of the + HTTP specification that the interpretation of an HTTP message + header does not change between minor versions of the same major + version. + + It is, and has always been, the explicit intent of the + HTTP specification that an implementation receiving a message + header that it does not understand MUST ignore that header. (The + word "ignore" has a special meaning for proxies; see section 2.1 + below.) + + + + + +Mogul, et. al. Informational [Page 3] + +RFC 2145 HTTP Version Numbers May 1997 + + + To make this as clear as possible: The major version sent in a + message MAY indicate the interpretation of other header fields. The + minor version sent in a message MUST NOT indicate the interpretation + of other header fields. This reflects the principle that the minor + version labels the capability of the sender, not the interpretation + of the message. + + Note: In a future version of HTTP, we may introduce a mechanism + that explicitly requires a receiving implementation to reject a + message if it does not understand certain headers. For example, + this might be implemented by means of a header that lists a set of + other message headers that must be understood by the recipient. + Any implementation claiming at least conditional compliance with + this future version of HTTP would have to implement this + mechanism. However, no implementation claiming compliance with a + lower HTTP version (in particular, HTTP/1.1) will have to + implement this mechanism. + + This future change may be required to support the Protocol + Extension Protocol (PEP) [3]. + + One consequence of these rules is that an HTTP/1.1 message sent to an + HTTP/1.0 recipient (or a recipient whose version is unknown) MUST be + constructed so that it remains a valid HTTP/1.0 message when all + headers not defined in the HTTP/1.0 specification [1] are removed. + +2.1 Proxy behavior + + A proxy MUST forward an unknown header, unless it is protected by a + Connection header. A proxy implementing an HTTP version >= 1.1 MUST + NOT forward unknown headers that are protected by a Connection + header, as described in section 14.10 of the HTTP/1.1 specification + [2]. + + We remind the reader that that HTTP version numbers are hop-by-hop + components of HTTP messages, and are not end-to-end. That is, an + HTTP proxy never "forwards" an HTTP version number in either a + request or response. + +2.2 Compatibility between minor versions of the same major version + + An implementation of HTTP/x.b sending a message to a recipient whose + version is known to be HTTP/x.a, a < b, MAY send a header that is not + defined in the specification for HTTP/x.a. For example, an HTTP/1.1 + server may send a "Cache-control" header to an HTTP/1.0 client; this + may be useful if the immediate recipient is an HTTP/1.0 proxy, but + the ultimate recipient is an HTTP/1.1 client. + + + + +Mogul, et. al. Informational [Page 4] + +RFC 2145 HTTP Version Numbers May 1997 + + + An implementation of HTTP/x.b sending a message to a recipient whose + version is known to be HTTP/x.a, a < b, MUST NOT depend on the + recipient understanding a header not defined in the specification for + HTTP/x.a. For example, HTTP/1.0 clients cannot be expected to + understand chunked encodings, and so an HTTP/1.1 server must never + send "Transfer-Encoding: chunked" in response to an HTTP/1.0 request. + +2.3 Which version number to send in a message + + The most strenuous debate over the use of HTTP version numbers has + centered on the problem of implementations that do not follow the + robustness principle, and which fail to produce useful results when + they receive a message with an HTTP minor version higher than the + minor version they implement. We consider these implementations + buggy, but we recognize that the robustness principle also implies + that message senders should make concessions to buggy implementations + when this is truly necessary for interoperation. + + An HTTP client SHOULD send a request version equal to the highest + version for which the client is at least conditionally compliant, and + whose major version is no higher than the highest version supported + by the server, if this is known. An HTTP client MUST NOT send a + version for which it is not at least conditionally compliant. + + An HTTP client MAY send a lower request version, if it is known that + the server incorrectly implements the HTTP specification, but only + after the client has determined that the server is actually buggy. + + An HTTP server SHOULD send a response version equal to the highest + version for which the server is at least conditionally compliant, and + whose major version is less than or equal to the one received in the + request. An HTTP server MUST NOT send a version for which it is not + at least conditionally compliant. A server MAY send a 505 (HTTP + Version Not Supported) response if cannot send a response using the + major version used in the client's request. + + An HTTP server MAY send a lower response version, if it is known or + suspected that the client incorrectly implements the HTTP + specification, but this should not be the default, and this SHOULD + NOT be done if the request version is HTTP/1.1 or greater. + + + + + + + + + + + +Mogul, et. al. Informational [Page 5] + +RFC 2145 HTTP Version Numbers May 1997 + + +3 Security Considerations + + None, except to the extent that security mechanisms introduced in one + version of HTTP might depend on the proper interpretation of HTTP + version numbers in older implementations. + +4 References + + 1. Berners-Lee, T., R. Fielding, and H. Frystyk. Hypertext + Transfer Protocol -- HTTP/1.0. RFC 1945, HTTP Working Group, May, + 1996. + + 2. Fielding, Roy T., Jim Gettys, Jeffrey C. Mogul, Henrik Frystyk + Nielsen, and Tim Berners-Lee. Hypertext Transfer Protocol -- + HTTP/1.1. RFC 2068, HTTP Working Group, January, 1997. + + 3. Khare, Rohit. HTTP/1.2 Extension Protocol (PEP). HTTP Working + Group, Work in Progress. + + 4. Postel, Jon. Internet Protocol. RFC 791, NIC, September, 1981. + +5 Authors' addresses + + Jeffrey C. Mogul + Western Research Laboratory + Digital Equipment Corporation + 250 University Avenue + Palo Alto, California, 94305, USA + Email: mogul@wrl.dec.com + + Roy T. Fielding + Department of Information and Computer Science + University of California + Irvine, CA 92717-3425, USA + Fax: +1 (714) 824-4056 + Email: fielding@ics.uci.edu + + Jim Gettys + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + Fax: +1 (617) 258 8682 + Email: jg@w3.org + + + + + + + + +Mogul, et. al. Informational [Page 6] + +RFC 2145 HTTP Version Numbers May 1997 + + + Henrik Frystyk Nielsen + W3 Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + Fax: +1 (617) 258 8682 + Email: frystyk@w3.org + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Mogul, et. al. Informational [Page 7] + diff --git a/docs/specs/rfc2324.txt b/docs/specs/rfc2324.txt new file mode 100644 index 0000000..a85921a --- /dev/null +++ b/docs/specs/rfc2324.txt @@ -0,0 +1,563 @@ + + + + + + +Network Working Group L. Masinter +Request for Comments: 2324 1 April 1998 +Category: Informational + + + Hyper Text Coffee Pot Control Protocol (HTCPCP/1.0) + +Status of this Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1998). All Rights Reserved. + +Abstract + + This document describes HTCPCP, a protocol for controlling, + monitoring, and diagnosing coffee pots. + +1. Rationale and Scope + + There is coffee all over the world. Increasingly, in a world in which + computing is ubiquitous, the computists want to make coffee. Coffee + brewing is an art, but the distributed intelligence of the web- + connected world transcends art. Thus, there is a strong, dark, rich + requirement for a protocol designed espressoly for the brewing of + coffee. Coffee is brewed using coffee pots. Networked coffee pots + require a control protocol if they are to be controlled. + + Increasingly, home and consumer devices are being connected to the + Internet. Early networking experiments demonstrated vending devices + connected to the Internet for status monitoring [COKE]. One of the + first remotely _operated_ machine to be hooked up to the Internet, + the Internet Toaster, (controlled via SNMP) was debuted in 1990 + [RFC2235]. + + The demand for ubiquitous appliance connectivity that is causing the + consumption of the IPv4 address space. Consumers want remote control + of devices such as coffee pots so that they may wake up to freshly + brewed coffee, or cause coffee to be prepared at a precise time after + the completion of dinner preparations. + + + + + + + +Masinter Informational [Page 1] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + This document specifies a Hyper Text Coffee Pot Control Protocol + (HTCPCP), which permits the full request and responses necessary to + control all devices capable of making the popular caffeinated hot + beverages. + + HTTP 1.1 ([RFC2068]) permits the transfer of web objects from origin + servers to clients. The web is world-wide. HTCPCP is based on HTTP. + This is because HTTP is everywhere. It could not be so pervasive + without being good. Therefore, HTTP is good. If you want good coffee, + HTCPCP needs to be good. To make HTCPCP good, it is good to base + HTCPCP on HTTP. + + Future versions of this protocol may include extensions for espresso + machines and similar devices. + +2. HTCPCP Protocol + + The HTCPCP protocol is built on top of HTTP, with the addition of a + few new methods, header fields and return codes. All HTCPCP servers + should be referred to with the "coffee:" URI scheme (Section 4). + +2.1 HTCPCP Added Methods + +2.1.1 The BREW method, and the use of POST + + Commands to control a coffee pot are sent from client to coffee + server using either the BREW or POST method, and a message body with + Content-Type set to "application/coffee-pot-command". + + A coffee pot server MUST accept both the BREW and POST method + equivalently. However, the use of POST for causing actions to happen + is deprecated. + + Coffee pots heat water using electronic mechanisms, so there is no + fire. Thus, no firewalls are necessary, and firewall control policy + is irrelevant. However, POST may be a trademark for coffee, and so + the BREW method has been added. The BREW method may be used with + other HTTP-based protocols (e.g., the Hyper Text Brewery Control + Protocol). + +2.1.2 GET method + + In HTTP, the GET method is used to mean "retrieve whatever + information (in the form of an entity) identified by the Request- + URI." If the Request-URI refers to a data-producing process, it is + the produced data which shall be returned as the entity in the + response and not the source text of the process, unless that text + happens to be the output of the process. + + + +Masinter Informational [Page 2] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + In HTCPCP, the resources associated with a coffee pot are physical, + and not information resources. The "data" for most coffee URIs + contain no caffeine. + +2.1.3 PROPFIND method + + If a cup of coffee is data, metadata about the brewed resource is + discovered using the PROPFIND method [WEBDAV]. + +2.1.4 WHEN method + + When coffee is poured, and milk is offered, it is necessary for the + holder of the recipient of milk to say "when" at the time when + sufficient milk has been introduced into the coffee. For this + purpose, the "WHEN" method has been added to HTCPCP. Enough? Say + WHEN. + +2.2 Coffee Pot Header fields + + HTCPCP recommends several HTTP header fields and defines some new + ones. + +2.2.1 Recommended header fields + +2.2.1.1 The "safe" response header field. + + [SAFE] defines a HTTP response header field, "Safe", which can be + used to indicate that repeating a HTTP request is safe. The inclusion + of a "Safe: Yes" header field allows a client to repeat a previous + request if the result of the request might be repeated. + + The actual safety of devices for brewing coffee varies widely, and + may depend, in fact, on conditions in the client rather than just in + the server. Thus, this protocol includes an extension to the "Safe" + response header: + + Safe = "Safe" ":" safe-nature + safe-nature = "yes" | "no" | conditionally-safe + conditionally-safe = "if-" safe-condition + safe-condition = "user-awake" | token + + indication will allow user agents to handle retries of some safe + requests, in particular safe POST requests, in a more user-friendly + way. + + + + + + + +Masinter Informational [Page 3] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + +2.2.2 New header fields + +2.2.2.1 The Accept-Additions header field + + In HTTP, the "Accept" request-header field is used to specify media + types which are acceptable for the response. However, in HTCPCP, the + response may result in additional actions on the part of the + automated pot. For this reason, HTCPCP adds a new header field, + "Accept-Additions": + + + Accept-Additions = "Accept-Additions" ":" + #( addition-range [ accept-params ] ) + + addition-type = ( "*" + | milk-type + | syrup-type + | sweetener-type + | spice-type + | alcohol-type + ) *( ";" parameter ) + milk-type = ( "Cream" | "Half-and-half" | "Whole-milk" + | "Part-Skim" | "Skim" | "Non-Dairy" ) + syrup-type = ( "Vanilla" | "Almond" | "Raspberry" + | "Chocolate" ) + alcohol-type = ( "Whisky" | "Rum" | "Kahlua" | "Aquavit" ) + +2.2.3 Omitted Header Fields + + No options were given for decaffeinated coffee. What's the point? + +2.3 HTCPCP return codes + + Normal HTTP return codes are used to indicate difficulties of the + HTCPCP server. This section identifies special interpretations and + new return codes. + +2.3.1 406 Not Acceptable + + This return code is normally interpreted as "The resource identified + by the request is only capable of generating response entities which + have content characteristics not acceptable according to the accept + headers sent in the request. In HTCPCP, this response code MAY be + returned if the operator of the coffee pot cannot comply with the + Accept-Addition request. Unless the request was a HEAD request, the + response SHOULD include an entity containing a list of available + coffee additions. + + + + +Masinter Informational [Page 4] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + In practice, most automated coffee pots cannot currently provide + additions. + +2.3.2 418 I'm a teapot + + Any attempt to brew coffee with a teapot should result in the error + code "418 I'm a teapot". The resulting entity body MAY be short and + stout. + +3. The "coffee" URI scheme + + Because coffee is international, there are international coffee URI + schemes. All coffee URL schemes are written with URL encoding of the + UTF-8 encoding of the characters that spell the word for "coffee" in + any of 29 languages, following the conventions for + internationalization in URIs [URLI18N]. + +coffee-url = coffee-scheme ":" [ "//" host ] + ["/" pot-designator ] ["?" additions-list ] + +coffee-scheme = ( "koffie" ; Afrikaans, Dutch + | "q%C3%A6hv%C3%A6" ; Azerbaijani + | "%D9%82%D9%87%D9%88%D8%A9" ; Arabic + | "akeita" ; Basque + | "koffee" ; Bengali + | "kahva" ; Bosnian + | "kafe" ; Bulgarian, Czech + | "caf%C3%E8" ; Catalan, French, Galician + | "%E5%92%96%E5%95%A1" ; Chinese + | "kava" ; Croatian + | "k%C3%A1va ; Czech + | "kaffe" ; Danish, Norwegian, Swedish + | "coffee" ; English + | "kafo" ; Esperanto + | "kohv" ; Estonian + | "kahvi" ; Finnish + | "%4Baffee" ; German + | "%CE%BA%CE%B1%CF%86%CE%AD" ; Greek + | "%E0%A4%95%E0%A5%8C%E0%A4%AB%E0%A5%80" ; Hindi + | "%E3%82%B3%E3%83%BC%E3%83%92%E3%83%BC" ; Japanese + | "%EC%BB%A4%ED%94%BC" ; Korean + | "%D0%BA%D0%BE%D1%84%D0%B5" ; Russian + | "%E0%B8%81%E0%B8%B2%E0%B9%81%E0%B8%9F" ; Thai + ) + + pot-designator = "pot-" integer ; for machines with multiple pots + additions-list = #( addition ) + + + + +Masinter Informational [Page 5] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + All alternative coffee-scheme forms are equivalent. However, the use + of coffee-scheme in various languages MAY be interpreted as an + indication of the kind of coffee produced by the coffee pot. Note + that while URL scheme names are case-independent, capitalization is + important for German and thus the initial "K" must be encoded. + +4. The "message/coffeepot" media type + + The entity body of a POST or BREW request MUST be of Content-Type + "message/coffeepot". Since most of the information for controlling + the coffee pot is conveyed by the additional headers, the content of + "message/coffeepot" contains only a coffee-message-body: + + coffee-message-body = "start" | "stop" + +5. Operational constraints + + This section lays out some of the operational issues with deployment + of HTCPCP ubiquitously. + +5.1 Timing Considerations + + A robust quality of service is required between the coffee pot user + and the coffee pot service. Coffee pots SHOULD use the Network Time + Protocol [NTP] to synchronize their clocks to a globally accurate + time standard. + + Telerobotics has been an expensive technology. However, with the + advent of the Cambridge Coffee Pot [CAM], the use of the web (rather + than SNMP) for remote system monitoring and management has been + proven. Additional coffee pot maintenance tasks might be + accomplished by remote robotics. + + Web data is normally static. Therefore to save data transmission and + time, Web browser programs store each Web page retrieved by a user on + the user's computer. Thus, if the user wants to return to that page, + it is now stored locally and does not need to be requested again from + the server. An image used for robot control or for monitoring a + changing scene is dynamic. A fresh version needs to be retrieved from + the server each time it is accessed. + +5.2 Crossing firewalls + + In most organizations HTTP traffic crosses firewalls fairly easily. + Modern coffee pots do not use fire. However, a "firewall" is useful + for protection of any source from any manner of heat, and not just + fire. Every home computer network SHOULD be protected by a firewall + from sources of heat. However, remote control of coffee pots is + + + +Masinter Informational [Page 6] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + important from outside the home. Thus, it is important that HTCPCP + cross firewalls easily. + + By basing HTCPCP on HTTP and using port 80, it will get all of HTTP's + firewall-crossing virtues. Of course, the home firewalls will require + reconfiguration or new versions in order to accommodate HTCPCP- + specific methods, headers and trailers, but such upgrades will be + easily accommodated. Most home network system administrators drink + coffee, and are willing to accommodate the needs of tunnelling + HTCPCP. + +6. System management considerations + + Coffee pot monitoring using HTTP protocols has been an early + application of the web. In the earliest instance, coffee pot + monitoring was an early (and appropriate) use of ATM networks [CAM]. + + The traditional technique [CAM] was to attach a frame-grabber to a + video camera, and feed the images to a web server. This was an + appropriate application of ATM networks. In this coffee pot + installation, the Trojan Room of Cambridge University laboratories + was used to give a web interface to monitor a common coffee pot. of + us involved in related research and, being poor, impoverished + academics, we only had one coffee filter machine between us, which + lived in the corridor just outside the Trojan Room. However, being + highly dedicated and hard-working academics, we got through a lot of + coffee, and when a fresh pot was brewed, it often didn't last long. + + This service was created as the first application to use a new RPC + mechanism designed in the Cambridge Computer Laboratory - MSRPC2. It + runs over MSNL (Multi-Service Network Layer) - a network layer + protocol designed for ATM networks. + + Coffee pots on the Internet may be managed using the Coffee Pot MIB + [CPMIB]. + +7. Security Considerations + + Anyone who gets in between me and my morning coffee should be + insecure. + + Unmoderated access to unprotected coffee pots from Internet users + might lead to several kinds of "denial of coffee service" attacks. + The improper use of filtration devices might admit trojan grounds. + Filtration is not a good virus protection method. + + + + + + +Masinter Informational [Page 7] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + Putting coffee grounds into Internet plumbing may result in clogged + plumbing, which would entail the services of an Internet Plumber + [PLUMB], who would, in turn, require an Internet Plumber's Helper. + + Access authentication will be discussed in a separate memo. + +8. Acknowledgements + + Many thanks to the many contributors to this standard, including Roy + Fielding, Mark Day, Keith Moore, Carl Uno-Manros, Michael Slavitch, + and Martin Duerst. The inspiration of the Prancing Pony, the CMU + Coke Machine, the Cambridge Coffee Pot, the Internet Toaster, and + other computer controlled remote devices have led to this valuable + creation. + +9. References + + [RFC2068] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., and T. + Berners-Lee, "Hypertext Transfer Protocol -- HTTP/1.1", RFC 2068, + January 1997. + + [RFC2186] Wessels, D., and K. Claffy, "Internet Cache Protocol (ICP), + version 2," RFC 2186, September 1997 + + [CPMIB] Slavitch, M., "Definitions of Managed Objects for Drip-Type + Heated Beverage Hardware Devices using SMIv2", RFC 2325, 1 April + 1998. + + [HTSVMP] Q. Stafford-Fraser, "Hyper Text Sandwich Van Monitoring + Protocol, Version 3.2". In preparation. + + [RFC2295] Holtman, K., and A. Mutz, "Transparent Content Negotiation + in HTTP", RFC 2295, March 1998. + + [SAFE] K. Holtman. "The Safe Response Header Field", September 1997. + + [CAM] "The Trojan Room Coffee Machine", D. Gordon and M. Johnson, + University of Cambridge Computer Lab, + + + [CBIO] "The Trojan Room Coffee Pot, a (non-technical) biography", Q. + Stafford-Fraser, University of Cambridge Computer Lab, + . + + [RFC2235] Zakon, R., "Hobbes' Internet Timeline", FYI 32, RFC 2230, + November 1997. See also + + + + + +Masinter Informational [Page 8] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + + [NTP] Mills, D., "Network Time Protocol (Version 3) Specification, + Implementation and Analysis", RFC 1305, March 1992. + + [URLI18N] Masinter, L., "Using UTF8 for non-ASCII Characters in + Extended URIs" Work in Progress. + + [PLUMB] B. Metcalfe, "Internet Plumber of the Year: Jim Gettys", + Infoworld, February 2, 1998. + + [COKE] D. Nichols, "Coke machine history", C. Everhart, "Interesting + uses of networking", . + +10. Author's Address + + Larry Masinter + Xerox Palo Alto Research Center + 3333 Coyote Hill Road + Palo Alto, CA 94304 + + EMail: masinter@parc.xerox.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Masinter Informational [Page 9] + +RFC 2324 HTCPCP/1.0 1 April 1998 + + +11. Full Copyright Statement + + Copyright (C) The Internet Society (1998). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Masinter Informational [Page 10] + diff --git a/docs/specs/rfc2388.txt b/docs/specs/rfc2388.txt new file mode 100644 index 0000000..ffb9b6c --- /dev/null +++ b/docs/specs/rfc2388.txt @@ -0,0 +1,507 @@ + + + + + + +Network Working Group L. Masinter +Request for Comments: 2388 Xerox Corporation +Category: Standards Track August 1998 + + + Returning Values from Forms: multipart/form-data + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1998). All Rights Reserved. + +1. Abstract + + This specification defines an Internet Media Type, multipart/form- + data, which can be used by a wide variety of applications and + transported by a wide variety of protocols as a way of returning a + set of values as the result of a user filling out a form. + +2. Introduction + + In many applications, it is possible for a user to be presented with + a form. The user will fill out the form, including information that + is typed, generated by user input, or included from files that the + user has selected. When the form is filled out, the data from the + form is sent from the user to the receiving application. + + The definition of MultiPart/Form-Data is derived from one of those + applications, originally set out in [RFC1867] and subsequently + incorporated into [HTML40], where forms are expressed in HTML, and in + which the form values are sent via HTTP or electronic mail. This + representation is widely implemented in numerous web browsers and web + servers. + + However, multipart/form-data can be used for forms that are presented + using representations other than HTML (spreadsheets, Portable + Document Format, etc), and for transport using other means than + electronic mail or HTTP. This document defines the representation of + form values independently of the application for which it is used. + + + + + +Masinter Standards Track [Page 1] + +RFC 2388 multipart/form-data August 1998 + + +3. Definition of multipart/form-data + + The media-type multipart/form-data follows the rules of all multipart + MIME data streams as outlined in [RFC 2046]. In forms, there are a + series of fields to be supplied by the user who fills out the form. + Each field has a name. Within a given form, the names are unique. + + "multipart/form-data" contains a series of parts. Each part is + expected to contain a content-disposition header [RFC 2183] where the + disposition type is "form-data", and where the disposition contains + an (additional) parameter of "name", where the value of that + parameter is the original field name in the form. For example, a part + might contain a header: + + Content-Disposition: form-data; name="user" + + with the value corresponding to the entry of the "user" field. + + Field names originally in non-ASCII character sets may be encoded + within the value of the "name" parameter using the standard method + described in RFC 2047. + + As with all multipart MIME types, each part has an optional + "Content-Type", which defaults to text/plain. If the contents of a + file are returned via filling out a form, then the file input is + identified as the appropriate media type, if known, or + "application/octet-stream". If multiple files are to be returned as + the result of a single form entry, they should be represented as a + "multipart/mixed" part embedded within the "multipart/form-data". + + Each part may be encoded and the "content-transfer-encoding" header + supplied if the value of that part does not conform to the default + encoding. + +4. Use of multipart/form-data + +4.1 Boundary + + As with other multipart types, a boundary is selected that does not + occur in any of the data. Each field of the form is sent, in the + order defined by the sending appliction and form, as a part of the + multipart stream. Each part identifies the INPUT name within the + original form. Each part should be labelled with an appropriate + content-type if the media type is known (e.g., inferred from the file + extension or operating system typing information) or as + "application/octet-stream". + + + + + +Masinter Standards Track [Page 2] + +RFC 2388 multipart/form-data August 1998 + + +4.2 Sets of files + + If the value of a form field is a set of files rather than a single + file, that value can be transferred together using the + "multipart/mixed" format. + +4.3 Encoding + + While the HTTP protocol can transport arbitrary binary data, the + default for mail transport is the 7BIT encoding. The value supplied + for a part may need to be encoded and the "content-transfer-encoding" + header supplied if the value does not conform to the default + encoding. [See section 5 of RFC 2046 for more details.] + +4.4 Other attributes + + Forms may request file inputs from the user; the form software may + include the file name and other file attributes, as specified in [RFC + 2184]. + + The original local file name may be supplied as well, either as a + "filename" parameter either of the "content-disposition: form-data" + header or, in the case of multiple files, in a "content-disposition: + file" header of the subpart. The sending application MAY supply a + file name; if the file name of the sender's operating system is not + in US-ASCII, the file name might be approximated, or encoded using + the method of RFC 2231. + + This is a convenience for those cases where the files supplied by the + form might contain references to each other, e.g., a TeX file and its + .sty auxiliary style description. + +4.5 Charset of text in form data + + Each part of a multipart/form-data is supposed to have a content- + type. In the case where a field element is text, the charset + parameter for the text indicates the character encoding used. + + For example, a form with a text field in which a user typed 'Joe owes + 100' where is the Euro symbol might have form data returned + as: + + --AaB03x + content-disposition: form-data; name="field1" + content-type: text/plain;charset=windows-1250 + content-transfer-encoding: quoted-printable + + + + + +Masinter Standards Track [Page 3] + +RFC 2388 multipart/form-data August 1998 + + + Joe owes =80100. + --AaB03x + +5. Operability considerations + +5.1 Compression, encryption + + Some of the data in forms may be compressed or encrypted, using other + MIME mechanisms. This is a function of the application that is + generating the form-data. + +5.2 Other data encodings rather than multipart + + Various people have suggested using new mime top-level type + "aggregate", e.g., aggregate/mixed or a content-transfer-encoding of + "packet" to express indeterminate-length binary data, rather than + relying on the multipart-style boundaries. While this would be + useful, the "multipart" mechanisms are well established, simple to + implement on both the sending client and receiving server, and as + efficient as other methods of dealing with multiple combinations of + binary data. + + The multipart/form-data encoding has a high overhead and performance + impact if there are many fields with short values. However, in + practice, for the forms in use, for example, in HTML, the average + overhead is not significant. + +5.3 Remote files with third-party transfer + + In some scenarios, the user operating the form software might want to + specify a URL for remote data rather than a local file. In this case, + is there a way to allow the browser to send to the client a pointer + to the external data rather than the entire contents? This capability + could be implemented, for example, by having the client send to the + server data of type "message/external-body" with "access-type" set + to, say, "uri", and the URL of the remote data in the body of the + message. + +5.4 Non-ASCII field names + + Note that MIME headers are generally required to consist only of 7- + bit data in the US-ASCII character set. Hence field names should be + encoded according to the method in RFC 2047 if they contain + characters outside of that set. + + + + + + + +Masinter Standards Track [Page 4] + +RFC 2388 multipart/form-data August 1998 + + +5.5 Ordered fields and duplicated field names + + The relationship of the ordering of fields within a form and the + ordering of returned values within "multipart/form-data" is not + defined by this specification, nor is the handling of the case where + a form has multiple fields with the same name. While HTML-based forms + may send back results in the order received, and intermediaries + should not reorder the results, there are some systems which might + not define a natural order for form fields. + +5.6 Interoperability with web applications + + Many web applications use the "application/x-url-encoded" method for + returning data from forms. This format is quite compact, e.g.: + + name=Xavier+Xantico&verdict=Yes&colour=Blue&happy=sad&Utf%F6r=Send + + however, there is no opportunity to label the enclosed data with + content type, apply a charset, or use other encoding mechanisms. + + Many form-interpreting programs (primarly web browsers) now implement + and generate multipart/form-data, but an existing application might + need to optionally support both the application/x-url-encoded format + as well. + +5.7 Correlating form data with the original form + + This specification provides no specific mechanism by which + multipart/form-data can be associated with the form that caused it to + be transmitted. This separation is intentional; many different forms + might be used for transmitting the same data. In practice, + applications may supply a specific form processing resource (in HTML, + the ACTION attribute in a FORM tag) for each different form. + Alternatively, data about the form might be encoded in a "hidden + field" (a field which is part of the form but which has a fixed value + to be transmitted back to the form-data processor.) + +6. Security Considerations + + The data format described in this document introduces no new security + considerations outside of those introduced by the protocols that use + it and of the component elements. It is important when interpreting + content-disposition to not overwrite files in the recipients address + space inadvertently. + + User applications that request form information from users must be + careful not to cause a user to send information to the requestor or a + third party unwillingly or unwittingly. For example, a form might + + + +Masinter Standards Track [Page 5] + +RFC 2388 multipart/form-data August 1998 + + + request 'spam' information to be sent to an unintended third party, + or private information to be sent to someone that the user might not + actually intend. While this is primarily an issue for the + representation and interpretation of forms themselves, rather than + the data representation of the result of form transmission, the + transportation of private information must be done in a way that does + not expose it to unwanted prying. + + With the introduction of form-data that can reasonably send back the + content of files from user's file space, the possibility that a user + might be sent an automated script that fills out a form and then + sends the user's local file to another address arises. Thus, + additional caution is required when executing automated scripting + where form-data might include user's files. + +7. Author's Address + + Larry Masinter + Xerox Palo Alto Research Center + 3333 Coyote Hill Road + Palo Alto, CA 94304 + + Fax: +1 650 812 4333 + EMail: masinter@parc.xerox.com + + + + + + + + + + + + + + + + + + + + + + + + + + + +Masinter Standards Track [Page 6] + +RFC 2388 multipart/form-data August 1998 + + +Appendix A. Media type registration for multipart/form-data + + Media Type name: + multipart + + Media subtype name: + form-data + + Required parameters: + none + + Optional parameters: + none + + Encoding considerations: + No additional considerations other than as for other multipart + types. + + Security Considerations + Applications which receive forms and process them must be careful + not to supply data back to the requesting form processing site that + was not intended to be sent by the recipient. This is a + consideration for any application that generates a multipart/form- + data. + + The multipart/form-data type introduces no new security + considerations for recipients beyond what might occur with any of + the enclosed parts. + + + + + + + + + + + + + + + + + + + + + + + +Masinter Standards Track [Page 7] + +RFC 2388 multipart/form-data August 1998 + + +References + + [RFC 2046] Freed, N., and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, + November 1996. + + [RFC 2047] Moore, K., "MIME (Multipurpose Internet Mail Extensions) + Part Three: Message Header Extensions for Non-ASCII Text", + RFC 2047, November 1996. + + [RFC 2231] Freed, N., and K. Moore, "MIME Parameter Value and Encoded + Word Extensions: Character Sets, Languages, and + Continuations", RFC 2231, November 1997. + + [RFC 1806] Troost, R., and S. Dorner, "Communicating Presentation + Information in Internet Messages: The Content-Disposition + Header", RFC 1806, June 1995. + + [RFC 1867] Nebel, E., and L. Masinter, "Form-based File Upload in + HTML", RFC 1867, November 1995. + + [RFC 2183] Troost, R., Dorner, S., and K. Moore, "Communicating + Presentation Information in Internet Messages: The + Content-Disposition Header Field", RFC 2183, August 1997. + + [RFC 2184] Freed, N., and K. Moore, "MIME Parameter Value and Encoded + Word Extensions: Character Sets, Languages, and + Continuations", RFC 2184, August 1997. + + [HTML40] D. Raggett, A. Le Hors, I. Jacobs. "HTML 4.0 + Specification", World Wide Web Consortium Technical Report + "REC-html40", December, 1997. + + + + + + + + + + + + + + + + + + +Masinter Standards Track [Page 8] + +RFC 2388 multipart/form-data August 1998 + + +Full Copyright Statement + + Copyright (C) The Internet Society (1998). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Masinter Standards Track [Page 9] + diff --git a/docs/specs/rfc2518.txt b/docs/specs/rfc2518.txt new file mode 100644 index 0000000..81d4038 --- /dev/null +++ b/docs/specs/rfc2518.txt @@ -0,0 +1,5267 @@ + + + + + + +Network Working Group Y. Goland +Request for Comments: 2518 Microsoft +Category: Standards Track E. Whitehead + UC Irvine + A. Faizi + Netscape + S. Carter + Novell + D. Jensen + Novell + February 1999 + + + HTTP Extensions for Distributed Authoring -- WEBDAV + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1999). All Rights Reserved. + +Abstract + + This document specifies a set of methods, headers, and content-types + ancillary to HTTP/1.1 for the management of resource properties, + creation and management of resource collections, namespace + manipulation, and resource locking (collision avoidance). + +Table of Contents + + ABSTRACT............................................................1 + 1 INTRODUCTION .....................................................5 + 2 NOTATIONAL CONVENTIONS ...........................................7 + 3 TERMINOLOGY ......................................................7 + 4 DATA MODEL FOR RESOURCE PROPERTIES ...............................8 + 4.1 The Resource Property Model ...................................8 + 4.2 Existing Metadata Proposals ...................................8 + 4.3 Properties and HTTP Headers ...................................9 + 4.4 Property Values ...............................................9 + 4.5 Property Names ...............................................10 + 4.6 Media Independent Links ......................................10 + 5 COLLECTIONS OF WEB RESOURCES ....................................11 + + + +Goland, et al. Standards Track [Page 1] + +RFC 2518 WEBDAV February 1999 + + + 5.1 HTTP URL Namespace Model .....................................11 + 5.2 Collection Resources .........................................11 + 5.3 Creation and Retrieval of Collection Resources ...............12 + 5.4 Source Resources and Output Resources ........................13 + 6 LOCKING .........................................................14 + 6.1 Exclusive Vs. Shared Locks ...................................14 + 6.2 Required Support .............................................16 + 6.3 Lock Tokens ..................................................16 + 6.4 opaquelocktoken Lock Token URI Scheme ........................16 + 6.4.1 Node Field Generation Without the IEEE 802 Address ........17 + 6.5 Lock Capability Discovery ....................................19 + 6.6 Active Lock Discovery ........................................19 + 6.7 Usage Considerations .........................................19 + 7 WRITE LOCK ......................................................20 + 7.1 Methods Restricted by Write Locks ............................20 + 7.2 Write Locks and Lock Tokens ..................................20 + 7.3 Write Locks and Properties ...................................20 + 7.4 Write Locks and Null Resources ...............................21 + 7.5 Write Locks and Collections ..................................21 + 7.6 Write Locks and the If Request Header ........................22 + 7.6.1 Example - Write Lock ......................................22 + 7.7 Write Locks and COPY/MOVE ....................................23 + 7.8 Refreshing Write Locks .......................................23 + 8 HTTP METHODS FOR DISTRIBUTED AUTHORING ..........................23 + 8.1 PROPFIND .....................................................24 + 8.1.1 Example - Retrieving Named Properties .....................25 + 8.1.2 Example - Using allprop to Retrieve All Properties ........26 + 8.1.3 Example - Using propname to Retrieve all Property Names ...29 + 8.2 PROPPATCH ....................................................31 + 8.2.1 Status Codes for use with 207 (Multi-Status) ..............31 + 8.2.2 Example - PROPPATCH .......................................32 + 8.3 MKCOL Method .................................................33 + 8.3.1 Request ...................................................33 + 8.3.2 Status Codes ..............................................33 + 8.3.3 Example - MKCOL ...........................................34 + 8.4 GET, HEAD for Collections ....................................34 + 8.5 POST for Collections .........................................35 + 8.6 DELETE .......................................................35 + 8.6.1 DELETE for Non-Collection Resources .......................35 + 8.6.2 DELETE for Collections ....................................36 + 8.7 PUT ..........................................................36 + 8.7.1 PUT for Non-Collection Resources ..........................36 + 8.7.2 PUT for Collections .......................................37 + 8.8 COPY Method ..................................................37 + 8.8.1 COPY for HTTP/1.1 resources ...............................37 + 8.8.2 COPY for Properties .......................................38 + 8.8.3 COPY for Collections ......................................38 + 8.8.4 COPY and the Overwrite Header .............................39 + + + +Goland, et al. Standards Track [Page 2] + +RFC 2518 WEBDAV February 1999 + + + 8.8.5 Status Codes ..............................................39 + 8.8.6 Example - COPY with Overwrite .............................40 + 8.8.7 Example - COPY with No Overwrite ..........................40 + 8.8.8 Example - COPY of a Collection ............................41 + 8.9 MOVE Method ..................................................42 + 8.9.1 MOVE for Properties .......................................42 + 8.9.2 MOVE for Collections ......................................42 + 8.9.3 MOVE and the Overwrite Header .............................43 + 8.9.4 Status Codes ..............................................43 + 8.9.5 Example - MOVE of a Non-Collection ........................44 + 8.9.6 Example - MOVE of a Collection ............................44 + 8.10 LOCK Method ..................................................45 + 8.10.1 Operation .................................................46 + 8.10.2 The Effect of Locks on Properties and Collections .........46 + 8.10.3 Locking Replicated Resources ..............................46 + 8.10.4 Depth and Locking .........................................46 + 8.10.5 Interaction with other Methods ............................47 + 8.10.6 Lock Compatibility Table ..................................47 + 8.10.7 Status Codes ..............................................48 + 8.10.8 Example - Simple Lock Request .............................48 + 8.10.9 Example - Refreshing a Write Lock .........................49 + 8.10.10 Example - Multi-Resource Lock Request ....................50 + 8.11 UNLOCK Method ................................................51 + 8.11.1 Example - UNLOCK ..........................................52 + 9 HTTP HEADERS FOR DISTRIBUTED AUTHORING ..........................52 + 9.1 DAV Header ...................................................52 + 9.2 Depth Header .................................................52 + 9.3 Destination Header ...........................................54 + 9.4 If Header ....................................................54 + 9.4.1 No-tag-list Production ....................................55 + 9.4.2 Tagged-list Production ....................................55 + 9.4.3 not Production ............................................56 + 9.4.4 Matching Function .........................................56 + 9.4.5 If Header and Non-DAV Compliant Proxies ...................57 + 9.5 Lock-Token Header ............................................57 + 9.6 Overwrite Header .............................................57 + 9.7 Status-URI Response Header ...................................57 + 9.8 Timeout Request Header .......................................58 + 10 STATUS CODE EXTENSIONS TO HTTP/1.1 ............................59 + 10.1 102 Processing ...............................................59 + 10.2 207 Multi-Status .............................................59 + 10.3 422 Unprocessable Entity .....................................60 + 10.4 423 Locked ...................................................60 + 10.5 424 Failed Dependency ........................................60 + 10.6 507 Insufficient Storage .....................................60 + 11 MULTI-STATUS RESPONSE .........................................60 + 12 XML ELEMENT DEFINITIONS .......................................61 + 12.1 activelock XML Element .......................................61 + + + +Goland, et al. Standards Track [Page 3] + +RFC 2518 WEBDAV February 1999 + + + 12.1.1 depth XML Element .........................................61 + 12.1.2 locktoken XML Element .....................................61 + 12.1.3 timeout XML Element .......................................61 + 12.2 collection XML Element .......................................62 + 12.3 href XML Element .............................................62 + 12.4 link XML Element .............................................62 + 12.4.1 dst XML Element ...........................................62 + 12.4.2 src XML Element ...........................................62 + 12.5 lockentry XML Element ........................................63 + 12.6 lockinfo XML Element .........................................63 + 12.7 lockscope XML Element ........................................63 + 12.7.1 exclusive XML Element .....................................63 + 12.7.2 shared XML Element ........................................63 + 12.8 locktype XML Element .........................................64 + 12.8.1 write XML Element .........................................64 + 12.9 multistatus XML Element ......................................64 + 12.9.1 response XML Element ......................................64 + 12.9.2 responsedescription XML Element ...........................65 + 12.10 owner XML Element ...........................................65 + 12.11 prop XML element ............................................66 + 12.12 propertybehavior XML element ................................66 + 12.12.1 keepalive XML element ....................................66 + 12.12.2 omit XML element .........................................67 + 12.13 propertyupdate XML element ..................................67 + 12.13.1 remove XML element .......................................67 + 12.13.2 set XML element ..........................................67 + 12.14 propfind XML Element ........................................68 + 12.14.1 allprop XML Element ......................................68 + 12.14.2 propname XML Element .....................................68 + 13 DAV PROPERTIES ................................................68 + 13.1 creationdate Property ........................................69 + 13.2 displayname Property .........................................69 + 13.3 getcontentlanguage Property ..................................69 + 13.4 getcontentlength Property ....................................69 + 13.5 getcontenttype Property ......................................70 + 13.6 getetag Property .............................................70 + 13.7 getlastmodified Property .....................................70 + 13.8 lockdiscovery Property .......................................71 + 13.8.1 Example - Retrieving the lockdiscovery Property ...........71 + 13.9 resourcetype Property ........................................72 + 13.10 source Property .............................................72 + 13.10.1 Example - A source Property ..............................72 + 13.11 supportedlock Property ......................................73 + 13.11.1 Example - Retrieving the supportedlock Property ..........73 + 14 INSTRUCTIONS FOR PROCESSING XML IN DAV ........................74 + 15 DAV COMPLIANCE CLASSES ........................................75 + 15.1 Class 1 ......................................................75 + 15.2 Class 2 ......................................................75 + + + +Goland, et al. Standards Track [Page 4] + +RFC 2518 WEBDAV February 1999 + + + 16 INTERNATIONALIZATION CONSIDERATIONS ...........................76 + 17 SECURITY CONSIDERATIONS .......................................77 + 17.1 Authentication of Clients ....................................77 + 17.2 Denial of Service ............................................78 + 17.3 Security through Obscurity ...................................78 + 17.4 Privacy Issues Connected to Locks ............................78 + 17.5 Privacy Issues Connected to Properties .......................79 + 17.6 Reduction of Security due to Source Link .....................79 + 17.7 Implications of XML External Entities ........................79 + 17.8 Risks Connected with Lock Tokens .............................80 + 18 IANA CONSIDERATIONS ...........................................80 + 19 INTELLECTUAL PROPERTY .........................................81 + 20 ACKNOWLEDGEMENTS ..............................................82 + 21 REFERENCES ....................................................82 + 21.1 Normative References .........................................82 + 21.2 Informational References .....................................83 + 22 AUTHORS' ADDRESSES ............................................84 + 23 APPENDICES ....................................................86 + 23.1 Appendix 1 - WebDAV Document Type Definition .................86 + 23.2 Appendix 2 - ISO 8601 Date and Time Profile ..................88 + 23.3 Appendix 3 - Notes on Processing XML Elements ................89 + 23.3.1 Notes on Empty XML Elements ...............................89 + 23.3.2 Notes on Illegal XML Processing ...........................89 + 23.4 Appendix 4 -- XML Namespaces for WebDAV ......................92 + 23.4.1 Introduction ..............................................92 + 23.4.2 Meaning of Qualified Names ................................92 + 24 FULL COPYRIGHT STATEMENT ......................................94 + + + +1 Introduction + + This document describes an extension to the HTTP/1.1 protocol that + allows clients to perform remote web content authoring operations. + This extension provides a coherent set of methods, headers, request + entity body formats, and response entity body formats that provide + operations for: + + Properties: The ability to create, remove, and query information + about Web pages, such as their authors, creation dates, etc. Also, + the ability to link pages of any media type to related pages. + + Collections: The ability to create sets of documents and to retrieve + a hierarchical membership listing (like a directory listing in a file + system). + + + + + + +Goland, et al. Standards Track [Page 5] + +RFC 2518 WEBDAV February 1999 + + + Locking: The ability to keep more than one person from working on a + document at the same time. This prevents the "lost update problem," + in which modifications are lost as first one author then another + writes changes without merging the other author's changes. + + Namespace Operations: The ability to instruct the server to copy and + move Web resources. + + Requirements and rationale for these operations are described in a + companion document, "Requirements for a Distributed Authoring and + Versioning Protocol for the World Wide Web" [RFC2291]. + + The sections below provide a detailed introduction to resource + properties (section 4), collections of resources (section 5), and + locking operations (section 6). These sections introduce the + abstractions manipulated by the WebDAV-specific HTTP methods + described in section 8, "HTTP Methods for Distributed Authoring". + + In HTTP/1.1, method parameter information was exclusively encoded in + HTTP headers. Unlike HTTP/1.1, WebDAV encodes method parameter + information either in an Extensible Markup Language (XML) [REC-XML] + request entity body, or in an HTTP header. The use of XML to encode + method parameters was motivated by the ability to add extra XML + elements to existing structures, providing extensibility; and by + XML's ability to encode information in ISO 10646 character sets, + providing internationalization support. As a rule of thumb, + parameters are encoded in XML entity bodies when they have unbounded + length, or when they may be shown to a human user and hence require + encoding in an ISO 10646 character set. Otherwise, parameters are + encoded within HTTP headers. Section 9 describes the new HTTP + headers used with WebDAV methods. + + In addition to encoding method parameters, XML is used in WebDAV to + encode the responses from methods, providing the extensibility and + internationalization advantages of XML for method output, as well as + input. + + XML elements used in this specification are defined in section 12. + + The XML namespace extension (Appendix 4) is also used in this + specification in order to allow for new XML elements to be added + without fear of colliding with other element names. + + While the status codes provided by HTTP/1.1 are sufficient to + describe most error conditions encountered by WebDAV methods, there + are some errors that do not fall neatly into the existing categories. + New status codes developed for the WebDAV methods are defined in + section 10. Since some WebDAV methods may operate over many + + + +Goland, et al. Standards Track [Page 6] + +RFC 2518 WEBDAV February 1999 + + + resources, the Multi-Status response has been introduced to return + status information for multiple resources. The Multi-Status response + is described in section 11. + + WebDAV employs the property mechanism to store information about the + current state of the resource. For example, when a lock is taken out + on a resource, a lock information property describes the current + state of the lock. Section 13 defines the properties used within the + WebDAV specification. + + Finishing off the specification are sections on what it means to be + compliant with this specification (section 15), on + internationalization support (section 16), and on security (section + 17). + +2 Notational Conventions + + Since this document describes a set of extensions to the HTTP/1.1 + protocol, the augmented BNF used herein to describe protocol elements + is exactly the same as described in section 2.1 of [RFC2068]. Since + this augmented BNF uses the basic production rules provided in + section 2.2 of [RFC2068], these rules apply to this document as well. + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +3 Terminology + + URI/URL - A Uniform Resource Identifier and Uniform Resource Locator, + respectively. These terms (and the distinction between them) are + defined in [RFC2396]. + + Collection - A resource that contains a set of URIs, termed member + URIs, which identify member resources and meets the requirements in + section 5 of this specification. + + Member URI - A URI which is a member of the set of URIs contained by + a collection. + + Internal Member URI - A Member URI that is immediately relative to + the URI of the collection (the definition of immediately relative is + given in section 5.2). + + Property - A name/value pair that contains descriptive information + about a resource. + + + + + +Goland, et al. Standards Track [Page 7] + +RFC 2518 WEBDAV February 1999 + + + Live Property - A property whose semantics and syntax are enforced by + the server. For example, the live "getcontentlength" property has + its value, the length of the entity returned by a GET request, + automatically calculated by the server. + + Dead Property - A property whose semantics and syntax are not + enforced by the server. The server only records the value of a dead + property; the client is responsible for maintaining the consistency + of the syntax and semantics of a dead property. + + Null Resource - A resource which responds with a 404 (Not Found) to + any HTTP/1.1 or DAV method except for PUT, MKCOL, OPTIONS and LOCK. + A NULL resource MUST NOT appear as a member of its parent collection. + +4 Data Model for Resource Properties + +4.1 The Resource Property Model + + Properties are pieces of data that describe the state of a resource. + Properties are data about data. + + Properties are used in distributed authoring environments to provide + for efficient discovery and management of resources. For example, a + 'subject' property might allow for the indexing of all resources by + their subject, and an 'author' property might allow for the discovery + of what authors have written which documents. + + The DAV property model consists of name/value pairs. The name of a + property identifies the property's syntax and semantics, and provides + an address by which to refer to its syntax and semantics. + + There are two categories of properties: "live" and "dead". A live + property has its syntax and semantics enforced by the server. Live + properties include cases where a) the value of a property is read- + only, maintained by the server, and b) the value of the property is + maintained by the client, but the server performs syntax checking on + submitted values. All instances of a given live property MUST comply + with the definition associated with that property name. A dead + property has its syntax and semantics enforced by the client; the + server merely records the value of the property verbatim. + +4.2 Existing Metadata Proposals + + Properties have long played an essential role in the maintenance of + large document repositories, and many current proposals contain some + notion of a property, or discuss web metadata more generally. These + include PICS [REC-PICS], PICS-NG, XML, Web Collections, and several + proposals on representing relationships within HTML. Work on PICS-NG + + + +Goland, et al. Standards Track [Page 8] + +RFC 2518 WEBDAV February 1999 + + + and Web Collections has been subsumed by the Resource Description + Framework (RDF) metadata activity of the World Wide Web Consortium. + RDF consists of a network-based data model and an XML representation + of that model. + + Some proposals come from a digital library perspective. These + include the Dublin Core [RFC2413] metadata set and the Warwick + Framework [WF], a container architecture for different metadata + schemas. The literature includes many examples of metadata, + including MARC [USMARC], a bibliographic metadata format, and a + technical report bibliographic format employed by the Dienst system + [RFC1807]. Additionally, the proceedings from the first IEEE Metadata + conference describe many community-specific metadata sets. + + Participants of the 1996 Metadata II Workshop in Warwick, UK [WF], + noted that "new metadata sets will develop as the networked + infrastructure matures" and "different communities will propose, + design, and be responsible for different types of metadata." These + observations can be corroborated by noting that many community- + specific sets of metadata already exist, and there is significant + motivation for the development of new forms of metadata as many + communities increasingly make their data available in digital form, + requiring a metadata format to assist data location and cataloging. + +4.3 Properties and HTTP Headers + + Properties already exist, in a limited sense, in HTTP message + headers. However, in distributed authoring environments a relatively + large number of properties are needed to describe the state of a + resource, and setting/returning them all through HTTP headers is + inefficient. Thus a mechanism is needed which allows a principal to + identify a set of properties in which the principal is interested and + to set or retrieve just those properties. + +4.4 Property Values + + The value of a property when expressed in XML MUST be well formed. + + XML has been chosen because it is a flexible, self-describing, + structured data format that supports rich schema definitions, and + because of its support for multiple character sets. XML's self- + describing nature allows any property's value to be extended by + adding new elements. Older clients will not break when they + encounter extensions because they will still have the data specified + in the original schema and will ignore elements they do not + understand. XML's support for multiple character sets allows any + human-readable property to be encoded and read in a character set + familiar to the user. XML's support for multiple human languages, + + + +Goland, et al. Standards Track [Page 9] + +RFC 2518 WEBDAV February 1999 + + + using the "xml:lang" attribute, handles cases where the same + character set is employed by multiple human languages. + +4.5 Property Names + + A property name is a universally unique identifier that is associated + with a schema that provides information about the syntax and + semantics of the property. + + Because a property's name is universally unique, clients can depend + upon consistent behavior for a particular property across multiple + resources, on the same and across different servers, so long as that + property is "live" on the resources in question, and the + implementation of the live property is faithful to its definition. + + The XML namespace mechanism, which is based on URIs [RFC2396], is + used to name properties because it prevents namespace collisions and + provides for varying degrees of administrative control. + + The property namespace is flat; that is, no hierarchy of properties + is explicitly recognized. Thus, if a property A and a property A/B + exist on a resource, there is no recognition of any relationship + between the two properties. It is expected that a separate + specification will eventually be produced which will address issues + relating to hierarchical properties. + + Finally, it is not possible to define the same property twice on a + single resource, as this would cause a collision in the resource's + property namespace. + +4.6 Media Independent Links + + Although HTML resources support links to other resources, the Web + needs more general support for links between resources of any media + type (media types are also known as MIME types, or content types). + WebDAV provides such links. A WebDAV link is a special type of + property value, formally defined in section 12.4, that allows typed + connections to be established between resources of any media type. + The property value consists of source and destination Uniform + Resource Identifiers (URIs); the property name identifies the link + type. + + + + + + + + + + +Goland, et al. Standards Track [Page 10] + +RFC 2518 WEBDAV February 1999 + + +5 Collections of Web Resources + + This section provides a description of a new type of Web resource, + the collection, and discusses its interactions with the HTTP URL + namespace. The purpose of a collection resource is to model + collection-like objects (e.g., file system directories) within a + server's namespace. + + All DAV compliant resources MUST support the HTTP URL namespace model + specified herein. + +5.1 HTTP URL Namespace Model + + The HTTP URL namespace is a hierarchical namespace where the + hierarchy is delimited with the "/" character. + + An HTTP URL namespace is said to be consistent if it meets the + following conditions: for every URL in the HTTP hierarchy there + exists a collection that contains that URL as an internal member. + The root, or top-level collection of the namespace under + consideration is exempt from the previous rule. + + Neither HTTP/1.1 nor WebDAV require that the entire HTTP URL + namespace be consistent. However, certain WebDAV methods are + prohibited from producing results that cause namespace + inconsistencies. + + Although implicit in [RFC2068] and [RFC2396], any resource, including + collection resources, MAY be identified by more than one URI. For + example, a resource could be identified by multiple HTTP URLs. + +5.2 Collection Resources + + A collection is a resource whose state consists of at least a list of + internal member URIs and a set of properties, but which may have + additional state such as entity bodies returned by GET. An internal + member URI MUST be immediately relative to a base URI of the + collection. That is, the internal member URI is equal to a + containing collection's URI plus an additional segment for non- + collection resources, or additional segment plus trailing slash "/" + for collection resources, where segment is defined in section 3.3 of + [RFC2396]. + + Any given internal member URI MUST only belong to the collection + once, i.e., it is illegal to have multiple instances of the same URI + in a collection. Properties defined on collections behave exactly as + do properties on non-collection resources. + + + + +Goland, et al. Standards Track [Page 11] + +RFC 2518 WEBDAV February 1999 + + + For all WebDAV compliant resources A and B, identified by URIs U and + V, for which U is immediately relative to V, B MUST be a collection + that has U as an internal member URI. So, if the resource with URL + http://foo.com/bar/blah is WebDAV compliant and if the resource with + URL http://foo.com/bar/ is WebDAV compliant then the resource with + URL http://foo.com/bar/ must be a collection and must contain URL + http://foo.com/bar/blah as an internal member. + + Collection resources MAY list the URLs of non-WebDAV compliant + children in the HTTP URL namespace hierarchy as internal members but + are not required to do so. For example, if the resource with URL + http://foo.com/bar/blah is not WebDAV compliant and the URL + http://foo.com/bar/ identifies a collection then URL + http://foo.com/bar/blah may or may not be an internal member of the + collection with URL http://foo.com/bar/. + + If a WebDAV compliant resource has no WebDAV compliant children in + the HTTP URL namespace hierarchy then the WebDAV compliant resource + is not required to be a collection. + + There is a standing convention that when a collection is referred to + by its name without a trailing slash, the trailing slash is + automatically appended. Due to this, a resource may accept a URI + without a trailing "/" to point to a collection. In this case it + SHOULD return a content-location header in the response pointing to + the URI ending with the "/". For example, if a client invokes a + method on http://foo.bar/blah (no trailing slash), the resource + http://foo.bar/blah/ (trailing slash) may respond as if the operation + were invoked on it, and should return a content-location header with + http://foo.bar/blah/ in it. In general clients SHOULD use the "/" + form of collection names. + + A resource MAY be a collection but not be WebDAV compliant. That is, + the resource may comply with all the rules set out in this + specification regarding how a collection is to behave without + necessarily supporting all methods that a WebDAV compliant resource + is required to support. In such a case the resource may return the + DAV:resourcetype property with the value DAV:collection but MUST NOT + return a DAV header containing the value "1" on an OPTIONS response. + +5.3 Creation and Retrieval of Collection Resources + + This document specifies the MKCOL method to create new collection + resources, rather than using the existing HTTP/1.1 PUT or POST + method, for the following reasons: + + + + + + +Goland, et al. Standards Track [Page 12] + +RFC 2518 WEBDAV February 1999 + + + In HTTP/1.1, the PUT method is defined to store the request body at + the location specified by the Request-URI. While a description + format for a collection can readily be constructed for use with PUT, + the implications of sending such a description to the server are + undesirable. For example, if a description of a collection that + omitted some existing resources were PUT to a server, this might be + interpreted as a command to remove those members. This would extend + PUT to perform DELETE functionality, which is undesirable since it + changes the semantics of PUT, and makes it difficult to control + DELETE functionality with an access control scheme based on methods. + + While the POST method is sufficiently open-ended that a "create a + collection" POST command could be constructed, this is undesirable + because it would be difficult to separate access control for + collection creation from other uses of POST. + + The exact definition of the behavior of GET and PUT on collections is + defined later in this document. + +5.4 Source Resources and Output Resources + + For many resources, the entity returned by a GET method exactly + matches the persistent state of the resource, for example, a GIF file + stored on a disk. For this simple case, the URI at which a resource + is accessed is identical to the URI at which the source (the + persistent state) of the resource is accessed. This is also the case + for HTML source files that are not processed by the server prior to + transmission. + + However, the server can sometimes process HTML resources before they + are transmitted as a return entity body. For example, a server- + side-include directive within an HTML file might instruct a server to + replace the directive with another value, such as the current date. + In this case, what is returned by GET (HTML plus date) differs from + the persistent state of the resource (HTML plus directive). + Typically there is no way to access the HTML resource containing the + unprocessed directive. + + Sometimes the entity returned by GET is the output of a data- + producing process that is described by one or more source resources + (that may not even have a location in the URI namespace). A single + data-producing process may dynamically generate the state of a + potentially large number of output resources. An example of this is + a CGI script that describes a "finger" gateway process that maps part + of the namespace of a server into finger requests, such as + http://www.foo.bar.org/finger_gateway/user@host. + + + + + +Goland, et al. Standards Track [Page 13] + +RFC 2518 WEBDAV February 1999 + + + In the absence of distributed authoring capabilities, it is + acceptable to have no mapping of source resource(s) to the URI + namespace. In fact, preventing access to the source resource(s) has + desirable security benefits. However, if remote editing of the + source resource(s) is desired, the source resource(s) should be given + a location in the URI namespace. This source location should not be + one of the locations at which the generated output is retrievable, + since in general it is impossible for the server to differentiate + requests for source resources from requests for process output + resources. There is often a many-to-many relationship between source + resources and output resources. + + On WebDAV compliant servers the URI of the source resource(s) may be + stored in a link on the output resource with type DAV:source (see + section 13.10 for a description of the source link property). + Storing the source URIs in links on the output resources places the + burden of discovering the source on the authoring client. Note that + the value of a source link is not guaranteed to point to the correct + source. Source links may break or incorrect values may be entered. + Also note that not all servers will allow the client to set the + source link value. For example a server which generates source links + on the fly for its CGI files will most likely not allow a client to + set the source link value. + +6 Locking + + The ability to lock a resource provides a mechanism for serializing + access to that resource. Using a lock, an authoring client can + provide a reasonable guarantee that another principal will not modify + a resource while it is being edited. In this way, a client can + prevent the "lost update" problem. + + This specification allows locks to vary over two client-specified + parameters, the number of principals involved (exclusive vs. shared) + and the type of access to be granted. This document defines locking + for only one access type, write. However, the syntax is extensible, + and permits the eventual specification of locking for other access + types. + +6.1 Exclusive Vs. Shared Locks + + The most basic form of lock is an exclusive lock. This is a lock + where the access right in question is only granted to a single + principal. The need for this arbitration results from a desire to + avoid having to merge results. + + + + + + +Goland, et al. Standards Track [Page 14] + +RFC 2518 WEBDAV February 1999 + + + However, there are times when the goal of a lock is not to exclude + others from exercising an access right but rather to provide a + mechanism for principals to indicate that they intend to exercise + their access rights. Shared locks are provided for this case. A + shared lock allows multiple principals to receive a lock. Hence any + principal with appropriate access can get the lock. + + With shared locks there are two trust sets that affect a resource. + The first trust set is created by access permissions. Principals who + are trusted, for example, may have permission to write to the + resource. Among those who have access permission to write to the + resource, the set of principals who have taken out a shared lock also + must trust each other, creating a (typically) smaller trust set + within the access permission write set. + + Starting with every possible principal on the Internet, in most + situations the vast majority of these principals will not have write + access to a given resource. Of the small number who do have write + access, some principals may decide to guarantee their edits are free + from overwrite conflicts by using exclusive write locks. Others may + decide they trust their collaborators will not overwrite their work + (the potential set of collaborators being the set of principals who + have write permission) and use a shared lock, which informs their + collaborators that a principal may be working on the resource. + + The WebDAV extensions to HTTP do not need to provide all of the + communications paths necessary for principals to coordinate their + activities. When using shared locks, principals may use any out of + band communication channel to coordinate their work (e.g., face-to- + face interaction, written notes, post-it notes on the screen, + telephone conversation, Email, etc.) The intent of a shared lock is + to let collaborators know who else may be working on a resource. + + Shared locks are included because experience from web distributed + authoring systems has indicated that exclusive locks are often too + rigid. An exclusive lock is used to enforce a particular editing + process: take out an exclusive lock, read the resource, perform + edits, write the resource, release the lock. This editing process + has the problem that locks are not always properly released, for + example when a program crashes, or when a lock owner leaves without + unlocking a resource. While both timeouts and administrative action + can be used to remove an offending lock, neither mechanism may be + available when needed; the timeout may be long or the administrator + may not be available. + + + + + + + +Goland, et al. Standards Track [Page 15] + +RFC 2518 WEBDAV February 1999 + + +6.2 Required Support + + A WebDAV compliant server is not required to support locking in any + form. If the server does support locking it may choose to support + any combination of exclusive and shared locks for any access types. + + The reason for this flexibility is that locking policy strikes to the + very heart of the resource management and versioning systems employed + by various storage repositories. These repositories require control + over what sort of locking will be made available. For example, some + repositories only support shared write locks while others only + provide support for exclusive write locks while yet others use no + locking at all. As each system is sufficiently different to merit + exclusion of certain locking features, this specification leaves + locking as the sole axis of negotiation within WebDAV. + +6.3 Lock Tokens + + A lock token is a type of state token, represented as a URI, which + identifies a particular lock. A lock token is returned by every + successful LOCK operation in the lockdiscovery property in the + response body, and can also be found through lock discovery on a + resource. + + Lock token URIs MUST be unique across all resources for all time. + This uniqueness constraint allows lock tokens to be submitted across + resources and servers without fear of confusion. + + This specification provides a lock token URI scheme called + opaquelocktoken that meets the uniqueness requirements. However + resources are free to return any URI scheme so long as it meets the + uniqueness requirements. + + Having a lock token provides no special access rights. Anyone can + find out anyone else's lock token by performing lock discovery. + Locks MUST be enforced based upon whatever authentication mechanism + is used by the server, not based on the secrecy of the token values. + +6.4 opaquelocktoken Lock Token URI Scheme + + The opaquelocktoken URI scheme is designed to be unique across all + resources for all time. Due to this uniqueness quality, a client may + submit an opaque lock token in an If header on a resource other than + the one that returned it. + + All resources MUST recognize the opaquelocktoken scheme and, at + minimum, recognize that the lock token does not refer to an + outstanding lock on the resource. + + + +Goland, et al. Standards Track [Page 16] + +RFC 2518 WEBDAV February 1999 + + + In order to guarantee uniqueness across all resources for all time + the opaquelocktoken requires the use of the Universal Unique + Identifier (UUID) mechanism, as described in [ISO-11578]. + + Opaquelocktoken generators, however, have a choice of how they create + these tokens. They can either generate a new UUID for every lock + token they create or they can create a single UUID and then add + extension characters. If the second method is selected then the + program generating the extensions MUST guarantee that the same + extension will never be used twice with the associated UUID. + + OpaqueLockToken-URI = "opaquelocktoken:" UUID [Extension] ; The UUID + production is the string representation of a UUID, as defined in + [ISO-11578]. Note that white space (LWS) is not allowed between + elements of this production. + + Extension = path ; path is defined in section 3.2.1 of RFC 2068 + [RFC2068] + +6.4.1 Node Field Generation Without the IEEE 802 Address + + UUIDs, as defined in [ISO-11578], contain a "node" field that + contains one of the IEEE 802 addresses for the server machine. As + noted in section 17.8, there are several security risks associated + with exposing a machine's IEEE 802 address. This section provides an + alternate mechanism for generating the "node" field of a UUID which + does not employ an IEEE 802 address. WebDAV servers MAY use this + algorithm for creating the node field when generating UUIDs. The + text in this section is originally from an Internet-Draft by Paul + Leach and Rich Salz, who are noted here to properly attribute their + work. + + The ideal solution is to obtain a 47 bit cryptographic quality random + number, and use it as the low 47 bits of the node ID, with the most + significant bit of the first octet of the node ID set to 1. This bit + is the unicast/multicast bit, which will never be set in IEEE 802 + addresses obtained from network cards; hence, there can never be a + conflict between UUIDs generated by machines with and without network + cards. + + If a system does not have a primitive to generate cryptographic + quality random numbers, then in most systems there are usually a + fairly large number of sources of randomness available from which one + can be generated. Such sources are system specific, but often + include: + + + + + + +Goland, et al. Standards Track [Page 17] + +RFC 2518 WEBDAV February 1999 + + + - the percent of memory in use + - the size of main memory in bytes + - the amount of free main memory in bytes + - the size of the paging or swap file in bytes + - free bytes of paging or swap file + - the total size of user virtual address space in bytes + - the total available user address space bytes + - the size of boot disk drive in bytes + - the free disk space on boot drive in bytes + - the current time + - the amount of time since the system booted + - the individual sizes of files in various system directories + - the creation, last read, and modification times of files in + various system directories + - the utilization factors of various system resources (heap, etc.) + - current mouse cursor position + - current caret position + - current number of running processes, threads + - handles or IDs of the desktop window and the active window + - the value of stack pointer of the caller + - the process and thread ID of caller + - various processor architecture specific performance counters + (instructions executed, cache misses, TLB misses) + + (Note that it is precisely the above kinds of sources of randomness + that are used to seed cryptographic quality random number generators + on systems without special hardware for their construction.) + + In addition, items such as the computer's name and the name of the + operating system, while not strictly speaking random, will help + differentiate the results from those obtained by other systems. + + The exact algorithm to generate a node ID using these data is system + specific, because both the data available and the functions to obtain + them are often very system specific. However, assuming that one can + concatenate all the values from the randomness sources into a buffer, + and that a cryptographic hash function such as MD5 is available, then + any 6 bytes of the MD5 hash of the buffer, with the multicast bit + (the high bit of the first byte) set will be an appropriately random + node ID. + + Other hash functions, such as SHA-1, can also be used. The only + requirement is that the result be suitably random _ in the sense that + the outputs from a set uniformly distributed inputs are themselves + uniformly distributed, and that a single bit change in the input can + be expected to cause half of the output bits to change. + + + + + +Goland, et al. Standards Track [Page 18] + +RFC 2518 WEBDAV February 1999 + + +6.5 Lock Capability Discovery + + Since server lock support is optional, a client trying to lock a + resource on a server can either try the lock and hope for the best, + or perform some form of discovery to determine what lock capabilities + the server supports. This is known as lock capability discovery. + Lock capability discovery differs from discovery of supported access + control types, since there may be access control types without + corresponding lock types. A client can determine what lock types the + server supports by retrieving the supportedlock property. + + Any DAV compliant resource that supports the LOCK method MUST support + the supportedlock property. + +6.6 Active Lock Discovery + + If another principal locks a resource that a principal wishes to + access, it is useful for the second principal to be able to find out + who the first principal is. For this purpose the lockdiscovery + property is provided. This property lists all outstanding locks, + describes their type, and where available, provides their lock token. + + Any DAV compliant resource that supports the LOCK method MUST support + the lockdiscovery property. + +6.7 Usage Considerations + + Although the locking mechanisms specified here provide some help in + preventing lost updates, they cannot guarantee that updates will + never be lost. Consider the following scenario: + + Two clients A and B are interested in editing the resource ' + index.html'. Client A is an HTTP client rather than a WebDAV client, + and so does not know how to perform locking. + Client A doesn't lock the document, but does a GET and begins + editing. + Client B does LOCK, performs a GET and begins editing. + Client B finishes editing, performs a PUT, then an UNLOCK. + Client A performs a PUT, overwriting and losing all of B's changes. + + There are several reasons why the WebDAV protocol itself cannot + prevent this situation. First, it cannot force all clients to use + locking because it must be compatible with HTTP clients that do not + comprehend locking. Second, it cannot require servers to support + locking because of the variety of repository implementations, some of + which rely on reservations and merging rather than on locking. + Finally, being stateless, it cannot enforce a sequence of operations + like LOCK / GET / PUT / UNLOCK. + + + +Goland, et al. Standards Track [Page 19] + +RFC 2518 WEBDAV February 1999 + + + WebDAV servers that support locking can reduce the likelihood that + clients will accidentally overwrite each other's changes by requiring + clients to lock resources before modifying them. Such servers would + effectively prevent HTTP 1.0 and HTTP 1.1 clients from modifying + resources. + + WebDAV clients can be good citizens by using a lock / retrieve / + write /unlock sequence of operations (at least by default) whenever + they interact with a WebDAV server that supports locking. + + HTTP 1.1 clients can be good citizens, avoiding overwriting other + clients' changes, by using entity tags in If-Match headers with any + requests that would modify resources. + + Information managers may attempt to prevent overwrites by + implementing client-side procedures requiring locking before + modifying WebDAV resources. + +7 Write Lock + + This section describes the semantics specific to the write lock type. + The write lock is a specific instance of a lock type, and is the only + lock type described in this specification. + +7.1 Methods Restricted by Write Locks + + A write lock MUST prevent a principal without the lock from + successfully executing a PUT, POST, PROPPATCH, LOCK, UNLOCK, MOVE, + DELETE, or MKCOL on the locked resource. All other current methods, + GET in particular, function independently of the lock. + + Note, however, that as new methods are created it will be necessary + to specify how they interact with a write lock. + +7.2 Write Locks and Lock Tokens + + A successful request for an exclusive or shared write lock MUST + result in the generation of a unique lock token associated with the + requesting principal. Thus if five principals have a shared write + lock on the same resource there will be five lock tokens, one for + each principal. + +7.3 Write Locks and Properties + + While those without a write lock may not alter a property on a + resource it is still possible for the values of live properties to + change, even while locked, due to the requirements of their schemas. + + + + +Goland, et al. Standards Track [Page 20] + +RFC 2518 WEBDAV February 1999 + + + Only dead properties and live properties defined to respect locks are + guaranteed not to change while write locked. + +7.4 Write Locks and Null Resources + + It is possible to assert a write lock on a null resource in order to + lock the name. + + A write locked null resource, referred to as a lock-null resource, + MUST respond with a 404 (Not Found) or 405 (Method Not Allowed) to + any HTTP/1.1 or DAV methods except for PUT, MKCOL, OPTIONS, PROPFIND, + LOCK, and UNLOCK. A lock-null resource MUST appear as a member of + its parent collection. Additionally the lock-null resource MUST have + defined on it all mandatory DAV properties. Most of these + properties, such as all the get* properties, will have no value as a + lock-null resource does not support the GET method. Lock-Null + resources MUST have defined values for lockdiscovery and + supportedlock properties. + + Until a method such as PUT or MKCOL is successfully executed on the + lock-null resource the resource MUST stay in the lock-null state. + However, once a PUT or MKCOL is successfully executed on a lock-null + resource the resource ceases to be in the lock-null state. + + If the resource is unlocked, for any reason, without a PUT, MKCOL, or + similar method having been successfully executed upon it then the + resource MUST return to the null state. + +7.5 Write Locks and Collections + + A write lock on a collection, whether created by a "Depth: 0" or + "Depth: infinity" lock request, prevents the addition or removal of + member URIs of the collection by non-lock owners. As a consequence, + when a principal issues a PUT or POST request to create a new + resource under a URI which needs to be an internal member of a write + locked collection to maintain HTTP namespace consistency, or issues a + DELETE to remove a resource which has a URI which is an existing + internal member URI of a write locked collection, this request MUST + fail if the principal does not have a write lock on the collection. + + However, if a write lock request is issued to a collection containing + member URIs identifying resources that are currently locked in a + manner which conflicts with the write lock, the request MUST fail + with a 423 (Locked) status code. + + If a lock owner causes the URI of a resource to be added as an + internal member URI of a locked collection then the new resource MUST + be automatically added to the lock. This is the only mechanism that + + + +Goland, et al. Standards Track [Page 21] + +RFC 2518 WEBDAV February 1999 + + + allows a resource to be added to a write lock. Thus, for example, if + the collection /a/b/ is write locked and the resource /c is moved to + /a/b/c then resource /a/b/c will be added to the write lock. + +7.6 Write Locks and the If Request Header + + If a user agent is not required to have knowledge about a lock when + requesting an operation on a locked resource, the following scenario + might occur. Program A, run by User A, takes out a write lock on a + resource. Program B, also run by User A, has no knowledge of the + lock taken out by Program A, yet performs a PUT to the locked + resource. In this scenario, the PUT succeeds because locks are + associated with a principal, not a program, and thus program B, + because it is acting with principal A's credential, is allowed to + perform the PUT. However, had program B known about the lock, it + would not have overwritten the resource, preferring instead to + present a dialog box describing the conflict to the user. Due to + this scenario, a mechanism is needed to prevent different programs + from accidentally ignoring locks taken out by other programs with the + same authorization. + + In order to prevent these collisions a lock token MUST be submitted + by an authorized principal in the If header for all locked resources + that a method may interact with or the method MUST fail. For + example, if a resource is to be moved and both the source and + destination are locked then two lock tokens must be submitted, one + for the source and the other for the destination. + +7.6.1 Example - Write Lock + + >>Request + + COPY /~fielding/index.html HTTP/1.1 + Host: www.ics.uci.edu + Destination: http://www.ics.uci.edu/users/f/fielding/index.html + If: + () + + >>Response + + HTTP/1.1 204 No Content + + In this example, even though both the source and destination are + locked, only one lock token must be submitted, for the lock on the + destination. This is because the source resource is not modified by + a COPY, and hence unaffected by the write lock. In this example, user + agent authentication has previously occurred via a mechanism outside + the scope of the HTTP protocol, in the underlying transport layer. + + + +Goland, et al. Standards Track [Page 22] + +RFC 2518 WEBDAV February 1999 + + +7.7 Write Locks and COPY/MOVE + + A COPY method invocation MUST NOT duplicate any write locks active on + the source. However, as previously noted, if the COPY copies the + resource into a collection that is locked with "Depth: infinity", + then the resource will be added to the lock. + + A successful MOVE request on a write locked resource MUST NOT move + the write lock with the resource. However, the resource is subject to + being added to an existing lock at the destination, as specified in + section 7.5. For example, if the MOVE makes the resource a child of a + collection that is locked with "Depth: infinity", then the resource + will be added to that collection's lock. Additionally, if a resource + locked with "Depth: infinity" is moved to a destination that is + within the scope of the same lock (e.g., within the namespace tree + covered by the lock), the moved resource will again be a added to the + lock. In both these examples, as specified in section 7.6, an If + header must be submitted containing a lock token for both the source + and destination. + +7.8 Refreshing Write Locks + + A client MUST NOT submit the same write lock request twice. Note + that a client is always aware it is resubmitting the same lock + request because it must include the lock token in the If header in + order to make the request for a resource that is already locked. + + However, a client may submit a LOCK method with an If header but + without a body. This form of LOCK MUST only be used to "refresh" a + lock. Meaning, at minimum, that any timers associated with the lock + MUST be re-set. + + A server may return a Timeout header with a lock refresh that is + different than the Timeout header returned when the lock was + originally requested. Additionally clients may submit Timeout + headers of arbitrary value with their lock refresh requests. + Servers, as always, may ignore Timeout headers submitted by the + client. + + If an error is received in response to a refresh LOCK request the + client SHOULD assume that the lock was not refreshed. + +8 HTTP Methods for Distributed Authoring + + The following new HTTP methods use XML as a request and response + format. All DAV compliant clients and resources MUST use XML parsers + that are compliant with [REC-XML]. All XML used in either requests + or responses MUST be, at minimum, well formed. If a server receives + + + +Goland, et al. Standards Track [Page 23] + +RFC 2518 WEBDAV February 1999 + + + ill-formed XML in a request it MUST reject the entire request with a + 400 (Bad Request). If a client receives ill-formed XML in a response + then it MUST NOT assume anything about the outcome of the executed + method and SHOULD treat the server as malfunctioning. + +8.1 PROPFIND + + The PROPFIND method retrieves properties defined on the resource + identified by the Request-URI, if the resource does not have any + internal members, or on the resource identified by the Request-URI + and potentially its member resources, if the resource is a collection + that has internal member URIs. All DAV compliant resources MUST + support the PROPFIND method and the propfind XML element (section + 12.14) along with all XML elements defined for use with that element. + + A client may submit a Depth header with a value of "0", "1", or + "infinity" with a PROPFIND on a collection resource with internal + member URIs. DAV compliant servers MUST support the "0", "1" and + "infinity" behaviors. By default, the PROPFIND method without a Depth + header MUST act as if a "Depth: infinity" header was included. + + A client may submit a propfind XML element in the body of the request + method describing what information is being requested. It is + possible to request particular property values, all property values, + or a list of the names of the resource's properties. A client may + choose not to submit a request body. An empty PROPFIND request body + MUST be treated as a request for the names and values of all + properties. + + All servers MUST support returning a response of content type + text/xml or application/xml that contains a multistatus XML element + that describes the results of the attempts to retrieve the various + properties. + + If there is an error retrieving a property then a proper error result + MUST be included in the response. A request to retrieve the value of + a property which does not exist is an error and MUST be noted, if the + response uses a multistatus XML element, with a response XML element + which contains a 404 (Not Found) status value. + + Consequently, the multistatus XML element for a collection resource + with member URIs MUST include a response XML element for each member + URI of the collection, to whatever depth was requested. Each response + XML element MUST contain an href XML element that gives the URI of + the resource on which the properties in the prop XML element are + defined. Results for a PROPFIND on a collection resource with + internal member URIs are returned as a flat list whose order of + entries is not significant. + + + +Goland, et al. Standards Track [Page 24] + +RFC 2518 WEBDAV February 1999 + + + In the case of allprop and propname, if a principal does not have the + right to know whether a particular property exists then the property + should be silently excluded from the response. + + The results of this method SHOULD NOT be cached. + +8.1.1 Example - Retrieving Named Properties + + >>Request + + PROPFIND /file HTTP/1.1 + Host: www.foo.bar + Content-type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + + + + + + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/file + + + + Box type A + + + J.J. Johnson + + + HTTP/1.1 200 OK + + + + + + +Goland, et al. Standards Track [Page 25] + +RFC 2518 WEBDAV February 1999 + + + HTTP/1.1 403 Forbidden + The user does not have access to + the DingALing property. + + + + There has been an access violation error. + + + + In this example, PROPFIND is executed on a non-collection resource + http://www.foo.bar/file. The propfind XML element specifies the name + of four properties whose values are being requested. In this case + only two properties were returned, since the principal issuing the + request did not have sufficient access rights to see the third and + fourth properties. + +8.1.2 Example - Using allprop to Retrieve All Properties + + >>Request + + PROPFIND /container/ HTTP/1.1 + Host: www.foo.bar + Depth: 1 + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/container/ + + + + Box type A + + + + + +Goland, et al. Standards Track [Page 26] + +RFC 2518 WEBDAV February 1999 + + + Hadrian + + + 1997-12-01T17:42:21-08:00 + + + Example collection + + + + + + + + + + + + + + HTTP/1.1 200 OK + + + + http://www.foo.bar/container/front.html + + + + Box type B + + + 1997-12-01T18:27:21-08:00 + + + Example HTML resource + + + 4525 + + + text/html + + + zzyzx + + + Monday, 12-Jan-98 09:25:56 GMT + + + + +Goland, et al. Standards Track [Page 27] + +RFC 2518 WEBDAV February 1999 + + + + + + + + + + + + + + + HTTP/1.1 200 OK + + + + + In this example, PROPFIND was invoked on the resource + http://www.foo.bar/container/ with a Depth header of 1, meaning the + request applies to the resource and its children, and a propfind XML + element containing the allprop XML element, meaning the request + should return the name and value of all properties defined on each + resource. + + The resource http://www.foo.bar/container/ has six properties defined + on it: + + http://www.foo.bar/boxschema/bigbox, + http://www.foo.bar/boxschema/author, DAV:creationdate, + DAV:displayname, DAV:resourcetype, and DAV:supportedlock. + + The last four properties are WebDAV-specific, defined in section 13. + Since GET is not supported on this resource, the get* properties + (e.g., getcontentlength) are not defined on this resource. The DAV- + specific properties assert that "container" was created on December + 1, 1997, at 5:42:21PM, in a time zone 8 hours west of GMT + (creationdate), has a name of "Example collection" (displayname), a + collection resource type (resourcetype), and supports exclusive write + and shared write locks (supportedlock). + + The resource http://www.foo.bar/container/front.html has nine + properties defined on it: + + http://www.foo.bar/boxschema/bigbox (another instance of the "bigbox" + property type), DAV:creationdate, DAV:displayname, + DAV:getcontentlength, DAV:getcontenttype, DAV:getetag, + DAV:getlastmodified, DAV:resourcetype, and DAV:supportedlock. + + + + +Goland, et al. Standards Track [Page 28] + +RFC 2518 WEBDAV February 1999 + + + The DAV-specific properties assert that "front.html" was created on + December 1, 1997, at 6:27:21PM, in a time zone 8 hours west of GMT + (creationdate), has a name of "Example HTML resource" (displayname), + a content length of 4525 bytes (getcontentlength), a MIME type of + "text/html" (getcontenttype), an entity tag of "zzyzx" (getetag), was + last modified on Monday, January 12, 1998, at 09:25:56 GMT + (getlastmodified), has an empty resource type, meaning that it is not + a collection (resourcetype), and supports both exclusive write and + shared write locks (supportedlock). + +8.1.3 Example - Using propname to Retrieve all Property Names + + >>Request + + PROPFIND /container/ HTTP/1.1 + Host: www.foo.bar + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/container/ + + + + + + + + + + HTTP/1.1 200 OK + + + + http://www.foo.bar/container/front.html + + + +Goland, et al. Standards Track [Page 29] + +RFC 2518 WEBDAV February 1999 + + + + + + + + + + + + + + + HTTP/1.1 200 OK + + + + + + In this example, PROPFIND is invoked on the collection resource + http://www.foo.bar/container/, with a propfind XML element containing + the propname XML element, meaning the name of all properties should + be returned. Since no Depth header is present, it assumes its + default value of "infinity", meaning the name of the properties on + the collection and all its progeny should be returned. + + Consistent with the previous example, resource + http://www.foo.bar/container/ has six properties defined on it, + http://www.foo.bar/boxschema/bigbox, + http://www.foo.bar/boxschema/author, DAV:creationdate, + DAV:displayname, DAV:resourcetype, and DAV:supportedlock. + + The resource http://www.foo.bar/container/index.html, a member of the + "container" collection, has nine properties defined on it, + http://www.foo.bar/boxschema/bigbox, DAV:creationdate, + DAV:displayname, DAV:getcontentlength, DAV:getcontenttype, + DAV:getetag, DAV:getlastmodified, DAV:resourcetype, and + DAV:supportedlock. + + This example also demonstrates the use of XML namespace scoping, and + the default namespace. Since the "xmlns" attribute does not contain + an explicit "shorthand name" (prefix) letter, the namespace applies + by default to all enclosed elements. Hence, all elements which do + not explicitly state the namespace to which they belong are members + of the "DAV:" namespace schema. + + + + + + + +Goland, et al. Standards Track [Page 30] + +RFC 2518 WEBDAV February 1999 + + +8.2 PROPPATCH + + The PROPPATCH method processes instructions specified in the request + body to set and/or remove properties defined on the resource + identified by the Request-URI. + + All DAV compliant resources MUST support the PROPPATCH method and + MUST process instructions that are specified using the + propertyupdate, set, and remove XML elements of the DAV schema. + Execution of the directives in this method is, of course, subject to + access control constraints. DAV compliant resources SHOULD support + the setting of arbitrary dead properties. + + The request message body of a PROPPATCH method MUST contain the + propertyupdate XML element. Instruction processing MUST occur in the + order instructions are received (i.e., from top to bottom). + Instructions MUST either all be executed or none executed. Thus if + any error occurs during processing all executed instructions MUST be + undone and a proper error result returned. Instruction processing + details can be found in the definition of the set and remove + instructions in section 12.13. + +8.2.1 Status Codes for use with 207 (Multi-Status) + + The following are examples of response codes one would expect to be + used in a 207 (Multi-Status) response for this method. Note, + however, that unless explicitly prohibited any 2/3/4/5xx series + response code may be used in a 207 (Multi-Status) response. + + 200 (OK) - The command succeeded. As there can be a mixture of sets + and removes in a body, a 201 (Created) seems inappropriate. + + 403 (Forbidden) - The client, for reasons the server chooses not to + specify, cannot alter one of the properties. + + 409 (Conflict) - The client has provided a value whose semantics are + not appropriate for the property. This includes trying to set read- + only properties. + + 423 (Locked) - The specified resource is locked and the client either + is not a lock owner or the lock type requires a lock token to be + submitted and the client did not submit it. + + 507 (Insufficient Storage) - The server did not have sufficient space + to record the property. + + + + + + +Goland, et al. Standards Track [Page 31] + +RFC 2518 WEBDAV February 1999 + + +8.2.2 Example - PROPPATCH + + >>Request + + PROPPATCH /bar.html HTTP/1.1 + Host: www.foo.com + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + + + Jim Whitehead + Roy Fielding + + + + + + + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.com/bar.html + + + HTTP/1.1 424 Failed Dependency + + + + HTTP/1.1 409 Conflict + + Copyright Owner can not be deleted or + altered. + + + + + +Goland, et al. Standards Track [Page 32] + +RFC 2518 WEBDAV February 1999 + + + In this example, the client requests the server to set the value of + the http://www.w3.com/standards/z39.50/Authors property, and to + remove the property http://www.w3.com/standards/z39.50/Copyright- + Owner. Since the Copyright-Owner property could not be removed, no + property modifications occur. The 424 (Failed Dependency) status + code for the Authors property indicates this action would have + succeeded if it were not for the conflict with removing the + Copyright-Owner property. + +8.3 MKCOL Method + + The MKCOL method is used to create a new collection. All DAV + compliant resources MUST support the MKCOL method. + +8.3.1 Request + + MKCOL creates a new collection resource at the location specified by + the Request-URI. If the resource identified by the Request-URI is + non-null then the MKCOL MUST fail. During MKCOL processing, a server + MUST make the Request-URI a member of its parent collection, unless + the Request-URI is "/". If no such ancestor exists, the method MUST + fail. When the MKCOL operation creates a new collection resource, + all ancestors MUST already exist, or the method MUST fail with a 409 + (Conflict) status code. For example, if a request to create + collection /a/b/c/d/ is made, and neither /a/b/ nor /a/b/c/ exists, + the request must fail. + + When MKCOL is invoked without a request body, the newly created + collection SHOULD have no members. + + A MKCOL request message may contain a message body. The behavior of + a MKCOL request when the body is present is limited to creating + collections, members of a collection, bodies of members and + properties on the collections or members. If the server receives a + MKCOL request entity type it does not support or understand it MUST + respond with a 415 (Unsupported Media Type) status code. The exact + behavior of MKCOL for various request media types is undefined in + this document, and will be specified in separate documents. + +8.3.2 Status Codes + + Responses from a MKCOL request MUST NOT be cached as MKCOL has non- + idempotent semantics. + + 201 (Created) - The collection or structured resource was created in + its entirety. + + + + + +Goland, et al. Standards Track [Page 33] + +RFC 2518 WEBDAV February 1999 + + + 403 (Forbidden) - This indicates at least one of two conditions: 1) + the server does not allow the creation of collections at the given + location in its namespace, or 2) the parent collection of the + Request-URI exists but cannot accept members. + + 405 (Method Not Allowed) - MKCOL can only be executed on a + deleted/non-existent resource. + + 409 (Conflict) - A collection cannot be made at the Request-URI until + one or more intermediate collections have been created. + + 415 (Unsupported Media Type)- The server does not support the request + type of the body. + + 507 (Insufficient Storage) - The resource does not have sufficient + space to record the state of the resource after the execution of this + method. + +8.3.3 Example - MKCOL + + This example creates a collection called /webdisc/xfiles/ on the + server www.server.org. + + >>Request + + MKCOL /webdisc/xfiles/ HTTP/1.1 + Host: www.server.org + + >>Response + + HTTP/1.1 201 Created + +8.4 GET, HEAD for Collections + + The semantics of GET are unchanged when applied to a collection, + since GET is defined as, "retrieve whatever information (in the form + of an entity) is identified by the Request-URI" [RFC2068]. GET when + applied to a collection may return the contents of an "index.html" + resource, a human-readable view of the contents of the collection, or + something else altogether. Hence it is possible that the result of a + GET on a collection will bear no correlation to the membership of the + collection. + + Similarly, since the definition of HEAD is a GET without a response + message body, the semantics of HEAD are unmodified when applied to + collection resources. + + + + + +Goland, et al. Standards Track [Page 34] + +RFC 2518 WEBDAV February 1999 + + +8.5 POST for Collections + + Since by definition the actual function performed by POST is + determined by the server and often depends on the particular + resource, the behavior of POST when applied to collections cannot be + meaningfully modified because it is largely undefined. Thus the + semantics of POST are unmodified when applied to a collection. + +8.6 DELETE + + 8.6.1 DELETE for Non-Collection Resources + + If the DELETE method is issued to a non-collection resource whose + URIs are an internal member of one or more collections, then during + DELETE processing a server MUST remove any URI for the resource + identified by the Request-URI from collections which contain it as a + member. + +8.6.2 DELETE for Collections + + The DELETE method on a collection MUST act as if a "Depth: infinity" + header was used on it. A client MUST NOT submit a Depth header with + a DELETE on a collection with any value but infinity. + + DELETE instructs that the collection specified in the Request-URI and + all resources identified by its internal member URIs are to be + deleted. + + If any resource identified by a member URI cannot be deleted then all + of the member's ancestors MUST NOT be deleted, so as to maintain + namespace consistency. + + Any headers included with DELETE MUST be applied in processing every + resource to be deleted. + + When the DELETE method has completed processing it MUST result in a + consistent namespace. + + If an error occurs with a resource other than the resource identified + in the Request-URI then the response MUST be a 207 (Multi-Status). + 424 (Failed Dependency) errors SHOULD NOT be in the 207 (Multi- + Status). They can be safely left out because the client will know + that the ancestors of a resource could not be deleted when the client + receives an error for the ancestor's progeny. Additionally 204 (No + Content) errors SHOULD NOT be returned in the 207 (Multi-Status). + The reason for this prohibition is that 204 (No Content) is the + default success code. + + + + +Goland, et al. Standards Track [Page 35] + +RFC 2518 WEBDAV February 1999 + + +8.6.2.1 Example - DELETE + + >>Request + + DELETE /container/ HTTP/1.1 + Host: www.foo.bar + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/container/resource3 + HTTP/1.1 423 Locked + + + + In this example the attempt to delete + http://www.foo.bar/container/resource3 failed because it is locked, + and no lock token was submitted with the request. Consequently, the + attempt to delete http://www.foo.bar/container/ also failed. Thus the + client knows that the attempt to delete http://www.foo.bar/container/ + must have also failed since the parent can not be deleted unless its + child has also been deleted. Even though a Depth header has not been + included, a depth of infinity is assumed because the method is on a + collection. + +8.7 PUT + +8.7.1 PUT for Non-Collection Resources + + A PUT performed on an existing resource replaces the GET response + entity of the resource. Properties defined on the resource may be + recomputed during PUT processing but are not otherwise affected. For + example, if a server recognizes the content type of the request body, + it may be able to automatically extract information that could be + profitably exposed as properties. + + A PUT that would result in the creation of a resource without an + appropriately scoped parent collection MUST fail with a 409 + (Conflict). + + + + + + +Goland, et al. Standards Track [Page 36] + +RFC 2518 WEBDAV February 1999 + + +8.7.2 PUT for Collections + + As defined in the HTTP/1.1 specification [RFC2068], the "PUT method + requests that the enclosed entity be stored under the supplied + Request-URI." Since submission of an entity representing a + collection would implicitly encode creation and deletion of + resources, this specification intentionally does not define a + transmission format for creating a collection using PUT. Instead, + the MKCOL method is defined to create collections. + + When the PUT operation creates a new non-collection resource all + ancestors MUST already exist. If all ancestors do not exist, the + method MUST fail with a 409 (Conflict) status code. For example, if + resource /a/b/c/d.html is to be created and /a/b/c/ does not exist, + then the request must fail. + +8.8 COPY Method + + The COPY method creates a duplicate of the source resource, + identified by the Request-URI, in the destination resource, + identified by the URI in the Destination header. The Destination + header MUST be present. The exact behavior of the COPY method + depends on the type of the source resource. + + All WebDAV compliant resources MUST support the COPY method. + However, support for the COPY method does not guarantee the ability + to copy a resource. For example, separate programs may control + resources on the same server. As a result, it may not be possible to + copy a resource to a location that appears to be on the same server. + +8.8.1 COPY for HTTP/1.1 resources + + When the source resource is not a collection the result of the COPY + method is the creation of a new resource at the destination whose + state and behavior match that of the source resource as closely as + possible. After a successful COPY invocation, all properties on the + source resource MUST be duplicated on the destination resource, + subject to modifying headers and XML elements, following the + definition for copying properties. Since the environment at the + destination may be different than at the source due to factors + outside the scope of control of the server, such as the absence of + resources required for correct operation, it may not be possible to + completely duplicate the behavior of the resource at the destination. + Subsequent alterations to the destination resource will not modify + the source resource. Subsequent alterations to the source resource + will not modify the destination resource. + + + + + +Goland, et al. Standards Track [Page 37] + +RFC 2518 WEBDAV February 1999 + + +8.8.2. COPY for Properties + + The following section defines how properties on a resource are + handled during a COPY operation. + + Live properties SHOULD be duplicated as identically behaving live + properties at the destination resource. If a property cannot be + copied live, then its value MUST be duplicated, octet-for-octet, in + an identically named, dead property on the destination resource + subject to the effects of the propertybehavior XML element. + + The propertybehavior XML element can specify that properties are + copied on best effort, that all live properties must be successfully + copied or the method must fail, or that a specified list of live + properties must be successfully copied or the method must fail. The + propertybehavior XML element is defined in section 12.12. + +8.8.3 COPY for Collections + + The COPY method on a collection without a Depth header MUST act as if + a Depth header with value "infinity" was included. A client may + submit a Depth header on a COPY on a collection with a value of "0" + or "infinity". DAV compliant servers MUST support the "0" and + "infinity" Depth header behaviors. + + A COPY of depth infinity instructs that the collection resource + identified by the Request-URI is to be copied to the location + identified by the URI in the Destination header, and all its internal + member resources are to be copied to a location relative to it, + recursively through all levels of the collection hierarchy. + + A COPY of "Depth: 0" only instructs that the collection and its + properties but not resources identified by its internal member URIs, + are to be copied. + + Any headers included with a COPY MUST be applied in processing every + resource to be copied with the exception of the Destination header. + + The Destination header only specifies the destination URI for the + Request-URI. When applied to members of the collection identified by + the Request-URI the value of Destination is to be modified to reflect + the current location in the hierarchy. So, if the Request- URI is + /a/ with Host header value http://fun.com/ and the Destination is + http://fun.com/b/ then when http://fun.com/a/c/d is processed it must + use a Destination of http://fun.com/b/c/d. + + + + + + +Goland, et al. Standards Track [Page 38] + +RFC 2518 WEBDAV February 1999 + + + When the COPY method has completed processing it MUST have created a + consistent namespace at the destination (see section 5.1 for the + definition of namespace consistency). However, if an error occurs + while copying an internal collection, the server MUST NOT copy any + resources identified by members of this collection (i.e., the server + must skip this subtree), as this would create an inconsistent + namespace. After detecting an error, the COPY operation SHOULD try to + finish as much of the original copy operation as possible (i.e., the + server should still attempt to copy other subtrees and their members, + that are not descendents of an error-causing collection). So, for + example, if an infinite depth copy operation is performed on + collection /a/, which contains collections /a/b/ and /a/c/, and an + error occurs copying /a/b/, an attempt should still be made to copy + /a/c/. Similarly, after encountering an error copying a non- + collection resource as part of an infinite depth copy, the server + SHOULD try to finish as much of the original copy operation as + possible. + + If an error in executing the COPY method occurs with a resource other + than the resource identified in the Request-URI then the response + MUST be a 207 (Multi-Status). + + The 424 (Failed Dependency) status code SHOULD NOT be returned in the + 207 (Multi-Status) response from a COPY method. These responses can + be safely omitted because the client will know that the progeny of a + resource could not be copied when the client receives an error for + the parent. Additionally 201 (Created)/204 (No Content) status codes + SHOULD NOT be returned as values in 207 (Multi-Status) responses from + COPY methods. They, too, can be safely omitted because they are the + default success codes. + +8.8.4 COPY and the Overwrite Header + + If a resource exists at the destination and the Overwrite header is + "T" then prior to performing the copy the server MUST perform a + DELETE with "Depth: infinity" on the destination resource. If the + Overwrite header is set to "F" then the operation will fail. + +8.8.5 Status Codes + + 201 (Created) - The source resource was successfully copied. The + copy operation resulted in the creation of a new resource. + + 204 (No Content) - The source resource was successfully copied to a + pre-existing destination resource. + + 403 (Forbidden) _ The source and destination URIs are the same. + + + + +Goland, et al. Standards Track [Page 39] + +RFC 2518 WEBDAV February 1999 + + + 409 (Conflict) _ A resource cannot be created at the destination + until one or more intermediate collections have been created. + + 412 (Precondition Failed) - The server was unable to maintain the + liveness of the properties listed in the propertybehavior XML element + or the Overwrite header is "F" and the state of the destination + resource is non-null. + + 423 (Locked) - The destination resource was locked. + + 502 (Bad Gateway) - This may occur when the destination is on another + server and the destination server refuses to accept the resource. + + 507 (Insufficient Storage) - The destination resource does not have + sufficient space to record the state of the resource after the + execution of this method. + +8.8.6 Example - COPY with Overwrite + + This example shows resource + http://www.ics.uci.edu/~fielding/index.html being copied to the + location http://www.ics.uci.edu/users/f/fielding/index.html. The 204 + (No Content) status code indicates the existing resource at the + destination was overwritten. + + >>Request + + COPY /~fielding/index.html HTTP/1.1 + Host: www.ics.uci.edu + Destination: http://www.ics.uci.edu/users/f/fielding/index.html + + >>Response + + HTTP/1.1 204 No Content + +8.8.7 Example - COPY with No Overwrite + + The following example shows the same copy operation being performed, + but with the Overwrite header set to "F." A response of 412 + (Precondition Failed) is returned because the destination resource + has a non-null state. + + >>Request + + COPY /~fielding/index.html HTTP/1.1 + Host: www.ics.uci.edu + Destination: http://www.ics.uci.edu/users/f/fielding/index.html + Overwrite: F + + + +Goland, et al. Standards Track [Page 40] + +RFC 2518 WEBDAV February 1999 + + + >>Response + + HTTP/1.1 412 Precondition Failed + +8.8.8 Example - COPY of a Collection + + >>Request + + COPY /container/ HTTP/1.1 + Host: www.foo.bar + Destination: http://www.foo.bar/othercontainer/ + Depth: infinity + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + * + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/othercontainer/R2/ + HTTP/1.1 412 Precondition Failed + + + + The Depth header is unnecessary as the default behavior of COPY on a + collection is to act as if a "Depth: infinity" header had been + submitted. In this example most of the resources, along with the + collection, were copied successfully. However the collection R2 + failed, most likely due to a problem with maintaining the liveness of + properties (this is specified by the propertybehavior XML element). + Because there was an error copying R2, none of R2's members were + copied. However no errors were listed for those members due to the + error minimization rules given in section 8.8.3. + + + + + + + + +Goland, et al. Standards Track [Page 41] + +RFC 2518 WEBDAV February 1999 + + +8.9 MOVE Method + + The MOVE operation on a non-collection resource is the logical + equivalent of a copy (COPY), followed by consistency maintenance + processing, followed by a delete of the source, where all three + actions are performed atomically. The consistency maintenance step + allows the server to perform updates caused by the move, such as + updating all URIs other than the Request-URI which identify the + source resource, to point to the new destination resource. + Consequently, the Destination header MUST be present on all MOVE + methods and MUST follow all COPY requirements for the COPY part of + the MOVE method. All DAV compliant resources MUST support the MOVE + method. However, support for the MOVE method does not guarantee the + ability to move a resource to a particular destination. + + For example, separate programs may actually control different sets of + resources on the same server. Therefore, it may not be possible to + move a resource within a namespace that appears to belong to the same + server. + + If a resource exists at the destination, the destination resource + will be DELETEd as a side-effect of the MOVE operation, subject to + the restrictions of the Overwrite header. + +8.9.1 MOVE for Properties + + The behavior of properties on a MOVE, including the effects of the + propertybehavior XML element, MUST be the same as specified in + section 8.8.2. + +8.9.2 MOVE for Collections + + A MOVE with "Depth: infinity" instructs that the collection + identified by the Request-URI be moved to the URI specified in the + Destination header, and all resources identified by its internal + member URIs are to be moved to locations relative to it, recursively + through all levels of the collection hierarchy. + + The MOVE method on a collection MUST act as if a "Depth: infinity" + header was used on it. A client MUST NOT submit a Depth header on a + MOVE on a collection with any value but "infinity". + + Any headers included with MOVE MUST be applied in processing every + resource to be moved with the exception of the Destination header. + + The behavior of the Destination header is the same as given for COPY + on collections. + + + + +Goland, et al. Standards Track [Page 42] + +RFC 2518 WEBDAV February 1999 + + + When the MOVE method has completed processing it MUST have created a + consistent namespace at both the source and destination (see section + 5.1 for the definition of namespace consistency). However, if an + error occurs while moving an internal collection, the server MUST NOT + move any resources identified by members of the failed collection + (i.e., the server must skip the error-causing subtree), as this would + create an inconsistent namespace. In this case, after detecting the + error, the move operation SHOULD try to finish as much of the + original move as possible (i.e., the server should still attempt to + move other subtrees and the resources identified by their members, + that are not descendents of an error-causing collection). So, for + example, if an infinite depth move is performed on collection /a/, + which contains collections /a/b/ and /a/c/, and an error occurs + moving /a/b/, an attempt should still be made to try moving /a/c/. + Similarly, after encountering an error moving a non-collection + resource as part of an infinite depth move, the server SHOULD try to + finish as much of the original move operation as possible. + + If an error occurs with a resource other than the resource identified + in the Request-URI then the response MUST be a 207 (Multi-Status). + + The 424 (Failed Dependency) status code SHOULD NOT be returned in the + 207 (Multi-Status) response from a MOVE method. These errors can be + safely omitted because the client will know that the progeny of a + resource could not be moved when the client receives an error for the + parent. Additionally 201 (Created)/204 (No Content) responses SHOULD + NOT be returned as values in 207 (Multi-Status) responses from a + MOVE. These responses can be safely omitted because they are the + default success codes. + +8.9.3 MOVE and the Overwrite Header + + If a resource exists at the destination and the Overwrite header is + "T" then prior to performing the move the server MUST perform a + DELETE with "Depth: infinity" on the destination resource. If the + Overwrite header is set to "F" then the operation will fail. + +8.9.4 Status Codes + + 201 (Created) - The source resource was successfully moved, and a new + resource was created at the destination. + + 204 (No Content) - The source resource was successfully moved to a + pre-existing destination resource. + + 403 (Forbidden) _ The source and destination URIs are the same. + + + + + +Goland, et al. Standards Track [Page 43] + +RFC 2518 WEBDAV February 1999 + + + 409 (Conflict) _ A resource cannot be created at the destination + until one or more intermediate collections have been created. + + 412 (Precondition Failed) - The server was unable to maintain the + liveness of the properties listed in the propertybehavior XML element + or the Overwrite header is "F" and the state of the destination + resource is non-null. + + 423 (Locked) - The source or the destination resource was locked. + + 502 (Bad Gateway) - This may occur when the destination is on another + server and the destination server refuses to accept the resource. + +8.9.5 Example - MOVE of a Non-Collection + + This example shows resource + http://www.ics.uci.edu/~fielding/index.html being moved to the + location http://www.ics.uci.edu/users/f/fielding/index.html. The + contents of the destination resource would have been overwritten if + the destination resource had been non-null. In this case, since + there was nothing at the destination resource, the response code is + 201 (Created). + + >>Request + + MOVE /~fielding/index.html HTTP/1.1 + Host: www.ics.uci.edu + Destination: http://www.ics.uci.edu/users/f/fielding/index.html + + >>Response + + HTTP/1.1 201 Created + Location: http://www.ics.uci.edu/users/f/fielding/index.html + + +8.9.6 Example - MOVE of a Collection + + >>Request + + MOVE /container/ HTTP/1.1 + Host: www.foo.bar + Destination: http://www.foo.bar/othercontainer/ + Overwrite: F + If: () + () + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + +Goland, et al. Standards Track [Page 44] + +RFC 2518 WEBDAV February 1999 + + + + + * + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/othercontainer/C2/ + HTTP/1.1 423 Locked + + + + In this example the client has submitted a number of lock tokens with + the request. A lock token will need to be submitted for every + resource, both source and destination, anywhere in the scope of the + method, that is locked. In this case the proper lock token was not + submitted for the destination http://www.foo.bar/othercontainer/C2/. + This means that the resource /container/C2/ could not be moved. + Because there was an error copying /container/C2/, none of + /container/C2's members were copied. However no errors were listed + for those members due to the error minimization rules given in + section 8.8.3. User agent authentication has previously occurred via + a mechanism outside the scope of the HTTP protocol, in an underlying + transport layer. + +8.10 LOCK Method + + The following sections describe the LOCK method, which is used to + take out a lock of any access type. These sections on the LOCK + method describe only those semantics that are specific to the LOCK + method and are independent of the access type of the lock being + requested. + + Any resource which supports the LOCK method MUST, at minimum, support + the XML request and response formats defined herein. + + + + + + + + + +Goland, et al. Standards Track [Page 45] + +RFC 2518 WEBDAV February 1999 + + +8.10.1 Operation + + A LOCK method invocation creates the lock specified by the lockinfo + XML element on the Request-URI. Lock method requests SHOULD have a + XML request body which contains an owner XML element for this lock + request, unless this is a refresh request. The LOCK request may have + a Timeout header. + + Clients MUST assume that locks may arbitrarily disappear at any time, + regardless of the value given in the Timeout header. The Timeout + header only indicates the behavior of the server if "extraordinary" + circumstances do not occur. For example, an administrator may remove + a lock at any time or the system may crash in such a way that it + loses the record of the lock's existence. The response MUST contain + the value of the lockdiscovery property in a prop XML element. + + In order to indicate the lock token associated with a newly created + lock, a Lock-Token response header MUST be included in the response + for every successful LOCK request for a new lock. Note that the + Lock-Token header would not be returned in the response for a + successful refresh LOCK request because a new lock was not created. + +8.10.2 The Effect of Locks on Properties and Collections + + The scope of a lock is the entire state of the resource, including + its body and associated properties. As a result, a lock on a + resource MUST also lock the resource's properties. + + For collections, a lock also affects the ability to add or remove + members. The nature of the effect depends upon the type of access + control involved. + +8.10.3 Locking Replicated Resources + + A resource may be made available through more than one URI. However + locks apply to resources, not URIs. Therefore a LOCK request on a + resource MUST NOT succeed if can not be honored by all the URIs + through which the resource is addressable. + +8.10.4 Depth and Locking + + The Depth header may be used with the LOCK method. Values other than + 0 or infinity MUST NOT be used with the Depth header on a LOCK + method. All resources that support the LOCK method MUST support the + Depth header. + + A Depth header of value 0 means to just lock the resource specified + by the Request-URI. + + + +Goland, et al. Standards Track [Page 46] + +RFC 2518 WEBDAV February 1999 + + + If the Depth header is set to infinity then the resource specified in + the Request-URI along with all its internal members, all the way down + the hierarchy, are to be locked. A successful result MUST return a + single lock token which represents all the resources that have been + locked. If an UNLOCK is successfully executed on this token, all + associated resources are unlocked. If the lock cannot be granted to + all resources, a 409 (Conflict) status code MUST be returned with a + response entity body containing a multistatus XML element describing + which resource(s) prevented the lock from being granted. Hence, + partial success is not an option. Either the entire hierarchy is + locked or no resources are locked. + + If no Depth header is submitted on a LOCK request then the request + MUST act as if a "Depth:infinity" had been submitted. + +8.10.5 Interaction with other Methods + + The interaction of a LOCK with various methods is dependent upon the + lock type. However, independent of lock type, a successful DELETE of + a resource MUST cause all of its locks to be removed. + +8.10.6 Lock Compatibility Table + + The table below describes the behavior that occurs when a lock + request is made on a resource. + + Current lock state/ | Shared Lock | Exclusive + Lock request | | Lock + =====================+=================+============== + None | True | True + ---------------------+-----------------+-------------- + Shared Lock | True | False + ---------------------+-----------------+-------------- + Exclusive Lock | False | False* + ------------------------------------------------------ + + Legend: True = lock may be granted. False = lock MUST NOT be + granted. *=It is illegal for a principal to request the same lock + twice. + + The current lock state of a resource is given in the leftmost column, + and lock requests are listed in the first row. The intersection of a + row and column gives the result of a lock request. For example, if a + shared lock is held on a resource, and an exclusive lock is + requested, the table entry is "false", indicating the lock must not + be granted. + + + + + +Goland, et al. Standards Track [Page 47] + +RFC 2518 WEBDAV February 1999 + + +8.10.7 Status Codes + + 200 (OK) - The lock request succeeded and the value of the + lockdiscovery property is included in the body. + + 412 (Precondition Failed) - The included lock token was not + enforceable on this resource or the server could not satisfy the + request in the lockinfo XML element. + + 423 (Locked) - The resource is locked, so the method has been + rejected. + +8.10.8 Example - Simple Lock Request + + >>Request + + LOCK /workspace/webdav/proposal.doc HTTP/1.1 + Host: webdav.sb.aol.com + Timeout: Infinite, Second-4100000000 + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + Authorization: Digest username="ejw", + realm="ejw@webdav.sb.aol.com", nonce="...", + uri="/workspace/webdav/proposal.doc", + response="...", opaque="..." + + + + + + + http://www.ics.uci.edu/~ejw/contact.html + + + + >>Response + + HTTP/1.1 200 OK + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + + + + Infinity + + + +Goland, et al. Standards Track [Page 48] + +RFC 2518 WEBDAV February 1999 + + + + + http://www.ics.uci.edu/~ejw/contact.html + + + Second-604800 + + + opaquelocktoken:e71d4fae-5dec-22d6-fea5-00a0c91e6be4 + + + + + + + This example shows the successful creation of an exclusive write lock + on resource http://webdav.sb.aol.com/workspace/webdav/proposal.doc. + The resource http://www.ics.uci.edu/~ejw/contact.html contains + contact information for the owner of the lock. The server has an + activity-based timeout policy in place on this resource, which causes + the lock to automatically be removed after 1 week (604800 seconds). + Note that the nonce, response, and opaque fields have not been + calculated in the Authorization request header. + +8.10.9 Example - Refreshing a Write Lock + + >>Request + + LOCK /workspace/webdav/proposal.doc HTTP/1.1 + Host: webdav.sb.aol.com + Timeout: Infinite, Second-4100000000 + If: () + Authorization: Digest username="ejw", + realm="ejw@webdav.sb.aol.com", nonce="...", + uri="/workspace/webdav/proposal.doc", + response="...", opaque="..." + + >>Response + + HTTP/1.1 200 OK + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + + + + + +Goland, et al. Standards Track [Page 49] + +RFC 2518 WEBDAV February 1999 + + + + Infinity + + + http://www.ics.uci.edu/~ejw/contact.html + + + Second-604800 + + + opaquelocktoken:e71d4fae-5dec-22d6-fea5-00a0c91e6be4 + + + + + + + This request would refresh the lock, resetting any time outs. Notice + that the client asked for an infinite time out but the server choose + to ignore the request. In this example, the nonce, response, and + opaque fields have not been calculated in the Authorization request + header. + +8.10.10 Example - Multi-Resource Lock Request + + >>Request + + LOCK /webdav/ HTTP/1.1 + Host: webdav.sb.aol.com + Timeout: Infinite, Second-4100000000 + Depth: infinity + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + Authorization: Digest username="ejw", + realm="ejw@webdav.sb.aol.com", nonce="...", + uri="/workspace/webdav/proposal.doc", + response="...", opaque="..." + + + + + + + http://www.ics.uci.edu/~ejw/contact.html + + + + >>Response + + + +Goland, et al. Standards Track [Page 50] + +RFC 2518 WEBDAV February 1999 + + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://webdav.sb.aol.com/webdav/secret + HTTP/1.1 403 Forbidden + + + http://webdav.sb.aol.com/webdav/ + + + HTTP/1.1 424 Failed Dependency + + + + + This example shows a request for an exclusive write lock on a + collection and all its children. In this request, the client has + specified that it desires an infinite length lock, if available, + otherwise a timeout of 4.1 billion seconds, if available. The request + entity body contains the contact information for the principal taking + out the lock, in this case a web page URL. + + The error is a 403 (Forbidden) response on the resource + http://webdav.sb.aol.com/webdav/secret. Because this resource could + not be locked, none of the resources were locked. Note also that the + lockdiscovery property for the Request-URI has been included as + required. In this example the lockdiscovery property is empty which + means that there are no outstanding locks on the resource. + + In this example, the nonce, response, and opaque fields have not been + calculated in the Authorization request header. + +8.11 UNLOCK Method + + The UNLOCK method removes the lock identified by the lock token in + the Lock-Token request header from the Request-URI, and all other + resources included in the lock. If all resources which have been + locked under the submitted lock token can not be unlocked then the + UNLOCK request MUST fail. + + Any DAV compliant resource which supports the LOCK method MUST + support the UNLOCK method. + + + + + +Goland, et al. Standards Track [Page 51] + +RFC 2518 WEBDAV February 1999 + + +8.11.1 Example - UNLOCK + + >>Request + + UNLOCK /workspace/webdav/info.doc HTTP/1.1 + Host: webdav.sb.aol.com + Lock-Token: + Authorization: Digest username="ejw", + realm="ejw@webdav.sb.aol.com", nonce="...", + uri="/workspace/webdav/proposal.doc", + response="...", opaque="..." + + >>Response + + HTTP/1.1 204 No Content + + In this example, the lock identified by the lock token + "opaquelocktoken:a515cfa4-5da4-22e1-f5b5-00a0451e6bf7" is + successfully removed from the resource + http://webdav.sb.aol.com/workspace/webdav/info.doc. If this lock + included more than just one resource, the lock is removed from all + resources included in the lock. The 204 (No Content) status code is + used instead of 200 (OK) because there is no response entity body. + + In this example, the nonce, response, and opaque fields have not been + calculated in the Authorization request header. + +9 HTTP Headers for Distributed Authoring + +9.1 DAV Header + + DAV = "DAV" ":" "1" ["," "2"] ["," 1#extend] + + This header indicates that the resource supports the DAV schema and + protocol as specified. All DAV compliant resources MUST return the + DAV header on all OPTIONS responses. + + The value is a list of all compliance classes that the resource + supports. Note that above a comma has already been added to the 2. + This is because a resource can not be level 2 compliant unless it is + also level 1 compliant. Please refer to section 15 for more details. + In general, however, support for one compliance class does not entail + support for any other. + +9.2 Depth Header + + Depth = "Depth" ":" ("0" | "1" | "infinity") + + + + +Goland, et al. Standards Track [Page 52] + +RFC 2518 WEBDAV February 1999 + + + The Depth header is used with methods executed on resources which + could potentially have internal members to indicate whether the + method is to be applied only to the resource ("Depth: 0"), to the + resource and its immediate children, ("Depth: 1"), or the resource + and all its progeny ("Depth: infinity"). + + The Depth header is only supported if a method's definition + explicitly provides for such support. + + The following rules are the default behavior for any method that + supports the Depth header. A method may override these defaults by + defining different behavior in its definition. + + Methods which support the Depth header may choose not to support all + of the header's values and may define, on a case by case basis, the + behavior of the method if a Depth header is not present. For example, + the MOVE method only supports "Depth: infinity" and if a Depth header + is not present will act as if a "Depth: infinity" header had been + applied. + + Clients MUST NOT rely upon methods executing on members of their + hierarchies in any particular order or on the execution being atomic + unless the particular method explicitly provides such guarantees. + + Upon execution, a method with a Depth header will perform as much of + its assigned task as possible and then return a response specifying + what it was able to accomplish and what it failed to do. + + So, for example, an attempt to COPY a hierarchy may result in some of + the members being copied and some not. + + Any headers on a method that has a defined interaction with the Depth + header MUST be applied to all resources in the scope of the method + except where alternative behavior is explicitly defined. For example, + an If-Match header will have its value applied against every resource + in the method's scope and will cause the method to fail if the header + fails to match. + + If a resource, source or destination, within the scope of the method + with a Depth header is locked in such a way as to prevent the + successful execution of the method, then the lock token for that + resource MUST be submitted with the request in the If request header. + + The Depth header only specifies the behavior of the method with + regards to internal children. If a resource does not have internal + children then the Depth header MUST be ignored. + + + + + +Goland, et al. Standards Track [Page 53] + +RFC 2518 WEBDAV February 1999 + + + Please note, however, that it is always an error to submit a value + for the Depth header that is not allowed by the method's definition. + Thus submitting a "Depth: 1" on a COPY, even if the resource does not + have internal members, will result in a 400 (Bad Request). The method + should fail not because the resource doesn't have internal members, + but because of the illegal value in the header. + +9.3 Destination Header + + Destination = "Destination" ":" absoluteURI + + The Destination header specifies the URI which identifies a + destination resource for methods such as COPY and MOVE, which take + two URIs as parameters. Note that the absoluteURI production is + defined in [RFC2396]. + +9.4 If Header + + If = "If" ":" ( 1*No-tag-list | 1*Tagged-list) + No-tag-list = List + Tagged-list = Resource 1*List + Resource = Coded-URL + List = "(" 1*(["Not"](State-token | "[" entity-tag "]")) ")" + State-token = Coded-URL + Coded-URL = "<" absoluteURI ">" + + The If header is intended to have similar functionality to the If- + Match header defined in section 14.25 of [RFC2068]. However the If + header is intended for use with any URI which represents state + information, referred to as a state token, about a resource as well + as ETags. A typical example of a state token is a lock token, and + lock tokens are the only state tokens defined in this specification. + + All DAV compliant resources MUST honor the If header. + + The If header's purpose is to describe a series of state lists. If + the state of the resource to which the header is applied does not + match any of the specified state lists then the request MUST fail + with a 412 (Precondition Failed). If one of the described state + lists matches the state of the resource then the request may succeed. + + Note that the absoluteURI production is defined in [RFC2396]. + + + + + + + + + +Goland, et al. Standards Track [Page 54] + +RFC 2518 WEBDAV February 1999 + + +9.4.1 No-tag-list Production + + The No-tag-list production describes a series of state tokens and + ETags. If multiple No-tag-list productions are used then one only + needs to match the state of the resource for the method to be allowed + to continue. + + If a method, due to the presence of a Depth or Destination header, is + applied to multiple resources then the No-tag-list production MUST be + applied to each resource the method is applied to. + +9.4.1.1 Example - No-tag-list If Header + + If: ( ["I am an ETag"]) (["I am another + ETag"]) + + The previous header would require that any resources within the scope + of the method must either be locked with the specified lock token and + in the state identified by the "I am an ETag" ETag or in the state + identified by the second ETag "I am another ETag". To put the matter + more plainly one can think of the previous If header as being in the + form (or (and ["I am an ETag"]) (and + ["I am another ETag"])). + +9.4.2 Tagged-list Production + + The tagged-list production scopes a list production. That is, it + specifies that the lists following the resource specification only + apply to the specified resource. The scope of the resource + production begins with the list production immediately following the + resource production and ends with the next resource production, if + any. + + When the If header is applied to a particular resource, the Tagged- + list productions MUST be searched to determine if any of the listed + resources match the operand resource(s) for the current method. If + none of the resource productions match the current resource then the + header MUST be ignored. If one of the resource productions does + match the name of the resource under consideration then the list + productions following the resource production MUST be applied to the + resource in the manner specified in the previous section. + + The same URI MUST NOT appear more than once in a resource production + in an If header. + + + + + + + +Goland, et al. Standards Track [Page 55] + +RFC 2518 WEBDAV February 1999 + + +9.4.2.1 Example - Tagged List If header + + COPY /resource1 HTTP/1.1 + Host: www.foo.bar + Destination: http://www.foo.bar/resource2 + If: ( + [W/"A weak ETag"]) (["strong ETag"]) + (["another strong ETag"]) + + In this example http://www.foo.bar/resource1 is being copied to + http://www.foo.bar/resource2. When the method is first applied to + http://www.foo.bar/resource1, resource1 must be in the state + specified by "( [W/"A weak ETag"]) + (["strong ETag"])", that is, it either must be locked with a lock + token of "locktoken:a-write-lock-token" and have a weak entity tag + W/"A weak ETag" or it must have a strong entity tag "strong ETag". + + That is the only success condition since the resource + http://www.bar.bar/random never has the method applied to it (the + only other resource listed in the If header) and + http://www.foo.bar/resource2 is not listed in the If header. + +9.4.3 not Production + + Every state token or ETag is either current, and hence describes the + state of a resource, or is not current, and does not describe the + state of a resource. The boolean operation of matching a state token + or ETag to the current state of a resource thus resolves to a true or + false value. The not production is used to reverse that value. The + scope of the not production is the state-token or entity-tag + immediately following it. + + If: (Not ) + + When submitted with a request, this If header requires that all + operand resources must not be locked with locktoken:write1 and must + be locked with locktoken:write2. + +9.4.4 Matching Function + + When performing If header processing, the definition of a matching + state token or entity tag is as follows. + + Matching entity tag: Where the entity tag matches an entity tag + associated with that resource. + + Matching state token: Where there is an exact match between the state + token in the If header and any state token on the resource. + + + +Goland, et al. Standards Track [Page 56] + +RFC 2518 WEBDAV February 1999 + + +9.4.5 If Header and Non-DAV Compliant Proxies + + Non-DAV compliant proxies will not honor the If header, since they + will not understand the If header, and HTTP requires non-understood + headers to be ignored. When communicating with HTTP/1.1 proxies, the + "Cache-Control: no-cache" request header MUST be used so as to + prevent the proxy from improperly trying to service the request from + its cache. When dealing with HTTP/1.0 proxies the "Pragma: no-cache" + request header MUST be used for the same reason. + +9.5 Lock-Token Header + + Lock-Token = "Lock-Token" ":" Coded-URL + + The Lock-Token request header is used with the UNLOCK method to + identify the lock to be removed. The lock token in the Lock-Token + request header MUST identify a lock that contains the resource + identified by Request-URI as a member. + + The Lock-Token response header is used with the LOCK method to + indicate the lock token created as a result of a successful LOCK + request to create a new lock. + +9.6 Overwrite Header + + Overwrite = "Overwrite" ":" ("T" | "F") + + The Overwrite header specifies whether the server should overwrite + the state of a non-null destination resource during a COPY or MOVE. + A value of "F" states that the server must not perform the COPY or + MOVE operation if the state of the destination resource is non-null. + If the overwrite header is not included in a COPY or MOVE request + then the resource MUST treat the request as if it has an overwrite + header of value "T". While the Overwrite header appears to duplicate + the functionality of the If-Match: * header of HTTP/1.1, If-Match + applies only to the Request-URI, and not to the Destination of a COPY + or MOVE. + + If a COPY or MOVE is not performed due to the value of the Overwrite + header, the method MUST fail with a 412 (Precondition Failed) status + code. + + All DAV compliant resources MUST support the Overwrite header. + +9.7 Status-URI Response Header + + The Status-URI response header may be used with the 102 (Processing) + status code to inform the client as to the status of a method. + + + +Goland, et al. Standards Track [Page 57] + +RFC 2518 WEBDAV February 1999 + + + Status-URI = "Status-URI" ":" *(Status-Code Coded-URL) ; Status-Code + is defined in 6.1.1 of [RFC2068] + + The URIs listed in the header are source resources which have been + affected by the outstanding method. The status code indicates the + resolution of the method on the identified resource. So, for + example, if a MOVE method on a collection is outstanding and a 102 + (Processing) response with a Status-URI response header is returned, + the included URIs will indicate resources that have had move + attempted on them and what the result was. + +9.8 Timeout Request Header + + TimeOut = "Timeout" ":" 1#TimeType + TimeType = ("Second-" DAVTimeOutVal | "Infinite" | Other) + DAVTimeOutVal = 1*digit + Other = "Extend" field-value ; See section 4.2 of [RFC2068] + + Clients may include Timeout headers in their LOCK requests. However, + the server is not required to honor or even consider these requests. + Clients MUST NOT submit a Timeout request header with any method + other than a LOCK method. + + A Timeout request header MUST contain at least one TimeType and may + contain multiple TimeType entries. The purpose of listing multiple + TimeType entries is to indicate multiple different values and value + types that are acceptable to the client. The client lists the + TimeType entries in order of preference. + + Timeout response values MUST use a Second value, Infinite, or a + TimeType the client has indicated familiarity with. The server may + assume a client is familiar with any TimeType submitted in a Timeout + header. + + The "Second" TimeType specifies the number of seconds that will + elapse between granting of the lock at the server, and the automatic + removal of the lock. The timeout value for TimeType "Second" MUST + NOT be greater than 2^32-1. + + The timeout counter SHOULD be restarted any time an owner of the lock + sends a method to any member of the lock, including unsupported + methods, or methods which are unsuccessful. However the lock MUST be + refreshed if a refresh LOCK method is successfully received. + + If the timeout expires then the lock may be lost. Specifically, if + the server wishes to harvest the lock upon time-out, the server + SHOULD act as if an UNLOCK method was executed by the server on the + resource using the lock token of the timed-out lock, performed with + + + +Goland, et al. Standards Track [Page 58] + +RFC 2518 WEBDAV February 1999 + + + its override authority. Thus logs should be updated with the + disposition of the lock, notifications should be sent, etc., just as + they would be for an UNLOCK request. + + Servers are advised to pay close attention to the values submitted by + clients, as they will be indicative of the type of activity the + client intends to perform. For example, an applet running in a + browser may need to lock a resource, but because of the instability + of the environment within which the applet is running, the applet may + be turned off without warning. As a result, the applet is likely to + ask for a relatively small timeout value so that if the applet dies, + the lock can be quickly harvested. However, a document management + system is likely to ask for an extremely long timeout because its + user may be planning on going off-line. + + A client MUST NOT assume that just because the time-out has expired + the lock has been lost. + +10 Status Code Extensions to HTTP/1.1 + + The following status codes are added to those defined in HTTP/1.1 + [RFC2068]. + +10.1 102 Processing + + The 102 (Processing) status code is an interim response used to + inform the client that the server has accepted the complete request, + but has not yet completed it. This status code SHOULD only be sent + when the server has a reasonable expectation that the request will + take significant time to complete. As guidance, if a method is taking + longer than 20 seconds (a reasonable, but arbitrary value) to process + the server SHOULD return a 102 (Processing) response. The server MUST + send a final response after the request has been completed. + + Methods can potentially take a long period of time to process, + especially methods that support the Depth header. In such cases the + client may time-out the connection while waiting for a response. To + prevent this the server may return a 102 (Processing) status code to + indicate to the client that the server is still processing the + method. + +10.2 207 Multi-Status + + The 207 (Multi-Status) status code provides status for multiple + independent operations (see section 11 for more information). + + + + + + +Goland, et al. Standards Track [Page 59] + +RFC 2518 WEBDAV February 1999 + + +10.3 422 Unprocessable Entity + + The 422 (Unprocessable Entity) status code means the server + understands the content type of the request entity (hence a + 415(Unsupported Media Type) status code is inappropriate), and the + syntax of the request entity is correct (thus a 400 (Bad Request) + status code is inappropriate) but was unable to process the contained + instructions. For example, this error condition may occur if an XML + request body contains well-formed (i.e., syntactically correct), but + semantically erroneous XML instructions. + +10.4 423 Locked + + The 423 (Locked) status code means the source or destination resource + of a method is locked. + +10.5 424 Failed Dependency + + The 424 (Failed Dependency) status code means that the method could + not be performed on the resource because the requested action + depended on another action and that action failed. For example, if a + command in a PROPPATCH method fails then, at minimum, the rest of the + commands will also fail with 424 (Failed Dependency). + +10.6 507 Insufficient Storage + + The 507 (Insufficient Storage) status code means the method could not + be performed on the resource because the server is unable to store + the representation needed to successfully complete the request. This + condition is considered to be temporary. If the request which + received this status code was the result of a user action, the + request MUST NOT be repeated until it is requested by a separate user + action. + +11 Multi-Status Response + + The default 207 (Multi-Status) response body is a text/xml or + application/xml HTTP entity that contains a single XML element called + multistatus, which contains a set of XML elements called response + which contain 200, 300, 400, and 500 series status codes generated + during the method invocation. 100 series status codes SHOULD NOT be + recorded in a response XML element. + + + + + + + + + +Goland, et al. Standards Track [Page 60] + +RFC 2518 WEBDAV February 1999 + + +12 XML Element Definitions + + In the section below, the final line of each section gives the + element type declaration using the format defined in [REC-XML]. The + "Value" field, where present, specifies further restrictions on the + allowable contents of the XML element using BNF (i.e., to further + restrict the values of a PCDATA element). + +12.1 activelock XML Element + + Name: activelock + Namespace: DAV: + Purpose: Describes a lock on a resource. + + + +12.1.1 depth XML Element + + Name: depth + Namespace: DAV: + Purpose: The value of the Depth header. + Value: "0" | "1" | "infinity" + + + +12.1.2 locktoken XML Element + + Name: locktoken + Namespace: DAV: + Purpose: The lock token associated with a lock. + Description: The href contains one or more opaque lock token URIs + which all refer to the same lock (i.e., the OpaqueLockToken-URI + production in section 6.4). + + + +12.1.3 timeout XML Element + + Name: timeout + Namespace: DAV: + Purpose: The timeout associated with a lock + Value: TimeType ;Defined in section 9.8 + + + + + + + + +Goland, et al. Standards Track [Page 61] + +RFC 2518 WEBDAV February 1999 + + +12.2 collection XML Element + + Name: collection + Namespace: DAV: + Purpose: Identifies the associated resource as a collection. The + resourcetype property of a collection resource MUST have this value. + + + +12.3 href XML Element + + Name: href + Namespace: DAV: + Purpose: Identifies the content of the element as a URI. + Value: URI ; See section 3.2.1 of [RFC2068] + + + +12.4 link XML Element + + Name: link + Namespace: DAV: + Purpose: Identifies the property as a link and contains the source + and destination of that link. + Description: The link XML element is used to provide the sources and + destinations of a link. The name of the property containing the link + XML element provides the type of the link. Link is a multi-valued + element, so multiple links may be used together to indicate multiple + links with the same type. The values in the href XML elements inside + the src and dst XML elements of the link XML element MUST NOT be + rejected if they point to resources which do not exist. + + + +12.4.1 dst XML Element + + Name: dst + Namespace: DAV: + Purpose: Indicates the destination of a link + Value: URI + + + +12.4.2 src XML Element + + Name: src + Namespace: DAV: + Purpose: Indicates the source of a link. + + + +Goland, et al. Standards Track [Page 62] + +RFC 2518 WEBDAV February 1999 + + + Value: URI + + + +12.5 lockentry XML Element + + Name: lockentry + Namespace: DAV: + Purpose: Defines the types of locks that can be used with the + resource. + + + +12.6 lockinfo XML Element + + Name: lockinfo + Namespace: DAV: + Purpose: The lockinfo XML element is used with a LOCK method to + specify the type of lock the client wishes to have created. + + + +12.7 lockscope XML Element + + Name: lockscope + Namespace: DAV: + Purpose: Specifies whether a lock is an exclusive lock, or a + shared lock. + + + +12.7.1 exclusive XML Element + + Name: exclusive + Namespace: DAV: + Purpose: Specifies an exclusive lock + + + +12.7.2 shared XML Element + + Name: shared + Namespace: DAV: + Purpose: Specifies a shared lock + + + + + + + +Goland, et al. Standards Track [Page 63] + +RFC 2518 WEBDAV February 1999 + + +12.8 locktype XML Element + + Name: locktype + Namespace: DAV: + Purpose: Specifies the access type of a lock. At present, this + specification only defines one lock type, the write lock. + + + +12.8.1 write XML Element + + Name: write + Namespace: DAV: + Purpose: Specifies a write lock. + + + +12.9 multistatus XML Element + + Name: multistatus + Namespace: DAV: + Purpose: Contains multiple response messages. + Description: The responsedescription at the top level is used to + provide a general message describing the overarching nature of the + response. If this value is available an application may use it + instead of presenting the individual response descriptions contained + within the responses. + + + +12.9.1 response XML Element + + Name: response + Namespace: DAV: + Purpose: Holds a single response describing the effect of a + method on resource and/or its properties. + Description: A particular href MUST NOT appear more than once as the + child of a response XML element under a multistatus XML element. + This requirement is necessary in order to keep processing costs for a + response to linear time. Essentially, this prevents having to search + in order to group together all the responses by href. There are, + however, no requirements regarding ordering based on href values. + + + + + + + + +Goland, et al. Standards Track [Page 64] + +RFC 2518 WEBDAV February 1999 + + +12.9.1.1 propstat XML Element + + Name: propstat + Namespace: DAV: + Purpose: Groups together a prop and status element that is + associated with a particular href element. + Description: The propstat XML element MUST contain one prop XML + element and one status XML element. The contents of the prop XML + element MUST only list the names of properties to which the result in + the status element applies. + + + +12.9.1.2 status XML Element + + Name: status + Namespace: DAV: + Purpose: Holds a single HTTP status-line + Value: status-line ;status-line defined in [RFC2068] + + + +12.9.2 responsedescription XML Element + + Name: responsedescription + Namespace: DAV: + Purpose: Contains a message that can be displayed to the user + explaining the nature of the response. + Description: This XML element provides information suitable to be + presented to a user. + + + +12.10 owner XML Element + + Name: owner + Namespace: DAV: + Purpose: Provides information about the principal taking out a + lock. + Description: The owner XML element provides information sufficient + for either directly contacting a principal (such as a telephone + number or Email URI), or for discovering the principal (such as the + URL of a homepage) who owns a lock. + + + + + + + + +Goland, et al. Standards Track [Page 65] + +RFC 2518 WEBDAV February 1999 + + +12.11 prop XML element + + Name: prop + Namespace: DAV: + Purpose: Contains properties related to a resource. + Description: The prop XML element is a generic container for + properties defined on resources. All elements inside a prop XML + element MUST define properties related to the resource. No other + elements may be used inside of a prop element. + + + +12.12 propertybehavior XML element + + Name: propertybehavior Namespace: DAV: Purpose: Specifies + how properties are handled during a COPY or MOVE. + Description: The propertybehavior XML element specifies how + properties are handled during a COPY or MOVE. If this XML element is + not included in the request body then the server is expected to act + as defined by the default property handling behavior of the + associated method. All WebDAV compliant resources MUST support the + propertybehavior XML element. + + + +12.12.1 keepalive XML element + + Name: keepalive + Namespace: DAV: + Purpose: Specifies requirements for the copying/moving of live + properties. + Description: If a list of URIs is included as the value of keepalive + then the named properties MUST be "live" after they are copied + (moved) to the destination resource of a COPY (or MOVE). If the + value "*" is given for the keepalive XML element, this designates + that all live properties on the source resource MUST be live on the + destination. If the requirements specified by the keepalive element + can not be honored then the method MUST fail with a 412 (Precondition + Failed). All DAV compliant resources MUST support the keepalive XML + element for use with the COPY and MOVE methods. + Value: "*" ; #PCDATA value can only be "*" + + + + + + + + + + +Goland, et al. Standards Track [Page 66] + +RFC 2518 WEBDAV February 1999 + + +12.12.2 omit XML element + + Name: omit + Namespace: DAV: + Purpose: The omit XML element instructs the server that it should + use best effort to copy properties but a failure to copy a property + MUST NOT cause the method to fail. Description: The default behavior + for a COPY or MOVE is to copy/move all properties or fail the method. + In certain circumstances, such as when a server copies a resource + over another protocol such as FTP, it may not be possible to + copy/move the properties associated with the resource. Thus any + attempt to copy/move over FTP would always have to fail because + properties could not be moved over, even as dead properties. All DAV + compliant resources MUST support the omit XML element on COPY/MOVE + methods. + + + +12.13 propertyupdate XML element + + Name: propertyupdate + Namespace: DAV: + Purpose: Contains a request to alter the properties on a + resource. + Description: This XML element is a container for the information + required to modify the properties on the resource. This XML element + is multi-valued. + + + +12.13.1 remove XML element + + Name: remove + Namespace: DAV: + Purpose: Lists the DAV properties to be removed from a resource. + Description: Remove instructs that the properties specified in prop + should be removed. Specifying the removal of a property that does + not exist is not an error. All the XML elements in a prop XML + element inside of a remove XML element MUST be empty, as only the + names of properties to be removed are required. + + + +12.13.2 set XML element + + Name: set + Namespace: DAV: + Purpose: Lists the DAV property values to be set for a resource. + + + +Goland, et al. Standards Track [Page 67] + +RFC 2518 WEBDAV February 1999 + + + Description: The set XML element MUST contain only a prop XML + element. The elements contained by the prop XML element inside the + set XML element MUST specify the name and value of properties that + are set on the resource identified by Request-URI. If a property + already exists then its value is replaced. Language tagging + information in the property's value (in the "xml:lang" attribute, if + present) MUST be persistently stored along with the property, and + MUST be subsequently retrievable using PROPFIND. + + + +12.14 propfind XML Element + + Name: propfind + Namespace: DAV: + Purpose: Specifies the properties to be returned from a PROPFIND + method. Two special elements are specified for use with propfind, + allprop and propname. If prop is used inside propfind it MUST only + contain property names, not values. + + + +12.14.1 allprop XML Element + + Name: allprop Namespace: DAV: Purpose: The allprop XML + element specifies that all property names and values on the resource + are to be returned. + + + +12.14.2 propname XML Element + + Name: propname Namespace: DAV: Purpose: The propname XML + element specifies that only a list of property names on the resource + is to be returned. + + + +13 DAV Properties + + For DAV properties, the name of the property is also the same as the + name of the XML element that contains its value. In the section + below, the final line of each section gives the element type + declaration using the format defined in [REC-XML]. The "Value" field, + where present, specifies further restrictions on the allowable + contents of the XML element using BNF (i.e., to further restrict the + values of a PCDATA element). + + + + +Goland, et al. Standards Track [Page 68] + +RFC 2518 WEBDAV February 1999 + + +13.1 creationdate Property + + Name: creationdate + Namespace: DAV: + Purpose: Records the time and date the resource was created. + Value: date-time ; See Appendix 2 + Description: The creationdate property should be defined on all DAV + compliant resources. If present, it contains a timestamp of the + moment when the resource was created (i.e., the moment it had non- + null state). + + + +13.2 displayname Property + + Name: displayname + Namespace: DAV: + Purpose: Provides a name for the resource that is suitable for + presentation to a user. + Description: The displayname property should be defined on all DAV + compliant resources. If present, the property contains a description + of the resource that is suitable for presentation to a user. + + + +13.3 getcontentlanguage Property + + Name: getcontentlanguage + Namespace: DAV: + Purpose: Contains the Content-Language header returned by a GET + without accept headers + Description: The getcontentlanguage property MUST be defined on any + DAV compliant resource that returns the Content-Language header on a + GET. + Value: language-tag ;language-tag is defined in section 14.13 + of [RFC2068] + + + +13.4 getcontentlength Property + + Name: getcontentlength + Namespace: DAV: + Purpose: Contains the Content-Length header returned by a GET + without accept headers. + Description: The getcontentlength property MUST be defined on any + DAV compliant resource that returns the Content-Length header in + response to a GET. + + + +Goland, et al. Standards Track [Page 69] + +RFC 2518 WEBDAV February 1999 + + + Value: content-length ; see section 14.14 of [RFC2068] + + + +13.5 getcontenttype Property + + Name: getcontenttype + Namespace: DAV: + Purpose: Contains the Content-Type header returned by a GET + without accept headers. + Description: This getcontenttype property MUST be defined on any DAV + compliant resource that returns the Content-Type header in response + to a GET. + Value: media-type ; defined in section 3.7 of [RFC2068] + + + +13.6 getetag Property + + Name: getetag + Namespace: DAV: + Purpose: Contains the ETag header returned by a GET without + accept headers. + Description: The getetag property MUST be defined on any DAV + compliant resource that returns the Etag header. + Value: entity-tag ; defined in section 3.11 of [RFC2068] + + + +13.7 getlastmodified Property + + Name: getlastmodified + Namespace: DAV: + Purpose: Contains the Last-Modified header returned by a GET + method without accept headers. + Description: Note that the last-modified date on a resource may + reflect changes in any part of the state of the resource, not + necessarily just a change to the response to the GET method. For + example, a change in a property may cause the last-modified date to + change. The getlastmodified property MUST be defined on any DAV + compliant resource that returns the Last-Modified header in response + to a GET. + Value: HTTP-date ; defined in section 3.3.1 of [RFC2068] + + + + + + + + +Goland, et al. Standards Track [Page 70] + +RFC 2518 WEBDAV February 1999 + + +13.8 lockdiscovery Property + + Name: lockdiscovery + Namespace: DAV: + Purpose: Describes the active locks on a resource + Description: The lockdiscovery property returns a listing of who has + a lock, what type of lock he has, the timeout type and the time + remaining on the timeout, and the associated lock token. The server + is free to withhold any or all of this information if the requesting + principal does not have sufficient access rights to see the requested + data. + + + +13.8.1 Example - Retrieving the lockdiscovery Property + + >>Request + + PROPFIND /container/ HTTP/1.1 + Host: www.foo.bar + Content-Length: xxxx + Content-Type: text/xml; charset="utf-8" + + + + + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/container/ + + + + + + + 0 + Jane Smith + Infinite + + + + +Goland, et al. Standards Track [Page 71] + +RFC 2518 WEBDAV February 1999 + + + + opaquelocktoken:f81de2ad-7f3d-a1b2-4f3c-00a0c91a9d76 + + + + + + HTTP/1.1 200 OK + + + + + This resource has a single exclusive write lock on it, with an + infinite timeout. + +13.9 resourcetype Property + + Name: resourcetype + Namespace: DAV: + Purpose: Specifies the nature of the resource. + Description: The resourcetype property MUST be defined on all DAV + compliant resources. The default value is empty. + + + +13.10 source Property + + Name: source + Namespace: DAV: + Purpose: The destination of the source link identifies the + resource that contains the unprocessed source of the link's source. + Description: The source of the link (src) is typically the URI of the + output resource on which the link is defined, and there is typically + only one destination (dst) of the link, which is the URI where the + unprocessed source of the resource may be accessed. When more than + one link destination exists, this specification asserts no policy on + ordering. + + + +13.10.1 Example - A source Property + + + + + + Source + http://foo.bar/program + + + +Goland, et al. Standards Track [Page 72] + +RFC 2518 WEBDAV February 1999 + + + http://foo.bar/src/main.c + + + Library + http://foo.bar/program + http://foo.bar/src/main.lib + + + Makefile + http://foo.bar/program + http://foo.bar/src/makefile + + + + + In this example the resource http://foo.bar/program has a source + property that contains three links. Each link contains three + elements, two of which, src and dst, are part of the DAV schema + defined in this document, and one which is defined by the schema + http://www.foocorp.com/project/ (Source, Library, and Makefile). A + client which only implements the elements in the DAV spec will not + understand the foocorp elements and will ignore them, thus seeing the + expected source and destination links. An enhanced client may know + about the foocorp elements and be able to present the user with + additional information about the links. This example demonstrates + the power of XML markup, allowing element values to be enhanced + without breaking older clients. + +13.11 supportedlock Property + + Name: supportedlock + Namespace: DAV: + Purpose: To provide a listing of the lock capabilities supported + by the resource. + Description: The supportedlock property of a resource returns a + listing of the combinations of scope and access types which may be + specified in a lock request on the resource. Note that the actual + contents are themselves controlled by access controls so a server is + not required to provide information the client is not authorized to + see. + + + +13.11.1 Example - Retrieving the supportedlock Property + + >>Request + + PROPFIND /container/ HTTP/1.1 + + + +Goland, et al. Standards Track [Page 73] + +RFC 2518 WEBDAV February 1999 + + + Host: www.foo.bar + Content-Length: xxxx + Content-Type: text/xml; charset="utf-8" + + + + + + + >>Response + + HTTP/1.1 207 Multi-Status + Content-Type: text/xml; charset="utf-8" + Content-Length: xxxx + + + + + http://www.foo.bar/container/ + + + + + + + + + + + + + + HTTP/1.1 200 OK + + + + +14 Instructions for Processing XML in DAV + + All DAV compliant resources MUST ignore any unknown XML element and + all its children encountered while processing a DAV method that uses + XML as its command language. + + This restriction also applies to the processing, by clients, of DAV + property values where unknown XML elements SHOULD be ignored unless + the property's schema declares otherwise. + + + + + +Goland, et al. Standards Track [Page 74] + +RFC 2518 WEBDAV February 1999 + + + This restriction does not apply to setting dead DAV properties on the + server where the server MUST record unknown XML elements. + + Additionally, this restriction does not apply to the use of XML where + XML happens to be the content type of the entity body, for example, + when used as the body of a PUT. + + Since XML can be transported as text/xml or application/xml, a DAV + server MUST accept DAV method requests with XML parameters + transported as either text/xml or application/xml, and DAV client + MUST accept XML responses using either text/xml or application/xml. + +15 DAV Compliance Classes + + A DAV compliant resource can choose from two classes of compliance. + A client can discover the compliance classes of a resource by + executing OPTIONS on the resource, and examining the "DAV" header + which is returned. + + Since this document describes extensions to the HTTP/1.1 protocol, + minimally all DAV compliant resources, clients, and proxies MUST be + compliant with [RFC2068]. + + Compliance classes are not necessarily sequential. A resource that is + class 2 compliant must also be class 1 compliant; but if additional + compliance classes are defined later, a resource that is class 1, 2, + and 4 compliant might not be class 3 compliant. Also note that + identifiers other than numbers may be used as compliance class + identifiers. + +15.1 Class 1 + + A class 1 compliant resource MUST meet all "MUST" requirements in all + sections of this document. + + Class 1 compliant resources MUST return, at minimum, the value "1" in + the DAV header on all responses to the OPTIONS method. + +15.2 Class 2 + + A class 2 compliant resource MUST meet all class 1 requirements and + support the LOCK method, the supportedlock property, the + lockdiscovery property, the Time-Out response header and the Lock- + Token request header. A class "2" compliant resource SHOULD also + support the Time-Out request header and the owner XML element. + + Class 2 compliant resources MUST return, at minimum, the values "1" + and "2" in the DAV header on all responses to the OPTIONS method. + + + +Goland, et al. Standards Track [Page 75] + +RFC 2518 WEBDAV February 1999 + + +16 Internationalization Considerations + + In the realm of internationalization, this specification complies + with the IETF Character Set Policy [RFC2277]. In this specification, + human-readable fields can be found either in the value of a property, + or in an error message returned in a response entity body. In both + cases, the human-readable content is encoded using XML, which has + explicit provisions for character set tagging and encoding, and + requires that XML processors read XML elements encoded, at minimum, + using the UTF-8 [UTF-8] encoding of the ISO 10646 multilingual plane. + XML examples in this specification demonstrate use of the charset + parameter of the Content-Type header, as defined in [RFC2376], as + well as the XML "encoding" attribute, which together provide charset + identification information for MIME and XML processors. + + XML also provides a language tagging capability for specifying the + language of the contents of a particular XML element. XML uses + either IANA registered language tags (see [RFC1766]) or ISO 639 + language tags [ISO-639] in the "xml:lang" attribute of an XML element + to identify the language of its content and attributes. + + WebDAV applications MUST support the character set tagging, character + set encoding, and the language tagging functionality of the XML + specification. Implementors of WebDAV applications are strongly + encouraged to read "XML Media Types" [RFC2376] for instruction on + which MIME media type to use for XML transport, and on use of the + charset parameter of the Content-Type header. + + Names used within this specification fall into three categories: + names of protocol elements such as methods and headers, names of XML + elements, and names of properties. Naming of protocol elements + follows the precedent of HTTP, using English names encoded in USASCII + for methods and headers. Since these protocol elements are not + visible to users, and are in fact simply long token identifiers, they + do not need to support encoding in multiple character sets. + Similarly, though the names of XML elements used in this + specification are English names encoded in UTF-8, these names are not + visible to the user, and hence do not need to support multiple + character set encodings. + + The name of a property defined on a resource is a URI. Although some + applications (e.g., a generic property viewer) will display property + URIs directly to their users, it is expected that the typical + application will use a fixed set of properties, and will provide a + mapping from the property name URI to a human-readable field when + displaying the property name to a user. It is only in the case where + + + + + +Goland, et al. Standards Track [Page 76] + +RFC 2518 WEBDAV February 1999 + + + the set of properties is not known ahead of time that an application + need display a property name URI to a user. We recommend that + applications provide human-readable property names wherever feasible. + + For error reporting, we follow the convention of HTTP/1.1 status + codes, including with each status code a short, English description + of the code (e.g., 423 (Locked)). While the possibility exists that + a poorly crafted user agent would display this message to a user, + internationalized applications will ignore this message, and display + an appropriate message in the user's language and character set. + + Since interoperation of clients and servers does not require locale + information, this specification does not specify any mechanism for + transmission of this information. + +17 Security Considerations + + This section is provided to detail issues concerning security + implications of which WebDAV applications need to be aware. + + All of the security considerations of HTTP/1.1 (discussed in + [RFC2068]) and XML (discussed in [RFC2376]) also apply to WebDAV. In + addition, the security risks inherent in remote authoring require + stronger authentication technology, introduce several new privacy + concerns, and may increase the hazards from poor server design. + These issues are detailed below. + +17.1 Authentication of Clients + + Due to their emphasis on authoring, WebDAV servers need to use + authentication technology to protect not just access to a network + resource, but the integrity of the resource as well. Furthermore, + the introduction of locking functionality requires support for + authentication. + + A password sent in the clear over an insecure channel is an + inadequate means for protecting the accessibility and integrity of a + resource as the password may be intercepted. Since Basic + authentication for HTTP/1.1 performs essentially clear text + transmission of a password, Basic authentication MUST NOT be used to + authenticate a WebDAV client to a server unless the connection is + secure. Furthermore, a WebDAV server MUST NOT send Basic + authentication credentials in a WWW-Authenticate header unless the + connection is secure. Examples of secure connections include a + Transport Layer Security (TLS) connection employing a strong cipher + suite with mutual authentication of client and server, or a + connection over a network which is physically secure, for example, an + isolated network in a building with restricted access. + + + +Goland, et al. Standards Track [Page 77] + +RFC 2518 WEBDAV February 1999 + + + WebDAV applications MUST support the Digest authentication scheme + [RFC2069]. Since Digest authentication verifies that both parties to + a communication know a shared secret, a password, without having to + send that secret in the clear, Digest authentication avoids the + security problems inherent in Basic authentication while providing a + level of authentication which is useful in a wide range of scenarios. + +17.2 Denial of Service + + Denial of service attacks are of special concern to WebDAV servers. + WebDAV plus HTTP enables denial of service attacks on every part of a + system's resources. + + The underlying storage can be attacked by PUTting extremely large + files. + + Asking for recursive operations on large collections can attack + processing time. + + Making multiple pipelined requests on multiple connections can attack + network connections. + + WebDAV servers need to be aware of the possibility of a denial of + service attack at all levels. + +17.3 Security through Obscurity + + WebDAV provides, through the PROPFIND method, a mechanism for listing + the member resources of a collection. This greatly diminishes the + effectiveness of security or privacy techniques that rely only on the + difficulty of discovering the names of network resources. Users of + WebDAV servers are encouraged to use access control techniques to + prevent unwanted access to resources, rather than depending on the + relative obscurity of their resource names. + +17.4 Privacy Issues Connected to Locks + + When submitting a lock request a user agent may also submit an owner + XML field giving contact information for the person taking out the + lock (for those cases where a person, rather than a robot, is taking + out the lock). This contact information is stored in a lockdiscovery + property on the resource, and can be used by other collaborators to + begin negotiation over access to the resource. However, in many + cases this contact information can be very private, and should not be + widely disseminated. Servers SHOULD limit read access to the + lockdiscovery property as appropriate. Furthermore, user agents + + + + + +Goland, et al. Standards Track [Page 78] + +RFC 2518 WEBDAV February 1999 + + + SHOULD provide control over whether contact information is sent at + all, and if contact information is sent, control over exactly what + information is sent. + +17.5 Privacy Issues Connected to Properties + + Since property values are typically used to hold information such as + the author of a document, there is the possibility that privacy + concerns could arise stemming from widespread access to a resource's + property data. To reduce the risk of inadvertent release of private + information via properties, servers are encouraged to develop access + control mechanisms that separate read access to the resource body and + read access to the resource's properties. This allows a user to + control the dissemination of their property data without overly + restricting access to the resource's contents. + +17.6 Reduction of Security due to Source Link + + HTTP/1.1 warns against providing read access to script code because + it may contain sensitive information. Yet WebDAV, via its source + link facility, can potentially provide a URI for script resources so + they may be authored. For HTTP/1.1, a server could reasonably + prevent access to source resources due to the predominance of read- + only access. WebDAV, with its emphasis on authoring, encourages read + and write access to source resources, and provides the source link + facility to identify the source. This reduces the security benefits + of eliminating access to source resources. Users and administrators + of WebDAV servers should be very cautious when allowing remote + authoring of scripts, limiting read and write access to the source + resources to authorized principals. + +17.7 Implications of XML External Entities + + XML supports a facility known as "external entities", defined in + section 4.2.2 of [REC-XML], which instruct an XML processor to + retrieve and perform an inline include of XML located at a particular + URI. An external XML entity can be used to append or modify the + document type declaration (DTD) associated with an XML document. An + external XML entity can also be used to include XML within the + content of an XML document. For non-validating XML, such as the XML + used in this specification, including an external XML entity is not + required by [REC-XML]. However, [REC-XML] does state that an XML + processor may, at its discretion, include the external XML entity. + + External XML entities have no inherent trustworthiness and are + subject to all the attacks that are endemic to any HTTP GET request. + Furthermore, it is possible for an external XML entity to modify the + DTD, and hence affect the final form of an XML document, in the worst + + + +Goland, et al. Standards Track [Page 79] + +RFC 2518 WEBDAV February 1999 + + + case significantly modifying its semantics, or exposing the XML + processor to the security risks discussed in [RFC2376]. Therefore, + implementers must be aware that external XML entities should be + treated as untrustworthy. + + There is also the scalability risk that would accompany a widely + deployed application which made use of external XML entities. In + this situation, it is possible that there would be significant + numbers of requests for one external XML entity, potentially + overloading any server which fields requests for the resource + containing the external XML entity. + +17.8 Risks Connected with Lock Tokens + + This specification, in section 6.4, requires the use of Universal + Unique Identifiers (UUIDs) for lock tokens, in order to guarantee + their uniqueness across space and time. UUIDs, as defined in [ISO- + 11578], contain a "node" field which "consists of the IEEE address, + usually the host address. For systems with multiple IEEE 802 nodes, + any available node address can be used." Since a WebDAV server will + issue many locks over its lifetime, the implication is that it will + also be publicly exposing its IEEE 802 address. + + There are several risks associated with exposure of IEEE 802 + addresses. Using the IEEE 802 address: + + * It is possible to track the movement of hardware from subnet to + subnet. + + * It may be possible to identify the manufacturer of the hardware + running a WebDAV server. + + * It may be possible to determine the number of each type of computer + running WebDAV. + + Section 6.4.1 of this specification details an alternate mechanism + for generating the "node" field of a UUID without using an IEEE 802 + address, which alleviates the risks associated with exposure of IEEE + 802 addresses by using an alternate source of uniqueness. + +18 IANA Considerations + + This document defines two namespaces, the namespace of property + names, and the namespace of WebDAV-specific XML elements used within + property values. + + + + + + +Goland, et al. Standards Track [Page 80] + +RFC 2518 WEBDAV February 1999 + + + URIs are used for both names, for several reasons. Assignment of a + URI does not require a request to a central naming authority, and + hence allow WebDAV property names and XML elements to be quickly + defined by any WebDAV user or application. URIs also provide a + unique address space, ensuring that the distributed users of WebDAV + will not have collisions among the property names and XML elements + they create. + + This specification defines a distinguished set of property names and + XML elements that are understood by all WebDAV applications. The + property names and XML elements in this specification are all derived + from the base URI DAV: by adding a suffix to this URI, for example, + DAV:creationdate for the "creationdate" property. + + This specification also defines a URI scheme for the encoding of lock + tokens, the opaquelocktoken URI scheme described in section 6.4. + + To ensure correct interoperation based on this specification, IANA + must reserve the URI namespaces starting with "DAV:" and with + "opaquelocktoken:" for use by this specification, its revisions, and + related WebDAV specifications. + +19 Intellectual Property + + The following notice is copied from RFC 2026 [RFC2026], section 10.4, + and describes the position of the IETF concerning intellectual + property claims made against this document. + + The IETF takes no position regarding the validity or scope of any + intellectual property or other rights that might be claimed to + pertain to the implementation or use other technology described in + this document or the extent to which any license under such rights + might or might not be available; neither does it represent that it + has made any effort to identify any such rights. Information on the + IETF's procedures with respect to rights in standards-track and + standards-related documentation can be found in BCP-11. Copies of + claims of rights made available for publication and any assurances of + licenses to be made available, or the result of an attempt made to + obtain a general license or permission for the use of such + proprietary rights by implementors or users of this specification can + be obtained from the IETF Secretariat. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights which may cover technology that may be required to practice + this standard. Please address the information to the IETF Executive + Director. + + + + +Goland, et al. Standards Track [Page 81] + +RFC 2518 WEBDAV February 1999 + + +20 Acknowledgements + + A specification such as this thrives on piercing critical review and + withers from apathetic neglect. The authors gratefully acknowledge + the contributions of the following people, whose insights were so + valuable at every stage of our work. + + Terry Allen, Harald Alvestrand, Jim Amsden, Becky Anderson, Alan + Babich, Sanford Barr, Dylan Barrell, Bernard Chester, Tim Berners- + Lee, Dan Connolly, Jim Cunningham, Ron Daniel, Jr., Jim Davis, Keith + Dawson, Mark Day, Brian Deen, Martin Duerst, David Durand, Lee + Farrell, Chuck Fay, Wesley Felter, Roy Fielding, Mark Fisher, Alan + Freier, George Florentine, Jim Gettys, Phill Hallam-Baker, Dennis + Hamilton, Steve Henning, Mead Himelstein, Alex Hopmann, Andre van der + Hoek, Ben Laurie, Paul Leach, Ora Lassila, Karen MacArthur, Steven + Martin, Larry Masinter, Michael Mealling, Keith Moore, Thomas Narten, + Henrik Nielsen, Kenji Ota, Bob Parker, Glenn Peterson, Jon Radoff, + Saveen Reddy, Henry Sanders, Christopher Seiwald, Judith Slein, Mike + Spreitzer, Einar Stefferud, Greg Stein, Ralph Swick, Kenji Takahashi, + Richard N. Taylor, Robert Thau, John Turner, Sankar Virdhagriswaran, + Fabio Vitali, Gregory Woodhouse, and Lauren Wood. + + Two from this list deserve special mention. The contributions by + Larry Masinter have been invaluable, both in helping the formation of + the working group and in patiently coaching the authors along the + way. In so many ways he has set high standards we have toiled to + meet. The contributions of Judith Slein in clarifying the + requirements, and in patiently reviewing draft after draft, both + improved this specification and expanded our minds on document + management. + + We would also like to thank John Turner for developing the XML DTD. + +21 References + +21.1 Normative References + + [RFC1766] Alvestrand, H., "Tags for the Identification of + Languages", RFC 1766, March 1995. + + [RFC2277] Alvestrand, H., "IETF Policy on Character Sets and + Languages", BCP 18, RFC 2277, January 1998. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + + + + + +Goland, et al. Standards Track [Page 82] + +RFC 2518 WEBDAV February 1999 + + + [RFC2396] Berners-Lee, T., Fielding, R. and L. Masinter, + "Uniform Resource Identifiers (URI): Generic Syntax", + RFC 2396, August 1998. + + [REC-XML] T. Bray, J. Paoli, C. M. Sperberg-McQueen, + "Extensible Markup Language (XML)." World Wide Web + Consortium Recommendation REC-xml-19980210. + http://www.w3.org/TR/1998/REC-xml-19980210. + + [REC-XML-NAMES] T. Bray, D. Hollander, A. Layman, "Namespaces in + XML". World Wide Web Consortium Recommendation REC- + xml-names-19990114. http://www.w3.org/TR/1999/REC- + xml-names-19990114/ + + [RFC2069] Franks, J., Hallam-Baker, P., Hostetler, J., Leach, + P, Luotonen, A., Sink, E. and L. Stewart, "An + Extension to HTTP : Digest Access Authentication", + RFC 2069, January 1997. + + [RFC2068] Fielding, R., Gettys, J., Mogul, J., Frystyk, H. and + T. Berners-Lee, "Hypertext Transfer Protocol -- + HTTP/1.1", RFC 2068, January 1997. + + [ISO-639] ISO (International Organization for Standardization). + ISO 639:1988. "Code for the representation of names + of languages." + + [ISO-8601] ISO (International Organization for Standardization). + ISO 8601:1988. "Data elements and interchange formats + - Information interchange - Representation of dates + and times." + + [ISO-11578] ISO (International Organization for Standardization). + ISO/IEC 11578:1996. "Information technology - Open + Systems Interconnection - Remote Procedure Call + (RPC)" + + [RFC2141] Moats, R., "URN Syntax", RFC 2141, May 1997. + + [UTF-8] Yergeau, F., "UTF-8, a transformation format of + Unicode and ISO 10646", RFC 2279, January 1998. + +21.2 Informational References + + [RFC2026] Bradner, S., "The Internet Standards Process - Revision + 3", BCP 9, RFC 2026, October 1996. + + + + + +Goland, et al. Standards Track [Page 83] + +RFC 2518 WEBDAV February 1999 + + + [RFC1807] Lasher, R. and D. Cohen, "A Format for Bibliographic + Records", RFC 1807, June 1995. + + [WF] C. Lagoze, "The Warwick Framework: A Container + Architecture for Diverse Sets of Metadata", D-Lib + Magazine, July/August 1996. + http://www.dlib.org/dlib/july96/lagoze/07lagoze.html + + [USMARC] Network Development and MARC Standards, Office, ed. 1994. + "USMARC Format for Bibliographic Data", 1994. Washington, + DC: Cataloging Distribution Service, Library of Congress. + + [REC-PICS] J. Miller, T. Krauskopf, P. Resnick, W. Treese, "PICS + Label Distribution Label Syntax and Communication + Protocols" Version 1.1, World Wide Web Consortium + Recommendation REC-PICS-labels-961031. + http://www.w3.org/pub/WWW/TR/REC-PICS-labels-961031.html. + + [RFC2291] Slein, J., Vitali, F., Whitehead, E. and D. Durand, + "Requirements for Distributed Authoring and Versioning + Protocol for the World Wide Web", RFC 2291, February 1998. + + [RFC2413] Weibel, S., Kunze, J., Lagoze, C. and M. Wolf, "Dublin + Core Metadata for Resource Discovery", RFC 2413, September + 1998. + + [RFC2376] Whitehead, E. and M. Murata, "XML Media Types", RFC 2376, + July 1998. + +22 Authors' Addresses + + Y. Y. Goland + Microsoft Corporation + One Microsoft Way + Redmond, WA 98052-6399 + + EMail: yarong@microsoft.com + + + E. J. Whitehead, Jr. + Dept. Of Information and Computer Science + University of California, Irvine + Irvine, CA 92697-3425 + + EMail: ejw@ics.uci.edu + + + + + + +Goland, et al. Standards Track [Page 84] + +RFC 2518 WEBDAV February 1999 + + + A. Faizi + Netscape + 685 East Middlefield Road + Mountain View, CA 94043 + + EMail: asad@netscape.com + + + S. R. Carter + Novell + 1555 N. Technology Way + M/S ORM F111 + Orem, UT 84097-2399 + + EMail: srcarter@novell.com + + + D. Jensen + Novell + 1555 N. Technology Way + M/S ORM F111 + Orem, UT 84097-2399 + + EMail: dcjensen@novell.com + + + + + + + + + + + + + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 85] + +RFC 2518 WEBDAV February 1999 + + +23 Appendices + +23.1 Appendix 1 - WebDAV Document Type Definition + + This section provides a document type definition, following the rules + in [REC-XML], for the XML elements used in the protocol stream and in + the values of properties. It collects the element definitions given + in sections 12 and 13. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 86] + +RFC 2518 WEBDAV February 1999 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]> + + + + + + + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 87] + +RFC 2518 WEBDAV February 1999 + + +23.2 Appendix 2 - ISO 8601 Date and Time Profile + + The creationdate property specifies the use of the ISO 8601 date + format [ISO-8601]. This section defines a profile of the ISO 8601 + date format for use with this specification. This profile is quoted + from an Internet-Draft by Chris Newman, and is mentioned here to + properly attribute his work. + + date-time = full-date "T" full-time + + full-date = date-fullyear "-" date-month "-" date-mday + full-time = partial-time time-offset + + date-fullyear = 4DIGIT + date-month = 2DIGIT ; 01-12 + date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on + month/year + time-hour = 2DIGIT ; 00-23 + time-minute = 2DIGIT ; 00-59 + time-second = 2DIGIT ; 00-59, 00-60 based on leap second rules + time-secfrac = "." 1*DIGIT + time-numoffset = ("+" / "-") time-hour ":" time-minute + time-offset = "Z" / time-numoffset + + partial-time = time-hour ":" time-minute ":" time-second + [time-secfrac] + + Numeric offsets are calculated as local time minus UTC (Coordinated + Universal Time). So the equivalent time in UTC can be determined by + subtracting the offset from the local time. For example, 18:50:00- + 04:00 is the same time as 22:58:00Z. + + If the time in UTC is known, but the offset to local time is unknown, + this can be represented with an offset of "-00:00". This differs + from an offset of "Z" which implies that UTC is the preferred + reference point for the specified time. + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 88] + +RFC 2518 WEBDAV February 1999 + + +23.3 Appendix 3 - Notes on Processing XML Elements + +23.3.1 Notes on Empty XML Elements + + XML supports two mechanisms for indicating that an XML element does + not have any content. The first is to declare an XML element of the + form . The second is to declare an XML element of the form + . The two XML elements are semantically identical. + + It is a violation of the XML specification to use the form if + the associated DTD declares the element to be EMPTY (e.g., ). If such a statement is included, then the empty element + format, must be used. If the element is not declared to be + EMPTY, then either form or may be used for empty + elements. + + 23.3.2 Notes on Illegal XML Processing + + XML is a flexible data format that makes it easy to submit data that + appears legal but in fact is not. The philosophy of "Be flexible in + what you accept and strict in what you send" still applies, but it + must not be applied inappropriately. XML is extremely flexible in + dealing with issues of white space, element ordering, inserting new + elements, etc. This flexibility does not require extension, + especially not in the area of the meaning of elements. + + There is no kindness in accepting illegal combinations of XML + elements. At best it will cause an unwanted result and at worst it + can cause real damage. + +23.3.2.1 Example - XML Syntax Error + + The following request body for a PROPFIND method is illegal. + + + + + + + + The definition of the propfind element only allows for the allprop or + the propname element, not both. Thus the above is an error and must + be responded to with a 400 (Bad Request). + + + + + + + + +Goland, et al. Standards Track [Page 89] + +RFC 2518 WEBDAV February 1999 + + + Imagine, however, that a server wanted to be "kind" and decided to + pick the allprop element as the true element and respond to it. A + client running over a bandwidth limited line who intended to execute + a propname would be in for a big surprise if the server treated the + command as an allprop. + + Additionally, if a server were lenient and decided to reply to this + request, the results would vary randomly from server to server, with + some servers executing the allprop directive, and others executing + the propname directive. This reduces interoperability rather than + increasing it. + +23.3.2.2 Example - Unknown XML Element + + The previous example was illegal because it contained two elements + that were explicitly banned from appearing together in the propfind + element. However, XML is an extensible language, so one can imagine + new elements being defined for use with propfind. Below is the + request body of a PROPFIND and, like the previous example, must be + rejected with a 400 (Bad Request) by a server that does not + understand the expired-props element. + + + + + + + To understand why a 400 (Bad Request) is returned let us look at the + request body as the server unfamiliar with expired-props sees it. + + + + + + As the server does not understand the expired-props element, + according to the WebDAV-specific XML processing rules specified in + section 14, it must ignore it. Thus the server sees an empty + propfind, which by the definition of the propfind element is illegal. + + Please note that had the extension been additive it would not + necessarily have resulted in a 400 (Bad Request). For example, + imagine the following request body for a PROPFIND: + + + + + + +Goland, et al. Standards Track [Page 90] + +RFC 2518 WEBDAV February 1999 + + + + *boss* + + + The previous example contains the fictitious element leave-out. Its + purpose is to prevent the return of any property whose name matches + the submitted pattern. If the previous example were submitted to a + server unfamiliar with leave-out, the only result would be that the + leave-out element would be ignored and a propname would be executed. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 91] + +RFC 2518 WEBDAV February 1999 + + +23.4 Appendix 4 -- XML Namespaces for WebDAV + +23.4.1 Introduction + + All DAV compliant systems MUST support the XML namespace extensions + as specified in [REC-XML-NAMES]. + +23.4.2 Meaning of Qualified Names + + [Note to the reader: This section does not appear in [REC-XML-NAMES], + but is necessary to avoid ambiguity for WebDAV XML processors.] + + WebDAV compliant XML processors MUST interpret a qualified name as a + URI constructed by appending the LocalPart to the namespace name URI. + + Example + + + + Johnny Updraft + + + + + In this example, the qualified element name "del:glider" is + interpreted as the URL "http://www.del.jensen.org/glider". + + + + Johnny Updraft + + + + + Even though this example is syntactically different from the previous + example, it is semantically identical. Each instance of the + namespace name "bar" is replaced with "http://www.del.jensen.org/" + and then appended to the local name for each element tag. The + resulting tag names in this example are exactly the same as for the + previous example. + + + + Johnny Updraft + + + + + + + +Goland, et al. Standards Track [Page 92] + +RFC 2518 WEBDAV February 1999 + + + This example is semantically identical to the two previous ones. + Each instance of the namespace name "foo" is replaced with + "http://www.del.jensen.org/glide" which is then appended to the local + name for each element tag, the resulting tag names are identical to + those in the previous examples. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 93] + +RFC 2518 WEBDAV February 1999 + + +24. Full Copyright Statement + + Copyright (C) The Internet Society (1999). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Goland, et al. Standards Track [Page 94] + diff --git a/docs/specs/rfc2616.txt b/docs/specs/rfc2616.txt new file mode 100644 index 0000000..32f6f69 --- /dev/null +++ b/docs/specs/rfc2616.txt @@ -0,0 +1,9934 @@ + +[[ Text in double brackets is from the unofficial errata at ]] +[[ http://skrb.org/ietf/http_errata.html ]] + + +Network Working Group R. Fielding +Request for Comments: 2616 UC Irvine +Obsoletes: 2068 J. Gettys +Category: Standards Track Compaq/W3C + J. Mogul + Compaq + H. Frystyk + W3C/MIT + L. Masinter + Xerox + P. Leach + Microsoft + T. Berners-Lee + W3C/MIT + June 1999 + + + Hypertext Transfer Protocol -- HTTP/1.1 + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1999). All Rights Reserved. + +Abstract + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol for distributed, collaborative, hypermedia information + systems. It is a generic, stateless, protocol which can be used for + many tasks beyond its use for hypertext, such as name servers and + distributed object management systems, through extension of its + request methods, error codes and headers [47]. A feature of HTTP is + the typing and negotiation of data representation, allowing systems + to be built independently of the data being transferred. + + HTTP has been in use by the World-Wide Web global information + initiative since 1990. This specification defines the protocol + referred to as "HTTP/1.1", and is an update to RFC 2068 [33]. + + + + + + +Fielding, et al. Standards Track [Page 1] + +RFC 2616 HTTP/1.1 June 1999 + + +Table of Contents + + 1 Introduction ...................................................7 + 1.1 Purpose......................................................7 + 1.2 Requirements .................................................8 + 1.3 Terminology ..................................................8 + 1.4 Overall Operation ...........................................12 + 2 Notational Conventions and Generic Grammar ....................14 + 2.1 Augmented BNF ...............................................14 + 2.2 Basic Rules .................................................15 + 3 Protocol Parameters ...........................................17 + 3.1 HTTP Version ................................................17 + 3.2 Uniform Resource Identifiers ................................18 + 3.2.1 General Syntax ...........................................19 + 3.2.2 http URL .................................................19 + 3.2.3 URI Comparison ...........................................20 + 3.3 Date/Time Formats ...........................................20 + 3.3.1 Full Date ................................................20 + 3.3.2 Delta Seconds ............................................21 + 3.4 Character Sets ..............................................21 + 3.4.1 Missing Charset ..........................................22 + 3.5 Content Codings .............................................23 + 3.6 Transfer Codings ............................................24 + 3.6.1 Chunked Transfer Coding ..................................25 + 3.7 Media Types .................................................26 + 3.7.1 Canonicalization and Text Defaults .......................27 + 3.7.2 Multipart Types ..........................................27 + 3.8 Product Tokens ..............................................28 + 3.9 Quality Values ..............................................29 + 3.10 Language Tags ...............................................29 + 3.11 Entity Tags .................................................30 + 3.12 Range Units .................................................30 + 4 HTTP Message ..................................................31 + 4.1 Message Types ...............................................31 + 4.2 Message Headers .............................................31 + 4.3 Message Body ................................................32 + 4.4 Message Length ..............................................33 + 4.5 General Header Fields .......................................34 + 5 Request .......................................................35 + 5.1 Request-Line ................................................35 + 5.1.1 Method ...................................................36 + 5.1.2 Request-URI ..............................................36 + 5.2 The Resource Identified by a Request ........................38 + 5.3 Request Header Fields .......................................38 + 6 Response ......................................................39 + 6.1 Status-Line .................................................39 + 6.1.1 Status Code and Reason Phrase ............................39 + 6.2 Response Header Fields ......................................41 + + + +Fielding, et al. Standards Track [Page 2] + +RFC 2616 HTTP/1.1 June 1999 + + + 7 Entity ........................................................42 + 7.1 Entity Header Fields ........................................42 + 7.2 Entity Body .................................................43 + 7.2.1 Type .....................................................43 + 7.2.2 Entity Length ............................................43 + 8 Connections ...................................................44 + 8.1 Persistent Connections ......................................44 + 8.1.1 Purpose ..................................................44 + 8.1.2 Overall Operation ........................................45 + 8.1.3 Proxy Servers ............................................46 + 8.1.4 Practical Considerations .................................46 + 8.2 Message Transmission Requirements ...........................47 + 8.2.1 Persistent Connections and Flow Control ..................47 + 8.2.2 Monitoring Connections for Error Status Messages .........48 + 8.2.3 Use of the 100 (Continue) Status .........................48 + 8.2.4 Client Behavior if Server Prematurely Closes Connection ..50 + 9 Method Definitions ............................................51 + 9.1 Safe and Idempotent Methods .................................51 + 9.1.1 Safe Methods .............................................51 + 9.1.2 Idempotent Methods .......................................51 + 9.2 OPTIONS .....................................................52 + 9.3 GET .........................................................53 + 9.4 HEAD ........................................................54 + 9.5 POST ........................................................54 + 9.6 PUT .........................................................55 + 9.7 DELETE ......................................................56 + 9.8 TRACE .......................................................56 + 9.9 CONNECT .....................................................57 + 10 Status Code Definitions ......................................57 + 10.1 Informational 1xx ...........................................57 + 10.1.1 100 Continue .............................................58 + 10.1.2 101 Switching Protocols ..................................58 + 10.2 Successful 2xx ..............................................58 + 10.2.1 200 OK ...................................................58 + 10.2.2 201 Created ..............................................59 + 10.2.3 202 Accepted .............................................59 + 10.2.4 203 Non-Authoritative Information ........................59 + 10.2.5 204 No Content ...........................................60 + 10.2.6 205 Reset Content ........................................60 + 10.2.7 206 Partial Content ......................................60 + 10.3 Redirection 3xx .............................................61 + 10.3.1 300 Multiple Choices .....................................61 + 10.3.2 301 Moved Permanently ....................................62 + 10.3.3 302 Found ................................................62 + 10.3.4 303 See Other ............................................63 + 10.3.5 304 Not Modified .........................................63 + 10.3.6 305 Use Proxy ............................................64 + 10.3.7 306 (Unused) .............................................64 + + + +Fielding, et al. Standards Track [Page 3] + +RFC 2616 HTTP/1.1 June 1999 + + + 10.3.8 307 Temporary Redirect ...................................65 + 10.4 Client Error 4xx ............................................65 + 10.4.1 400 Bad Request .........................................65 + 10.4.2 401 Unauthorized ........................................66 + 10.4.3 402 Payment Required ....................................66 + 10.4.4 403 Forbidden ...........................................66 + 10.4.5 404 Not Found ...........................................66 + 10.4.6 405 Method Not Allowed ..................................66 + 10.4.7 406 Not Acceptable ......................................67 + 10.4.8 407 Proxy Authentication Required .......................67 + 10.4.9 408 Request Timeout .....................................67 + 10.4.10 409 Conflict ............................................67 + 10.4.11 410 Gone ................................................68 + 10.4.12 411 Length Required .....................................68 + 10.4.13 412 Precondition Failed .................................68 + 10.4.14 413 Request Entity Too Large ............................69 + 10.4.15 414 Request-URI Too Long ................................69 + 10.4.16 415 Unsupported Media Type ..............................69 + 10.4.17 416 Requested Range Not Satisfiable .....................69 + 10.4.18 417 Expectation Failed ..................................70 + 10.5 Server Error 5xx ............................................70 + 10.5.1 500 Internal Server Error ................................70 + 10.5.2 501 Not Implemented ......................................70 + 10.5.3 502 Bad Gateway ..........................................70 + 10.5.4 503 Service Unavailable ..................................70 + 10.5.5 504 Gateway Timeout ......................................71 + 10.5.6 505 HTTP Version Not Supported ...........................71 + 11 Access Authentication ........................................71 + 12 Content Negotiation ..........................................71 + 12.1 Server-driven Negotiation ...................................72 + 12.2 Agent-driven Negotiation ....................................73 + 12.3 Transparent Negotiation .....................................74 + 13 Caching in HTTP ..............................................74 + 13.1.1 Cache Correctness ........................................75 + 13.1.2 Warnings .................................................76 + 13.1.3 Cache-control Mechanisms .................................77 + 13.1.4 Explicit User Agent Warnings .............................78 + 13.1.5 Exceptions to the Rules and Warnings .....................78 + 13.1.6 Client-controlled Behavior ...............................79 + 13.2 Expiration Model ............................................79 + 13.2.1 Server-Specified Expiration ..............................79 + 13.2.2 Heuristic Expiration .....................................80 + 13.2.3 Age Calculations .........................................80 + 13.2.4 Expiration Calculations ..................................83 + 13.2.5 Disambiguating Expiration Values .........................84 + 13.2.6 Disambiguating Multiple Responses ........................84 + 13.3 Validation Model ............................................85 + 13.3.1 Last-Modified Dates ......................................86 + + + +Fielding, et al. Standards Track [Page 4] + +RFC 2616 HTTP/1.1 June 1999 + + + 13.3.2 Entity Tag Cache Validators ..............................86 + 13.3.3 Weak and Strong Validators ...............................86 + 13.3.4 Rules for When to Use Entity Tags and Last-Modified Dates.89 + 13.3.5 Non-validating Conditionals ..............................90 + 13.4 Response Cacheability .......................................91 + 13.5 Constructing Responses From Caches ..........................92 + 13.5.1 End-to-end and Hop-by-hop Headers ........................92 + 13.5.2 Non-modifiable Headers ...................................92 + 13.5.3 Combining Headers ........................................94 + 13.5.4 Combining Byte Ranges ....................................95 + 13.6 Caching Negotiated Responses ................................95 + 13.7 Shared and Non-Shared Caches ................................96 + 13.8 Errors or Incomplete Response Cache Behavior ................97 + 13.9 Side Effects of GET and HEAD ................................97 + 13.10 Invalidation After Updates or Deletions ...................97 + 13.11 Write-Through Mandatory ...................................98 + 13.12 Cache Replacement .........................................99 + 13.13 History Lists .............................................99 + 14 Header Field Definitions ....................................100 + 14.1 Accept .....................................................100 + 14.2 Accept-Charset .............................................102 + 14.3 Accept-Encoding ............................................102 + 14.4 Accept-Language ............................................104 + 14.5 Accept-Ranges ..............................................105 + 14.6 Age ........................................................106 + 14.7 Allow ......................................................106 + 14.8 Authorization ..............................................107 + 14.9 Cache-Control ..............................................108 + 14.9.1 What is Cacheable .......................................109 + 14.9.2 What May be Stored by Caches ............................110 + 14.9.3 Modifications of the Basic Expiration Mechanism .........111 + 14.9.4 Cache Revalidation and Reload Controls ..................113 + 14.9.5 No-Transform Directive ..................................115 + 14.9.6 Cache Control Extensions ................................116 + 14.10 Connection ...............................................117 + 14.11 Content-Encoding .........................................118 + 14.12 Content-Language .........................................118 + 14.13 Content-Length ...........................................119 + 14.14 Content-Location .........................................120 + 14.15 Content-MD5 ..............................................121 + 14.16 Content-Range ............................................122 + 14.17 Content-Type .............................................124 + 14.18 Date .....................................................124 + 14.18.1 Clockless Origin Server Operation ......................125 + 14.19 ETag .....................................................126 + 14.20 Expect ...................................................126 + 14.21 Expires ..................................................127 + 14.22 From .....................................................128 + + + +Fielding, et al. Standards Track [Page 5] + +RFC 2616 HTTP/1.1 June 1999 + + + 14.23 Host .....................................................128 + 14.24 If-Match .................................................129 + 14.25 If-Modified-Since ........................................130 + 14.26 If-None-Match ............................................132 + 14.27 If-Range .................................................133 + 14.28 If-Unmodified-Since ......................................134 + 14.29 Last-Modified ............................................134 + 14.30 Location .................................................135 + 14.31 Max-Forwards .............................................136 + 14.32 Pragma ...................................................136 + 14.33 Proxy-Authenticate .......................................137 + 14.34 Proxy-Authorization ......................................137 + 14.35 Range ....................................................138 + 14.35.1 Byte Ranges ...........................................138 + 14.35.2 Range Retrieval Requests ..............................139 + 14.36 Referer ..................................................140 + 14.37 Retry-After ..............................................141 + 14.38 Server ...................................................141 + 14.39 TE .......................................................142 + 14.40 Trailer ..................................................143 + 14.41 Transfer-Encoding..........................................143 + 14.42 Upgrade ..................................................144 + 14.43 User-Agent ...............................................145 + 14.44 Vary .....................................................145 + 14.45 Via ......................................................146 + 14.46 Warning ..................................................148 + 14.47 WWW-Authenticate .........................................150 + 15 Security Considerations .......................................150 + 15.1 Personal Information....................................151 + 15.1.1 Abuse of Server Log Information .........................151 + 15.1.2 Transfer of Sensitive Information .......................151 + 15.1.3 Encoding Sensitive Information in URI's .................152 + 15.1.4 Privacy Issues Connected to Accept Headers ..............152 + 15.2 Attacks Based On File and Path Names .......................153 + 15.3 DNS Spoofing ...............................................154 + 15.4 Location Headers and Spoofing ..............................154 + 15.5 Content-Disposition Issues .................................154 + 15.6 Authentication Credentials and Idle Clients ................155 + 15.7 Proxies and Caching ........................................155 + 15.7.1 Denial of Service Attacks on Proxies....................156 + 16 Acknowledgments .............................................156 + 17 References ..................................................158 + 18 Authors' Addresses ..........................................162 + 19 Appendices ..................................................164 + 19.1 Internet Media Type message/http and application/http ......164 + 19.2 Internet Media Type multipart/byteranges ...................165 + 19.3 Tolerant Applications ......................................166 + 19.4 Differences Between HTTP Entities and RFC 2045 Entities ....167 + + + +Fielding, et al. Standards Track [Page 6] + +RFC 2616 HTTP/1.1 June 1999 + + + 19.4.1 MIME-Version ............................................167 + 19.4.2 Conversion to Canonical Form ............................167 + 19.4.3 Conversion of Date Formats ..............................168 + 19.4.4 Introduction of Content-Encoding ........................168 + 19.4.5 No Content-Transfer-Encoding ............................168 + 19.4.6 Introduction of Transfer-Encoding .......................169 + 19.4.7 MHTML and Line Length Limitations .......................169 + 19.5 Additional Features ........................................169 + 19.5.1 Content-Disposition .....................................170 + 19.6 Compatibility with Previous Versions .......................170 + 19.6.1 Changes from HTTP/1.0 ...................................171 + 19.6.2 Compatibility with HTTP/1.0 Persistent Connections ......172 + 19.6.3 Changes from RFC 2068 ...................................172 + 20 Index .......................................................175 + 21 Full Copyright Statement ....................................176 + +1 Introduction + +1.1 Purpose + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol for distributed, collaborative, hypermedia information + systems. HTTP has been in use by the World-Wide Web global + information initiative since 1990. The first version of HTTP, + referred to as HTTP/0.9, was a simple protocol for raw data transfer + across the Internet. HTTP/1.0, as defined by RFC 1945 [6], improved + the protocol by allowing messages to be in the format of MIME-like + messages, containing metainformation about the data transferred and + modifiers on the request/response semantics. However, HTTP/1.0 does + not sufficiently take into consideration the effects of hierarchical + proxies, caching, the need for persistent connections, or virtual + hosts. In addition, the proliferation of incompletely-implemented + applications calling themselves "HTTP/1.0" has necessitated a + protocol version change in order for two communicating applications + to determine each other's true capabilities. + + This specification defines the protocol referred to as "HTTP/1.1". + This protocol includes more stringent requirements than HTTP/1.0 in + order to ensure reliable implementation of its features. + + Practical information systems require more functionality than simple + retrieval, including search, front-end update, and annotation. HTTP + allows an open-ended set of methods and headers that indicate the + purpose of a request [47]. It builds on the discipline of reference + provided by the Uniform Resource Identifier (URI) [3], as a location + (URL) [4] or name (URN) [20], for indicating the resource to which a + + + + + +Fielding, et al. Standards Track [Page 7] + +RFC 2616 HTTP/1.1 June 1999 + + + method is to be applied. Messages are passed in a format similar to + that used by Internet mail [9] as defined by the Multipurpose + Internet Mail Extensions (MIME) [7]. + + HTTP is also used as a generic protocol for communication between + user agents and proxies/gateways to other Internet systems, including + those supported by the SMTP [16], NNTP [13], FTP [18], Gopher [2], + and WAIS [10] protocols. In this way, HTTP allows basic hypermedia + access to resources available from diverse applications. + +1.2 Requirements + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [34]. + + An implementation is not compliant if it fails to satisfy one or more + of the MUST or REQUIRED level requirements for the protocols it + implements. An implementation that satisfies all the MUST or REQUIRED + level and all the SHOULD level requirements for its protocols is said + to be "unconditionally compliant"; one that satisfies all the MUST + level requirements but not all the SHOULD level requirements for its + protocols is said to be "conditionally compliant." + +1.3 Terminology + + This specification uses a number of terms to refer to the roles + played by participants in, and objects of, the HTTP communication. + + connection + A transport layer virtual circuit established between two programs + for the purpose of communication. + + message + The basic unit of HTTP communication, consisting of a structured + sequence of octets matching the syntax defined in section 4 and + transmitted via the connection. + + request + An HTTP request message, as defined in section 5. + + response + An HTTP response message, as defined in section 6. + + + + + + + + +Fielding, et al. Standards Track [Page 8] + +RFC 2616 HTTP/1.1 June 1999 + + + resource + A network data object or service that can be identified by a URI, + as defined in section 3.2. Resources may be available in multiple + representations (e.g. multiple languages, data formats, size, and + resolutions) or vary in other ways. + + entity + The information transferred as the payload of a request or + response. An entity consists of metainformation in the form of + entity-header fields and content in the form of an entity-body, as + described in section 7. + + representation + An entity included with a response that is subject to content + negotiation, as described in section 12. There may exist multiple + representations associated with a particular response status. + + content negotiation + The mechanism for selecting the appropriate representation when + servicing a request, as described in section 12. The + representation of entities in any response can be negotiated + (including error responses). + + variant + A resource may have one, or more than one, representation(s) + associated with it at any given instant. Each of these + representations is termed a `varriant'. Use of the term `variant' + does not necessarily imply that the resource is subject to content + negotiation. + + client + A program that establishes connections for the purpose of sending + requests. + + user agent + The client which initiates a request. These are often browsers, + editors, spiders (web-traversing robots), or other end user tools. + + server + An application program that accepts connections in order to + service requests by sending back responses. Any given program may + be capable of being both a client and a server; our use of these + terms refers only to the role being performed by the program for a + particular connection, rather than to the program's capabilities + in general. Likewise, any server may act as an origin server, + proxy, gateway, or tunnel, switching behavior based on the nature + of each request. + + + + +Fielding, et al. Standards Track [Page 9] + +RFC 2616 HTTP/1.1 June 1999 + + + origin server + The server on which a given resource resides or is to be created. + + proxy + An intermediary program which acts as both a server and a client + for the purpose of making requests on behalf of other clients. + Requests are serviced internally or by passing them on, with + possible translation, to other servers. A proxy MUST implement + both the client and server requirements of this specification. A + "transparent proxy" is a proxy that does not modify the request or + response beyond what is required for proxy authentication and + identification. A "non-transparent proxy" is a proxy that modifies + the request or response in order to provide some added service to + the user agent, such as group annotation services, media type + transformation, protocol reduction, or anonymity filtering. Except + where either transparent or non-transparent behavior is explicitly + stated, the HTTP proxy requirements apply to both types of + proxies. + + gateway + A server which acts as an intermediary for some other server. + Unlike a proxy, a gateway receives requests as if it were the + origin server for the requested resource; the requesting client + may not be aware that it is communicating with a gateway. + + tunnel + An intermediary program which is acting as a blind relay between + two connections. Once active, a tunnel is not considered a party + to the HTTP communication, though the tunnel may have been + initiated by an HTTP request. The tunnel ceases to exist when both + ends of the relayed connections are closed. + + cache + A program's local store of response messages and the subsystem + that controls its message storage, retrieval, and deletion. A + cache stores cacheable responses in order to reduce the response + time and network bandwidth consumption on future, equivalent + requests. Any client or server may include a cache, though a cache + cannot be used by a server that is acting as a tunnel. + + cacheable + A response is cacheable if a cache is allowed to store a copy of + the response message for use in answering subsequent requests. The + rules for determining the cacheability of HTTP responses are + defined in section 13. Even if a resource is cacheable, there may + be additional constraints on whether a cache can use the cached + copy for a particular request. + + + + +Fielding, et al. Standards Track [Page 10] + +RFC 2616 HTTP/1.1 June 1999 + + + first-hand + A response is first-hand if it comes directly and without + unnecessary delay from the origin server, perhaps via one or more + proxies. A response is also first-hand if its validity has just + been checked directly with the origin server. + + explicit expiration time + The time at which the origin server intends that an entity should + no longer be returned by a cache without further validation. + + heuristic expiration time + An expiration time assigned by a cache when no explicit expiration + time is available. + + age + The age of a response is the time since it was sent by, or + successfully validated with, the origin server. + + freshness lifetime + The length of time between the generation of a response and its + expiration time. + + fresh + A response is fresh if its age has not yet exceeded its freshness + lifetime. + + stale + A response is stale if its age has passed its freshness lifetime. + + semantically transparent + A cache behaves in a "semantically transparent" manner, with + respect to a particular response, when its use affects neither the + requesting client nor the origin server, except to improve + performance. When a cache is semantically transparent, the client + receives exactly the same response (except for hop-by-hop headers) + that it would have received had its request been handled directly + by the origin server. + + validator + A protocol element (e.g., an entity tag or a Last-Modified time) + that is used to find out whether a cache entry is an equivalent + copy of an entity. + + upstream/downstream + Upstream and downstream describe the flow of a message: all + messages flow from upstream to downstream. + + + + + +Fielding, et al. Standards Track [Page 11] + +RFC 2616 HTTP/1.1 June 1999 + + + inbound/outbound + Inbound and outbound refer to the request and response paths for + messages: "inbound" means "traveling toward the origin server", + and "outbound" means "traveling toward the user agent" + +1.4 Overall Operation + + The HTTP protocol is a request/response protocol. A client sends a + request to the server in the form of a request method, URI, and + protocol version, followed by a MIME-like message containing request + modifiers, client information, and possible body content over a + connection with a server. The server responds with a status line, + including the message's protocol version and a success or error code, + followed by a MIME-like message containing server information, entity + metainformation, and possible entity-body content. The relationship + between HTTP and MIME is described in appendix 19.4. + + Most HTTP communication is initiated by a user agent and consists of + a request to be applied to a resource on some origin server. In the + simplest case, this may be accomplished via a single connection (v) + between the user agent (UA) and the origin server (O). + + request chain ------------------------> + UA -------------------v------------------- O + <----------------------- response chain + + A more complicated situation occurs when one or more intermediaries + are present in the request/response chain. There are three common + forms of intermediary: proxy, gateway, and tunnel. A proxy is a + forwarding agent, receiving requests for a URI in its absolute form, + rewriting all or part of the message, and forwarding the reformatted + request toward the server identified by the URI. A gateway is a + receiving agent, acting as a layer above some other server(s) and, if + necessary, translating the requests to the underlying server's + protocol. A tunnel acts as a relay point between two connections + without changing the messages; tunnels are used when the + communication needs to pass through an intermediary (such as a + firewall) even when the intermediary cannot understand the contents + of the messages. + + request chain --------------------------------------> + UA -----v----- A -----v----- B -----v----- C -----v----- O + <------------------------------------- response chain + + The figure above shows three intermediaries (A, B, and C) between the + user agent and origin server. A request or response message that + travels the whole chain will pass through four separate connections. + This distinction is important because some HTTP communication options + + + +Fielding, et al. Standards Track [Page 12] + +RFC 2616 HTTP/1.1 June 1999 + + + may apply only to the connection with the nearest, non-tunnel + neighbor, only to the end-points of the chain, or to all connections + along the chain. Although the diagram is linear, each participant may + be engaged in multiple, simultaneous communications. For example, B + may be receiving requests from many clients other than A, and/or + forwarding requests to servers other than C, at the same time that it + is handling A's request. + + Any party to the communication which is not acting as a tunnel may + employ an internal cache for handling requests. The effect of a cache + is that the request/response chain is shortened if one of the + participants along the chain has a cached response applicable to that + request. The following illustrates the resulting chain if B has a + cached copy of an earlier response from O (via C) for a request which + has not been cached by UA or A. + + request chain ----------> + UA -----v----- A -----v----- B - - - - - - C - - - - - - O + <--------- response chain + + Not all responses are usefully cacheable, and some requests may + contain modifiers which place special requirements on cache behavior. + HTTP requirements for cache behavior and cacheable responses are + defined in section 13. + + In fact, there are a wide variety of architectures and configurations + of caches and proxies currently being experimented with or deployed + across the World Wide Web. These systems include national hierarchies + of proxy caches to save transoceanic bandwidth, systems that + broadcast or multicast cache entries, organizations that distribute + subsets of cached data via CD-ROM, and so on. HTTP systems are used + in corporate intranets over high-bandwidth links, and for access via + PDAs with low-power radio links and intermittent connectivity. The + goal of HTTP/1.1 is to support the wide diversity of configurations + already deployed while introducing protocol constructs that meet the + needs of those who build web applications that require high + reliability and, failing that, at least reliable indications of + failure. + + HTTP communication usually takes place over TCP/IP connections. The + default port is TCP 80 [19], but other ports can be used. This does + not preclude HTTP from being implemented on top of any other protocol + on the Internet, or on other networks. HTTP only presumes a reliable + transport; any protocol that provides such guarantees can be used; + the mapping of the HTTP/1.1 request and response structures onto the + transport data units of the protocol in question is outside the scope + of this specification. + + + + +Fielding, et al. Standards Track [Page 13] + +RFC 2616 HTTP/1.1 June 1999 + + + In HTTP/1.0, most implementations used a new connection for each + request/response exchange. In HTTP/1.1, a connection may be used for + one or more request/response exchanges, although connections may be + closed for a variety of reasons (see section 8.1). + +2 Notational Conventions and Generic Grammar + +2.1 Augmented BNF + + All of the mechanisms specified in this document are described in + both prose and an augmented Backus-Naur Form (BNF) similar to that + used by RFC 822 [9]. Implementors will need to be familiar with the + notation in order to understand this specification. The augmented BNF + includes the following constructs: + + name = definition + The name of a rule is simply the name itself (without any + enclosing "<" and ">") and is separated from its definition by the + equal "=" character. White space is only significant in that + indentation of continuation lines is used to indicate a rule + definition that spans more than one line. Certain basic rules are + in uppercase, such as SP, LWS, HT, CRLF, DIGIT, ALPHA, etc. Angle + brackets are used within definitions whenever their presence will + facilitate discerning the use of rule names. + + "literal" + Quotation marks surround literal text. Unless stated otherwise, + the text is case-insensitive. + + rule1 | rule2 + Elements separated by a bar ("|") are alternatives, e.g., "yes | + no" will accept yes or no. + + (rule1 rule2) + Elements enclosed in parentheses are treated as a single element. + Thus, "(elem (foo | bar) elem)" allows the token sequences "elem + foo elem" and "elem bar elem". + + *rule + The character "*" preceding an element indicates repetition. The + full form is "*element" indicating at least and at most + occurrences of element. Default values are 0 and infinity so + that "*(element)" allows any number, including zero; "1*element" + requires at least one; and "1*2element" allows one or two. + + [rule] + Square brackets enclose optional elements; "[foo bar]" is + equivalent to "*1(foo bar)". + + + +Fielding, et al. Standards Track [Page 14] + +RFC 2616 HTTP/1.1 June 1999 + + + N rule + Specific repetition: "(element)" is equivalent to + "*(element)"; that is, exactly occurrences of (element). + Thus 2DIGIT is a 2-digit number, and 3ALPHA is a string of three + alphabetic characters. + + #rule + A construct "#" is defined, similar to "*", for defining lists of + elements. The full form is "#element" indicating at least + and at most elements, each separated by one or more commas + (",") and OPTIONAL linear white space (LWS). This makes the usual + form of lists very easy; a rule such as + ( *LWS element *( *LWS "," *LWS element )) + can be shown as + 1#element + Wherever this construct is used, null elements are allowed, but do + not contribute to the count of elements present. That is, + "(element), , (element) " is permitted, but counts as only two + elements. Therefore, where at least one element is required, at + least one non-null element MUST be present. Default values are 0 + and infinity so that "#element" allows any number, including zero; + "1#element" requires at least one; and "1#2element" allows one or + two. + + ; comment + A semi-colon, set off some distance to the right of rule text, + starts a comment that continues to the end of line. This is a + simple way of including useful notes in parallel with the + specifications. + + implied *LWS + The grammar described by this specification is word-based. Except + where noted otherwise, linear white space (LWS) can be included + between any two adjacent words (token or quoted-string), and + between adjacent words and separators, without changing the + interpretation of a field. At least one delimiter (LWS and/or + + separators) MUST exist between any two tokens (for the definition + of "token" below), since they would otherwise be interpreted as a + single token. + +2.2 Basic Rules + + The following rules are used throughout this specification to + describe basic parsing constructs. The US-ASCII coded character set + is defined by ANSI X3.4-1986 [21]. + + + + + +Fielding, et al. Standards Track [Page 15] + +RFC 2616 HTTP/1.1 June 1999 + + + OCTET = + CHAR = + UPALPHA = + LOALPHA = + ALPHA = UPALPHA | LOALPHA + DIGIT = + CTL = + CR = + LF = + SP = + HT = + <"> = + + HTTP/1.1 defines the sequence CR LF as the end-of-line marker for all + protocol elements except the entity-body (see appendix 19.3 for + tolerant applications). The end-of-line marker within an entity-body + is defined by its associated media type, as described in section 3.7. + + CRLF = CR LF + + HTTP/1.1 header field values can be folded onto multiple lines if the + continuation line begins with a space or horizontal tab. All linear + white space, including folding, has the same semantics as SP. A + recipient MAY replace any linear white space with a single SP before + interpreting the field value or forwarding the message downstream. + + LWS = [CRLF] 1*( SP | HT ) + + The TEXT rule is only used for descriptive field contents and values + that are not intended to be interpreted by the message parser. Words + of *TEXT MAY contain characters from character sets other than ISO- + 8859-1 [22] only when encoded according to the rules of RFC 2047 + [14]. + + TEXT = + + A CRLF is allowed in the definition of TEXT only as part of a header + field continuation. It is expected that the folding LWS will be + replaced with a single SP before interpretation of the TEXT value. + + Hexadecimal numeric characters are used in several protocol elements. + + HEX = "A" | "B" | "C" | "D" | "E" | "F" + | "a" | "b" | "c" | "d" | "e" | "f" | DIGIT + + + + + +Fielding, et al. Standards Track [Page 16] + +RFC 2616 HTTP/1.1 June 1999 + + + Many HTTP/1.1 header field values consist of words separated by LWS + or special characters. These special characters MUST be in a quoted + string to be used within a parameter value (as defined in section + 3.6). + + token = 1* + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + Comments can be included in some HTTP header fields by surrounding + the comment text with parentheses. Comments are only allowed in + fields containing "comment" as part of their field value definition. + In all other fields, parentheses are considered part of the field + value. + + comment = "(" *( ctext | quoted-pair | comment ) ")" + ctext = + + A string of text is parsed as a single word if it is quoted using + double-quote marks. + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = > + + The backslash character ("\") MAY be used as a single-character + quoting mechanism only within quoted-string and comment constructs. + + quoted-pair = "\" CHAR + +3 Protocol Parameters + +3.1 HTTP Version + + HTTP uses a "." numbering scheme to indicate versions + of the protocol. The protocol versioning policy is intended to allow + the sender to indicate the format of a message and its capacity for + understanding further HTTP communication, rather than the features + obtained via that communication. No change is made to the version + number for the addition of message components which do not affect + communication behavior or which only add to extensible field values. + The number is incremented when the changes made to the + protocol add features which do not change the general message parsing + algorithm, but which may add to the message semantics and imply + additional capabilities of the sender. The number is + incremented when the format of a message within the protocol is + changed. See RFC 2145 [36] for a fuller explanation. + + + +Fielding, et al. Standards Track [Page 17] + +RFC 2616 HTTP/1.1 June 1999 + + + The version of an HTTP message is indicated by an HTTP-Version field + in the first line of the message. [[HTTP-Version is case-sensitive.]] + + HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT + + Note that the major and minor numbers MUST be treated as separate + integers and that each MAY be incremented higher than a single digit. + Thus, HTTP/2.4 is a lower version than HTTP/2.13, which in turn is + lower than HTTP/12.3. Leading zeros MUST be ignored by recipients and + MUST NOT be sent. + + An application that sends a request or response message that includes + HTTP-Version of "HTTP/1.1" MUST be at least conditionally compliant + with this specification. Applications that are at least conditionally + compliant with this specification SHOULD use an HTTP-Version of + "HTTP/1.1" in their messages, and MUST do so for any message that is + not compatible with HTTP/1.0. For more details on when to send + specific HTTP-Version values, see RFC 2145 [36]. + + The HTTP version of an application is the highest HTTP version for + which the application is at least conditionally compliant. + + Proxy and gateway applications need to be careful when forwarding + messages in protocol versions different from that of the application. + Since the protocol version indicates the protocol capability of the + sender, a proxy/gateway MUST NOT send a message with a version + indicator which is greater than its actual version. If a higher + version request is received, the proxy/gateway MUST either downgrade + the request version, or respond with an error, or switch to tunnel + behavior. + + Due to interoperability problems with HTTP/1.0 proxies discovered + since the publication of RFC 2068[33], caching proxies MUST, gateways + MAY, and tunnels MUST NOT upgrade the request to the highest version + they support. The proxy/gateway's response to that request MUST be in + the same major version as the request. + + Note: Converting between versions of HTTP may involve modification + of header fields required or forbidden by the versions involved. + +3.2 Uniform Resource Identifiers + + URIs have been known by many names: WWW addresses, Universal Document + Identifiers, Universal Resource Identifiers [3], and finally the + combination of Uniform Resource Locators (URL) [4] and Names (URN) + [20]. As far as HTTP is concerned, Uniform Resource Identifiers are + simply formatted strings which identify--via name, location, or any + other characteristic--a resource. + + + +Fielding, et al. Standards Track [Page 18] + +RFC 2616 HTTP/1.1 June 1999 + + +3.2.1 General Syntax + + URIs in HTTP can be represented in absolute form or relative to some + known base URI [11], depending upon the context of their use. The two + forms are differentiated by the fact that absolute URIs always begin + with a scheme name followed by a colon. For definitive information on + URL syntax and semantics, see "Uniform Resource Identifiers (URI): + Generic Syntax and Semantics," RFC 2396 [42] (which replaces RFCs + 1738 [4] and RFC 1808 [11]). This specification adopts the + definitions of "URI-reference", "absoluteURI", "relativeURI", "port", + "host","abs_path", "rel_path", and "authority" from that + specification. + + The HTTP protocol does not place any a priori limit on the length of + a URI. Servers MUST be able to handle the URI of any resource they + serve, and SHOULD be able to handle URIs of unbounded length if they + provide GET-based forms that could generate such URIs. A server + SHOULD return 414 (Request-URI Too Long) status if a URI is longer + than the server can handle (see section 10.4.15). + + Note: Servers ought to be cautious about depending on URI lengths + above 255 bytes, because some older client or proxy + implementations might not properly support these lengths. + +3.2.2 http URL + + The "http" scheme is used to locate network resources via the HTTP + protocol. This section defines the scheme-specific syntax and + semantics for http URLs. + + http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + + If the port is empty or not given, port 80 is assumed. The semantics + are that the identified resource is located at the server listening + for TCP connections on that port of that host, and the Request-URI + for the resource is abs_path (section 5.1.2). The use of IP addresses + in URLs SHOULD be avoided whenever possible (see RFC 1900 [24]). If + the abs_path is not present in the URL, it MUST be given as "/" when + used as a Request-URI for a resource (section 5.1.2). If a proxy + receives a host name which is not a fully qualified domain name, it + MAY add its domain to the host name it received. If a proxy receives + a fully qualified domain name, the proxy MUST NOT change the host + name. + + + + + + + + +Fielding, et al. Standards Track [Page 19] + +RFC 2616 HTTP/1.1 June 1999 + + +3.2.3 URI Comparison + + When comparing two URIs to decide if they match or not, a client + SHOULD use a case-sensitive octet-by-octet comparison of the entire + URIs, with these exceptions: + + - A port that is empty or not given is equivalent to the default + port for that URI-reference; + + - Comparisons of host names MUST be case-insensitive; + + - Comparisons of scheme names MUST be case-insensitive; + + - An empty abs_path is equivalent to an abs_path of "/". + + Characters other than those in the "reserved" and "unsafe" sets (see + RFC 2396 [42]) are equivalent to their ""%" HEX HEX" encoding. + [[ Ignore reference to "unsafe" set. ]] + + For example, the following three URIs are equivalent: + + http://abc.com:80/~smith/home.html + http://ABC.com/%7Esmith/home.html + http://ABC.com:/%7esmith/home.html + +3.3 Date/Time Formats + +3.3.1 Full Date + + HTTP applications have historically allowed three different formats + for the representation of date/time stamps: + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + + The first format is preferred as an Internet standard and represents + a fixed-length subset of that defined by RFC 1123 [8] (an update to + RFC 822 [9]). The second format is in common use, but is based on the + obsolete RFC 850 [12] date format and lacks a four-digit year. + HTTP/1.1 clients and servers that parse the date value MUST accept + all three formats (for compatibility with HTTP/1.0), though they MUST + only generate the RFC 1123 format for representing HTTP-date values + in header fields. See section 19.3 for further information. + + Note: Recipients of date values are encouraged to be robust in + accepting date values that may have been sent by non-HTTP + applications, as is sometimes the case when retrieving or posting + messages via proxies/gateways to SMTP or NNTP. + + + +Fielding, et al. Standards Track [Page 20] + +RFC 2616 HTTP/1.1 June 1999 + + + All HTTP date/time stamps MUST be represented in Greenwich Mean Time + (GMT), without exception. For the purposes of HTTP, GMT is exactly + equal to UTC (Coordinated Universal Time). This is indicated in the + first two formats by the inclusion of "GMT" as the three-letter + abbreviation for time zone, and MUST be assumed when reading the + asctime format. HTTP-date is case sensitive and MUST NOT include + additional LWS beyond that specifically included as SP in the + grammar. + + HTTP-date = rfc1123-date | rfc850-date | asctime-date + rfc1123-date = wkday "," SP date1 SP time SP "GMT" + rfc850-date = weekday "," SP date2 SP time SP "GMT" + asctime-date = wkday SP date3 SP time SP 4DIGIT + date1 = 2DIGIT SP month SP 4DIGIT + ; day month year (e.g., 02 Jun 1982) + date2 = 2DIGIT "-" month "-" 2DIGIT + ; day-month-year (e.g., 02-Jun-82) + date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) + ; month day (e.g., Jun 2) + time = 2DIGIT ":" 2DIGIT ":" 2DIGIT + ; 00:00:00 - 23:59:59 + wkday = "Mon" | "Tue" | "Wed" + | "Thu" | "Fri" | "Sat" | "Sun" + weekday = "Monday" | "Tuesday" | "Wednesday" + | "Thursday" | "Friday" | "Saturday" | "Sunday" + month = "Jan" | "Feb" | "Mar" | "Apr" + | "May" | "Jun" | "Jul" | "Aug" + | "Sep" | "Oct" | "Nov" | "Dec" + + Note: HTTP requirements for the date/time stamp format apply only + to their usage within the protocol stream. Clients and servers are + not required to use these formats for user presentation, request + logging, etc. + +3.3.2 Delta Seconds + + Some HTTP header fields allow a time value to be specified as an + integer number of seconds, represented in decimal, after the time + that the message was received. + + delta-seconds = 1*DIGIT + +3.4 Character Sets + + HTTP uses the same definition of the term "character set" as that + described for MIME: + + + + + +Fielding, et al. Standards Track [Page 21] + +RFC 2616 HTTP/1.1 June 1999 + + + The term "character set" is used in this document to refer to a + method used with one or more tables to convert a sequence of octets + into a sequence of characters. Note that unconditional conversion in + the other direction is not required, in that not all characters may + be available in a given character set and a character set may provide + more than one sequence of octets to represent a particular character. + This definition is intended to allow various kinds of character + encoding, from simple single-table mappings such as US-ASCII to + complex table switching methods such as those that use ISO-2022's + techniques. However, the definition associated with a MIME character + set name MUST fully specify the mapping to be performed from octets + to characters. In particular, use of external profiling information + to determine the exact mapping is not permitted. + + Note: This use of the term "character set" is more commonly + referred to as a "character encoding." However, since HTTP and + MIME share the same registry, it is important that the terminology + also be shared. + + HTTP character sets are identified by case-insensitive tokens. The + complete set of tokens is defined by the IANA Character Set registry + [19]. + + charset = token + +[[ HTTP uses charset in two contexts: within an Accept-Charset request ]] +[[ header (in which the charset value is an unquoted token) and as the ]] +[[ value of a parameter in a Content-type header (within a request or ]] +[[ response), in which case the parameter value of the charset ]] +[[ parameter may be quoted. ]] + + Although HTTP allows an arbitrary token to be used as a charset + value, any token that has a predefined value within the IANA + Character Set registry [19] MUST represent the character set defined + by that registry. Applications SHOULD limit their use of character + sets to those defined by the IANA registry. + + Implementors should be aware of IETF character set requirements [38] + [41]. + +3.4.1 Missing Charset + + Some HTTP/1.0 software has interpreted a Content-Type header without + charset parameter incorrectly to mean "recipient should guess." + Senders wishing to defeat this behavior MAY include a charset + parameter even when the charset is ISO-8859-1 and SHOULD do so when + it is known that it will not confuse the recipient. + + Unfortunately, some older HTTP/1.0 clients did not deal properly with + an explicit charset parameter. HTTP/1.1 recipients MUST respect the + charset label provided by the sender; and those user agents that have + a provision to "guess" a charset MUST use the charset from the + + + + + +Fielding, et al. Standards Track [Page 22] + +RFC 2616 HTTP/1.1 June 1999 + + + content-type field if they support that charset, rather than the + recipient's preference, when initially displaying a document. See + section 3.7.1. + +3.5 Content Codings + + Content coding values indicate an encoding transformation that has + been or can be applied to an entity. Content codings are primarily + used to allow a document to be compressed or otherwise usefully + transformed without losing the identity of its underlying media type + and without loss of information. Frequently, the entity is stored in + coded form, transmitted directly, and only decoded by the recipient. + + content-coding = token + + All content-coding values are case-insensitive. HTTP/1.1 uses + content-coding values in the Accept-Encoding (section 14.3) and + Content-Encoding (section 14.11) header fields. Although the value + describes the content-coding, what is more important is that it + indicates what decoding mechanism will be required to remove the + encoding. + + The Internet Assigned Numbers Authority (IANA) acts as a registry for + content-coding value tokens. Initially, the registry contains the + following tokens: + + gzip An encoding format produced by the file compression program + "gzip" (GNU zip) as described in RFC 1952 [25]. This format is a + Lempel-Ziv coding (LZ77) with a 32 bit CRC. + + compress + The encoding format produced by the common UNIX file compression + program "compress". This format is an adaptive Lempel-Ziv-Welch + coding (LZW). + + Use of program names for the identification of encoding formats + is not desirable and is discouraged for future encodings. Their + use here is representative of historical practice, not good + design. For compatibility with previous implementations of HTTP, + applications SHOULD consider "x-gzip" and "x-compress" to be + equivalent to "gzip" and "compress" respectively. + + deflate + The "zlib" format defined in RFC 1950 [31] in combination with + the "deflate" compression mechanism described in RFC 1951 [29]. + + + + + + +Fielding, et al. Standards Track [Page 23] + +RFC 2616 HTTP/1.1 June 1999 + + + identity + The default (identity) encoding; the use of no transformation + whatsoever. This content-coding is used only in the Accept- + Encoding header, and SHOULD NOT be used in the Content-Encoding + header. + + New content-coding value tokens SHOULD be registered; to allow + interoperability between clients and servers, specifications of the + content coding algorithms needed to implement a new value SHOULD be + publicly available and adequate for independent implementation, and + conform to the purpose of content coding defined in this section. + +3.6 Transfer Codings + + Transfer-coding values are used to indicate an encoding + transformation that has been, can be, or may need to be applied to an + entity-body in order to ensure "safe transport" through the network. + This differs from a content coding in that the transfer-coding is a + property of the message, not of the original entity. + + transfer-coding = "chunked" | transfer-extension + transfer-extension = token *( ";" parameter ) + + Parameters are in the form of attribute/value pairs. + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + All transfer-coding values are case-insensitive. HTTP/1.1 uses + transfer-coding values in the TE header field (section 14.39) and in + the Transfer-Encoding header field (section 14.41). + + Whenever a transfer-coding is applied to a message-body, the set of + transfer-codings MUST include "chunked", unless the message is + terminated by closing the connection. When the "chunked" transfer- + coding is used, it MUST be the last transfer-coding applied to the + message-body. The "chunked" transfer-coding MUST NOT be applied more + than once to a message-body. These rules allow the recipient to + determine the transfer-length of the message (section 4.4). + + Transfer-codings are analogous to the Content-Transfer-Encoding + values of MIME [7], which were designed to enable safe transport of + binary data over a 7-bit transport service. However, safe transport + has a different focus for an 8bit-clean transfer protocol. In HTTP, + the only unsafe characteristic of message-bodies is the difficulty in + determining the exact body length (section 7.2.2), or the desire to + encrypt data over a shared transport. + + + +Fielding, et al. Standards Track [Page 24] + +RFC 2616 HTTP/1.1 June 1999 + + + The Internet Assigned Numbers Authority (IANA) acts as a registry for + transfer-coding value tokens. Initially, the registry contains the + following tokens: "chunked" (section 3.6.1), "identity" (section + 3.6.2), "gzip" (section 3.5), "compress" (section 3.5), and "deflate" + (section 3.5). + + [[ Remove reference to "identity" token ]] + + New transfer-coding value tokens SHOULD be registered in the same way + as new content-coding value tokens (section 3.5). + + A server which receives an entity-body with a transfer-coding it does + not understand SHOULD return 501 (Unimplemented), and close the + connection. A server MUST NOT send transfer-codings to an HTTP/1.0 + client. + +3.6.1 Chunked Transfer Coding + + The chunked encoding modifies the body of a message in order to + transfer it as a series of chunks, each with its own size indicator, + followed by an OPTIONAL trailer containing entity-header fields. This + allows dynamically produced content to be transferred along with the + information necessary for the recipient to verify that it has + received the full message. + + Chunked-Body = *chunk + last-chunk + trailer + CRLF + + chunk = chunk-size [ chunk-extension ] CRLF + chunk-data CRLF + chunk-size = 1*HEX + last-chunk = 1*("0") [ chunk-extension ] CRLF + + chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + chunk-ext-name = token + chunk-ext-val = token | quoted-string + chunk-data = chunk-size(OCTET) + trailer = *(entity-header CRLF) + + The chunk-size field is a string of hex digits indicating the size of + the chunk. The chunked encoding is ended by any chunk whose size is + zero, followed by the trailer, which is terminated by an empty line. + + [[ "the size of the chunk" means "the size of the chunk-data in ]] + [[ octets" ]] + + The trailer allows the sender to include additional HTTP header + fields at the end of the message. The Trailer header field can be + used to indicate which header fields are included in a trailer (see + section 14.40). + + + + +Fielding, et al. Standards Track [Page 25] + +RFC 2616 HTTP/1.1 June 1999 + + + A server using chunked transfer-coding in a response MUST NOT use the + trailer for any header fields unless at least one of the following is + true: + + a)the request included a TE header field that indicates "trailers" is + acceptable in the transfer-coding of the response, as described in + section 14.39; or, + + b)the server is the origin server for the response, the trailer + fields consist entirely of optional metadata, and the recipient + could use the message (in a manner acceptable to the origin server) + without receiving this metadata. In other words, the origin server + is willing to accept the possibility that the trailer fields might + be silently discarded along the path to the client. + + This requirement prevents an interoperability failure when the + message is being received by an HTTP/1.1 (or later) proxy and + forwarded to an HTTP/1.0 recipient. It avoids a situation where + compliance with the protocol would have necessitated a possibly + infinite buffer on the proxy. + + An example process for decoding a Chunked-Body is presented in + appendix 19.4.6. + + All HTTP/1.1 applications MUST be able to receive and decode the + "chunked" transfer-coding, and MUST ignore chunk-extension extensions + they do not understand. + +3.7 Media Types + + HTTP uses Internet Media Types [17] in the Content-Type (section + 14.17) and Accept (section 14.1) header fields in order to provide + open and extensible data typing and type negotiation. + + media-type = type "/" subtype *( ";" parameter ) + type = token + subtype = token + + Parameters MAY follow the type/subtype in the form of attribute/value + pairs (as defined in section 3.6). + + The type, subtype, and parameter attribute names are case- + insensitive. Parameter values might or might not be case-sensitive, + depending on the semantics of the parameter name. Linear white space + (LWS) MUST NOT be used between the type and subtype, nor between an + attribute and its value. The presence or absence of a parameter might + be significant to the processing of a media-type, depending on its + definition within the media type registry. + + + +Fielding, et al. Standards Track [Page 26] + +RFC 2616 HTTP/1.1 June 1999 + + + Note that some older HTTP applications do not recognize media type + parameters. When sending data to older HTTP applications, + implementations SHOULD only use media type parameters when they are + required by that type/subtype definition. + + Media-type values are registered with the Internet Assigned Number + Authority (IANA [19]). The media type registration process is + outlined in RFC 1590 [17]. Use of non-registered media types is + discouraged. + + [[ "RFC 1590" should be "RFC 2048" ]] + +3.7.1 Canonicalization and Text Defaults + + Internet media types are registered with a canonical form. An + entity-body transferred via HTTP messages MUST be represented in the + appropriate canonical form prior to its transmission except for + "text" types, as defined in the next paragraph. + + When in canonical form, media subtypes of the "text" type use CRLF as + the text line break. HTTP relaxes this requirement and allows the + transport of text media with plain CR or LF alone representing a line + break when it is done consistently for an entire entity-body. HTTP + applications MUST accept CRLF, bare CR, and bare LF as being + representative of a line break in text media received via HTTP. In + addition, if the text is represented in a character set that does not + use octets 13 and 10 for CR and LF respectively, as is the case for + some multi-byte character sets, HTTP allows the use of whatever octet + sequences are defined by that character set to represent the + equivalent of CR and LF for line breaks. This flexibility regarding + line breaks applies only to text media in the entity-body; a bare CR + or LF MUST NOT be substituted for CRLF within any of the HTTP control + structures (such as header fields and multipart boundaries). + + If an entity-body is encoded with a content-coding, the underlying + data MUST be in a form defined above prior to being encoded. + + The "charset" parameter is used with some media types to define the + character set (section 3.4) of the data. When no explicit charset + parameter is provided by the sender, media subtypes of the "text" + type are defined to have a default charset value of "ISO-8859-1" when + received via HTTP. Data in character sets other than "ISO-8859-1" or + its subsets MUST be labeled with an appropriate charset value. See + section 3.4.1 for compatibility problems. + +3.7.2 Multipart Types + + MIME provides for a number of "multipart" types -- encapsulations of + one or more entities within a single message-body. All multipart + types share a common syntax, as defined in section 5.1.1 of RFC 2046 + + + +Fielding, et al. Standards Track [Page 27] + +RFC 2616 HTTP/1.1 June 1999 + + + [40], and MUST include a boundary parameter as part of the media type + value. The message body is itself a protocol element and MUST + therefore use only CRLF to represent line breaks between body-parts. + Unlike in RFC 2046, the epilogue of any multipart message MUST be + empty; HTTP applications MUST NOT transmit the epilogue (even if the + original multipart contains an epilogue). These restrictions exist in + order to preserve the self-delimiting nature of a multipart message- + body, wherein the "end" of the message-body is indicated by the + ending multipart boundary. + + In general, HTTP treats a multipart message-body no differently than + any other media type: strictly as payload. The one exception is the + "multipart/byteranges" type (appendix 19.2) when it appears in a 206 + (Partial Content) response, which will be interpreted by some HTTP + caching mechanisms as described in sections 13.5.4 and 14.16. In all + other cases, an HTTP user agent SHOULD follow the same or similar + behavior as a MIME user agent would upon receipt of a multipart type. + The MIME header fields within each body-part of a multipart message- + body do not have any significance to HTTP beyond that defined by + their MIME semantics. + + In general, an HTTP user agent SHOULD follow the same or similar + behavior as a MIME user agent would upon receipt of a multipart type. + If an application receives an unrecognized multipart subtype, the + application MUST treat it as being equivalent to "multipart/mixed". + + Note: The "multipart/form-data" type has been specifically defined + for carrying form data suitable for processing via the POST + request method, as described in RFC 1867 [15]. + +3.8 Product Tokens + + Product tokens are used to allow communicating applications to + identify themselves by software name and version. Most fields using + product tokens also allow sub-products which form a significant part + of the application to be listed, separated by white space. By + convention, the products are listed in order of their significance + for identifying the application. + + product = token ["/" product-version] + product-version = token + + Examples: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + Server: Apache/0.8.4 + + + + + +Fielding, et al. Standards Track [Page 28] + +RFC 2616 HTTP/1.1 June 1999 + + + Product tokens SHOULD be short and to the point. They MUST NOT be + used for advertising or other non-essential information. Although any + token character MAY appear in a product-version, this token SHOULD + only be used for a version identifier (i.e., successive versions of + the same product SHOULD only differ in the product-version portion of + the product value). + +3.9 Quality Values + + HTTP content negotiation (section 12) uses short "floating point" + numbers to indicate the relative importance ("weight") of various + negotiable parameters. A weight is normalized to a real number in + the range 0 through 1, where 0 is the minimum and 1 the maximum + value. If a parameter has a quality value of 0, then content with + this parameter is `not acceptable' for the client. HTTP/1.1 + applications MUST NOT generate more than three digits after the + decimal point. User configuration of these values SHOULD also be + limited in this fashion. + + qvalue = ( "0" [ "." 0*3DIGIT ] ) + | ( "1" [ "." 0*3("0") ] ) + + "Quality values" is a misnomer, since these values merely represent + relative degradation in desired quality. + +3.10 Language Tags + + A language tag identifies a natural language spoken, written, or + otherwise conveyed by human beings for communication of information + to other human beings. Computer languages are explicitly excluded. + HTTP uses language tags within the Accept-Language and Content- + Language fields. + + The syntax and registry of HTTP language tags is the same as that + defined by RFC 1766 [1]. In summary, a language tag is composed of 1 + or more parts: A primary language tag and a possibly empty series of + subtags: + + language-tag = primary-tag *( "-" subtag ) + primary-tag = 1*8ALPHA + subtag = 1*8ALPHA + + [[ Updated by RFC 3066: subtags may now contain digits ]] + + White space is not allowed within the tag and all tags are case- + insensitive. The name space of language tags is administered by the + IANA. Example tags include: + + en, en-US, en-cockney, i-cherokee, x-pig-latin + + + + +Fielding, et al. Standards Track [Page 29] + +RFC 2616 HTTP/1.1 June 1999 + + + where any two-letter primary-tag is an ISO-639 language abbreviation + and any two-letter initial subtag is an ISO-3166 country code. (The + last three tags above are not registered tags; all but the last are + examples of tags which could be registered in future.) + +3.11 Entity Tags + + Entity tags are used for comparing two or more entities from the same + requested resource. HTTP/1.1 uses entity tags in the ETag (section + 14.19), If-Match (section 14.24), If-None-Match (section 14.26), and + If-Range (section 14.27) header fields. The definition of how they + are used and compared as cache validators is in section 13.3.3. An + entity tag consists of an opaque quoted string, possibly prefixed by + a weakness indicator. + + entity-tag = [ weak ] opaque-tag + weak = "W/" + opaque-tag = quoted-string + + A "strong entity tag" MAY be shared by two entities of a resource + only if they are equivalent by octet equality. + + A "weak entity tag," indicated by the "W/" prefix, MAY be shared by + two entities of a resource only if the entities are equivalent and + could be substituted for each other with no significant change in + semantics. A weak entity tag can only be used for weak comparison. + + An entity tag MUST be unique across all versions of all entities + associated with a particular resource. A given entity tag value MAY + be used for entities obtained by requests on different URIs. The use + of the same entity tag value in conjunction with entities obtained by + requests on different URIs does not imply the equivalence of those + entities. + +3.12 Range Units + + HTTP/1.1 allows a client to request that only part (a range of) the + response entity be included within the response. HTTP/1.1 uses range + units in the Range (section 14.35) and Content-Range (section 14.16) + header fields. An entity can be broken down into subranges according + to various structural units. + + range-unit = bytes-unit | other-range-unit + bytes-unit = "bytes" + other-range-unit = token + + The only range unit defined by HTTP/1.1 is "bytes". HTTP/1.1 + implementations MAY ignore ranges specified using other units. + + + +Fielding, et al. Standards Track [Page 30] + +RFC 2616 HTTP/1.1 June 1999 + + + HTTP/1.1 has been designed to allow implementations of applications + that do not depend on knowledge of ranges. + +4 HTTP Message + +4.1 Message Types + + HTTP messages consist of requests from client to server and responses + from server to client. + + HTTP-message = Request | Response ; HTTP/1.1 messages + + Request (section 5) and Response (section 6) messages use the generic + message format of RFC 822 [9] for transferring entities (the payload + of the message). Both types of message consist of a start-line, zero + or more header fields (also known as "headers"), an empty line (i.e., + a line with nothing preceding the CRLF) indicating the end of the + header fields, and possibly a message-body. + + generic-message = start-line + *(message-header CRLF) + CRLF + [ message-body ] + start-line = Request-Line | Status-Line + + In the interest of robustness, servers SHOULD ignore any empty + line(s) received where a Request-Line is expected. In other words, if + the server is reading the protocol stream at the beginning of a + message and receives a CRLF first, it should ignore the CRLF. + + Certain buggy HTTP/1.0 client implementations generate extra CRLF's + after a POST request. To restate what is explicitly forbidden by the + BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an + extra CRLF. + +4.2 Message Headers + + HTTP header fields, which include general-header (section 4.5), + request-header (section 5.3), response-header (section 6.2), and + entity-header (section 7.1) fields, follow the same generic format as + that given in Section 3.1 of RFC 822 [9]. Each header field consists + of a name followed by a colon (":") and the field value. Field names + are case-insensitive. The field value MAY be preceded by any amount + of LWS, though a single SP is preferred. Header fields can be + extended over multiple lines by preceding each extra line with at + least one SP or HT. Applications ought to follow "common form", where + one is known or indicated, when generating HTTP constructs, since + there might exist some implementations that fail to accept anything + + + +Fielding, et al. Standards Track [Page 31] + +RFC 2616 HTTP/1.1 June 1999 + + + beyond the common forms. + + message-header = field-name ":" [ field-value ] + field-name = token + field-value = *( field-content | LWS ) + field-content = + + The field-content does not include any leading or trailing LWS: + linear white space occurring before the first non-whitespace + character of the field-value or after the last non-whitespace + character of the field-value. Such leading or trailing LWS MAY be + removed without changing the semantics of the field value. Any LWS + that occurs between field-content MAY be replaced with a single SP + before interpreting the field value or forwarding the message + downstream. + + The order in which header fields with differing field names are + received is not significant. However, it is "good practice" to send + general-header fields first, followed by request-header or response- + header fields, and ending with the entity-header fields. + + Multiple message-header fields with the same field-name MAY be + present in a message if and only if the entire field-value for that + header field is defined as a comma-separated list [i.e., #(values)]. + It MUST be possible to combine the multiple header fields into one + "field-name: field-value" pair, without changing the semantics of the + message, by appending each subsequent field-value to the first, each + separated by a comma. The order in which header fields with the same + field-name are received is therefore significant to the + interpretation of the combined field value, and thus a proxy MUST NOT + change the order of these field values when a message is forwarded. + +4.3 Message Body + + The message-body (if any) of an HTTP message is used to carry the + entity-body associated with the request or response. The message-body + differs from the entity-body only when a transfer-coding has been + applied, as indicated by the Transfer-Encoding header field (section + 14.41). + + message-body = entity-body + | + + Transfer-Encoding MUST be used to indicate any transfer-codings + applied by an application to ensure safe and proper transfer of the + message. Transfer-Encoding is a property of the message, not of the + + + +Fielding, et al. Standards Track [Page 32] + +RFC 2616 HTTP/1.1 June 1999 + + + entity, and thus MAY be added or removed by any application along the + request/response chain. (However, section 3.6 places restrictions on + when certain transfer-codings may be used.) + + The rules for when a message-body is allowed in a message differ for + requests and responses. + + The presence of a message-body in a request is signaled by the + inclusion of a Content-Length or Transfer-Encoding header field in + the request's message-headers. A message-body MUST NOT be included in + a request if the specification of the request method (section 5.1.1) + does not allow sending an entity-body in requests. A server SHOULD + read and forward a message-body on any request; if the request method + does not include defined semantics for an entity-body, then the + message-body SHOULD be ignored when handling the request. + + For response messages, whether or not a message-body is included with + a message is dependent on both the request method and the response + status code (section 6.1.1). All responses to the HEAD request method + MUST NOT include a message-body, even though the presence of entity- + header fields might lead one to believe they do. All 1xx + (informational), 204 (no content), and 304 (not modified) responses + MUST NOT include a message-body. All other responses do include a + message-body, although it MAY be of zero length. + +4.4 Message Length + + The transfer-length of a message is the length of the message-body as + it appears in the message; that is, after any transfer-codings have + been applied. When a message-body is included with a message, the + transfer-length of that body is determined by one of the following + (in order of precedence): + + 1.Any response message which "MUST NOT" include a message-body (such + as the 1xx, 204, and 304 responses and any response to a HEAD + request) is always terminated by the first empty line after the + header fields, regardless of the entity-header fields present in + the message. + + 2.If a Transfer-Encoding header field (section 14.41) is present and + has any value other than "identity", then the transfer-length is + defined by use of the "chunked" transfer-coding (section 3.6), + unless the message is terminated by closing the connection. + + [[ Remove 'and has any value other than "identity"' ]] + + 3.If a Content-Length header field (section 14.13) is present, its + decimal value in OCTETs represents both the entity-length and the + transfer-length. The Content-Length header field MUST NOT be sent + if these two lengths are different (i.e., if a Transfer-Encoding + + + +Fielding, et al. Standards Track [Page 33] + +RFC 2616 HTTP/1.1 June 1999 + + + header field is present). If a message is received with both a + Transfer-Encoding header field and a Content-Length header field, + the latter MUST be ignored. + + 4.If the message uses the media type "multipart/byteranges", and the + ransfer-length is not otherwise specified, then this self- + elimiting media type defines the transfer-length. This media type + UST NOT be used unless the sender knows that the recipient can arse + it; the presence in a request of a Range header with ultiple byte- + range specifiers from a 1.1 client implies that the lient can parse + multipart/byteranges responses. + + A range header might be forwarded by a 1.0 proxy that does not + understand multipart/byteranges; in this case the server MUST + delimit the message using methods defined in items 1,3 or 5 of + this section. + + 5.By the server closing the connection. (Closing the connection + cannot be used to indicate the end of a request body, since that + would leave no possibility for the server to send back a response.) + + For compatibility with HTTP/1.0 applications, HTTP/1.1 requests + containing a message-body MUST include a valid Content-Length header + field unless the server is known to be HTTP/1.1 compliant. If a + request contains a message-body and a Content-Length is not given, + the server SHOULD respond with 400 (bad request) if it cannot + determine the length of the message, or with 411 (length required) if + it wishes to insist on receiving a valid Content-Length. + + All HTTP/1.1 applications that receive entities MUST accept the + "chunked" transfer-coding (section 3.6), thus allowing this mechanism + to be used for messages when the message length cannot be determined + in advance. + + Messages MUST NOT include both a Content-Length header field and a + non-identity transfer-coding. If the message does include a non- + identity transfer-coding, the Content-Length MUST be ignored. + + [[ Remove "non-identity" both times ]] + + When a Content-Length is given in a message where a message-body is + allowed, its field value MUST exactly match the number of OCTETs in + the message-body. HTTP/1.1 user agents MUST notify the user when an + invalid length is received and detected. + +4.5 General Header Fields + + There are a few header fields which have general applicability for + both request and response messages, but which do not apply to the + entity being transferred. These header fields apply only to the + + + +Fielding, et al. Standards Track [Page 34] + +RFC 2616 HTTP/1.1 June 1999 + + + message being transmitted. + + general-header = Cache-Control ; Section 14.9 + | Connection ; Section 14.10 + | Date ; Section 14.18 + | Pragma ; Section 14.32 + | Trailer ; Section 14.40 + | Transfer-Encoding ; Section 14.41 + | Upgrade ; Section 14.42 + | Via ; Section 14.45 + | Warning ; Section 14.46 + + General-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields may be given the semantics of general + header fields if all parties in the communication recognize them to + be general-header fields. Unrecognized header fields are treated as + entity-header fields. + +5 Request + + A request message from a client to a server includes, within the + first line of that message, the method to be applied to the resource, + the identifier of the resource, and the protocol version in use. + + Request = Request-Line ; Section 5.1 + *(( general-header ; Section 4.5 + | request-header ; Section 5.3 + | entity-header ) CRLF) ; Section 7.1 + CRLF + [ message-body ] ; Section 4.3 + +5.1 Request-Line + + The Request-Line begins with a method token, followed by the + Request-URI and the protocol version, and ending with CRLF. The + elements are separated by SP characters. No CR or LF is allowed + except in the final CRLF sequence. + + Request-Line = Method SP Request-URI SP HTTP-Version CRLF + + + + + + + + + + + +Fielding, et al. Standards Track [Page 35] + +RFC 2616 HTTP/1.1 June 1999 + + +5.1.1 Method + + The Method token indicates the method to be performed on the + resource identified by the Request-URI. The method is case-sensitive. + + Method = "OPTIONS" ; Section 9.2 + | "GET" ; Section 9.3 + | "HEAD" ; Section 9.4 + | "POST" ; Section 9.5 + | "PUT" ; Section 9.6 + | "DELETE" ; Section 9.7 + | "TRACE" ; Section 9.8 + | "CONNECT" ; Section 9.9 + | extension-method + extension-method = token + + The list of methods allowed by a resource can be specified in an + Allow header field (section 14.7). The return code of the response + always notifies the client whether a method is currently allowed on a + resource, since the set of allowed methods can change dynamically. An + origin server SHOULD return the status code 405 (Method Not Allowed) + if the method is known by the origin server but not allowed for the + requested resource, and 501 (Not Implemented) if the method is + unrecognized or not implemented by the origin server. The methods GET + and HEAD MUST be supported by all general-purpose servers. All other + methods are OPTIONAL; however, if the above methods are implemented, + they MUST be implemented with the same semantics as those specified + in section 9. + +5.1.2 Request-URI + + The Request-URI is a Uniform Resource Identifier (section 3.2) and + identifies the resource upon which to apply the request. + + Request-URI = "*" | absoluteURI | abs_path | authority + [[ Request-URI = "*" | absoluteURI | abs_path [ "?" query ] | authority ]] + + The four options for Request-URI are dependent on the nature of the + request. The asterisk "*" means that the request does not apply to a + particular resource, but to the server itself, and is only allowed + when the method used does not necessarily apply to a resource. One + example would be + + OPTIONS * HTTP/1.1 + + The absoluteURI form is REQUIRED when the request is being made to a + proxy. The proxy is requested to forward the request or service it + from a valid cache, and return the response. Note that the proxy MAY + forward the request on to another proxy or directly to the server + + + +Fielding, et al. Standards Track [Page 36] + +RFC 2616 HTTP/1.1 June 1999 + + + specified by the absoluteURI. In order to avoid request loops, a + proxy MUST be able to recognize all of its server names, including + any aliases, local variations, and the numeric IP address. An example + Request-Line would be: + + GET http://www.w3.org/pub/WWW/TheProject.html HTTP/1.1 + + To allow for transition to absoluteURIs in all requests in future + versions of HTTP, all HTTP/1.1 servers MUST accept the absoluteURI + form in requests, even though HTTP/1.1 clients will only generate + them in requests to proxies. + + The authority form is only used by the CONNECT method (section 9.9). + + The most common form of Request-URI is that used to identify a + resource on an origin server or gateway. In this case the absolute + path of the URI MUST be transmitted (see section 3.2.1, abs_path) as + the Request-URI, and the network location of the URI (authority) MUST + be transmitted in a Host header field. For example, a client wishing + to retrieve the resource above directly from the origin server would + create a TCP connection to port 80 of the host "www.w3.org" and send + the lines: + + GET /pub/WWW/TheProject.html HTTP/1.1 + Host: www.w3.org + + followed by the remainder of the Request. Note that the absolute path + cannot be empty; if none is present in the original URI, it MUST be + given as "/" (the server root). + + The Request-URI is transmitted in the format specified in section + 3.2.1. If the Request-URI is encoded using the "% HEX HEX" encoding + [42], the origin server MUST decode the Request-URI in order to + properly interpret the request. Servers SHOULD respond to invalid + Request-URIs with an appropriate status code. + + A transparent proxy MUST NOT rewrite the "abs_path" part of the + received Request-URI when forwarding it to the next inbound server, + except as noted above to replace a null abs_path with "/". + + Note: The "no rewrite" rule prevents the proxy from changing the + meaning of the request when the origin server is improperly using + a non-reserved URI character for a reserved purpose. Implementors + should be aware that some pre-HTTP/1.1 proxies have been known to + rewrite the Request-URI. + + + + + + +Fielding, et al. Standards Track [Page 37] + +RFC 2616 HTTP/1.1 June 1999 + + +5.2 The Resource Identified by a Request + + The exact resource identified by an Internet request is determined by + examining both the Request-URI and the Host header field. + + An origin server that does not allow resources to differ by the + requested host MAY ignore the Host header field value when + determining the resource identified by an HTTP/1.1 request. (But see + section 19.6.1.1 for other requirements on Host support in HTTP/1.1.) + + An origin server that does differentiate resources based on the host + requested (sometimes referred to as virtual hosts or vanity host + names) MUST use the following rules for determining the requested + resource on an HTTP/1.1 request: + + 1. If Request-URI is an absoluteURI, the host is part of the + Request-URI. Any Host header field value in the request MUST be + ignored. + + 2. If the Request-URI is not an absoluteURI, and the request includes + a Host header field, the host is determined by the Host header + field value. + + 3. If the host as determined by rule 1 or 2 is not a valid host on + the server, the response MUST be a 400 (Bad Request) error message. + + Recipients of an HTTP/1.0 request that lacks a Host header field MAY + attempt to use heuristics (e.g., examination of the URI path for + something unique to a particular host) in order to determine what + exact resource is being requested. + +5.3 Request Header Fields + + The request-header fields allow the client to pass additional + information about the request, and about the client itself, to the + server. These fields act as request modifiers, with semantics + equivalent to the parameters on a programming language method + invocation. + + request-header = Accept ; Section 14.1 + | Accept-Charset ; Section 14.2 + | Accept-Encoding ; Section 14.3 + | Accept-Language ; Section 14.4 + | Authorization ; Section 14.8 + | Expect ; Section 14.20 + | From ; Section 14.22 + | Host ; Section 14.23 + | If-Match ; Section 14.24 + + + +Fielding, et al. Standards Track [Page 38] + +RFC 2616 HTTP/1.1 June 1999 + + + | If-Modified-Since ; Section 14.25 + | If-None-Match ; Section 14.26 + | If-Range ; Section 14.27 + | If-Unmodified-Since ; Section 14.28 + | Max-Forwards ; Section 14.31 + | Proxy-Authorization ; Section 14.34 + | Range ; Section 14.35 + | Referer ; Section 14.36 + | TE ; Section 14.39 + | User-Agent ; Section 14.43 + + Request-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields MAY be given the semantics of request- + header fields if all parties in the communication recognize them to + be request-header fields. Unrecognized header fields are treated as + entity-header fields. + +6 Response + + After receiving and interpreting a request message, a server responds + with an HTTP response message. + + Response = Status-Line ; Section 6.1 + *(( general-header ; Section 4.5 + | response-header ; Section 6.2 + | entity-header ) CRLF) ; Section 7.1 + CRLF + [ message-body ] ; Section 7.2 + +6.1 Status-Line + + The first line of a Response message is the Status-Line, consisting + of the protocol version followed by a numeric status code and its + associated textual phrase, with each element separated by SP + characters. No CR or LF is allowed except in the final CRLF sequence. + + Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF + +6.1.1 Status Code and Reason Phrase + + The Status-Code element is a 3-digit integer result code of the + attempt to understand and satisfy the request. These codes are fully + defined in section 10. The Reason-Phrase is intended to give a short + textual description of the Status-Code. The Status-Code is intended + for use by automata and the Reason-Phrase is intended for the human + user. The client is not required to examine or display the Reason- + Phrase. + + + +Fielding, et al. Standards Track [Page 39] + +RFC 2616 HTTP/1.1 June 1999 + + + The first digit of the Status-Code defines the class of response. The + last two digits do not have any categorization role. There are 5 + values for the first digit: + + - 1xx: Informational - Request received, continuing process + + - 2xx: Success - The action was successfully received, + understood, and accepted + + - 3xx: Redirection - Further action must be taken in order to + complete the request + + - 4xx: Client Error - The request contains bad syntax or cannot + be fulfilled + + - 5xx: Server Error - The server failed to fulfill an apparently + valid request + + The individual values of the numeric status codes defined for + HTTP/1.1, and an example set of corresponding Reason-Phrase's, are + presented below. The reason phrases listed here are only + recommendations -- they MAY be replaced by local equivalents without + affecting the protocol. + + Status-Code = + "100" ; Section 10.1.1: Continue + | "101" ; Section 10.1.2: Switching Protocols + | "200" ; Section 10.2.1: OK + | "201" ; Section 10.2.2: Created + | "202" ; Section 10.2.3: Accepted + | "203" ; Section 10.2.4: Non-Authoritative Information + | "204" ; Section 10.2.5: No Content + | "205" ; Section 10.2.6: Reset Content + | "206" ; Section 10.2.7: Partial Content + | "300" ; Section 10.3.1: Multiple Choices + | "301" ; Section 10.3.2: Moved Permanently + | "302" ; Section 10.3.3: Found + | "303" ; Section 10.3.4: See Other + | "304" ; Section 10.3.5: Not Modified + | "305" ; Section 10.3.6: Use Proxy + | "307" ; Section 10.3.8: Temporary Redirect + | "400" ; Section 10.4.1: Bad Request + | "401" ; Section 10.4.2: Unauthorized + | "402" ; Section 10.4.3: Payment Required + | "403" ; Section 10.4.4: Forbidden + | "404" ; Section 10.4.5: Not Found + | "405" ; Section 10.4.6: Method Not Allowed + | "406" ; Section 10.4.7: Not Acceptable + + + +Fielding, et al. Standards Track [Page 40] + +RFC 2616 HTTP/1.1 June 1999 + + + | "407" ; Section 10.4.8: Proxy Authentication Required + | "408" ; Section 10.4.9: Request Time-out + | "409" ; Section 10.4.10: Conflict + | "410" ; Section 10.4.11: Gone + | "411" ; Section 10.4.12: Length Required + | "412" ; Section 10.4.13: Precondition Failed + | "413" ; Section 10.4.14: Request Entity Too Large + | "414" ; Section 10.4.15: Request-URI Too Large + | "415" ; Section 10.4.16: Unsupported Media Type + | "416" ; Section 10.4.17: Requested range not satisfiable + | "417" ; Section 10.4.18: Expectation Failed + | "500" ; Section 10.5.1: Internal Server Error + | "501" ; Section 10.5.2: Not Implemented + | "502" ; Section 10.5.3: Bad Gateway + | "503" ; Section 10.5.4: Service Unavailable + | "504" ; Section 10.5.5: Gateway Time-out + | "505" ; Section 10.5.6: HTTP Version not supported + | extension-code + + extension-code = 3DIGIT + Reason-Phrase = * + + HTTP status codes are extensible. HTTP applications are not required + to understand the meaning of all registered status codes, though such + understanding is obviously desirable. However, applications MUST + understand the class of any status code, as indicated by the first + digit, and treat any unrecognized response as being equivalent to the + x00 status code of that class, with the exception that an + unrecognized response MUST NOT be cached. For example, if an + unrecognized status code of 431 is received by the client, it can + safely assume that there was something wrong with its request and + treat the response as if it had received a 400 status code. In such + cases, user agents SHOULD present to the user the entity returned + with the response, since that entity is likely to include human- + readable information which will explain the unusual status. + +6.2 Response Header Fields + + The response-header fields allow the server to pass additional + information about the response which cannot be placed in the Status- + Line. These header fields give information about the server and about + further access to the resource identified by the Request-URI. + + response-header = Accept-Ranges ; Section 14.5 + | Age ; Section 14.6 + | ETag ; Section 14.19 + | Location ; Section 14.30 + | Proxy-Authenticate ; Section 14.33 + + + +Fielding, et al. Standards Track [Page 41] + +RFC 2616 HTTP/1.1 June 1999 + + + | Retry-After ; Section 14.37 + | Server ; Section 14.38 + | Vary ; Section 14.44 + | WWW-Authenticate ; Section 14.47 + + Response-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields MAY be given the semantics of response- + header fields if all parties in the communication recognize them to + be response-header fields. Unrecognized header fields are treated as + entity-header fields. + +7 Entity + + Request and Response messages MAY transfer an entity if not otherwise + restricted by the request method or response status code. An entity + consists of entity-header fields and an entity-body, although some + responses will only include the entity-headers. + + In this section, both sender and recipient refer to either the client + or the server, depending on who sends and who receives the entity. + +7.1 Entity Header Fields + + Entity-header fields define metainformation about the entity-body or, + if no body is present, about the resource identified by the request. + Some of this metainformation is OPTIONAL; some might be REQUIRED by + portions of this specification. + + entity-header = Allow ; Section 14.7 + | Content-Encoding ; Section 14.11 + | Content-Language ; Section 14.12 + | Content-Length ; Section 14.13 + | Content-Location ; Section 14.14 + | Content-MD5 ; Section 14.15 + | Content-Range ; Section 14.16 + | Content-Type ; Section 14.17 + | Expires ; Section 14.21 + | Last-Modified ; Section 14.29 + | extension-header + + extension-header = message-header + + The extension-header mechanism allows additional entity-header fields + to be defined without changing the protocol, but these fields cannot + be assumed to be recognizable by the recipient. Unrecognized header + fields SHOULD be ignored by the recipient and MUST be forwarded by + transparent proxies. + + + +Fielding, et al. Standards Track [Page 42] + +RFC 2616 HTTP/1.1 June 1999 + + +7.2 Entity Body + + The entity-body (if any) sent with an HTTP request or response is in + a format and encoding defined by the entity-header fields. + + entity-body = *OCTET + + An entity-body is only present in a message when a message-body is + present, as described in section 4.3. The entity-body is obtained + from the message-body by decoding any Transfer-Encoding that might + have been applied to ensure safe and proper transfer of the message. + +7.2.1 Type + + When an entity-body is included with a message, the data type of that + body is determined via the header fields Content-Type and Content- + Encoding. These define a two-layer, ordered encoding model: + + entity-body := Content-Encoding( Content-Type( data ) ) + + Content-Type specifies the media type of the underlying data. + Content-Encoding may be used to indicate any additional content + codings applied to the data, usually for the purpose of data + compression, that are a property of the requested resource. There is + no default encoding. + + Any HTTP/1.1 message containing an entity-body SHOULD include a + Content-Type header field defining the media type of that body. If + and only if the media type is not given by a Content-Type field, the + recipient MAY attempt to guess the media type via inspection of its + content and/or the name extension(s) of the URI used to identify the + resource. If the media type remains unknown, the recipient SHOULD + treat it as type "application/octet-stream". + +7.2.2 Entity Length + + The entity-length of a message is the length of the message-body + before any transfer-codings have been applied. Section 4.4 defines + how the transfer-length of a message-body is determined. + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 43] + +RFC 2616 HTTP/1.1 June 1999 + + +8 Connections + +8.1 Persistent Connections + +8.1.1 Purpose + + Prior to persistent connections, a separate TCP connection was + established to fetch each URL, increasing the load on HTTP servers + and causing congestion on the Internet. The use of inline images and + other associated data often require a client to make multiple + requests of the same server in a short amount of time. Analysis of + these performance problems and results from a prototype + implementation are available [26] [30]. Implementation experience and + measurements of actual HTTP/1.1 (RFC 2068) implementations show good + results [39]. Alternatives have also been explored, for example, + T/TCP [27]. + + Persistent HTTP connections have a number of advantages: + + - By opening and closing fewer TCP connections, CPU time is saved + in routers and hosts (clients, servers, proxies, gateways, + tunnels, or caches), and memory used for TCP protocol control + blocks can be saved in hosts. + + - HTTP requests and responses can be pipelined on a connection. + Pipelining allows a client to make multiple requests without + waiting for each response, allowing a single TCP connection to + be used much more efficiently, with much lower elapsed time. + + - Network congestion is reduced by reducing the number of packets + caused by TCP opens, and by allowing TCP sufficient time to + determine the congestion state of the network. + + - Latency on subsequent requests is reduced since there is no time + spent in TCP's connection opening handshake. + + - HTTP can evolve more gracefully, since errors can be reported + without the penalty of closing the TCP connection. Clients using + future versions of HTTP might optimistically try a new feature, + but if communicating with an older server, retry with old + semantics after an error is reported. + + HTTP implementations SHOULD implement persistent connections. + + + + + + + + +Fielding, et al. Standards Track [Page 44] + +RFC 2616 HTTP/1.1 June 1999 + + +8.1.2 Overall Operation + + A significant difference between HTTP/1.1 and earlier versions of + HTTP is that persistent connections are the default behavior of any + HTTP connection. That is, unless otherwise indicated, the client + SHOULD assume that the server will maintain a persistent connection, + even after error responses from the server. + + Persistent connections provide a mechanism by which a client and a + server can signal the close of a TCP connection. This signaling takes + place using the Connection header field (section 14.10). Once a close + has been signaled, the client MUST NOT send any more requests on that + connection. + +8.1.2.1 Negotiation + + An HTTP/1.1 server MAY assume that a HTTP/1.1 client intends to + maintain a persistent connection unless a Connection header including + the connection-token "close" was sent in the request. If the server + chooses to close the connection immediately after sending the + response, it SHOULD send a Connection header including the + connection-token close. + + An HTTP/1.1 client MAY expect a connection to remain open, but would + decide to keep it open based on whether the response from a server + contains a Connection header with the connection-token close. In case + the client does not want to maintain a connection for more than that + request, it SHOULD send a Connection header including the + connection-token close. + + If either the client or the server sends the close token in the + Connection header, that request becomes the last one for the + connection. + + Clients and servers SHOULD NOT assume that a persistent connection is + maintained for HTTP versions less than 1.1 unless it is explicitly + signaled. See section 19.6.2 for more information on backward + compatibility with HTTP/1.0 clients. + + In order to remain persistent, all messages on the connection MUST + have a self-defined message length (i.e., one not defined by closure + of the connection), as described in section 4.4. + + + + + + + + + +Fielding, et al. Standards Track [Page 45] + +RFC 2616 HTTP/1.1 June 1999 + + +8.1.2.2 Pipelining + + A client that supports persistent connections MAY "pipeline" its + requests (i.e., send multiple requests without waiting for each + response). A server MUST send its responses to those requests in the + same order that the requests were received. + + Clients which assume persistent connections and pipeline immediately + after connection establishment SHOULD be prepared to retry their + connection if the first pipelined attempt fails. If a client does + such a retry, it MUST NOT pipeline before it knows the connection is + persistent. Clients MUST also be prepared to resend their requests if + the server closes the connection before sending all of the + corresponding responses. + + Clients SHOULD NOT pipeline requests using non-idempotent methods or + non-idempotent sequences of methods (see section 9.1.2). Otherwise, a + premature termination of the transport connection could lead to + indeterminate results. A client wishing to send a non-idempotent + request SHOULD wait to send that request until it has received the + response status for the previous request. + +8.1.3 Proxy Servers + + It is especially important that proxies correctly implement the + properties of the Connection header field as specified in section + 14.10. + + The proxy server MUST signal persistent connections separately with + its clients and the origin servers (or other proxy servers) that it + connects to. Each persistent connection applies to only one transport + link. + + A proxy server MUST NOT establish a HTTP/1.1 persistent connection + with an HTTP/1.0 client (but see RFC 2068 [33] for information and + discussion of the problems with the Keep-Alive header implemented by + many HTTP/1.0 clients). + +8.1.4 Practical Considerations + + Servers will usually have some time-out value beyond which they will + no longer maintain an inactive connection. Proxy servers might make + this a higher value since it is likely that the client will be making + more connections through the same server. The use of persistent + connections places no requirements on the length (or existence) of + this time-out for either the client or the server. + + + + + +Fielding, et al. Standards Track [Page 46] + +RFC 2616 HTTP/1.1 June 1999 + + + When a client or server wishes to time-out it SHOULD issue a graceful + close on the transport connection. Clients and servers SHOULD both + constantly watch for the other side of the transport close, and + respond to it as appropriate. If a client or server does not detect + the other side's close promptly it could cause unnecessary resource + drain on the network. + + A client, server, or proxy MAY close the transport connection at any + time. For example, a client might have started to send a new request + at the same time that the server has decided to close the "idle" + connection. From the server's point of view, the connection is being + closed while it was idle, but from the client's point of view, a + request is in progress. + + This means that clients, servers, and proxies MUST be able to recover + from asynchronous close events. Client software SHOULD reopen the + transport connection and retransmit the aborted sequence of requests + without user interaction so long as the request sequence is + idempotent (see section 9.1.2). Non-idempotent methods or sequences + MUST NOT be automatically retried, although user agents MAY offer a + human operator the choice of retrying the request(s). Confirmation by + user-agent software with semantic understanding of the application + MAY substitute for user confirmation. The automatic retry SHOULD NOT + be repeated if the second sequence of requests fails. + + Servers SHOULD always respond to at least one request per connection, + if at all possible. Servers SHOULD NOT close a connection in the + middle of transmitting a response, unless a network or client failure + is suspected. + + Clients that use persistent connections SHOULD limit the number of + simultaneous connections that they maintain to a given server. A + single-user client SHOULD NOT maintain more than 2 connections with + any server or proxy. A proxy SHOULD use up to 2*N connections to + another server or proxy, where N is the number of simultaneously + active users. These guidelines are intended to improve HTTP response + times and avoid congestion. + +8.2 Message Transmission Requirements + +8.2.1 Persistent Connections and Flow Control + + HTTP/1.1 servers SHOULD maintain persistent connections and use TCP's + flow control mechanisms to resolve temporary overloads, rather than + terminating connections with the expectation that clients will retry. + The latter technique can exacerbate network congestion. + + + + + +Fielding, et al. Standards Track [Page 47] + +RFC 2616 HTTP/1.1 June 1999 + + +8.2.2 Monitoring Connections for Error Status Messages + + An HTTP/1.1 (or later) client sending a message-body SHOULD monitor + the network connection for an error status while it is transmitting + the request. If the client sees an error status, it SHOULD + immediately cease transmitting the body. If the body is being sent + using a "chunked" encoding (section 3.6), a zero length chunk and + empty trailer MAY be used to prematurely mark the end of the message. + If the body was preceded by a Content-Length header, the client MUST + close the connection. + +8.2.3 Use of the 100 (Continue) Status + + The purpose of the 100 (Continue) status (see section 10.1.1) is to + allow a client that is sending a request message with a request body + to determine if the origin server is willing to accept the request + (based on the request headers) before the client sends the request + body. In some cases, it might either be inappropriate or highly + inefficient for the client to send the body if the server will reject + the message without looking at the body. + + Requirements for HTTP/1.1 clients: + + - If a client will wait for a 100 (Continue) response before + sending the request body, it MUST send an Expect request-header + field (section 14.20) with the "100-continue" expectation. + + - A client MUST NOT send an Expect request-header field (section + 14.20) with the "100-continue" expectation if it does not intend + to send a request body. + + Because of the presence of older implementations, the protocol allows + ambiguous situations in which a client may send "Expect: 100- + continue" without receiving either a 417 (Expectation Failed) status + or a 100 (Continue) status. Therefore, when a client sends this + header field to an origin server (possibly via a proxy) from which it + has never seen a 100 (Continue) status, the client SHOULD NOT wait + for an indefinite period before sending the request body. + + Requirements for HTTP/1.1 origin servers: + + - Upon receiving a request which includes an Expect request-header + field with the "100-continue" expectation, an origin server MUST + either respond with 100 (Continue) status and continue to read + from the input stream, or respond with a final status code. The + origin server MUST NOT wait for the request body before sending + the 100 (Continue) response. If it responds with a final status + code, it MAY close the transport connection or it MAY continue + + + +Fielding, et al. Standards Track [Page 48] + +RFC 2616 HTTP/1.1 June 1999 + + + to read and discard the rest of the request. It MUST NOT + perform the requested method if it returns a final status code. + + - An origin server SHOULD NOT send a 100 (Continue) response if + the request message does not include an Expect request-header + field with the "100-continue" expectation, and MUST NOT send a + 100 (Continue) response if such a request comes from an HTTP/1.0 + (or earlier) client. There is an exception to this rule: for + compatibility with RFC 2068, a server MAY send a 100 (Continue) + status in response to an HTTP/1.1 PUT or POST request that does + not include an Expect request-header field with the "100- + continue" expectation. This exception, the purpose of which is + to minimize any client processing delays associated with an + undeclared wait for 100 (Continue) status, applies only to + HTTP/1.1 requests, and not to requests with any other HTTP- + version value. + + - An origin server MAY omit a 100 (Continue) response if it has + already received some or all of the request body for the + corresponding request. + + - An origin server that sends a 100 (Continue) response MUST + ultimately send a final status code, once the request body is + received and processed, unless it terminates the transport + connection prematurely. + + - If an origin server receives a request that does not include an + Expect request-header field with the "100-continue" expectation, + the request includes a request body, and the server responds + with a final status code before reading the entire request body + from the transport connection, then the server SHOULD NOT close + the transport connection until it has read the entire request, + or until the client closes the connection. Otherwise, the client + might not reliably receive the response message. However, this + requirement is not be construed as preventing a server from + defending itself against denial-of-service attacks, or from + badly broken client implementations. + + Requirements for HTTP/1.1 proxies: + + - If a proxy receives a request that includes an Expect request- + header field with the "100-continue" expectation, and the proxy + either knows that the next-hop server complies with HTTP/1.1 or + higher, or does not know the HTTP version of the next-hop + server, it MUST forward the request, including the Expect header + field. + + + + + +Fielding, et al. Standards Track [Page 49] + +RFC 2616 HTTP/1.1 June 1999 + + + - If the proxy knows that the version of the next-hop server is + HTTP/1.0 or lower, it MUST NOT forward the request, and it MUST + respond with a 417 (Expectation Failed) status. + + - Proxies SHOULD maintain a cache recording the HTTP version + numbers received from recently-referenced next-hop servers. + + - A proxy MUST NOT forward a 100 (Continue) response if the + request message was received from an HTTP/1.0 (or earlier) + client and did not include an Expect request-header field with + the "100-continue" expectation. This requirement overrides the + general rule for forwarding of 1xx responses (see section 10.1). + +8.2.4 Client Behavior if Server Prematurely Closes Connection + + If an HTTP/1.1 client sends a request which includes a request body, + but which does not include an Expect request-header field with the + "100-continue" expectation, and if the client is not directly + connected to an HTTP/1.1 origin server, and if the client sees the + connection close before receiving any status from the server, the + client SHOULD retry the request. If the client does retry this + request, it MAY use the following "binary exponential backoff" + algorithm to be assured of obtaining a reliable response: + + 1. Initiate a new connection to the server + + 2. Transmit the request-headers + + 3. Initialize a variable R to the estimated round-trip time to the + server (e.g., based on the time it took to establish the + connection), or to a constant value of 5 seconds if the round- + trip time is not available. + + 4. Compute T = R * (2**N), where N is the number of previous + retries of this request. + + 5. Wait either for an error response from the server, or for T + seconds (whichever comes first) + + 6. If no error response is received, after T seconds transmit the + body of the request. + + 7. If client sees that the connection is closed prematurely, + repeat from step 1 until the request is accepted, an error + response is received, or the user becomes impatient and + terminates the retry process. + + + + + +Fielding, et al. Standards Track [Page 50] + +RFC 2616 HTTP/1.1 June 1999 + + + If at any point an error status is received, the client + + - SHOULD NOT continue and + + - SHOULD close the connection if it has not completed sending the + request message. + +9 Method Definitions + + The set of common methods for HTTP/1.1 is defined below. Although + this set can be expanded, additional methods cannot be assumed to + share the same semantics for separately extended clients and servers. + + The Host request-header field (section 14.23) MUST accompany all + HTTP/1.1 requests. + +9.1 Safe and Idempotent Methods + +9.1.1 Safe Methods + + Implementors should be aware that the software represents the user in + their interactions over the Internet, and should be careful to allow + the user to be aware of any actions they might take which may have an + unexpected significance to themselves or others. + + In particular, the convention has been established that the GET and + HEAD methods SHOULD NOT have the significance of taking an action + other than retrieval. These methods ought to be considered "safe". + This allows user agents to represent other methods, such as POST, PUT + and DELETE, in a special way, so that the user is made aware of the + fact that a possibly unsafe action is being requested. + + Naturally, it is not possible to ensure that the server does not + generate side-effects as a result of performing a GET request; in + fact, some dynamic resources consider that a feature. The important + distinction here is that the user did not request the side-effects, + so therefore cannot be held accountable for them. + +9.1.2 Idempotent Methods + + Methods can also have the property of "idempotence" in that (aside + from error or expiration issues) the side-effects of N > 0 identical + requests is the same as for a single request. The methods GET, HEAD, + PUT and DELETE share this property. Also, the methods OPTIONS and + TRACE SHOULD NOT have side effects, and so are inherently idempotent. + + + + + + +Fielding, et al. Standards Track [Page 51] + +RFC 2616 HTTP/1.1 June 1999 + + + However, it is possible that a sequence of several requests is non- + idempotent, even if all of the methods executed in that sequence are + idempotent. (A sequence is idempotent if a single execution of the + entire sequence always yields a result that is not changed by a + reexecution of all, or part, of that sequence.) For example, a + sequence is non-idempotent if its result depends on a value that is + later modified in the same sequence. + + A sequence that never has side effects is idempotent, by definition + (provided that no concurrent operations are being executed on the + same set of resources). + +9.2 OPTIONS + + The OPTIONS method represents a request for information about the + communication options available on the request/response chain + identified by the Request-URI. This method allows the client to + determine the options and/or requirements associated with a resource, + or the capabilities of a server, without implying a resource action + or initiating a resource retrieval. + + Responses to this method are not cacheable. + + If the OPTIONS request includes an entity-body (as indicated by the + presence of Content-Length or Transfer-Encoding), then the media type + MUST be indicated by a Content-Type field. Although this + specification does not define any use for such a body, future + extensions to HTTP might use the OPTIONS body to make more detailed + queries on the server. A server that does not support such an + extension MAY discard the request body. + + If the Request-URI is an asterisk ("*"), the OPTIONS request is + intended to apply to the server in general rather than to a specific + resource. Since a server's communication options typically depend on + the resource, the "*" request is only useful as a "ping" or "no-op" + type of method; it does nothing beyond allowing the client to test + the capabilities of the server. For example, this can be used to test + a proxy for HTTP/1.1 compliance (or lack thereof). + + If the Request-URI is not an asterisk, the OPTIONS request applies + only to the options that are available when communicating with that + resource. + + A 200 response SHOULD include any header fields that indicate + optional features implemented by the server and applicable to that + resource (e.g., Allow), possibly including extensions not defined by + this specification. The response body, if any, SHOULD also include + information about the communication options. The format for such a + + + +Fielding, et al. Standards Track [Page 52] + +RFC 2616 HTTP/1.1 June 1999 + + + body is not defined by this specification, but might be defined by + future extensions to HTTP. Content negotiation MAY be used to select + the appropriate response format. If no response body is included, the + response MUST include a Content-Length field with a field-value of + "0". + + The Max-Forwards request-header field MAY be used to target a + specific proxy in the request chain. When a proxy receives an OPTIONS + request on an absoluteURI for which request forwarding is permitted, + the proxy MUST check for a Max-Forwards field. If the Max-Forwards + field-value is zero ("0"), the proxy MUST NOT forward the message; + instead, the proxy SHOULD respond with its own communication options. + If the Max-Forwards field-value is an integer greater than zero, the + proxy MUST decrement the field-value when it forwards the request. If + no Max-Forwards field is present in the request, then the forwarded + request MUST NOT include a Max-Forwards field. + +9.3 GET + + The GET method means retrieve whatever information (in the form of an + entity) is identified by the Request-URI. If the Request-URI refers + to a data-producing process, it is the produced data which shall be + returned as the entity in the response and not the source text of the + process, unless that text happens to be the output of the process. + + The semantics of the GET method change to a "conditional GET" if the + request message includes an If-Modified-Since, If-Unmodified-Since, + If-Match, If-None-Match, or If-Range header field. A conditional GET + method requests that the entity be transferred only under the + circumstances described by the conditional header field(s). The + conditional GET method is intended to reduce unnecessary network + usage by allowing cached entities to be refreshed without requiring + multiple requests or transferring data already held by the client. + + The semantics of the GET method change to a "partial GET" if the + request message includes a Range header field. A partial GET requests + that only part of the entity be transferred, as described in section + 14.35. The partial GET method is intended to reduce unnecessary + network usage by allowing partially-retrieved entities to be + completed without transferring data already held by the client. + + The response to a GET request is cacheable if and only if it meets + the requirements for HTTP caching described in section 13. + + See section 15.1.3 for security considerations when used for forms. + + + + + + +Fielding, et al. Standards Track [Page 53] + +RFC 2616 HTTP/1.1 June 1999 + + +9.4 HEAD + + The HEAD method is identical to GET except that the server MUST NOT + return a message-body in the response. The metainformation contained + in the HTTP headers in response to a HEAD request SHOULD be identical + to the information sent in response to a GET request. This method can + be used for obtaining metainformation about the entity implied by the + request without transferring the entity-body itself. This method is + often used for testing hypertext links for validity, accessibility, + and recent modification. + + The response to a HEAD request MAY be cacheable in the sense that the + information contained in the response MAY be used to update a + previously cached entity from that resource. If the new field values + indicate that the cached entity differs from the current entity (as + would be indicated by a change in Content-Length, Content-MD5, ETag + or Last-Modified), then the cache MUST treat the cache entry as + stale. + +9.5 POST + + The POST method is used to request that the origin server accept the + entity enclosed in the request as a new subordinate of the resource + identified by the Request-URI in the Request-Line. POST is designed + to allow a uniform method to cover the following functions: + +[[ Should be: ]] +[[ The POST method is used to request that the origin server accept the ]] +[[ entity enclosed in the request as data to be processed by the resource ]] +[[ identified by the Request-URI in the Request-Line. POST is designed ]] +[[ to allow a uniform method to cover the following functions: ]] + + - Annotation of existing resources; + + - Posting a message to a bulletin board, newsgroup, mailing list, + or similar group of articles; + + - Providing a block of data, such as the result of submitting a + form, to a data-handling process; + + - Extending a database through an append operation. + + The actual function performed by the POST method is determined by the + server and is usually dependent on the Request-URI. The posted entity + is subordinate to that URI in the same way that a file is subordinate + to a directory containing it, a news article is subordinate to a + newsgroup to which it is posted, or a record is subordinate to a + database. + + [[ Remove second sentence ("The posted entity is subordinate") above ]] + + The action performed by the POST method might not result in a + resource that can be identified by a URI. In this case, either 200 + (OK) or 204 (No Content) is the appropriate response status, + depending on whether or not the response includes an entity that + describes the result. + + + +Fielding, et al. Standards Track [Page 54] + +RFC 2616 HTTP/1.1 June 1999 + + + If a resource has been created on the origin server, the response + SHOULD be 201 (Created) and contain an entity which describes the + status of the request and refers to the new resource, and a Location + header (see section 14.30). + + Responses to this method are not cacheable, unless the response + includes appropriate Cache-Control or Expires header fields. However, + the 303 (See Other) response can be used to direct the user agent to + retrieve a cacheable resource. + + POST requests MUST obey the message transmission requirements set out + in section 8.2. + + See section 15.1.3 for security considerations. + +9.6 PUT + + The PUT method requests that the enclosed entity be stored under the + supplied Request-URI. If the Request-URI refers to an already + existing resource, the enclosed entity SHOULD be considered as a + modified version of the one residing on the origin server. If the + Request-URI does not point to an existing resource, and that URI is + capable of being defined as a new resource by the requesting user + agent, the origin server can create the resource with that URI. If a + new resource is created, the origin server MUST inform the user agent + via the 201 (Created) response. If an existing resource is modified, + either the 200 (OK) or 204 (No Content) response codes SHOULD be sent + to indicate successful completion of the request. If the resource + could not be created or modified with the Request-URI, an appropriate + error response SHOULD be given that reflects the nature of the + problem. The recipient of the entity MUST NOT ignore any Content-* + (e.g. Content-Range) headers that it does not understand or implement + and MUST return a 501 (Not Implemented) response in such cases. + + If the request passes through a cache and the Request-URI identifies + one or more currently cached entities, those entries SHOULD be + treated as stale. Responses to this method are not cacheable. + + The fundamental difference between the POST and PUT requests is + reflected in the different meaning of the Request-URI. The URI in a + POST request identifies the resource that will handle the enclosed + entity. That resource might be a data-accepting process, a gateway to + some other protocol, or a separate entity that accepts annotations. + In contrast, the URI in a PUT request identifies the entity enclosed + with the request -- the user agent knows what URI is intended and the + server MUST NOT attempt to apply the request to some other resource. + If the server desires that the request be applied to a different URI, + + + + +Fielding, et al. Standards Track [Page 55] + +RFC 2616 HTTP/1.1 June 1999 + + + it MUST send a 301 (Moved Permanently) response; the user agent MAY + then make its own decision regarding whether or not to redirect the + request. + + A single resource MAY be identified by many different URIs. For + example, an article might have a URI for identifying "the current + version" which is separate from the URI identifying each particular + version. In this case, a PUT request on a general URI might result in + several other URIs being defined by the origin server. + + HTTP/1.1 does not define how a PUT method affects the state of an + origin server. + + PUT requests MUST obey the message transmission requirements set out + in section 8.2. + + Unless otherwise specified for a particular entity-header, the + entity-headers in the PUT request SHOULD be applied to the resource + created or modified by the PUT. + +9.7 DELETE + + The DELETE method requests that the origin server delete the resource + identified by the Request-URI. This method MAY be overridden by human + intervention (or other means) on the origin server. The client cannot + be guaranteed that the operation has been carried out, even if the + status code returned from the origin server indicates that the action + has been completed successfully. However, the server SHOULD NOT + indicate success unless, at the time the response is given, it + intends to delete the resource or move it to an inaccessible + location. + + A successful response SHOULD be 200 (OK) if the response includes an + entity describing the status, 202 (Accepted) if the action has not + yet been enacted, or 204 (No Content) if the action has been enacted + but the response does not include an entity. + + If the request passes through a cache and the Request-URI identifies + one or more currently cached entities, those entries SHOULD be + treated as stale. Responses to this method are not cacheable. + +9.8 TRACE + + The TRACE method is used to invoke a remote, application-layer loop- + back of the request message. The final recipient of the request + SHOULD reflect the message received back to the client as the + entity-body of a 200 (OK) response. The final recipient is either the + + + + +Fielding, et al. Standards Track [Page 56] + +RFC 2616 HTTP/1.1 June 1999 + + + origin server or the first proxy or gateway to receive a Max-Forwards + value of zero (0) in the request (see section 14.31). A TRACE request + MUST NOT include an entity. + + TRACE allows the client to see what is being received at the other + end of the request chain and use that data for testing or diagnostic + information. The value of the Via header field (section 14.45) is of + particular interest, since it acts as a trace of the request chain. + Use of the Max-Forwards header field allows the client to limit the + length of the request chain, which is useful for testing a chain of + proxies forwarding messages in an infinite loop. + + If the request is valid, the response SHOULD contain the entire + request message in the entity-body, with a Content-Type of + "message/http". Responses to this method MUST NOT be cached. + +9.9 CONNECT + + This specification reserves the method name CONNECT for use with a + proxy that can dynamically switch to being a tunnel (e.g. SSL + tunneling [44]). + +10 Status Code Definitions + + Each Status-Code is described below, including a description of which + method(s) it can follow and any metainformation required in the + response. + +10.1 Informational 1xx + + This class of status code indicates a provisional response, + consisting only of the Status-Line and optional headers, and is + terminated by an empty line. There are no required headers for this + class of status code. Since HTTP/1.0 did not define any 1xx status + codes, servers MUST NOT send a 1xx response to an HTTP/1.0 client + except under experimental conditions. + + A client MUST be prepared to accept one or more 1xx status responses + prior to a regular response, even if the client does not expect a 100 + (Continue) status message. Unexpected 1xx status responses MAY be + ignored by a user agent. + + Proxies MUST forward 1xx responses, unless the connection between the + proxy and its client has been closed, or unless the proxy itself + requested the generation of the 1xx response. (For example, if a + + + + + + +Fielding, et al. Standards Track [Page 57] + +RFC 2616 HTTP/1.1 June 1999 + + + proxy adds a "Expect: 100-continue" field when it forwards a request, + then it need not forward the corresponding 100 (Continue) + response(s).) + +10.1.1 100 Continue + + The client SHOULD continue with its request. This interim response is + used to inform the client that the initial part of the request has + been received and has not yet been rejected by the server. The client + SHOULD continue by sending the remainder of the request or, if the + request has already been completed, ignore this response. The server + MUST send a final response after the request has been completed. See + section 8.2.3 for detailed discussion of the use and handling of this + status code. + +10.1.2 101 Switching Protocols + + The server understands and is willing to comply with the client's + request, via the Upgrade message header field (section 14.42), for a + change in the application protocol being used on this connection. The + server will switch protocols to those defined by the response's + Upgrade header field immediately after the empty line which + terminates the 101 response. + + The protocol SHOULD be switched only when it is advantageous to do + so. For example, switching to a newer version of HTTP is advantageous + over older versions, and switching to a real-time, synchronous + protocol might be advantageous when delivering resources that use + such features. + +10.2 Successful 2xx + + This class of status code indicates that the client's request was + successfully received, understood, and accepted. + +10.2.1 200 OK + + The request has succeeded. The information returned with the response + is dependent on the method used in the request, for example: + + GET an entity corresponding to the requested resource is sent in + the response; + + HEAD the entity-header fields corresponding to the requested + resource are sent in the response without any message-body; + + POST an entity describing or containing the result of the action; + + + + +Fielding, et al. Standards Track [Page 58] + +RFC 2616 HTTP/1.1 June 1999 + + + TRACE an entity containing the request message as received by the + end server. + +10.2.2 201 Created + + The request has been fulfilled and resulted in a new resource being + created. The newly created resource can be referenced by the URI(s) + returned in the entity of the response, with the most specific URI + for the resource given by a Location header field. The response + SHOULD include an entity containing a list of resource + characteristics and location(s) from which the user or user agent can + choose the one most appropriate. The entity format is specified by + the media type given in the Content-Type header field. The origin + server MUST create the resource before returning the 201 status code. + If the action cannot be carried out immediately, the server SHOULD + respond with 202 (Accepted) response instead. + + A 201 response MAY contain an ETag response header field indicating + the current value of the entity tag for the requested variant just + created, see section 14.19. + +10.2.3 202 Accepted + + The request has been accepted for processing, but the processing has + not been completed. The request might or might not eventually be + acted upon, as it might be disallowed when processing actually takes + place. There is no facility for re-sending a status code from an + asynchronous operation such as this. + + The 202 response is intentionally non-committal. Its purpose is to + allow a server to accept a request for some other process (perhaps a + batch-oriented process that is only run once per day) without + requiring that the user agent's connection to the server persist + until the process is completed. The entity returned with this + response SHOULD include an indication of the request's current status + and either a pointer to a status monitor or some estimate of when the + user can expect the request to be fulfilled. + +10.2.4 203 Non-Authoritative Information + + The returned metainformation in the entity-header is not the + definitive set as available from the origin server, but is gathered + from a local or a third-party copy. The set presented MAY be a subset + or superset of the original version. For example, including local + annotation information about the resource might result in a superset + of the metainformation known by the origin server. Use of this + response code is not required and is only appropriate when the + response would otherwise be 200 (OK). + + + +Fielding, et al. Standards Track [Page 59] + +RFC 2616 HTTP/1.1 June 1999 + + +10.2.5 204 No Content + + The server has fulfilled the request but does not need to return an + entity-body, and might want to return updated metainformation. The + response MAY include new or updated metainformation in the form of + entity-headers, which if present SHOULD be associated with the + requested variant. + + If the client is a user agent, it SHOULD NOT change its document view + from that which caused the request to be sent. This response is + primarily intended to allow input for actions to take place without + causing a change to the user agent's active document view, although + any new or updated metainformation SHOULD be applied to the document + currently in the user agent's active view. + + The 204 response MUST NOT include a message-body, and thus is always + terminated by the first empty line after the header fields. + +10.2.6 205 Reset Content + + The server has fulfilled the request and the user agent SHOULD reset + the document view which caused the request to be sent. This response + is primarily intended to allow input for actions to take place via + user input, followed by a clearing of the form in which the input is + given so that the user can easily initiate another input action. The + response MUST NOT include an entity. + +10.2.7 206 Partial Content + + The server has fulfilled the partial GET request for the resource. + The request MUST have included a Range header field (section 14.35) + indicating the desired range, and MAY have included an If-Range + header field (section 14.27) to make the request conditional. + + The response MUST include the following header fields: + + - Either a Content-Range header field (section 14.16) indicating + the range included with this response, or a multipart/byteranges + Content-Type including Content-Range fields for each part. If a + Content-Length header field is present in the response, its + value MUST match the actual number of OCTETs transmitted in the + message-body. + + - Date + + - ETag and/or Content-Location, if the header would have been sent + in a 200 response to the same request + + + + +Fielding, et al. Standards Track [Page 60] + +RFC 2616 HTTP/1.1 June 1999 + + + - Expires, Cache-Control, and/or Vary, if the field-value might + differ from that sent in any previous response for the same + variant + + If the 206 response is the result of an If-Range request that used a + strong cache validator (see section 13.3.3), the response SHOULD NOT + include other entity-headers. If the response is the result of an + If-Range request that used a weak validator, the response MUST NOT + include other entity-headers; this prevents inconsistencies between + cached entity-bodies and updated headers. Otherwise, the response + MUST include all of the entity-headers that would have been returned + with a 200 (OK) response to the same request. + +[[ Should be: ]] +[[ If the 206 response is the result of an If-Range request, the ]] +[[ response SHOULD NOT include other entity-headers. Otherwise, the ]] +[[ response MUST include all of the entity-headers that would have ]] +[[ been returned with a 200 (OK) response to the same request. ]] + + A cache MUST NOT combine a 206 response with other previously cached + content if the ETag or Last-Modified headers do not match exactly, + see 13.5.4. + + A cache that does not support the Range and Content-Range headers + MUST NOT cache 206 (Partial) responses. + +10.3 Redirection 3xx + + This class of status code indicates that further action needs to be + taken by the user agent in order to fulfill the request. The action + required MAY be carried out by the user agent without interaction + with the user if and only if the method used in the second request is + GET or HEAD. A client SHOULD detect infinite redirection loops, since + such loops generate network traffic for each redirection. + + Note: previous versions of this specification recommended a + maximum of five redirections. Content developers should be aware + that there might be clients that implement such a fixed + limitation. + +10.3.1 300 Multiple Choices + + The requested resource corresponds to any one of a set of + representations, each with its own specific location, and agent- + driven negotiation information (section 12) is being provided so that + the user (or user agent) can select a preferred representation and + redirect its request to that location. + + Unless it was a HEAD request, the response SHOULD include an entity + containing a list of resource characteristics and location(s) from + which the user or user agent can choose the one most appropriate. The + entity format is specified by the media type given in the Content- + Type header field. Depending upon the format and the capabilities of + + + + +Fielding, et al. Standards Track [Page 61] + +RFC 2616 HTTP/1.1 June 1999 + + + the user agent, selection of the most appropriate choice MAY be + performed automatically. However, this specification does not define + any standard for such automatic selection. + + If the server has a preferred choice of representation, it SHOULD + include the specific URI for that representation in the Location + field; user agents MAY use the Location field value for automatic + redirection. This response is cacheable unless indicated otherwise. + +10.3.2 301 Moved Permanently + + The requested resource has been assigned a new permanent URI and any + future references to this resource SHOULD use one of the returned + URIs. Clients with link editing capabilities ought to automatically + re-link references to the Request-URI to one or more of the new + references returned by the server, where possible. This response is + cacheable unless indicated otherwise. + + The new permanent URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s). + + If the 301 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + +[[ Should be: ]] +[[ If the 301 status code is received in response to a request method ]] +[[ that is known to be "safe", as defined in section 9.1.1, then the ]] +[[ request MAY be automatically redirected by the user agent without ]] +[[ confirmation. Otherwise, the user agent MUST NOT automatically ]] +[[ redirect the request unless it is confirmed by the user, since the ]] +[[ new URI might change the conditions under which the request was ]] +[[ issued. ]] + + Note: When automatically redirecting a POST request after + receiving a 301 status code, some existing HTTP/1.0 user agents + will erroneously change it into a GET request. + +10.3.3 302 Found + + The requested resource resides temporarily under a different URI. + Since the redirection might be altered on occasion, the client SHOULD + continue to use the Request-URI for future requests. This response + is only cacheable if indicated by a Cache-Control or Expires header + field. + + The temporary URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s). + + + + + + + +Fielding, et al. Standards Track [Page 62] + +RFC 2616 HTTP/1.1 June 1999 + + + If the 302 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + [[ See errata to 10.3.3 ]] + + Note: RFC 1945 and RFC 2068 specify that the client is not allowed + to change the method on the redirected request. However, most + existing user agent implementations treat 302 as if it were a 303 + response, performing a GET on the Location field-value regardless + of the original request method. The status codes 303 and 307 have + been added for servers that wish to make unambiguously clear which + kind of reaction is expected of the client. + +10.3.4 303 See Other + + The response to the request can be found under a different URI and + SHOULD be retrieved using a GET method on that resource. This method + exists primarily to allow the output of a POST-activated script to + redirect the user agent to a selected resource. The new URI is not a + substitute reference for the originally requested resource. The 303 + response MUST NOT be cached, but the response to the second + (redirected) request might be cacheable. + + The different URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s). + + Note: Many pre-HTTP/1.1 user agents do not understand the 303 + status. When interoperability with such clients is a concern, the + 302 status code may be used instead, since most user agents react + to a 302 response as described here for 303. + +10.3.5 304 Not Modified + + If the client has performed a conditional GET request and access is + allowed, but the document has not been modified, the server SHOULD + respond with this status code. The 304 response MUST NOT contain a + message-body, and thus is always terminated by the first empty line + after the header fields. + + The response MUST include the following header fields: + + - Date, unless its omission is required by section 14.18.1 + + + + + + + +Fielding, et al. Standards Track [Page 63] + +RFC 2616 HTTP/1.1 June 1999 + + + If a clockless origin server obeys these rules, and proxies and + clients add their own Date to any response received without one (as + already specified by [RFC 2068], section 14.19), caches will operate + correctly. + + - ETag and/or Content-Location, if the header would have been sent + in a 200 response to the same request + + - Expires, Cache-Control, and/or Vary, if the field-value might + differ from that sent in any previous response for the same + variant + + If the conditional GET used a strong cache validator (see section + 13.3.3), the response SHOULD NOT include other entity-headers. + Otherwise (i.e., the conditional GET used a weak validator), the + response MUST NOT include other entity-headers; this prevents + inconsistencies between cached entity-bodies and updated headers. + + If a 304 response indicates an entity not currently cached, then the + cache MUST disregard the response and repeat the request without the + conditional. + + If a cache uses a received 304 response to update a cache entry, the + cache MUST update the entry to reflect any new field values given in + the response. + +10.3.6 305 Use Proxy + + The requested resource MUST be accessed through the proxy given by + the Location field. The Location field gives the URI of the proxy. + The recipient is expected to repeat this single request via the + proxy. 305 responses MUST only be generated by origin servers. + + Note: RFC 2068 was not clear that 305 was intended to redirect a + single request, and to be generated by origin servers only. Not + observing these limitations has significant security consequences. + +10.3.7 306 (Unused) + + The 306 status code was used in a previous version of the + specification, is no longer used, and the code is reserved. + + + + + + + + + + +Fielding, et al. Standards Track [Page 64] + +RFC 2616 HTTP/1.1 June 1999 + + +10.3.8 307 Temporary Redirect + + The requested resource resides temporarily under a different URI. + Since the redirection MAY be altered on occasion, the client SHOULD + continue to use the Request-URI for future requests. This response + is only cacheable if indicated by a Cache-Control or Expires header + field. + + The temporary URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s) , since many pre-HTTP/1.1 user agents do not + understand the 307 status. Therefore, the note SHOULD contain the + information necessary for a user to repeat the original request on + the new URI. + + If the 307 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + [[ See errata to 10.3.3 ]] + +10.4 Client Error 4xx + + The 4xx class of status code is intended for cases in which the + client seems to have erred. Except when responding to a HEAD request, + the server SHOULD include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. These status codes are applicable to any request method. + User agents SHOULD display any included entity to the user. + + If the client is sending data, a server implementation using TCP + SHOULD be careful to ensure that the client acknowledges receipt of + the packet(s) containing the response, before the server closes the + input connection. If the client continues sending data to the server + after the close, the server's TCP stack will send a reset packet to + the client, which may erase the client's unacknowledged input buffers + before they can be read and interpreted by the HTTP application. + +10.4.1 400 Bad Request + + The request could not be understood by the server due to malformed + syntax. The client SHOULD NOT repeat the request without + modifications. + + + + + + + + +Fielding, et al. Standards Track [Page 65] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.2 401 Unauthorized + + The request requires user authentication. The response MUST include a + WWW-Authenticate header field (section 14.47) containing a challenge + applicable to the requested resource. The client MAY repeat the + request with a suitable Authorization header field (section 14.8). If + the request already included Authorization credentials, then the 401 + response indicates that authorization has been refused for those + credentials. If the 401 response contains the same challenge as the + prior response, and the user agent has already attempted + authentication at least once, then the user SHOULD be presented the + entity that was given in the response, since that entity might + include relevant diagnostic information. HTTP access authentication + is explained in "HTTP Authentication: Basic and Digest Access + Authentication" [43]. + +10.4.3 402 Payment Required + + This code is reserved for future use. + +10.4.4 403 Forbidden + + The server understood the request, but is refusing to fulfill it. + Authorization will not help and the request SHOULD NOT be repeated. + If the request method was not HEAD and the server wishes to make + public why the request has not been fulfilled, it SHOULD describe the + reason for the refusal in the entity. If the server does not wish to + make this information available to the client, the status code 404 + (Not Found) can be used instead. + +10.4.5 404 Not Found + + The server has not found anything matching the Request-URI. No + indication is given of whether the condition is temporary or + permanent. The 410 (Gone) status code SHOULD be used if the server + knows, through some internally configurable mechanism, that an old + resource is permanently unavailable and has no forwarding address. + This status code is commonly used when the server does not wish to + reveal exactly why the request has been refused, or when no other + response is applicable. + +10.4.6 405 Method Not Allowed + + The method specified in the Request-Line is not allowed for the + resource identified by the Request-URI. The response MUST include an + Allow header containing a list of valid methods for the requested + resource. + + + + +Fielding, et al. Standards Track [Page 66] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.7 406 Not Acceptable + + The resource identified by the request is only capable of generating + response entities which have content characteristics not acceptable + according to the accept headers sent in the request. + + Unless it was a HEAD request, the response SHOULD include an entity + containing a list of available entity characteristics and location(s) + from which the user or user agent can choose the one most + appropriate. The entity format is specified by the media type given + in the Content-Type header field. Depending upon the format and the + capabilities of the user agent, selection of the most appropriate + choice MAY be performed automatically. However, this specification + does not define any standard for such automatic selection. + + Note: HTTP/1.1 servers are allowed to return responses which are + not acceptable according to the accept headers sent in the + request. In some cases, this may even be preferable to sending a + 406 response. User agents are encouraged to inspect the headers of + an incoming response to determine if it is acceptable. + + If the response could be unacceptable, a user agent SHOULD + temporarily stop receipt of more data and query the user for a + decision on further actions. + +10.4.8 407 Proxy Authentication Required + + This code is similar to 401 (Unauthorized), but indicates that the + client must first authenticate itself with the proxy. The proxy MUST + return a Proxy-Authenticate header field (section 14.33) containing a + challenge applicable to the proxy for the requested resource. The + client MAY repeat the request with a suitable Proxy-Authorization + header field (section 14.34). HTTP access authentication is explained + in "HTTP Authentication: Basic and Digest Access Authentication" + [43]. + +10.4.9 408 Request Timeout + + The client did not produce a request within the time that the server + was prepared to wait. The client MAY repeat the request without + modifications at any later time. + +10.4.10 409 Conflict + + The request could not be completed due to a conflict with the current + state of the resource. This code is only allowed in situations where + it is expected that the user might be able to resolve the conflict + and resubmit the request. The response body SHOULD include enough + + + +Fielding, et al. Standards Track [Page 67] + +RFC 2616 HTTP/1.1 June 1999 + + + information for the user to recognize the source of the conflict. + Ideally, the response entity would include enough information for the + user or user agent to fix the problem; however, that might not be + possible and is not required. + + Conflicts are most likely to occur in response to a PUT request. For + example, if versioning were being used and the entity being PUT + included changes to a resource which conflict with those made by an + earlier (third-party) request, the server might use the 409 response + to indicate that it can't complete the request. In this case, the + response entity would likely contain a list of the differences + between the two versions in a format defined by the response + Content-Type. + +10.4.11 410 Gone + + The requested resource is no longer available at the server and no + forwarding address is known. This condition is expected to be + considered permanent. Clients with link editing capabilities SHOULD + delete references to the Request-URI after user approval. If the + server does not know, or has no facility to determine, whether or not + the condition is permanent, the status code 404 (Not Found) SHOULD be + used instead. This response is cacheable unless indicated otherwise. + + The 410 response is primarily intended to assist the task of web + maintenance by notifying the recipient that the resource is + intentionally unavailable and that the server owners desire that + remote links to that resource be removed. Such an event is common for + limited-time, promotional services and for resources belonging to + individuals no longer working at the server's site. It is not + necessary to mark all permanently unavailable resources as "gone" or + to keep the mark for any length of time -- that is left to the + discretion of the server owner. + +10.4.12 411 Length Required + + The server refuses to accept the request without a defined Content- + Length. The client MAY repeat the request if it adds a valid + Content-Length header field containing the length of the message-body + in the request message. + +10.4.13 412 Precondition Failed + + The precondition given in one or more of the request-header fields + evaluated to false when it was tested on the server. This response + code allows the client to place preconditions on the current resource + metainformation (header field data) and thus prevent the requested + method from being applied to a resource other than the one intended. + + + +Fielding, et al. Standards Track [Page 68] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.14 413 Request Entity Too Large + + The server is refusing to process a request because the request + entity is larger than the server is willing or able to process. The + server MAY close the connection to prevent the client from continuing + the request. + + If the condition is temporary, the server SHOULD include a Retry- + After header field to indicate that it is temporary and after what + time the client MAY try again. + +10.4.15 414 Request-URI Too Long + + The server is refusing to service the request because the Request-URI + is longer than the server is willing to interpret. This rare + condition is only likely to occur when a client has improperly + converted a POST request to a GET request with long query + information, when the client has descended into a URI "black hole" of + redirection (e.g., a redirected URI prefix that points to a suffix of + itself), or when the server is under attack by a client attempting to + exploit security holes present in some servers using fixed-length + buffers for reading or manipulating the Request-URI. + +10.4.16 415 Unsupported Media Type + + The server is refusing to service the request because the entity of + the request is in a format not supported by the requested resource + for the requested method. + +10.4.17 416 Requested Range Not Satisfiable + + A server SHOULD return a response with this status code if a request + included a Range request-header field (section 14.35), and none of + the range-specifier values in this field overlap the current extent + of the selected resource, and the request did not include an If-Range + request-header field. (For byte-ranges, this means that the first- + byte-pos of all of the byte-range-spec values were greater than the + current length of the selected resource.) + + When this status code is returned for a byte-range request, the + response SHOULD include a Content-Range entity-header field + specifying the current length of the selected resource (see section + 14.16). This response MUST NOT use the multipart/byteranges content- + type. + + + + + + + +Fielding, et al. Standards Track [Page 69] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.18 417 Expectation Failed + + The expectation given in an Expect request-header field (see section + 14.20) could not be met by this server, or, if the server is a proxy, + the server has unambiguous evidence that the request could not be met + by the next-hop server. + +10.5 Server Error 5xx + + Response status codes beginning with the digit "5" indicate cases in + which the server is aware that it has erred or is incapable of + performing the request. Except when responding to a HEAD request, the + server SHOULD include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. User agents SHOULD display any included entity to the + user. These response codes are applicable to any request method. + +10.5.1 500 Internal Server Error + + The server encountered an unexpected condition which prevented it + from fulfilling the request. + +10.5.2 501 Not Implemented + + The server does not support the functionality required to fulfill the + request. This is the appropriate response when the server does not + recognize the request method and is not capable of supporting it for + any resource. + +10.5.3 502 Bad Gateway + + The server, while acting as a gateway or proxy, received an invalid + response from the upstream server it accessed in attempting to + fulfill the request. + +10.5.4 503 Service Unavailable + + The server is currently unable to handle the request due to a + temporary overloading or maintenance of the server. The implication + is that this is a temporary condition which will be alleviated after + some delay. If known, the length of the delay MAY be indicated in a + Retry-After header. If no Retry-After is given, the client SHOULD + handle the response as it would for a 500 response. + + Note: The existence of the 503 status code does not imply that a + server must use it when becoming overloaded. Some servers may wish + to simply refuse the connection. + + + + +Fielding, et al. Standards Track [Page 70] + +RFC 2616 HTTP/1.1 June 1999 + + +10.5.5 504 Gateway Timeout + + The server, while acting as a gateway or proxy, did not receive a + timely response from the upstream server specified by the URI (e.g. + HTTP, FTP, LDAP) or some other auxiliary server (e.g. DNS) it needed + to access in attempting to complete the request. + + Note: Note to implementors: some deployed proxies are known to + return 400 or 500 when DNS lookups time out. + +10.5.6 505 HTTP Version Not Supported + + The server does not support, or refuses to support, the HTTP protocol + version that was used in the request message. The server is + indicating that it is unable or unwilling to complete the request + using the same major version as the client, as described in section + 3.1, other than with this error message. The response SHOULD contain + an entity describing why that version is not supported and what other + protocols are supported by that server. + +11 Access Authentication + + HTTP provides several OPTIONAL challenge-response authentication + mechanisms which can be used by a server to challenge a client + request and by a client to provide authentication information. The + general framework for access authentication, and the specification of + "basic" and "digest" authentication, are specified in "HTTP + Authentication: Basic and Digest Access Authentication" [43]. This + specification adopts the definitions of "challenge" and "credentials" + from that specification. + +12 Content Negotiation + + Most HTTP responses include an entity which contains information for + interpretation by a human user. Naturally, it is desirable to supply + the user with the "best available" entity corresponding to the + request. Unfortunately for servers and caches, not all users have the + same preferences for what is "best," and not all user agents are + equally capable of rendering all entity types. For that reason, HTTP + has provisions for several mechanisms for "content negotiation" -- + the process of selecting the best representation for a given response + when there are multiple representations available. + + Note: This is not called "format negotiation" because the + alternate representations may be of the same media type, but use + different capabilities of that type, be in different languages, + etc. + + + + +Fielding, et al. Standards Track [Page 71] + +RFC 2616 HTTP/1.1 June 1999 + + + Any response containing an entity-body MAY be subject to negotiation, + including error responses. + + There are two kinds of content negotiation which are possible in + HTTP: server-driven and agent-driven negotiation. These two kinds of + negotiation are orthogonal and thus may be used separately or in + combination. One method of combination, referred to as transparent + negotiation, occurs when a cache uses the agent-driven negotiation + information provided by the origin server in order to provide + server-driven negotiation for subsequent requests. + +12.1 Server-driven Negotiation + + If the selection of the best representation for a response is made by + an algorithm located at the server, it is called server-driven + negotiation. Selection is based on the available representations of + the response (the dimensions over which it can vary; e.g. language, + content-coding, etc.) and the contents of particular header fields in + the request message or on other information pertaining to the request + (such as the network address of the client). + + Server-driven negotiation is advantageous when the algorithm for + selecting from among the available representations is difficult to + describe to the user agent, or when the server desires to send its + "best guess" to the client along with the first response (hoping to + avoid the round-trip delay of a subsequent request if the "best + guess" is good enough for the user). In order to improve the server's + guess, the user agent MAY include request header fields (Accept, + Accept-Language, Accept-Encoding, etc.) which describe its + preferences for such a response. + + Server-driven negotiation has disadvantages: + + 1. It is impossible for the server to accurately determine what + might be "best" for any given user, since that would require + complete knowledge of both the capabilities of the user agent + and the intended use for the response (e.g., does the user want + to view it on screen or print it on paper?). + + 2. Having the user agent describe its capabilities in every + request can be both very inefficient (given that only a small + percentage of responses have multiple representations) and a + potential violation of the user's privacy. + + 3. It complicates the implementation of an origin server and the + algorithms for generating responses to a request. + + + + + +Fielding, et al. Standards Track [Page 72] + +RFC 2616 HTTP/1.1 June 1999 + + + 4. It may limit a public cache's ability to use the same response + for multiple user's requests. + + HTTP/1.1 includes the following request-header fields for enabling + server-driven negotiation through description of user agent + capabilities and user preferences: Accept (section 14.1), Accept- + Charset (section 14.2), Accept-Encoding (section 14.3), Accept- + Language (section 14.4), and User-Agent (section 14.43). However, an + origin server is not limited to these dimensions and MAY vary the + response based on any aspect of the request, including information + outside the request-header fields or within extension header fields + not defined by this specification. + + The Vary header field can be used to express the parameters the + server uses to select a representation that is subject to server- + driven negotiation. See section 13.6 for use of the Vary header field + by caches and section 14.44 for use of the Vary header field by + servers. + +12.2 Agent-driven Negotiation + + With agent-driven negotiation, selection of the best representation + for a response is performed by the user agent after receiving an + initial response from the origin server. Selection is based on a list + of the available representations of the response included within the + header fields or entity-body of the initial response, with each + representation identified by its own URI. Selection from among the + representations may be performed automatically (if the user agent is + capable of doing so) or manually by the user selecting from a + generated (possibly hypertext) menu. + + Agent-driven negotiation is advantageous when the response would vary + over commonly-used dimensions (such as type, language, or encoding), + when the origin server is unable to determine a user agent's + capabilities from examining the request, and generally when public + caches are used to distribute server load and reduce network usage. + + Agent-driven negotiation suffers from the disadvantage of needing a + second request to obtain the best alternate representation. This + second request is only efficient when caching is used. In addition, + this specification does not define any mechanism for supporting + automatic selection, though it also does not prevent any such + mechanism from being developed as an extension and used within + HTTP/1.1. + + + + + + + +Fielding, et al. Standards Track [Page 73] + +RFC 2616 HTTP/1.1 June 1999 + + + HTTP/1.1 defines the 300 (Multiple Choices) and 406 (Not Acceptable) + status codes for enabling agent-driven negotiation when the server is + unwilling or unable to provide a varying response using server-driven + negotiation. + +12.3 Transparent Negotiation + + Transparent negotiation is a combination of both server-driven and + agent-driven negotiation. When a cache is supplied with a form of the + list of available representations of the response (as in agent-driven + negotiation) and the dimensions of variance are completely understood + by the cache, then the cache becomes capable of performing server- + driven negotiation on behalf of the origin server for subsequent + requests on that resource. + + Transparent negotiation has the advantage of distributing the + negotiation work that would otherwise be required of the origin + server and also removing the second request delay of agent-driven + negotiation when the cache is able to correctly guess the right + response. + + This specification does not define any mechanism for transparent + negotiation, though it also does not prevent any such mechanism from + being developed as an extension that could be used within HTTP/1.1. + +13 Caching in HTTP + + HTTP is typically used for distributed information systems, where + performance can be improved by the use of response caches. The + HTTP/1.1 protocol includes a number of elements intended to make + caching work as well as possible. Because these elements are + inextricable from other aspects of the protocol, and because they + interact with each other, it is useful to describe the basic caching + design of HTTP separately from the detailed descriptions of methods, + headers, response codes, etc. + + Caching would be useless if it did not significantly improve + performance. The goal of caching in HTTP/1.1 is to eliminate the need + to send requests in many cases, and to eliminate the need to send + full responses in many other cases. The former reduces the number of + network round-trips required for many operations; we use an + "expiration" mechanism for this purpose (see section 13.2). The + latter reduces network bandwidth requirements; we use a "validation" + mechanism for this purpose (see section 13.3). + + Requirements for performance, availability, and disconnected + operation require us to be able to relax the goal of semantic + transparency. The HTTP/1.1 protocol allows origin servers, caches, + + + +Fielding, et al. Standards Track [Page 74] + +RFC 2616 HTTP/1.1 June 1999 + + + and clients to explicitly reduce transparency when necessary. + However, because non-transparent operation may confuse non-expert + users, and might be incompatible with certain server applications + (such as those for ordering merchandise), the protocol requires that + transparency be relaxed + + - only by an explicit protocol-level request when relaxed by + client or origin server + + - only with an explicit warning to the end user when relaxed by + cache or client + + Therefore, the HTTP/1.1 protocol provides these important elements: + + 1. Protocol features that provide full semantic transparency when + this is required by all parties. + + 2. Protocol features that allow an origin server or user agent to + explicitly request and control non-transparent operation. + + 3. Protocol features that allow a cache to attach warnings to + responses that do not preserve the requested approximation of + semantic transparency. + + A basic principle is that it must be possible for the clients to + detect any potential relaxation of semantic transparency. + + Note: The server, cache, or client implementor might be faced with + design decisions not explicitly discussed in this specification. + If a decision might affect semantic transparency, the implementor + ought to err on the side of maintaining transparency unless a + careful and complete analysis shows significant benefits in + breaking transparency. + +13.1.1 Cache Correctness + + A correct cache MUST respond to a request with the most up-to-date + response held by the cache that is appropriate to the request (see + sections 13.2.5, 13.2.6, and 13.12) which meets one of the following + conditions: + + 1. It has been checked for equivalence with what the origin server + would have returned by revalidating the response with the + origin server (section 13.3); + + + + + + + +Fielding, et al. Standards Track [Page 75] + +RFC 2616 HTTP/1.1 June 1999 + + + 2. It is "fresh enough" (see section 13.2). In the default case, + this means it meets the least restrictive freshness requirement + of the client, origin server, and cache (see section 14.9); if + the origin server so specifies, it is the freshness requirement + of the origin server alone. + + If a stored response is not "fresh enough" by the most + restrictive freshness requirement of both the client and the + origin server, in carefully considered circumstances the cache + MAY still return the response with the appropriate Warning + header (see section 13.1.5 and 14.46), unless such a response + is prohibited (e.g., by a "no-store" cache-directive, or by a + "no-cache" cache-request-directive; see section 14.9). + + 3. It is an appropriate 304 (Not Modified), 305 (Proxy Redirect), + or error (4xx or 5xx) response message. + + If the cache can not communicate with the origin server, then a + correct cache SHOULD respond as above if the response can be + correctly served from the cache; if not it MUST return an error or + warning indicating that there was a communication failure. + + If a cache receives a response (either an entire response, or a 304 + (Not Modified) response) that it would normally forward to the + requesting client, and the received response is no longer fresh, the + cache SHOULD forward it to the requesting client without adding a new + Warning (but without removing any existing Warning headers). A cache + SHOULD NOT attempt to revalidate a response simply because that + response became stale in transit; this might lead to an infinite + loop. A user agent that receives a stale response without a Warning + MAY display a warning indication to the user. + +13.1.2 Warnings + + Whenever a cache returns a response that is neither first-hand nor + "fresh enough" (in the sense of condition 2 in section 13.1.1), it + MUST attach a warning to that effect, using a Warning general-header. + The Warning header and the currently defined warnings are described + in section 14.46. The warning allows clients to take appropriate + action. + + Warnings MAY be used for other purposes, both cache-related and + otherwise. The use of a warning, rather than an error status code, + distinguish these responses from true failures. + + Warnings are assigned three digit warn-codes. The first digit + indicates whether the Warning MUST or MUST NOT be deleted from a + stored cache entry after a successful revalidation: + + + +Fielding, et al. Standards Track [Page 76] + +RFC 2616 HTTP/1.1 June 1999 + + + 1xx Warnings that describe the freshness or revalidation status of + the response, and so MUST be deleted after a successful + revalidation. 1XX warn-codes MAY be generated by a cache only when + validating a cached entry. It MUST NOT be generated by clients. + + 2xx Warnings that describe some aspect of the entity body or entity + headers that is not rectified by a revalidation (for example, a + lossy compression of the entity bodies) and which MUST NOT be + deleted after a successful revalidation. + + See section 14.46 for the definitions of the codes themselves. + + HTTP/1.0 caches will cache all Warnings in responses, without + deleting the ones in the first category. Warnings in responses that + are passed to HTTP/1.0 caches carry an extra warning-date field, + which prevents a future HTTP/1.1 recipient from believing an + erroneously cached Warning. + + Warnings also carry a warning text. The text MAY be in any + appropriate natural language (perhaps based on the client's Accept + headers), and include an OPTIONAL indication of what character set is + used. + + Multiple warnings MAY be attached to a response (either by the origin + server or by a cache), including multiple warnings with the same code + number. For example, a server might provide the same warning with + texts in both English and Basque. + + When multiple warnings are attached to a response, it might not be + practical or reasonable to display all of them to the user. This + version of HTTP does not specify strict priority rules for deciding + which warnings to display and in what order, but does suggest some + heuristics. + +13.1.3 Cache-control Mechanisms + + The basic cache mechanisms in HTTP/1.1 (server-specified expiration + times and validators) are implicit directives to caches. In some + cases, a server or client might need to provide explicit directives + to the HTTP caches. We use the Cache-Control header for this purpose. + + The Cache-Control header allows a client or server to transmit a + variety of directives in either requests or responses. These + directives typically override the default caching algorithms. As a + general rule, if there is any apparent conflict between header + values, the most restrictive interpretation is applied (that is, the + one that is most likely to preserve semantic transparency). However, + + + + +Fielding, et al. Standards Track [Page 77] + +RFC 2616 HTTP/1.1 June 1999 + + + in some cases, cache-control directives are explicitly specified as + weakening the approximation of semantic transparency (for example, + "max-stale" or "public"). + + The cache-control directives are described in detail in section 14.9. + +13.1.4 Explicit User Agent Warnings + + Many user agents make it possible for users to override the basic + caching mechanisms. For example, the user agent might allow the user + to specify that cached entities (even explicitly stale ones) are + never validated. Or the user agent might habitually add "Cache- + Control: max-stale=3600" to every request. The user agent SHOULD NOT + default to either non-transparent behavior, or behavior that results + in abnormally ineffective caching, but MAY be explicitly configured + to do so by an explicit action of the user. + + If the user has overridden the basic caching mechanisms, the user + agent SHOULD explicitly indicate to the user whenever this results in + the display of information that might not meet the server's + transparency requirements (in particular, if the displayed entity is + known to be stale). Since the protocol normally allows the user agent + to determine if responses are stale or not, this indication need only + be displayed when this actually happens. The indication need not be a + dialog box; it could be an icon (for example, a picture of a rotting + fish) or some other indicator. + + If the user has overridden the caching mechanisms in a way that would + abnormally reduce the effectiveness of caches, the user agent SHOULD + continually indicate this state to the user (for example, by a + display of a picture of currency in flames) so that the user does not + inadvertently consume excess resources or suffer from excessive + latency. + +13.1.5 Exceptions to the Rules and Warnings + + In some cases, the operator of a cache MAY choose to configure it to + return stale responses even when not requested by clients. This + decision ought not be made lightly, but may be necessary for reasons + of availability or performance, especially when the cache is poorly + connected to the origin server. Whenever a cache returns a stale + response, it MUST mark it as such (using a Warning header) enabling + the client software to alert the user that there might be a potential + problem. + + + + + + + +Fielding, et al. Standards Track [Page 78] + +RFC 2616 HTTP/1.1 June 1999 + + + It also allows the user agent to take steps to obtain a first-hand or + fresh response. For this reason, a cache SHOULD NOT return a stale + response if the client explicitly requests a first-hand or fresh one, + unless it is impossible to comply for technical or policy reasons. + +13.1.6 Client-controlled Behavior + + While the origin server (and to a lesser extent, intermediate caches, + by their contribution to the age of a response) are the primary + source of expiration information, in some cases the client might need + to control a cache's decision about whether to return a cached + response without validating it. Clients do this using several + directives of the Cache-Control header. + + A client's request MAY specify the maximum age it is willing to + accept of an unvalidated response; specifying a value of zero forces + the cache(s) to revalidate all responses. A client MAY also specify + the minimum time remaining before a response expires. Both of these + options increase constraints on the behavior of caches, and so cannot + further relax the cache's approximation of semantic transparency. + + A client MAY also specify that it will accept stale responses, up to + some maximum amount of staleness. This loosens the constraints on the + caches, and so might violate the origin server's specified + constraints on semantic transparency, but might be necessary to + support disconnected operation, or high availability in the face of + poor connectivity. + +13.2 Expiration Model + +13.2.1 Server-Specified Expiration + + HTTP caching works best when caches can entirely avoid making + requests to the origin server. The primary mechanism for avoiding + requests is for an origin server to provide an explicit expiration + time in the future, indicating that a response MAY be used to satisfy + subsequent requests. In other words, a cache can return a fresh + response without first contacting the server. + + Our expectation is that servers will assign future explicit + expiration times to responses in the belief that the entity is not + likely to change, in a semantically significant way, before the + expiration time is reached. This normally preserves semantic + transparency, as long as the server's expiration times are carefully + chosen. + + + + + + +Fielding, et al. Standards Track [Page 79] + +RFC 2616 HTTP/1.1 June 1999 + + + The expiration mechanism applies only to responses taken from a cache + and not to first-hand responses forwarded immediately to the + requesting client. + + If an origin server wishes to force a semantically transparent cache + to validate every request, it MAY assign an explicit expiration time + in the past. This means that the response is always stale, and so the + cache SHOULD validate it before using it for subsequent requests. See + section 14.9.4 for a more restrictive way to force revalidation. + + If an origin server wishes to force any HTTP/1.1 cache, no matter how + it is configured, to validate every request, it SHOULD use the "must- + revalidate" cache-control directive (see section 14.9). + + Servers specify explicit expiration times using either the Expires + header, or the max-age directive of the Cache-Control header. + + An expiration time cannot be used to force a user agent to refresh + its display or reload a resource; its semantics apply only to caching + mechanisms, and such mechanisms need only check a resource's + expiration status when a new request for that resource is initiated. + See section 13.13 for an explanation of the difference between caches + and history mechanisms. + +13.2.2 Heuristic Expiration + + Since origin servers do not always provide explicit expiration times, + HTTP caches typically assign heuristic expiration times, employing + algorithms that use other header values (such as the Last-Modified + time) to estimate a plausible expiration time. The HTTP/1.1 + specification does not provide specific algorithms, but does impose + worst-case constraints on their results. Since heuristic expiration + times might compromise semantic transparency, they ought to used + cautiously, and we encourage origin servers to provide explicit + expiration times as much as possible. + +13.2.3 Age Calculations + + In order to know if a cached entry is fresh, a cache needs to know if + its age exceeds its freshness lifetime. We discuss how to calculate + the latter in section 13.2.4; this section describes how to calculate + the age of a response or cache entry. + + In this discussion, we use the term "now" to mean "the current value + of the clock at the host performing the calculation." Hosts that use + HTTP, but especially hosts running origin servers and caches, SHOULD + use NTP [28] or some similar protocol to synchronize their clocks to + a globally accurate time standard. + + + +Fielding, et al. Standards Track [Page 80] + +RFC 2616 HTTP/1.1 June 1999 + + + HTTP/1.1 requires origin servers to send a Date header, if possible, + with every response, giving the time at which the response was + generated (see section 14.18). We use the term "date_value" to denote + the value of the Date header, in a form appropriate for arithmetic + operations. + + HTTP/1.1 uses the Age response-header to convey the estimated age of + the response message when obtained from a cache. The Age field value + is the cache's estimate of the amount of time since the response was + generated or revalidated by the origin server. + + In essence, the Age value is the sum of the time that the response + has been resident in each of the caches along the path from the + origin server, plus the amount of time it has been in transit along + network paths. + + We use the term "age_value" to denote the value of the Age header, in + a form appropriate for arithmetic operations. + + A response's age can be calculated in two entirely independent ways: + + 1. now minus date_value, if the local clock is reasonably well + synchronized to the origin server's clock. If the result is + negative, the result is replaced by zero. + + 2. age_value, if all of the caches along the response path + implement HTTP/1.1. + + Given that we have two independent ways to compute the age of a + response when it is received, we can combine these as + + corrected_received_age = max(now - date_value, age_value) + + and as long as we have either nearly synchronized clocks or all- + HTTP/1.1 paths, one gets a reliable (conservative) result. + + Because of network-imposed delays, some significant interval might + pass between the time that a server generates a response and the time + it is received at the next outbound cache or client. If uncorrected, + this delay could result in improperly low ages. + + Because the request that resulted in the returned Age value must have + been initiated prior to that Age value's generation, we can correct + for delays imposed by the network by recording the time at which the + request was initiated. Then, when an Age value is received, it MUST + be interpreted relative to the time the request was initiated, not + + + + + +Fielding, et al. Standards Track [Page 81] + +RFC 2616 HTTP/1.1 June 1999 + + + the time that the response was received. This algorithm results in + conservative behavior no matter how much delay is experienced. So, we + compute: + + corrected_initial_age = corrected_received_age + + (now - request_time) + + where "request_time" is the time (according to the local clock) when + the request that elicited this response was sent. + + Summary of age calculation algorithm, when a cache receives a + response: + + /* + * age_value + * is the value of Age: header received by the cache with + * this response. + * date_value + * is the value of the origin server's Date: header + * request_time + * is the (local) time when the cache made the request + * that resulted in this cached response + * response_time + * is the (local) time when the cache received the + * response + * now + * is the current (local) time + */ + + apparent_age = max(0, response_time - date_value); + corrected_received_age = max(apparent_age, age_value); + response_delay = response_time - request_time; + corrected_initial_age = corrected_received_age + response_delay; + resident_time = now - response_time; + current_age = corrected_initial_age + resident_time; + + The current_age of a cache entry is calculated by adding the amount + of time (in seconds) since the cache entry was last validated by the + origin server to the corrected_initial_age. When a response is + generated from a cache entry, the cache MUST include a single Age + header field in the response with a value equal to the cache entry's + current_age. + + The presence of an Age header field in a response implies that a + response is not first-hand. However, the converse is not true, since + the lack of an Age header field in a response does not imply that the + + + + + +Fielding, et al. Standards Track [Page 82] + +RFC 2616 HTTP/1.1 June 1999 + + + response is first-hand unless all caches along the request path are + compliant with HTTP/1.1 (i.e., older HTTP caches did not implement + the Age header field). + +13.2.4 Expiration Calculations + + In order to decide whether a response is fresh or stale, we need to + compare its freshness lifetime to its age. The age is calculated as + described in section 13.2.3; this section describes how to calculate + the freshness lifetime, and to determine if a response has expired. + In the discussion below, the values can be represented in any form + appropriate for arithmetic operations. + + We use the term "expires_value" to denote the value of the Expires + header. We use the term "max_age_value" to denote an appropriate + value of the number of seconds carried by the "max-age" directive of + the Cache-Control header in a response (see section 14.9.3). + + The max-age directive takes priority over Expires, so if max-age is + present in a response, the calculation is simply: + + freshness_lifetime = max_age_value + + Otherwise, if Expires is present in the response, the calculation is: + + freshness_lifetime = expires_value - date_value + + Note that neither of these calculations is vulnerable to clock skew, + since all of the information comes from the origin server. + + If none of Expires, Cache-Control: max-age, or Cache-Control: s- + maxage (see section 14.9.3) appears in the response, and the response + does not include other restrictions on caching, the cache MAY compute + a freshness lifetime using a heuristic. The cache MUST attach Warning + 113 to any response whose age is more than 24 hours if such warning + has not already been added. + + Also, if the response does have a Last-Modified time, the heuristic + expiration value SHOULD be no more than some fraction of the interval + since that time. A typical setting of this fraction might be 10%. + + The calculation to determine if a response has expired is quite + simple: + + response_is_fresh = (freshness_lifetime > current_age) + + + + + + +Fielding, et al. Standards Track [Page 83] + +RFC 2616 HTTP/1.1 June 1999 + + +13.2.5 Disambiguating Expiration Values + + Because expiration values are assigned optimistically, it is possible + for two caches to contain fresh values for the same resource that are + different. + + If a client performing a retrieval receives a non-first-hand response + for a request that was already fresh in its own cache, and the Date + header in its existing cache entry is newer than the Date on the new + response, then the client MAY ignore the response. If so, it MAY + retry the request with a "Cache-Control: max-age=0" directive (see + section 14.9), to force a check with the origin server. + + If a cache has two fresh responses for the same representation with + different validators, it MUST use the one with the more recent Date + header. This situation might arise because the cache is pooling + responses from other caches, or because a client has asked for a + reload or a revalidation of an apparently fresh cache entry. + +13.2.6 Disambiguating Multiple Responses + + Because a client might be receiving responses via multiple paths, so + that some responses flow through one set of caches and other + responses flow through a different set of caches, a client might + receive responses in an order different from that in which the origin + server sent them. We would like the client to use the most recently + generated response, even if older responses are still apparently + fresh. + + Neither the entity tag nor the expiration value can impose an + ordering on responses, since it is possible that a later response + intentionally carries an earlier expiration time. The Date values are + ordered to a granularity of one second. + + When a client tries to revalidate a cache entry, and the response it + receives contains a Date header that appears to be older than the one + for the existing entry, then the client SHOULD repeat the request + unconditionally, and include + + Cache-Control: max-age=0 + + to force any intermediate caches to validate their copies directly + with the origin server, or + + Cache-Control: no-cache + + to force any intermediate caches to obtain a new copy from the origin + server. + + + +Fielding, et al. Standards Track [Page 84] + +RFC 2616 HTTP/1.1 June 1999 + + + If the Date values are equal, then the client MAY use either response + (or MAY, if it is being extremely prudent, request a new response). + Servers MUST NOT depend on clients being able to choose + deterministically between responses generated during the same second, + if their expiration times overlap. + +13.3 Validation Model + + When a cache has a stale entry that it would like to use as a + response to a client's request, it first has to check with the origin + server (or possibly an intermediate cache with a fresh response) to + see if its cached entry is still usable. We call this "validating" + the cache entry. Since we do not want to have to pay the overhead of + retransmitting the full response if the cached entry is good, and we + do not want to pay the overhead of an extra round trip if the cached + entry is invalid, the HTTP/1.1 protocol supports the use of + conditional methods. + + The key protocol features for supporting conditional methods are + those concerned with "cache validators." When an origin server + generates a full response, it attaches some sort of validator to it, + which is kept with the cache entry. When a client (user agent or + proxy cache) makes a conditional request for a resource for which it + has a cache entry, it includes the associated validator in the + request. + + The server then checks that validator against the current validator + for the entity, and, if they match (see section 13.3.3), it responds + with a special status code (usually, 304 (Not Modified)) and no + entity-body. Otherwise, it returns a full response (including + entity-body). Thus, we avoid transmitting the full response if the + validator matches, and we avoid an extra round trip if it does not + match. + + In HTTP/1.1, a conditional request looks exactly the same as a normal + request for the same resource, except that it carries a special + header (which includes the validator) that implicitly turns the + method (usually, GET) into a conditional. + + The protocol includes both positive and negative senses of cache- + validating conditions. That is, it is possible to request either that + a method be performed if and only if a validator matches or if and + only if no validators match. + + + + + + + + +Fielding, et al. Standards Track [Page 85] + +RFC 2616 HTTP/1.1 June 1999 + + + Note: a response that lacks a validator may still be cached, and + served from cache until it expires, unless this is explicitly + prohibited by a cache-control directive. However, a cache cannot + do a conditional retrieval if it does not have a validator for the + entity, which means it will not be refreshable after it expires. + +13.3.1 Last-Modified Dates + + The Last-Modified entity-header field value is often used as a cache + validator. In simple terms, a cache entry is considered to be valid + if the entity has not been modified since the Last-Modified value. + +13.3.2 Entity Tag Cache Validators + + The ETag response-header field value, an entity tag, provides for an + "opaque" cache validator. This might allow more reliable validation + in situations where it is inconvenient to store modification dates, + where the one-second resolution of HTTP date values is not + sufficient, or where the origin server wishes to avoid certain + paradoxes that might arise from the use of modification dates. + + Entity Tags are described in section 3.11. The headers used with + entity tags are described in sections 14.19, 14.24, 14.26 and 14.44. + +13.3.3 Weak and Strong Validators + + Since both origin servers and caches will compare two validators to + decide if they represent the same or different entities, one normally + would expect that if the entity (the entity-body or any entity- + headers) changes in any way, then the associated validator would + change as well. If this is true, then we call this validator a + "strong validator." + + However, there might be cases when a server prefers to change the + validator only on semantically significant changes, and not when + insignificant aspects of the entity change. A validator that does not + always change when the resource changes is a "weak validator." + + Entity tags are normally "strong validators," but the protocol + provides a mechanism to tag an entity tag as "weak." One can think of + a strong validator as one that changes whenever the bits of an entity + changes, while a weak value changes whenever the meaning of an entity + changes. Alternatively, one can think of a strong validator as part + of an identifier for a specific entity, while a weak validator is + part of an identifier for a set of semantically equivalent entities. + + Note: One example of a strong validator is an integer that is + incremented in stable storage every time an entity is changed. + + + +Fielding, et al. Standards Track [Page 86] + +RFC 2616 HTTP/1.1 June 1999 + + + An entity's modification time, if represented with one-second + resolution, could be a weak validator, since it is possible that + the resource might be modified twice during a single second. + + Support for weak validators is optional. However, weak validators + allow for more efficient caching of equivalent objects; for + example, a hit counter on a site is probably good enough if it is + updated every few days or weeks, and any value during that period + is likely "good enough" to be equivalent. + + A "use" of a validator is either when a client generates a request + and includes the validator in a validating header field, or when a + server compares two validators. + + Strong validators are usable in any context. Weak validators are only + usable in contexts that do not depend on exact equality of an entity. + For example, either kind is usable for a conditional GET of a full + entity. However, only a strong validator is usable for a sub-range + retrieval, since otherwise the client might end up with an internally + inconsistent entity. + + Clients MAY issue simple (non-subrange) GET requests with either weak + validators or strong validators. Clients MUST NOT use weak validators + in other forms of request. + + The only function that the HTTP/1.1 protocol defines on validators is + comparison. There are two validator comparison functions, depending + on whether the comparison context allows the use of weak validators + or not: + + - The strong comparison function: in order to be considered equal, + both validators MUST be identical in every way, and both MUST + NOT be weak. + + - The weak comparison function: in order to be considered equal, + both validators MUST be identical in every way, but either or + both of them MAY be tagged as "weak" without affecting the + result. + + An entity tag is strong unless it is explicitly tagged as weak. + Section 3.11 gives the syntax for entity tags. + + A Last-Modified time, when used as a validator in a request, is + implicitly weak unless it is possible to deduce that it is strong, + using the following rules: + + - The validator is being compared by an origin server to the + actual current validator for the entity and, + + + +Fielding, et al. Standards Track [Page 87] + +RFC 2616 HTTP/1.1 June 1999 + + + - That origin server reliably knows that the associated entity did + not change twice during the second covered by the presented + validator. + + or + + - The validator is about to be used by a client in an If- + Modified-Since or If-Unmodified-Since header, because the client + has a cache entry for the associated entity, and + + - That cache entry includes a Date value, which gives the time + when the origin server sent the original response, and + + - The presented Last-Modified time is at least 60 seconds before + the Date value. + + or + + - The validator is being compared by an intermediate cache to the + validator stored in its cache entry for the entity, and + + - That cache entry includes a Date value, which gives the time + when the origin server sent the original response, and + + - The presented Last-Modified time is at least 60 seconds before + the Date value. + + This method relies on the fact that if two different responses were + sent by the origin server during the same second, but both had the + same Last-Modified time, then at least one of those responses would + have a Date value equal to its Last-Modified time. The arbitrary 60- + second limit guards against the possibility that the Date and Last- + Modified values are generated from different clocks, or at somewhat + different times during the preparation of the response. An + implementation MAY use a value larger than 60 seconds, if it is + believed that 60 seconds is too short. + + If a client wishes to perform a sub-range retrieval on a value for + which it has only a Last-Modified time and no opaque validator, it + MAY do this only if the Last-Modified time is strong in the sense + described here. + + A cache or origin server receiving a conditional request, other than + a full-body GET request, MUST use the strong comparison function to + evaluate the condition. + + These rules allow HTTP/1.1 caches and clients to safely perform sub- + range retrievals on values that have been obtained from HTTP/1.0 + + + +Fielding, et al. Standards Track [Page 88] + +RFC 2616 HTTP/1.1 June 1999 + + + servers. + +13.3.4 Rules for When to Use Entity Tags and Last-Modified Dates + + We adopt a set of rules and recommendations for origin servers, + clients, and caches regarding when various validator types ought to + be used, and for what purposes. + + HTTP/1.1 origin servers: + + - SHOULD send an entity tag validator unless it is not feasible to + generate one. + + - MAY send a weak entity tag instead of a strong entity tag, if + performance considerations support the use of weak entity tags, + or if it is unfeasible to send a strong entity tag. + + - SHOULD send a Last-Modified value if it is feasible to send one, + unless the risk of a breakdown in semantic transparency that + could result from using this date in an If-Modified-Since header + would lead to serious problems. + + In other words, the preferred behavior for an HTTP/1.1 origin server + is to send both a strong entity tag and a Last-Modified value. + + In order to be legal, a strong entity tag MUST change whenever the + associated entity value changes in any way. A weak entity tag SHOULD + change whenever the associated entity changes in a semantically + significant way. + + Note: in order to provide semantically transparent caching, an + origin server must avoid reusing a specific strong entity tag + value for two different entities, or reusing a specific weak + entity tag value for two semantically different entities. Cache + entries might persist for arbitrarily long periods, regardless of + expiration times, so it might be inappropriate to expect that a + cache will never again attempt to validate an entry using a + validator that it obtained at some point in the past. + + HTTP/1.1 clients: + + - If an entity tag has been provided by the origin server, MUST + use that entity tag in any cache-conditional request (using If- + Match or If-None-Match). + + - If only a Last-Modified value has been provided by the origin + server, SHOULD use that value in non-subrange cache-conditional + requests (using If-Modified-Since). + + + +Fielding, et al. Standards Track [Page 89] + +RFC 2616 HTTP/1.1 June 1999 + + + - If only a Last-Modified value has been provided by an HTTP/1.0 + origin server, MAY use that value in subrange cache-conditional + requests (using If-Unmodified-Since:). The user agent SHOULD + provide a way to disable this, in case of difficulty. + + - If both an entity tag and a Last-Modified value have been + provided by the origin server, SHOULD use both validators in + cache-conditional requests. This allows both HTTP/1.0 and + HTTP/1.1 caches to respond appropriately. + + An HTTP/1.1 origin server, upon receiving a conditional request that + includes both a Last-Modified date (e.g., in an If-Modified-Since or + If-Unmodified-Since header field) and one or more entity tags (e.g., + in an If-Match, If-None-Match, or If-Range header field) as cache + validators, MUST NOT return a response status of 304 (Not Modified) + unless doing so is consistent with all of the conditional header + fields in the request. + + An HTTP/1.1 caching proxy, upon receiving a conditional request that + includes both a Last-Modified date and one or more entity tags as + cache validators, MUST NOT return a locally cached response to the + client unless that cached response is consistent with all of the + conditional header fields in the request. + + Note: The general principle behind these rules is that HTTP/1.1 + servers and clients should transmit as much non-redundant + information as is available in their responses and requests. + HTTP/1.1 systems receiving this information will make the most + conservative assumptions about the validators they receive. + + HTTP/1.0 clients and caches will ignore entity tags. Generally, + last-modified values received or used by these systems will + support transparent and efficient caching, and so HTTP/1.1 origin + servers should provide Last-Modified values. In those rare cases + where the use of a Last-Modified value as a validator by an + HTTP/1.0 system could result in a serious problem, then HTTP/1.1 + origin servers should not provide one. + +13.3.5 Non-validating Conditionals + + The principle behind entity tags is that only the service author + knows the semantics of a resource well enough to select an + appropriate cache validation mechanism, and the specification of any + validator comparison function more complex than byte-equality would + open up a can of worms. Thus, comparisons of any other headers + (except Last-Modified, for compatibility with HTTP/1.0) are never + used for purposes of validating a cache entry. + + + + +Fielding, et al. Standards Track [Page 90] + +RFC 2616 HTTP/1.1 June 1999 + + +13.4 Response Cacheability + + Unless specifically constrained by a cache-control (section 14.9) + directive, a caching system MAY always store a successful response + (see section 13.8) as a cache entry, MAY return it without validation + if it is fresh, and MAY return it after successful validation. If + there is neither a cache validator nor an explicit expiration time + associated with a response, we do not expect it to be cached, but + certain caches MAY violate this expectation (for example, when little + or no network connectivity is available). A client can usually detect + that such a response was taken from a cache by comparing the Date + header to the current time. + + Note: some HTTP/1.0 caches are known to violate this expectation + without providing any Warning. + + However, in some cases it might be inappropriate for a cache to + retain an entity, or to return it in response to a subsequent + request. This might be because absolute semantic transparency is + deemed necessary by the service author, or because of security or + privacy considerations. Certain cache-control directives are + therefore provided so that the server can indicate that certain + resource entities, or portions thereof, are not to be cached + regardless of other considerations. + + Note that section 14.8 normally prevents a shared cache from saving + and returning a response to a previous request if that request + included an Authorization header. + + A response received with a status code of 200, 203, 206, 300, 301 or + 410 MAY be stored by a cache and used in reply to a subsequent + request, subject to the expiration mechanism, unless a cache-control + directive prohibits caching. However, a cache that does not support + the Range and Content-Range headers MUST NOT cache 206 (Partial + Content) responses. + + A response received with any other status code (e.g. status codes 302 + and 307) MUST NOT be returned in a reply to a subsequent request + unless there are cache-control directives or another header(s) that + explicitly allow it. For example, these include the following: an + Expires header (section 14.21); a "max-age", "s-maxage", "must- + revalidate", "proxy-revalidate", "public" or "private" cache-control + directive (section 14.9). + + + + + + + + +Fielding, et al. Standards Track [Page 91] + +RFC 2616 HTTP/1.1 June 1999 + + +13.5 Constructing Responses From Caches + + The purpose of an HTTP cache is to store information received in + response to requests for use in responding to future requests. In + many cases, a cache simply returns the appropriate parts of a + response to the requester. However, if the cache holds a cache entry + based on a previous response, it might have to combine parts of a new + response with what is held in the cache entry. + +13.5.1 End-to-end and Hop-by-hop Headers + + For the purpose of defining the behavior of caches and non-caching + proxies, we divide HTTP headers into two categories: + + - End-to-end headers, which are transmitted to the ultimate + recipient of a request or response. End-to-end headers in + responses MUST be stored as part of a cache entry and MUST be + transmitted in any response formed from a cache entry. + + - Hop-by-hop headers, which are meaningful only for a single + transport-level connection, and are not stored by caches or + forwarded by proxies. + + The following HTTP/1.1 headers are hop-by-hop headers: + + - Connection + - Keep-Alive + - Proxy-Authenticate + - Proxy-Authorization + - TE + - Trailers [[should be "Trailer"]] + - Transfer-Encoding + - Upgrade + + All other headers defined by HTTP/1.1 are end-to-end headers. + + Other hop-by-hop headers MUST be listed in a Connection header, + (section 14.10) to be introduced into HTTP/1.1 (or later). + +13.5.2 Non-modifiable Headers + + Some features of the HTTP/1.1 protocol, such as Digest + Authentication, depend on the value of certain end-to-end headers. A + transparent proxy SHOULD NOT modify an end-to-end header unless the + definition of that header requires or specifically allows that. + + + + + + +Fielding, et al. Standards Track [Page 92] + +RFC 2616 HTTP/1.1 June 1999 + + + A transparent proxy MUST NOT modify any of the following fields in a + request or response, and it MUST NOT add any of these fields if not + already present: + + - Content-Location + + - Content-MD5 + + - ETag + + - Last-Modified + + A transparent proxy MUST NOT modify any of the following fields in a + response: + + - Expires + + but it MAY add any of these fields if not already present. If an + Expires header is added, it MUST be given a field-value identical to + that of the Date header in that response. + + A proxy MUST NOT modify or add any of the following fields in a + message that contains the no-transform cache-control directive, or in + any request: + + - Content-Encoding + + - Content-Range + + - Content-Type + + A non-transparent proxy MAY modify or add these fields to a message + that does not include no-transform, but if it does so, it MUST add a + Warning 214 (Transformation applied) if one does not already appear + in the message (see section 14.46). + + Warning: unnecessary modification of end-to-end headers might + cause authentication failures if stronger authentication + mechanisms are introduced in later versions of HTTP. Such + authentication mechanisms MAY rely on the values of header fields + not listed here. + + The Content-Length field of a request or response is added or deleted + according to the rules in section 4.4. A transparent proxy MUST + preserve the entity-length (section 7.2.2) of the entity-body, + although it MAY change the transfer-length (section 4.4). + + + + + +Fielding, et al. Standards Track [Page 93] + +RFC 2616 HTTP/1.1 June 1999 + + +13.5.3 Combining Headers + + When a cache makes a validating request to a server, and the server + provides a 304 (Not Modified) response or a 206 (Partial Content) + response, the cache then constructs a response to send to the + requesting client. + + If the status code is 304 (Not Modified), the cache uses the entity- + body stored in the cache entry as the entity-body of this outgoing + response. If the status code is 206 (Partial Content) and the ETag or + Last-Modified headers match exactly, the cache MAY combine the + contents stored in the cache entry with the new contents received in + the response and use the result as the entity-body of this outgoing + response, (see 13.5.4). + + The end-to-end headers stored in the cache entry are used for the + constructed response, except that + + - any stored Warning headers with warn-code 1xx (see section + 14.46) MUST be deleted from the cache entry and the forwarded + response. + + - any stored Warning headers with warn-code 2xx MUST be retained + in the cache entry and the forwarded response. + + - any end-to-end headers provided in the 304 or 206 response MUST + replace the corresponding headers from the cache entry. + + Unless the cache decides to remove the cache entry, it MUST also + replace the end-to-end headers stored with the cache entry with + corresponding headers received in the incoming response, except for + Warning headers as described immediately above. If a header field- + name in the incoming response matches more than one header in the + cache entry, all such old headers MUST be replaced. + + In other words, the set of end-to-end headers received in the + incoming response overrides all corresponding end-to-end headers + stored with the cache entry (except for stored Warning headers with + warn-code 1xx, which are deleted even if not overridden). + + Note: this rule allows an origin server to use a 304 (Not + Modified) or a 206 (Partial Content) response to update any header + associated with a previous response for the same entity or sub- + ranges thereof, although it might not always be meaningful or + correct to do so. This rule does not allow an origin server to use + a 304 (Not Modified) or a 206 (Partial Content) response to + entirely delete a header that it had provided with a previous + response. + + + +Fielding, et al. Standards Track [Page 94] + +RFC 2616 HTTP/1.1 June 1999 + + +13.5.4 Combining Byte Ranges + + A response might transfer only a subrange of the bytes of an entity- + body, either because the request included one or more Range + specifications, or because a connection was broken prematurely. After + several such transfers, a cache might have received several ranges of + the same entity-body. + + If a cache has a stored non-empty set of subranges for an entity, and + an incoming response transfers another subrange, the cache MAY + combine the new subrange with the existing set if both the following + conditions are met: + + - Both the incoming response and the cache entry have a cache + validator. + + - The two cache validators match using the strong comparison + function (see section 13.3.3). + + If either requirement is not met, the cache MUST use only the most + recent partial response (based on the Date values transmitted with + every response, and using the incoming response if these values are + equal or missing), and MUST discard the other partial information. + +13.6 Caching Negotiated Responses + + Use of server-driven content negotiation (section 12.1), as indicated + by the presence of a Vary header field in a response, alters the + conditions and procedure by which a cache can use the response for + subsequent requests. See section 14.44 for use of the Vary header + field by servers. + + A server SHOULD use the Vary header field to inform a cache of what + request-header fields were used to select among multiple + representations of a cacheable response subject to server-driven + negotiation. The set of header fields named by the Vary field value + is known as the "selecting" request-headers. + + When the cache receives a subsequent request whose Request-URI + specifies one or more cache entries including a Vary header field, + the cache MUST NOT use such a cache entry to construct a response to + the new request unless all of the selecting request-headers present + in the new request match the corresponding stored request-headers in + the original request. + + The selecting request-headers from two requests are defined to match + if and only if the selecting request-headers in the first request can + be transformed to the selecting request-headers in the second request + + + +Fielding, et al. Standards Track [Page 95] + +RFC 2616 HTTP/1.1 June 1999 + + + by adding or removing linear white space (LWS) at places where this + is allowed by the corresponding BNF, and/or combining multiple + message-header fields with the same field name following the rules + about message headers in section 4.2. + + A Vary header field-value of "*" always fails to match and subsequent + requests on that resource can only be properly interpreted by the + origin server. + + If the selecting request header fields for the cached entry do not + match the selecting request header fields of the new request, then + the cache MUST NOT use a cached entry to satisfy the request unless + it first relays the new request to the origin server in a conditional + request and the server responds with 304 (Not Modified), including an + entity tag or Content-Location that indicates the entity to be used. + + If an entity tag was assigned to a cached representation, the + forwarded request SHOULD be conditional and include the entity tags + in an If-None-Match header field from all its cache entries for the + resource. This conveys to the server the set of entities currently + held by the cache, so that if any one of these entities matches the + requested entity, the server can use the ETag header field in its 304 + (Not Modified) response to tell the cache which entry is appropriate. + If the entity-tag of the new response matches that of an existing + entry, the new response SHOULD be used to update the header fields of + the existing entry, and the result MUST be returned to the client. + + If any of the existing cache entries contains only partial content + for the associated entity, its entity-tag SHOULD NOT be included in + the If-None-Match header field unless the request is for a range that + would be fully satisfied by that entry. + + If a cache receives a successful response whose Content-Location + field matches that of an existing cache entry for the same Request- + ]URI, whose entity-tag differs from that of the existing entry, and + whose Date is more recent than that of the existing entry, the + existing entry SHOULD NOT be returned in response to future requests + and SHOULD be deleted from the cache. + +13.7 Shared and Non-Shared Caches + + For reasons of security and privacy, it is necessary to make a + distinction between "shared" and "non-shared" caches. A non-shared + cache is one that is accessible only to a single user. Accessibility + in this case SHOULD be enforced by appropriate security mechanisms. + All other caches are considered to be "shared." Other sections of + + + + + +Fielding, et al. Standards Track [Page 96] + +RFC 2616 HTTP/1.1 June 1999 + + + this specification place certain constraints on the operation of + shared caches in order to prevent loss of privacy or failure of + access controls. + +13.8 Errors or Incomplete Response Cache Behavior + + A cache that receives an incomplete response (for example, with fewer + bytes of data than specified in a Content-Length header) MAY store + the response. However, the cache MUST treat this as a partial + response. Partial responses MAY be combined as described in section + 13.5.4; the result might be a full response or might still be + partial. A cache MUST NOT return a partial response to a client + without explicitly marking it as such, using the 206 (Partial + Content) status code. A cache MUST NOT return a partial response + using a status code of 200 (OK). + + If a cache receives a 5xx response while attempting to revalidate an + entry, it MAY either forward this response to the requesting client, + or act as if the server failed to respond. In the latter case, it MAY + return a previously received response unless the cached entry + includes the "must-revalidate" cache-control directive (see section + 14.9). + +13.9 Side Effects of GET and HEAD + + Unless the origin server explicitly prohibits the caching of their + responses, the application of GET and HEAD methods to any resources + SHOULD NOT have side effects that would lead to erroneous behavior if + these responses are taken from a cache. They MAY still have side + effects, but a cache is not required to consider such side effects in + its caching decisions. Caches are always expected to observe an + origin server's explicit restrictions on caching. + + We note one exception to this rule: since some applications have + traditionally used GETs and HEADs with query URLs (those containing a + "?" in the rel_path part) to perform operations with significant side + effects, caches MUST NOT treat responses to such URIs as fresh unless + the server provides an explicit expiration time. This specifically + means that responses from HTTP/1.0 servers for such URIs SHOULD NOT + be taken from a cache. See section 9.1.1 for related information. + +13.10 Invalidation After Updates or Deletions + + The effect of certain methods performed on a resource at the origin + server might cause one or more existing cache entries to become non- + transparently invalid. That is, although they might continue to be + "fresh," they do not accurately reflect what the origin server would + return for a new request on that resource. + + + +Fielding, et al. Standards Track [Page 97] + +RFC 2616 HTTP/1.1 June 1999 + + + There is no way for the HTTP protocol to guarantee that all such + cache entries are marked invalid. For example, the request that + caused the change at the origin server might not have gone through + the proxy where a cache entry is stored. However, several rules help + reduce the likelihood of erroneous behavior. + + In this section, the phrase "invalidate an entity" means that the + cache will either remove all instances of that entity from its + storage, or will mark these as "invalid" and in need of a mandatory + revalidation before they can be returned in response to a subsequent + request. + + Some HTTP methods MUST cause a cache to invalidate an entity. This is + either the entity referred to by the Request-URI, or by the Location + or Content-Location headers (if present). These methods are: + + - PUT + + - DELETE + + - POST + + In order to prevent denial of service attacks, an invalidation based + on the URI in a Location or Content-Location header MUST only be + performed if the host part is the same as in the Request-URI. + +[[ Should be: ]] +[[ An invalidation based on the URI in a Location or Content-Location ]] +[[ header MUST NOT be performed if the host part of that URI differs ]] +[[ from the host part in the Request-URI. This helps prevent denial of ]] +[[ service attacks. ]] + + A cache that passes through requests for methods it does not + understand SHOULD invalidate any entities referred to by the + Request-URI. + +13.11 Write-Through Mandatory + + All methods that might be expected to cause modifications to the + origin server's resources MUST be written through to the origin + server. This currently includes all methods except for GET and HEAD. + A cache MUST NOT reply to such a request from a client before having + transmitted the request to the inbound server, and having received a + corresponding response from the inbound server. This does not prevent + a proxy cache from sending a 100 (Continue) response before the + inbound server has sent its final reply. + + The alternative (known as "write-back" or "copy-back" caching) is not + allowed in HTTP/1.1, due to the difficulty of providing consistent + updates and the problems arising from server, cache, or network + failure prior to write-back. + + + + + + +Fielding, et al. Standards Track [Page 98] + +RFC 2616 HTTP/1.1 June 1999 + + +13.12 Cache Replacement + + If a new cacheable (see sections 14.9.2, 13.2.5, 13.2.6 and 13.8) + response is received from a resource while any existing responses for + the same resource are cached, the cache SHOULD use the new response + to reply to the current request. It MAY insert it into cache storage + and MAY, if it meets all other requirements, use it to respond to any + future requests that would previously have caused the old response to + be returned. If it inserts the new response into cache storage the + rules in section 13.5.3 apply. + + Note: a new response that has an older Date header value than + existing cached responses is not cacheable. + +13.13 History Lists + + User agents often have history mechanisms, such as "Back" buttons and + history lists, which can be used to redisplay an entity retrieved + earlier in a session. + + History mechanisms and caches are different. In particular history + mechanisms SHOULD NOT try to show a semantically transparent view of + the current state of a resource. Rather, a history mechanism is meant + to show exactly what the user saw at the time when the resource was + retrieved. + + By default, an expiration time does not apply to history mechanisms. + If the entity is still in storage, a history mechanism SHOULD display + it even if the entity has expired, unless the user has specifically + configured the agent to refresh expired history documents. + + This is not to be construed to prohibit the history mechanism from + telling the user that a view might be stale. + + Note: if history list mechanisms unnecessarily prevent users from + viewing stale resources, this will tend to force service authors + to avoid using HTTP expiration controls and cache controls when + they would otherwise like to. Service authors may consider it + important that users not be presented with error messages or + warning messages when they use navigation controls (such as BACK) + to view previously fetched resources. Even though sometimes such + resources ought not to cached, or ought to expire quickly, user + interface considerations may force service authors to resort to + other means of preventing caching (e.g. "once-only" URLs) in order + not to suffer the effects of improperly functioning history + mechanisms. + + + + + +Fielding, et al. Standards Track [Page 99] + +RFC 2616 HTTP/1.1 June 1999 + + +14 Header Field Definitions + + This section defines the syntax and semantics of all standard + HTTP/1.1 header fields. For entity-header fields, both sender and + recipient refer to either the client or the server, depending on who + sends and who receives the entity. + +14.1 Accept + + The Accept request-header field can be used to specify certain media + types which are acceptable for the response. Accept headers can be + used to indicate that the request is specifically limited to a small + set of desired types, as in the case of a request for an in-line + image. + + Accept = "Accept" ":" + #( media-range [ accept-params ] ) + + media-range = ( "*/*" + | ( type "/" "*" ) + | ( type "/" subtype ) + ) *( ";" parameter ) + accept-params = ";" "q" "=" qvalue *( accept-extension ) + accept-extension = ";" token [ "=" ( token | quoted-string ) ] + + The asterisk "*" character is used to group media types into ranges, + with "*/*" indicating all media types and "type/*" indicating all + subtypes of that type. The media-range MAY include media type + parameters that are applicable to that range. + + Each media-range MAY be followed by one or more accept-params, + beginning with the "q" parameter for indicating a relative quality + factor. The first "q" parameter (if any) separates the media-range + parameter(s) from the accept-params. Quality factors allow the user + or user agent to indicate the relative degree of preference for that + media-range, using the qvalue scale from 0 to 1 (section 3.9). The + default value is q=1. + + Note: Use of the "q" parameter name to separate media type + parameters from Accept extension parameters is due to historical + practice. Although this prevents any media type parameter named + "q" from being used with a media range, such an event is believed + to be unlikely given the lack of any "q" parameters in the IANA + media type registry and the rare usage of any media type + parameters in Accept. Future media types are discouraged from + registering any parameter named "q". + + + + + +Fielding, et al. Standards Track [Page 100] + +RFC 2616 HTTP/1.1 June 1999 + + + The example + + Accept: audio/*; q=0.2, audio/basic + + SHOULD be interpreted as "I prefer audio/basic, but send me any audio + type if it is the best available after an 80% mark-down in quality." + + If no Accept header field is present, then it is assumed that the + client accepts all media types. If an Accept header field is present, + and if the server cannot send a response which is acceptable + according to the combined Accept field value, then the server SHOULD + send a 406 (not acceptable) response. + + A more elaborate example is + + Accept: text/plain; q=0.5, text/html, + text/x-dvi; q=0.8, text/x-c + + Verbally, this would be interpreted as "text/html and text/x-c are + the preferred media types, but if they do not exist, then send the + text/x-dvi entity, and if that does not exist, send the text/plain + entity." + + Media ranges can be overridden by more specific media ranges or + specific media types. If more than one media range applies to a given + type, the most specific reference has precedence. For example, + + Accept: text/*, text/html, text/html;level=1, */* + + have the following precedence: + + 1) text/html;level=1 + 2) text/html + 3) text/* + 4) */* + + The media type quality factor associated with a given type is + determined by finding the media range with the highest precedence + which matches that type. For example, + + Accept: text/*;q=0.3, text/html;q=0.7, text/html;level=1, + text/html;level=2;q=0.4, */*;q=0.5 + + would cause the following values to be associated: + + text/html;level=1 = 1 + text/html = 0.7 + text/plain = 0.3 + + + +Fielding, et al. Standards Track [Page 101] + +RFC 2616 HTTP/1.1 June 1999 + + + image/jpeg = 0.5 + text/html;level=2 = 0.4 + text/html;level=3 = 0.7 + + Note: A user agent might be provided with a default set of quality + values for certain media ranges. However, unless the user agent is + a closed system which cannot interact with other rendering agents, + this default set ought to be configurable by the user. + +14.2 Accept-Charset + + The Accept-Charset request-header field can be used to indicate what + character sets are acceptable for the response. This field allows + clients capable of understanding more comprehensive or special- + purpose character sets to signal that capability to a server which is + capable of representing documents in those character sets. + + Accept-Charset = "Accept-Charset" ":" + 1#( ( charset | "*" )[ ";" "q" "=" qvalue ] ) + + + Character set values are described in section 3.4. Each charset MAY + be given an associated quality value which represents the user's + preference for that charset. The default value is q=1. An example is + + Accept-Charset: iso-8859-5, unicode-1-1;q=0.8 + + The special value "*", if present in the Accept-Charset field, + matches every character set (including ISO-8859-1) which is not + mentioned elsewhere in the Accept-Charset field. If no "*" is present + in an Accept-Charset field, then all character sets not explicitly + mentioned get a quality value of 0, except for ISO-8859-1, which gets + a quality value of 1 if not explicitly mentioned. + + If no Accept-Charset header is present, the default is that any + character set is acceptable. If an Accept-Charset header is present, + and if the server cannot send a response which is acceptable + according to the Accept-Charset header, then the server SHOULD send + an error response with the 406 (not acceptable) status code, though + the sending of an unacceptable response is also allowed. + +14.3 Accept-Encoding + + The Accept-Encoding request-header field is similar to Accept, but + restricts the content-codings (section 3.5) that are acceptable in + the response. + + Accept-Encoding = "Accept-Encoding" ":" + + + +Fielding, et al. Standards Track [Page 102] + +RFC 2616 HTTP/1.1 June 1999 + + + 1#( codings [ ";" "q" "=" qvalue ] ) + codings = ( content-coding | "*" ) + + [[ http://lists.w3.org/Archives/Public/ietf-http-wg/2005AprJun/0029.html ]] + [[ points out that the "1#" must be "#" to make the examples below and ]] + [[ the text of rule 4 correct. ]] + + Examples of its use are: + + Accept-Encoding: compress, gzip + Accept-Encoding: + Accept-Encoding: * + Accept-Encoding: compress;q=0.5, gzip;q=1.0 + Accept-Encoding: gzip;q=1.0, identity; q=0.5, *;q=0 + + A server tests whether a content-coding is acceptable, according to + an Accept-Encoding field, using these rules: + + 1. If the content-coding is one of the content-codings listed in + the Accept-Encoding field, then it is acceptable, unless it is + accompanied by a qvalue of 0. (As defined in section 3.9, a + qvalue of 0 means "not acceptable.") + + 2. The special "*" symbol in an Accept-Encoding field matches any + available content-coding not explicitly listed in the header + field. + + 3. If multiple content-codings are acceptable, then the acceptable + content-coding with the highest non-zero qvalue is preferred. + + 4. The "identity" content-coding is always acceptable, unless + specifically refused because the Accept-Encoding field includes + "identity;q=0", or because the field includes "*;q=0" and does + not explicitly include the "identity" content-coding. If the + Accept-Encoding field-value is empty, then only the "identity" + encoding is acceptable. + + If an Accept-Encoding field is present in a request, and if the + server cannot send a response which is acceptable according to the + Accept-Encoding header, then the server SHOULD send an error response + with the 406 (Not Acceptable) status code. + + If no Accept-Encoding field is present in a request, the server MAY + assume that the client will accept any content coding. In this case, + if "identity" is one of the available content-codings, then the + server SHOULD use the "identity" content-coding, unless it has + additional information that a different content-coding is meaningful + to the client. + + Note: If the request does not include an Accept-Encoding field, + and if the "identity" content-coding is unavailable, then + content-codings commonly understood by HTTP/1.0 clients (i.e., + + + +Fielding, et al. Standards Track [Page 103] + +RFC 2616 HTTP/1.1 June 1999 + + + "gzip" and "compress") are preferred; some older clients + improperly display messages sent with other content-codings. The + server might also make this decision based on information about + the particular user-agent or client. + + Note: Most HTTP/1.0 applications do not recognize or obey qvalues + associated with content-codings. This means that qvalues will not + work and are not permitted with x-gzip or x-compress. + +14.4 Accept-Language + + The Accept-Language request-header field is similar to Accept, but + restricts the set of natural languages that are preferred as a + response to the request. Language tags are defined in section 3.10. + + Accept-Language = "Accept-Language" ":" + 1#( language-range [ ";" "q" "=" qvalue ] ) + language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) + + Each language-range MAY be given an associated quality value which + represents an estimate of the user's preference for the languages + specified by that range. The quality value defaults to "q=1". For + example, + + Accept-Language: da, en-gb;q=0.8, en;q=0.7 + + would mean: "I prefer Danish, but will accept British English and + other types of English." A language-range matches a language-tag if + it exactly equals the tag, or if it exactly equals a prefix of the + tag such that the first tag character following the prefix is "-". + The special range "*", if present in the Accept-Language field, + matches every tag not matched by any other range present in the + Accept-Language field. + + Note: This use of a prefix matching rule does not imply that + language tags are assigned to languages in such a way that it is + always true that if a user understands a language with a certain + tag, then this user will also understand all languages with tags + for which this tag is a prefix. The prefix rule simply allows the + use of prefix tags if this is the case. + + The language quality factor assigned to a language-tag by the + Accept-Language field is the quality value of the longest language- + range in the field that matches the language-tag. If no language- + range in the field matches the tag, the language quality factor + assigned is 0. If no Accept-Language header is present in the + request, the server + + + + +Fielding, et al. Standards Track [Page 104] + +RFC 2616 HTTP/1.1 June 1999 + + + SHOULD assume that all languages are equally acceptable. If an + Accept-Language header is present, then all languages which are + assigned a quality factor greater than 0 are acceptable. + + It might be contrary to the privacy expectations of the user to send + an Accept-Language header with the complete linguistic preferences of + the user in every request. For a discussion of this issue, see + section 15.1.4. + + As intelligibility is highly dependent on the individual user, it is + recommended that client applications make the choice of linguistic + preference available to the user. If the choice is not made + available, then the Accept-Language header field MUST NOT be given in + the request. + + Note: When making the choice of linguistic preference available to + the user, we remind implementors of the fact that users are not + familiar with the details of language matching as described above, + and should provide appropriate guidance. As an example, users + might assume that on selecting "en-gb", they will be served any + kind of English document if British English is not available. A + user agent might suggest in such a case to add "en" to get the + best matching behavior. + +14.5 Accept-Ranges + + The Accept-Ranges response-header field allows the server to + indicate its acceptance of range requests for a resource: + + Accept-Ranges = "Accept-Ranges" ":" acceptable-ranges + acceptable-ranges = 1#range-unit | "none" + + Origin servers that accept byte-range requests MAY send + + Accept-Ranges: bytes + + but are not required to do so. Clients MAY generate byte-range + requests without having received this header for the resource + involved. Range units are defined in section 3.12. + + Servers that do not accept any kind of range request for a + resource MAY send + + Accept-Ranges: none + + to advise the client not to attempt a range request. + + + + + +Fielding, et al. Standards Track [Page 105] + +RFC 2616 HTTP/1.1 June 1999 + + +14.6 Age + + The Age response-header field conveys the sender's estimate of the + amount of time since the response (or its revalidation) was + generated at the origin server. A cached response is "fresh" if + its age does not exceed its freshness lifetime. Age values are + calculated as specified in section 13.2.3. + + Age = "Age" ":" age-value + age-value = delta-seconds + + Age values are non-negative decimal integers, representing time in + seconds. + + If a cache receives a value larger than the largest positive + integer it can represent, or if any of its age calculations + overflows, it MUST transmit an Age header with a value of + 2147483648 (2^31). An HTTP/1.1 server that includes a cache MUST + include an Age header field in every response generated from its + own cache. Caches SHOULD use an arithmetic type of at least 31 + bits of range. + +14.7 Allow + + The Allow entity-header field lists the set of methods supported + by the resource identified by the Request-URI. The purpose of this + field is strictly to inform the recipient of valid methods + associated with the resource. An Allow header field MUST be + present in a 405 (Method Not Allowed) response. + + Allow = "Allow" ":" #Method + + Example of use: + + Allow: GET, HEAD, PUT + + This field cannot prevent a client from trying other methods. + However, the indications given by the Allow header field value + SHOULD be followed. The actual set of allowed methods is defined + by the origin server at the time of each request. + + The Allow header field MAY be provided with a PUT request to + recommend the methods to be supported by the new or modified + resource. The server is not required to support these methods and + SHOULD include an Allow header in the response giving the actual + supported methods. + + + + + +Fielding, et al. Standards Track [Page 106] + +RFC 2616 HTTP/1.1 June 1999 + + + A proxy MUST NOT modify the Allow header field even if it does not + understand all the methods specified, since the user agent might + have other means of communicating with the origin server. + +14.8 Authorization + + A user agent that wishes to authenticate itself with a server-- + usually, but not necessarily, after receiving a 401 response--does + so by including an Authorization request-header field with the + request. The Authorization field value consists of credentials + containing the authentication information of the user agent for + the realm of the resource being requested. + + Authorization = "Authorization" ":" credentials + + HTTP access authentication is described in "HTTP Authentication: + Basic and Digest Access Authentication" [43]. If a request is + authenticated and a realm specified, the same credentials SHOULD + be valid for all other requests within this realm (assuming that + the authentication scheme itself does not require otherwise, such + as credentials that vary according to a challenge value or using + synchronized clocks). + + When a shared cache (see section 13.7) receives a request + containing an Authorization field, it MUST NOT return the + corresponding response as a reply to any other request, unless one + of the following specific exceptions holds: + + 1. If the response includes the "s-maxage" cache-control + directive, the cache MAY use that response in replying to a + subsequent request. But (if the specified maximum age has + passed) a proxy cache MUST first revalidate it with the origin + server, using the request-headers from the new request to allow + the origin server to authenticate the new request. (This is the + defined behavior for s-maxage.) If the response includes "s- + maxage=0", the proxy MUST always revalidate it before re-using + it. + + 2. If the response includes the "must-revalidate" cache-control + directive, the cache MAY use that response in replying to a + subsequent request. But if the response is stale, all caches + MUST first revalidate it with the origin server, using the + request-headers from the new request to allow the origin server + to authenticate the new request. + + 3. If the response includes the "public" cache-control directive, + it MAY be returned in reply to any subsequent request. + + + + +Fielding, et al. Standards Track [Page 107] + +RFC 2616 HTTP/1.1 June 1999 + + +14.9 Cache-Control + + The Cache-Control general-header field is used to specify directives + that MUST be obeyed by all caching mechanisms along the + request/response chain. The directives specify behavior intended to + prevent caches from adversely interfering with the request or + response. These directives typically override the default caching + algorithms. Cache directives are unidirectional in that the presence + of a directive in a request does not imply that the same directive is + to be given in the response. + + Note that HTTP/1.0 caches might not implement Cache-Control and + might only implement Pragma: no-cache (see section 14.32). + + Cache directives MUST be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives might be applicable to all recipients along the + request/response chain. It is not possible to specify a cache- + directive for a specific cache. + + Cache-Control = "Cache-Control" ":" 1#cache-directive + + cache-directive = cache-request-directive + | cache-response-directive + + cache-request-directive = + "no-cache" ; Section 14.9.1 + | "no-store" ; Section 14.9.2 + | "max-age" "=" delta-seconds ; Section 14.9.3, 14.9.4 + | "max-stale" [ "=" delta-seconds ] ; Section 14.9.3 + | "min-fresh" "=" delta-seconds ; Section 14.9.3 + | "no-transform" ; Section 14.9.5 + | "only-if-cached" ; Section 14.9.4 + | cache-extension ; Section 14.9.6 + + cache-response-directive = + "public" ; Section 14.9.1 + | "private" [ "=" <"> 1#field-name <"> ] ; Section 14.9.1 + | "no-cache" [ "=" <"> 1#field-name <"> ]; Section 14.9.1 + | "no-store" ; Section 14.9.2 + | "no-transform" ; Section 14.9.5 + | "must-revalidate" ; Section 14.9.4 + | "proxy-revalidate" ; Section 14.9.4 + | "max-age" "=" delta-seconds ; Section 14.9.3 + | "s-maxage" "=" delta-seconds ; Section 14.9.3 + | cache-extension ; Section 14.9.6 + + cache-extension = token [ "=" ( token | quoted-string ) ] + + + +Fielding, et al. Standards Track [Page 108] + +RFC 2616 HTTP/1.1 June 1999 + + + When a directive appears without any 1#field-name parameter, the + directive applies to the entire request or response. When such a + directive appears with a 1#field-name parameter, it applies only to + the named field or fields, and not to the rest of the request or + response. This mechanism supports extensibility; implementations of + future versions of the HTTP protocol might apply these directives to + header fields not defined in HTTP/1.1. + + The cache-control directives can be broken down into these general + categories: + + - Restrictions on what are cacheable; these may only be imposed by + the origin server. + + - Restrictions on what may be stored by a cache; these may be + imposed by either the origin server or the user agent. + + - Modifications of the basic expiration mechanism; these may be + imposed by either the origin server or the user agent. + + - Controls over cache revalidation and reload; these may only be + imposed by a user agent. + + - Control over transformation of entities. + + - Extensions to the caching system. + +14.9.1 What is Cacheable + + By default, a response is cacheable if the requirements of the + request method, request header fields, and the response status + indicate that it is cacheable. Section 13.4 summarizes these defaults + for cacheability. The following Cache-Control response directives + allow an origin server to override the default cacheability of a + response: + + public + Indicates that the response MAY be cached by any cache, even if it + would normally be non-cacheable or cacheable only within a non- + shared cache. (See also Authorization, section 14.8, for + additional details.) + + private + Indicates that all or part of the response message is intended for + a single user and MUST NOT be cached by a shared cache. This + allows an origin server to state that the specified parts of the + + + + + +Fielding, et al. Standards Track [Page 109] + +RFC 2616 HTTP/1.1 June 1999 + + + response are intended for only one user and are not a valid + response for requests by other users. A private (non-shared) cache + MAY cache the response. + + Note: This usage of the word private only controls where the + response may be cached, and cannot ensure the privacy of the + message content. + + no-cache + If the no-cache directive does not specify a field-name, then a + cache MUST NOT use the response to satisfy a subsequent request + without successful revalidation with the origin server. This + allows an origin server to prevent caching even by caches that + have been configured to return stale responses to client requests. + + If the no-cache directive does specify one or more field-names, + then a cache MAY use the response to satisfy a subsequent request, + subject to any other restrictions on caching. However, the + specified field-name(s) MUST NOT be sent in the response to a + subsequent request without successful revalidation with the origin + server. This allows an origin server to prevent the re-use of + certain header fields in a response, while still allowing caching + of the rest of the response. + + Note: Most HTTP/1.0 caches will not recognize or obey this + directive. + +14.9.2 What May be Stored by Caches + + no-store + The purpose of the no-store directive is to prevent the + inadvertent release or retention of sensitive information (for + example, on backup tapes). The no-store directive applies to the + entire message, and MAY be sent either in a response or in a + request. If sent in a request, a cache MUST NOT store any part of + either this request or any response to it. If sent in a response, + a cache MUST NOT store any part of either this response or the + request that elicited it. This directive applies to both non- + shared and shared caches. "MUST NOT store" in this context means + that the cache MUST NOT intentionally store the information in + non-volatile storage, and MUST make a best-effort attempt to + remove the information from volatile storage as promptly as + possible after forwarding it. + + Even when this directive is associated with a response, users + might explicitly store such a response outside of the caching + system (e.g., with a "Save As" dialog). History buffers MAY store + such responses as part of their normal operation. + + + +Fielding, et al. Standards Track [Page 110] + +RFC 2616 HTTP/1.1 June 1999 + + + The purpose of this directive is to meet the stated requirements + of certain users and service authors who are concerned about + accidental releases of information via unanticipated accesses to + cache data structures. While the use of this directive might + improve privacy in some cases, we caution that it is NOT in any + way a reliable or sufficient mechanism for ensuring privacy. In + particular, malicious or compromised caches might not recognize or + obey this directive, and communications networks might be + vulnerable to eavesdropping. + +14.9.3 Modifications of the Basic Expiration Mechanism + + The expiration time of an entity MAY be specified by the origin + server using the Expires header (see section 14.21). Alternatively, + it MAY be specified using the max-age directive in a response. When + the max-age cache-control directive is present in a cached response, + the response is stale if its current age is greater than the age + value given (in seconds) at the time of a new request for that + resource. The max-age directive on a response implies that the + response is cacheable (i.e., "public") unless some other, more + restrictive cache directive is also present. + + If a response includes both an Expires header and a max-age + directive, the max-age directive overrides the Expires header, even + if the Expires header is more restrictive. This rule allows an origin + server to provide, for a given response, a longer expiration time to + an HTTP/1.1 (or later) cache than to an HTTP/1.0 cache. This might be + useful if certain HTTP/1.0 caches improperly calculate ages or + expiration times, perhaps due to desynchronized clocks. + + Many HTTP/1.0 cache implementations will treat an Expires value that + is less than or equal to the response Date value as being equivalent + to the Cache-Control response directive "no-cache". If an HTTP/1.1 + cache receives such a response, and the response does not include a + Cache-Control header field, it SHOULD consider the response to be + non-cacheable in order to retain compatibility with HTTP/1.0 servers. + + Note: An origin server might wish to use a relatively new HTTP + cache control feature, such as the "private" directive, on a + network including older caches that do not understand that + feature. The origin server will need to combine the new feature + with an Expires field whose value is less than or equal to the + Date value. This will prevent older caches from improperly + caching the response. + + + + + + + +Fielding, et al. Standards Track [Page 111] + +RFC 2616 HTTP/1.1 June 1999 + + + s-maxage + If a response includes an s-maxage directive, then for a shared + cache (but not for a private cache), the maximum age specified by + this directive overrides the maximum age specified by either the + max-age directive or the Expires header. The s-maxage directive + also implies the semantics of the proxy-revalidate directive (see + section 14.9.4), i.e., that the shared cache must not use the + entry after it becomes stale to respond to a subsequent request + without first revalidating it with the origin server. The s- + maxage directive is always ignored by a private cache. + + Note that most older caches, not compliant with this specification, + do not implement any cache-control directives. An origin server + wishing to use a cache-control directive that restricts, but does not + prevent, caching by an HTTP/1.1-compliant cache MAY exploit the + requirement that the max-age directive overrides the Expires header, + and the fact that pre-HTTP/1.1-compliant caches do not observe the + max-age directive. + + Other directives allow a user agent to modify the basic expiration + mechanism. These directives MAY be specified on a request: + + max-age + Indicates that the client is willing to accept a response whose + age is no greater than the specified time in seconds. Unless max- + stale directive is also included, the client is not willing to + accept a stale response. + + min-fresh + Indicates that the client is willing to accept a response whose + freshness lifetime is no less than its current age plus the + specified time in seconds. That is, the client wants a response + that will still be fresh for at least the specified number of + seconds. + + max-stale + Indicates that the client is willing to accept a response that has + exceeded its expiration time. If max-stale is assigned a value, + then the client is willing to accept a response that has exceeded + its expiration time by no more than the specified number of + seconds. If no value is assigned to max-stale, then the client is + willing to accept a stale response of any age. + + If a cache returns a stale response, either because of a max-stale + directive on a request, or because the cache is configured to + override the expiration time of a response, the cache MUST attach a + Warning header to the stale response, using Warning 110 (Response is + stale). + + + +Fielding, et al. Standards Track [Page 112] + +RFC 2616 HTTP/1.1 June 1999 + + + A cache MAY be configured to return stale responses without + validation, but only if this does not conflict with any "MUST"-level + requirements concerning cache validation (e.g., a "must-revalidate" + cache-control directive). + + If both the new request and the cached entry include "max-age" + directives, then the lesser of the two values is used for determining + the freshness of the cached entry for that request. + +14.9.4 Cache Revalidation and Reload Controls + + Sometimes a user agent might want or need to insist that a cache + revalidate its cache entry with the origin server (and not just with + the next cache along the path to the origin server), or to reload its + cache entry from the origin server. End-to-end revalidation might be + necessary if either the cache or the origin server has overestimated + the expiration time of the cached response. End-to-end reload may be + necessary if the cache entry has become corrupted for some reason. + + End-to-end revalidation may be requested either when the client does + not have its own local cached copy, in which case we call it + "unspecified end-to-end revalidation", or when the client does have a + local cached copy, in which case we call it "specific end-to-end + revalidation." + + The client can specify these three kinds of action using Cache- + Control request directives: + + End-to-end reload + The request includes a "no-cache" cache-control directive or, for + compatibility with HTTP/1.0 clients, "Pragma: no-cache". Field + names MUST NOT be included with the no-cache directive in a + request. The server MUST NOT use a cached copy when responding to + such a request. + + Specific end-to-end revalidation + The request includes a "max-age=0" cache-control directive, which + forces each cache along the path to the origin server to + revalidate its own entry, if any, with the next cache or server. + The initial request includes a cache-validating conditional with + the client's current validator. + + Unspecified end-to-end revalidation + The request includes "max-age=0" cache-control directive, which + forces each cache along the path to the origin server to + revalidate its own entry, if any, with the next cache or server. + The initial request does not include a cache-validating + + + + +Fielding, et al. Standards Track [Page 113] + +RFC 2616 HTTP/1.1 June 1999 + + + conditional; the first cache along the path (if any) that holds a + cache entry for this resource includes a cache-validating + conditional with its current validator. + + max-age + When an intermediate cache is forced, by means of a max-age=0 + directive, to revalidate its own cache entry, and the client has + supplied its own validator in the request, the supplied validator + might differ from the validator currently stored with the cache + entry. In this case, the cache MAY use either validator in making + its own request without affecting semantic transparency. + + However, the choice of validator might affect performance. The + best approach is for the intermediate cache to use its own + validator when making its request. If the server replies with 304 + (Not Modified), then the cache can return its now validated copy + to the client with a 200 (OK) response. If the server replies with + a new entity and cache validator, however, the intermediate cache + can compare the returned validator with the one provided in the + client's request, using the strong comparison function. If the + client's validator is equal to the origin server's, then the + intermediate cache simply returns 304 (Not Modified). Otherwise, + it returns the new entity with a 200 (OK) response. + + If a request includes the no-cache directive, it SHOULD NOT + include min-fresh, max-stale, or max-age. + + only-if-cached + In some cases, such as times of extremely poor network + connectivity, a client may want a cache to return only those + responses that it currently has stored, and not to reload or + revalidate with the origin server. To do this, the client may + include the only-if-cached directive in a request. If it receives + this directive, a cache SHOULD either respond using a cached entry + that is consistent with the other constraints of the request, or + respond with a 504 (Gateway Timeout) status. However, if a group + of caches is being operated as a unified system with good internal + connectivity, such a request MAY be forwarded within that group of + caches. + + must-revalidate + Because a cache MAY be configured to ignore a server's specified + expiration time, and because a client request MAY include a max- + stale directive (which has a similar effect), the protocol also + includes a mechanism for the origin server to require revalidation + of a cache entry on any subsequent use. When the must-revalidate + directive is present in a response received by a cache, that cache + MUST NOT use the entry after it becomes stale to respond to a + + + +Fielding, et al. Standards Track [Page 114] + +RFC 2616 HTTP/1.1 June 1999 + + + subsequent request without first revalidating it with the origin + server. (I.e., the cache MUST do an end-to-end revalidation every + time, if, based solely on the origin server's Expires or max-age + value, the cached response is stale.) + + The must-revalidate directive is necessary to support reliable + operation for certain protocol features. In all circumstances an + HTTP/1.1 cache MUST obey the must-revalidate directive; in + particular, if the cache cannot reach the origin server for any + reason, it MUST generate a 504 (Gateway Timeout) response. + + Servers SHOULD send the must-revalidate directive if and only if + failure to revalidate a request on the entity could result in + incorrect operation, such as a silently unexecuted financial + transaction. Recipients MUST NOT take any automated action that + violates this directive, and MUST NOT automatically provide an + unvalidated copy of the entity if revalidation fails. + + Although this is not recommended, user agents operating under + severe connectivity constraints MAY violate this directive but, if + so, MUST explicitly warn the user that an unvalidated response has + been provided. The warning MUST be provided on each unvalidated + access, and SHOULD require explicit user confirmation. + + proxy-revalidate + The proxy-revalidate directive has the same meaning as the must- + revalidate directive, except that it does not apply to non-shared + user agent caches. It can be used on a response to an + authenticated request to permit the user's cache to store and + later return the response without needing to revalidate it (since + it has already been authenticated once by that user), while still + requiring proxies that service many users to revalidate each time + (in order to make sure that each user has been authenticated). + Note that such authenticated responses also need the public cache + control directive in order to allow them to be cached at all. + +14.9.5 No-Transform Directive + + no-transform + Implementors of intermediate caches (proxies) have found it useful + to convert the media type of certain entity bodies. A non- + transparent proxy might, for example, convert between image + formats in order to save cache space or to reduce the amount of + traffic on a slow link. + + Serious operational problems occur, however, when these + transformations are applied to entity bodies intended for certain + kinds of applications. For example, applications for medical + + + +Fielding, et al. Standards Track [Page 115] + +RFC 2616 HTTP/1.1 June 1999 + + + imaging, scientific data analysis and those using end-to-end + authentication, all depend on receiving an entity body that is bit + for bit identical to the original entity-body. + + Therefore, if a message includes the no-transform directive, an + intermediate cache or proxy MUST NOT change those headers that are + listed in section 13.5.2 as being subject to the no-transform + directive. This implies that the cache or proxy MUST NOT change + any aspect of the entity-body that is specified by these headers, + including the value of the entity-body itself. + +14.9.6 Cache Control Extensions + + The Cache-Control header field can be extended through the use of one + or more cache-extension tokens, each with an optional assigned value. + Informational extensions (those which do not require a change in + cache behavior) MAY be added without changing the semantics of other + directives. Behavioral extensions are designed to work by acting as + modifiers to the existing base of cache directives. Both the new + directive and the standard directive are supplied, such that + applications which do not understand the new directive will default + to the behavior specified by the standard directive, and those that + understand the new directive will recognize it as modifying the + requirements associated with the standard directive. In this way, + extensions to the cache-control directives can be made without + requiring changes to the base protocol. + + This extension mechanism depends on an HTTP cache obeying all of the + cache-control directives defined for its native HTTP-version, obeying + certain extensions, and ignoring all directives that it does not + understand. + + For example, consider a hypothetical new response directive called + community which acts as a modifier to the private directive. We + define this new directive to mean that, in addition to any non-shared + cache, any cache which is shared only by members of the community + named within its value may cache the response. An origin server + wishing to allow the UCI community to use an otherwise private + response in their shared cache(s) could do so by including + + Cache-Control: private, community="UCI" + + A cache seeing this header field will act correctly even if the cache + does not understand the community cache-extension, since it will also + see and understand the private directive and thus default to the safe + behavior. + + + + + +Fielding, et al. Standards Track [Page 116] + +RFC 2616 HTTP/1.1 June 1999 + + + Unrecognized cache-directives MUST be ignored; it is assumed that any + cache-directive likely to be unrecognized by an HTTP/1.1 cache will + be combined with standard directives (or the response's default + cacheability) such that the cache behavior will remain minimally + correct even if the cache does not understand the extension(s). + +14.10 Connection + + The Connection general-header field allows the sender to specify + options that are desired for that particular connection and MUST NOT + be communicated by proxies over further connections. + + The Connection header has the following grammar: + + Connection = "Connection" ":" 1#(connection-token) + connection-token = token + + HTTP/1.1 proxies MUST parse the Connection header field before a + message is forwarded and, for each connection-token in this field, + remove any header field(s) from the message with the same name as the + connection-token. Connection options are signaled by the presence of + a connection-token in the Connection header field, not by any + corresponding additional header field(s), since the additional header + field may not be sent if there are no parameters associated with that + connection option. + + Message headers listed in the Connection header MUST NOT include + end-to-end headers, such as Cache-Control. + + HTTP/1.1 defines the "close" connection option for the sender to + signal that the connection will be closed after completion of the + response. For example, + + Connection: close + + in either the request or the response header fields indicates that + the connection SHOULD NOT be considered `persistent' (section 8.1) + after the current request/response is complete. + + HTTP/1.1 applications that do not support persistent connections MUST + include the "close" connection option in every message. + +[[ Should say: ]] +[[ An HTTP/1.1 client that does not support persistent connections ]] +[[ MUST include the "close" connection option in every request message. ]] +[[ ]] +[[ An HTTP/1.1 server that does not support persistent connections ]] +[[ MUST include the "close" connection option in every response ]] +[[ message that does not have a 1xx (informational) status code. ]] + + A system receiving an HTTP/1.0 (or lower-version) message that + includes a Connection header MUST, for each connection-token in this + field, remove and ignore any header field(s) from the message with + the same name as the connection-token. This protects against mistaken + forwarding of such header fields by pre-HTTP/1.1 proxies. See section + 19.6.2. + + + +Fielding, et al. Standards Track [Page 117] + +RFC 2616 HTTP/1.1 June 1999 + + +14.11 Content-Encoding + + The Content-Encoding entity-header field is used as a modifier to the + media-type. When present, its value indicates what additional content + codings have been applied to the entity-body, and thus what decoding + mechanisms must be applied in order to obtain the media-type + referenced by the Content-Type header field. Content-Encoding is + primarily used to allow a document to be compressed without losing + the identity of its underlying media type. + + Content-Encoding = "Content-Encoding" ":" 1#content-coding + + Content codings are defined in section 3.5. An example of its use is + + Content-Encoding: gzip + + The content-coding is a characteristic of the entity identified by + the Request-URI. Typically, the entity-body is stored with this + encoding and is only decoded before rendering or analogous usage. + However, a non-transparent proxy MAY modify the content-coding if the + new coding is known to be acceptable to the recipient, unless the + "no-transform" cache-control directive is present in the message. + + If the content-coding of an entity is not "identity", then the + response MUST include a Content-Encoding entity-header (section + 14.11) that lists the non-identity content-coding(s) used. + + If the content-coding of an entity in a request message is not + acceptable to the origin server, the server SHOULD respond with a + status code of 415 (Unsupported Media Type). + + If multiple encodings have been applied to an entity, the content + codings MUST be listed in the order in which they were applied. + Additional information about the encoding parameters MAY be provided + by other entity-header fields not defined by this specification. + +14.12 Content-Language + + The Content-Language entity-header field describes the natural + language(s) of the intended audience for the enclosed entity. Note + that this might not be equivalent to all the languages used within + the entity-body. + + Content-Language = "Content-Language" ":" 1#language-tag + + + + + + + +Fielding, et al. Standards Track [Page 118] + +RFC 2616 HTTP/1.1 June 1999 + + + Language tags are defined in section 3.10. The primary purpose of + Content-Language is to allow a user to identify and differentiate + entities according to the user's own preferred language. Thus, if the + body content is intended only for a Danish-literate audience, the + appropriate field is + + Content-Language: da + + If no Content-Language is specified, the default is that the content + is intended for all language audiences. This might mean that the + sender does not consider it to be specific to any natural language, + or that the sender does not know for which language it is intended. + + Multiple languages MAY be listed for content that is intended for + multiple audiences. For example, a rendition of the "Treaty of + Waitangi," presented simultaneously in the original Maori and English + versions, would call for + + Content-Language: mi, en + + However, just because multiple languages are present within an entity + does not mean that it is intended for multiple linguistic audiences. + An example would be a beginner's language primer, such as "A First + Lesson in Latin," which is clearly intended to be used by an + English-literate audience. In this case, the Content-Language would + properly only include "en". + + Content-Language MAY be applied to any media type -- it is not + limited to textual documents. + +14.13 Content-Length + + The Content-Length entity-header field indicates the size of the + entity-body, in decimal number of OCTETs, sent to the recipient or, + in the case of the HEAD method, the size of the entity-body that + would have been sent had the request been a GET. + + Content-Length = "Content-Length" ":" 1*DIGIT + + An example is + + Content-Length: 3495 + + Applications SHOULD use this field to indicate the transfer-length of + the message-body, unless this is prohibited by the rules in section + 4.4. + + + + + +Fielding, et al. Standards Track [Page 119] + +RFC 2616 HTTP/1.1 June 1999 + + + Any Content-Length greater than or equal to zero is a valid value. + Section 4.4 describes how to determine the length of a message-body + if a Content-Length is not given. + + Note that the meaning of this field is significantly different from + the corresponding definition in MIME, where it is an optional field + used within the "message/external-body" content-type. In HTTP, it + SHOULD be sent whenever the message's length can be determined prior + to being transferred, unless this is prohibited by the rules in + section 4.4. + +14.14 Content-Location + + The Content-Location entity-header field MAY be used to supply the + resource location for the entity enclosed in the message when that + entity is accessible from a location separate from the requested + resource's URI. A server SHOULD provide a Content-Location for the + variant corresponding to the response entity; especially in the case + where a resource has multiple entities associated with it, and those + entities actually have separate locations by which they might be + individually accessed, the server SHOULD provide a Content-Location + for the particular variant which is returned. + + Content-Location = "Content-Location" ":" + ( absoluteURI | relativeURI ) + + The value of Content-Location also defines the base URI for the + entity. + + The Content-Location value is not a replacement for the original + requested URI; it is only a statement of the location of the resource + corresponding to this particular entity at the time of the request. + Future requests MAY specify the Content-Location URI as the request- + URI if the desire is to identify the source of that particular + entity. + + A cache cannot assume that an entity with a Content-Location + different from the URI used to retrieve it can be used to respond to + later requests on that Content-Location URI. However, the Content- + Location can be used to differentiate between multiple entities + retrieved from a single requested resource, as described in section + 13.6. + + If the Content-Location is a relative URI, the relative URI is + interpreted relative to the Request-URI. + + The meaning of the Content-Location header in PUT or POST requests is + undefined; servers are free to ignore it in those cases. + + + +Fielding, et al. Standards Track [Page 120] + +RFC 2616 HTTP/1.1 June 1999 + + +14.15 Content-MD5 + + The Content-MD5 entity-header field, as defined in RFC 1864 [23], is + an MD5 digest of the entity-body for the purpose of providing an + end-to-end message integrity check (MIC) of the entity-body. (Note: a + MIC is good for detecting accidental modification of the entity-body + in transit, but is not proof against malicious attacks.) + + Content-MD5 = "Content-MD5" ":" md5-digest + md5-digest = + + The Content-MD5 header field MAY be generated by an origin server or + client to function as an integrity check of the entity-body. Only + origin servers or clients MAY generate the Content-MD5 header field; + proxies and gateways MUST NOT generate it, as this would defeat its + value as an end-to-end integrity check. Any recipient of the entity- + body, including gateways and proxies, MAY check that the digest value + in this header field matches that of the entity-body as received. + + The MD5 digest is computed based on the content of the entity-body, + including any content-coding that has been applied, but not including + any transfer-encoding applied to the message-body. If the message is + received with a transfer-encoding, that encoding MUST be removed + prior to checking the Content-MD5 value against the received entity. + + This has the result that the digest is computed on the octets of the + entity-body exactly as, and in the order that, they would be sent if + no transfer-encoding were being applied. + + HTTP extends RFC 1864 to permit the digest to be computed for MIME + composite media-types (e.g., multipart/* and message/rfc822), but + this does not change how the digest is computed as defined in the + preceding paragraph. + + There are several consequences of this. The entity-body for composite + types MAY contain many body-parts, each with its own MIME and HTTP + headers (including Content-MD5, Content-Transfer-Encoding, and + Content-Encoding headers). If a body-part has a Content-Transfer- + Encoding or Content-Encoding header, it is assumed that the content + of the body-part has had the encoding applied, and the body-part is + included in the Content-MD5 digest as is -- i.e., after the + application. The Transfer-Encoding header field is not allowed within + body-parts. + + Conversion of all line breaks to CRLF MUST NOT be done before + computing or checking the digest: the line break convention used in + the text actually transmitted MUST be left unaltered when computing + the digest. + + + +Fielding, et al. Standards Track [Page 121] + +RFC 2616 HTTP/1.1 June 1999 + + + Note: while the definition of Content-MD5 is exactly the same for + HTTP as in RFC 1864 for MIME entity-bodies, there are several ways + in which the application of Content-MD5 to HTTP entity-bodies + differs from its application to MIME entity-bodies. One is that + HTTP, unlike MIME, does not use Content-Transfer-Encoding, and + does use Transfer-Encoding and Content-Encoding. Another is that + HTTP more frequently uses binary content types than MIME, so it is + worth noting that, in such cases, the byte order used to compute + the digest is the transmission byte order defined for the type. + Lastly, HTTP allows transmission of text types with any of several + line break conventions and not just the canonical form using CRLF. + +14.16 Content-Range + + The Content-Range entity-header is sent with a partial entity-body to + specify where in the full entity-body the partial body should be + applied. Range units are defined in section 3.12. + + Content-Range = "Content-Range" ":" content-range-spec + + content-range-spec = byte-content-range-spec + byte-content-range-spec = bytes-unit SP + byte-range-resp-spec "/" + ( instance-length | "*" ) + + byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) + | "*" + instance-length = 1*DIGIT + + The header SHOULD indicate the total length of the full entity-body, + unless this length is unknown or difficult to determine. The asterisk + "*" character means that the instance-length is unknown at the time + when the response was generated. + + Unlike byte-ranges-specifier values (see section 14.35.1), a byte- + range-resp-spec MUST only specify one range, and MUST contain + absolute byte positions for both the first and last byte of the + range. + + A byte-content-range-spec with a byte-range-resp-spec whose last- + byte-pos value is less than its first-byte-pos value, or whose + instance-length value is less than or equal to its last-byte-pos + value, is invalid. The recipient of an invalid byte-content-range- + spec MUST ignore it and any content transferred along with it. + + A server sending a response with status code 416 (Requested range not + satisfiable) SHOULD include a Content-Range field with a byte-range- + resp-spec of "*". The instance-length specifies the current length of + + + +Fielding, et al. Standards Track [Page 122] + +RFC 2616 HTTP/1.1 June 1999 + + + the selected resource. A response with status code 206 (Partial + Content) MUST NOT include a Content-Range field with a byte-range- + resp-spec of "*". + + Examples of byte-content-range-spec values, assuming that the entity + contains a total of 1234 bytes: + + . The first 500 bytes: + bytes 0-499/1234 + + . The second 500 bytes: + bytes 500-999/1234 + + . All except for the first 500 bytes: + bytes 500-1233/1234 + + . The last 500 bytes: + bytes 734-1233/1234 + + When an HTTP message includes the content of a single range (for + example, a response to a request for a single range, or to a request + for a set of ranges that overlap without any holes), this content is + transmitted with a Content-Range header, and a Content-Length header + showing the number of bytes actually transferred. For example, + + HTTP/1.1 206 Partial content + Date: Wed, 15 Nov 1995 06:25:24 GMT + Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT + Content-Range: bytes 21010-47021/47022 + Content-Length: 26012 + Content-Type: image/gif + + When an HTTP message includes the content of multiple ranges (for + example, a response to a request for multiple non-overlapping + ranges), these are transmitted as a multipart message. The multipart + media type used for this purpose is "multipart/byteranges" as defined + in appendix 19.2. See appendix 19.6.3 for a compatibility issue. + + A response to a request for a single range MUST NOT be sent using the + multipart/byteranges media type. A response to a request for + multiple ranges, whose result is a single range, MAY be sent as a + multipart/byteranges media type with one part. A client that cannot + decode a multipart/byteranges message MUST NOT ask for multiple + byte-ranges in a single request. + + When a client requests multiple byte-ranges in one request, the + server SHOULD return them in the order that they appeared in the + request. + + + +Fielding, et al. Standards Track [Page 123] + +RFC 2616 HTTP/1.1 June 1999 + + + If the server ignores a byte-range-spec because it is syntactically + invalid, the server SHOULD treat the request as if the invalid Range + header field did not exist. (Normally, this means return a 200 + response containing the full entity). + + If the server receives a request (other than one including an If- + Range request-header field) with an unsatisfiable Range request- + header field (that is, all of whose byte-range-spec values have a + first-byte-pos value greater than the current length of the selected + resource), it SHOULD return a response code of 416 (Requested range + not satisfiable) (section 10.4.17). + + Note: clients cannot depend on servers to send a 416 (Requested + range not satisfiable) response instead of a 200 (OK) response for + an unsatisfiable Range request-header, since not all servers + implement this request-header. + +14.17 Content-Type + + The Content-Type entity-header field indicates the media type of the + entity-body sent to the recipient or, in the case of the HEAD method, + the media type that would have been sent had the request been a GET. + + Content-Type = "Content-Type" ":" media-type + + Media types are defined in section 3.7. An example of the field is + + Content-Type: text/html; charset=ISO-8859-4 + + Further discussion of methods for identifying the media type of an + entity is provided in section 7.2.1. + +14.18 Date + + The Date general-header field represents the date and time at which + the message was originated, having the same semantics as orig-date in + RFC 822. The field value is an HTTP-date, as described in section + 3.3.1; it MUST be sent in RFC 1123 [8]-date format. + + Date = "Date" ":" HTTP-date + + An example is + + Date: Tue, 15 Nov 1994 08:12:31 GMT + + Origin servers MUST include a Date header field in all responses, + except in these cases: + + + + +Fielding, et al. Standards Track [Page 124] + +RFC 2616 HTTP/1.1 June 1999 + + + 1. If the response status code is 100 (Continue) or 101 (Switching + Protocols), the response MAY include a Date header field, at + the server's option. + + 2. If the response status code conveys a server error, e.g. 500 + (Internal Server Error) or 503 (Service Unavailable), and it is + inconvenient or impossible to generate a valid Date. + + 3. If the server does not have a clock that can provide a + reasonable approximation of the current time, its responses + MUST NOT include a Date header field. In this case, the rules + in section 14.18.1 MUST be followed. + + A received message that does not have a Date header field MUST be + assigned one by the recipient if the message will be cached by that + recipient or gatewayed via a protocol which requires a Date. An HTTP + implementation without a clock MUST NOT cache responses without + revalidating them on every use. An HTTP cache, especially a shared + cache, SHOULD use a mechanism, such as NTP [28], to synchronize its + clock with a reliable external standard. + + Clients SHOULD only send a Date header field in messages that include + an entity-body, as in the case of the PUT and POST requests, and even + then it is optional. A client without a clock MUST NOT send a Date + header field in a request. + + The HTTP-date sent in a Date header SHOULD NOT represent a date and + time subsequent to the generation of the message. It SHOULD represent + the best available approximation of the date and time of message + generation, unless the implementation has no means of generating a + reasonably accurate date and time. In theory, the date ought to + represent the moment just before the entity is generated. In + practice, the date can be generated at any time during the message + origination without affecting its semantic value. + +14.18.1 Clockless Origin Server Operation + + Some origin server implementations might not have a clock available. + An origin server without a clock MUST NOT assign Expires or Last- + Modified values to a response, unless these values were associated + with the resource by a system or user with a reliable clock. It MAY + assign an Expires value that is known, at or before server + configuration time, to be in the past (this allows "pre-expiration" + of responses without storing separate Expires values for each + resource). + + + + + + +Fielding, et al. Standards Track [Page 125] + +RFC 2616 HTTP/1.1 June 1999 + + +14.19 ETag + + The ETag response-header field provides the current value of the + entity tag for the requested variant. The headers used with entity + tags are described in sections 14.24, 14.26 and 14.44. The entity tag + MAY be used for comparison with other entities from the same resource + (see section 13.3.3). + + ETag = "ETag" ":" entity-tag + + Examples: + + ETag: "xyzzy" + ETag: W/"xyzzy" + ETag: "" + +14.20 Expect + + The Expect request-header field is used to indicate that particular + server behaviors are required by the client. + + Expect = "Expect" ":" 1#expectation + + expectation = "100-continue" | expectation-extension + expectation-extension = token [ "=" ( token | quoted-string ) + *expect-params ] + expect-params = ";" token [ "=" ( token | quoted-string ) ] + + + A server that does not understand or is unable to comply with any of + the expectation values in the Expect field of a request MUST respond + with appropriate error status. The server MUST respond with a 417 + (Expectation Failed) status if any of the expectations cannot be met + or, if there are other problems with the request, some other 4xx + status. + + This header field is defined with extensible syntax to allow for + future extensions. If a server receives a request containing an + Expect field that includes an expectation-extension that it does not + support, it MUST respond with a 417 (Expectation Failed) status. + + Comparison of expectation values is case-insensitive for unquoted + tokens (including the 100-continue token), and is case-sensitive for + quoted-string expectation-extensions. + + + + + + + +Fielding, et al. Standards Track [Page 126] + +RFC 2616 HTTP/1.1 June 1999 + + + The Expect mechanism is hop-by-hop: that is, an HTTP/1.1 proxy MUST + return a 417 (Expectation Failed) status if it receives a request + with an expectation that it cannot meet. However, the Expect + request-header itself is end-to-end; it MUST be forwarded if the + request is forwarded. + + Many older HTTP/1.0 and HTTP/1.1 applications do not understand the + Expect header. + + See section 8.2.3 for the use of the 100 (continue) status. + +14.21 Expires + + The Expires entity-header field gives the date/time after which the + response is considered stale. A stale cache entry may not normally be + returned by a cache (either a proxy cache or a user agent cache) + unless it is first validated with the origin server (or with an + intermediate cache that has a fresh copy of the entity). See section + 13.2 for further discussion of the expiration model. + + The presence of an Expires field does not imply that the original + resource will change or cease to exist at, before, or after that + time. + + The format is an absolute date and time as defined by HTTP-date in + section 3.3.1; it MUST be in RFC 1123 date format: + + Expires = "Expires" ":" HTTP-date + + An example of its use is + + Expires: Thu, 01 Dec 1994 16:00:00 GMT + + Note: if a response includes a Cache-Control field with the max- + age directive (see section 14.9.3), that directive overrides the + Expires field. + + HTTP/1.1 clients and caches MUST treat other invalid date formats, + especially including the value "0", as in the past (i.e., "already + expired"). + + To mark a response as "already expired," an origin server sends an + Expires date that is equal to the Date header value. (See the rules + for expiration calculations in section 13.2.4.) + + + + + + + +Fielding, et al. Standards Track [Page 127] + +RFC 2616 HTTP/1.1 June 1999 + + + To mark a response as "never expires," an origin server sends an + Expires date approximately one year from the time the response is + sent. HTTP/1.1 servers SHOULD NOT send Expires dates more than one + year in the future. + + The presence of an Expires header field with a date value of some + time in the future on a response that otherwise would by default be + non-cacheable indicates that the response is cacheable, unless + indicated otherwise by a Cache-Control header field (section 14.9). + +14.22 From + + The From request-header field, if given, SHOULD contain an Internet + e-mail address for the human user who controls the requesting user + agent. The address SHOULD be machine-usable, as defined by "mailbox" + in RFC 822 [9] as updated by RFC 1123 [8]: + + From = "From" ":" mailbox + + An example is: + + From: webmaster@w3.org + + This header field MAY be used for logging purposes and as a means for + identifying the source of invalid or unwanted requests. It SHOULD NOT + be used as an insecure form of access protection. The interpretation + of this field is that the request is being performed on behalf of the + person given, who accepts responsibility for the method performed. In + particular, robot agents SHOULD include this header so that the + person responsible for running the robot can be contacted if problems + occur on the receiving end. + + The Internet e-mail address in this field MAY be separate from the + Internet host which issued the request. For example, when a request + is passed through a proxy the original issuer's address SHOULD be + used. + + The client SHOULD NOT send the From header field without the user's + approval, as it might conflict with the user's privacy interests or + their site's security policy. It is strongly recommended that the + user be able to disable, enable, and modify the value of this field + at any time prior to a request. + +14.23 Host + + The Host request-header field specifies the Internet host and port + number of the resource being requested, as obtained from the original + URI given by the user or referring resource (generally an HTTP URL, + + + +Fielding, et al. Standards Track [Page 128] + +RFC 2616 HTTP/1.1 June 1999 + + + as described in section 3.2.2). The Host field value MUST represent + the naming authority of the origin server or gateway given by the + original URL. This allows the origin server or gateway to + differentiate between internally-ambiguous URLs, such as the root "/" + URL of a server for multiple host names on a single IP address. + + Host = "Host" ":" host [ ":" port ] ; Section 3.2.2 + + A "host" without any trailing port information implies the default + port for the service requested (e.g., "80" for an HTTP URL). For + example, a request on the origin server for + would properly include: + + GET /pub/WWW/ HTTP/1.1 + Host: www.w3.org + + A client MUST include a Host header field in all HTTP/1.1 request + messages . If the requested URI does not include an Internet host + name for the service being requested, then the Host header field MUST + be given with an empty value. An HTTP/1.1 proxy MUST ensure that any + request message it forwards does contain an appropriate Host header + field that identifies the service being requested by the proxy. All + Internet-based HTTP/1.1 servers MUST respond with a 400 (Bad Request) + status code to any HTTP/1.1 request message which lacks a Host header + field. + + See sections 5.2 and 19.6.1.1 for other requirements relating to + Host. + +14.24 If-Match + + The If-Match request-header field is used with a method to make it + conditional. A client that has one or more entities previously + obtained from the resource can verify that one of those entities is + current by including a list of their associated entity tags in the + If-Match header field. Entity tags are defined in section 3.11. The + purpose of this feature is to allow efficient updates of cached + information with a minimum amount of transaction overhead. It is also + used, on updating requests, to prevent inadvertent modification of + the wrong version of a resource. As a special case, the value "*" + matches any current entity of the resource. + + If-Match = "If-Match" ":" ( "*" | 1#entity-tag ) + + If any of the entity tags match the entity tag of the entity that + would have been returned in the response to a similar GET request + (without the If-Match header) on that resource, or if "*" is given + + + + +Fielding, et al. Standards Track [Page 129] + +RFC 2616 HTTP/1.1 June 1999 + + + and any current entity exists for that resource, then the server MAY + perform the requested method as if the If-Match header field did not + exist. + + A server MUST use the strong comparison function (see section 13.3.3) + to compare the entity tags in If-Match. + + If none of the entity tags match, or if "*" is given and no current + entity exists, the server MUST NOT perform the requested method, and + MUST return a 412 (Precondition Failed) response. This behavior is + most useful when the client wants to prevent an updating method, such + as PUT, from modifying a resource that has changed since the client + last retrieved it. + + If the request would, without the If-Match header field, result in + anything other than a 2xx or 412 status, then the If-Match header + MUST be ignored. + + The meaning of "If-Match: *" is that the method SHOULD be performed + if the representation selected by the origin server (or by a cache, + possibly using the Vary mechanism, see section 14.44) exists, and + MUST NOT be performed if the representation does not exist. + + A request intended to update a resource (e.g., a PUT) MAY include an + If-Match header field to signal that the request method MUST NOT be + applied if the entity corresponding to the If-Match value (a single + entity tag) is no longer a representation of that resource. This + allows the user to indicate that they do not wish the request to be + successful if the resource has been changed without their knowledge. + Examples: + + If-Match: "xyzzy" + If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + If-Match: * + + The result of a request having both an If-Match header field and + either an If-None-Match or an If-Modified-Since header fields is + undefined by this specification. + +14.25 If-Modified-Since + + The If-Modified-Since request-header field is used with a method to + make it conditional: if the requested variant has not been modified + since the time specified in this field, an entity will not be + returned from the server; instead, a 304 (not modified) response will + be returned without any message-body. + + If-Modified-Since = "If-Modified-Since" ":" HTTP-date + + + +Fielding, et al. Standards Track [Page 130] + +RFC 2616 HTTP/1.1 June 1999 + + + An example of the field is: + + If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + A GET method with an If-Modified-Since header and no Range header + requests that the identified entity be transferred only if it has + been modified since the date given by the If-Modified-Since header. + The algorithm for determining this includes the following cases: + + a) If the request would normally result in anything other than a + 200 (OK) status, or if the passed If-Modified-Since date is + invalid, the response is exactly the same as for a normal GET. + A date which is later than the server's current time is + invalid. + + b) If the variant has been modified since the If-Modified-Since + date, the response is exactly the same as for a normal GET. + + c) If the variant has not been modified since a valid If- + Modified-Since date, the server SHOULD return a 304 (Not + Modified) response. + + The purpose of this feature is to allow efficient updates of cached + information with a minimum amount of transaction overhead. + + Note: The Range request-header field modifies the meaning of If- + Modified-Since; see section 14.35 for full details. + + Note: If-Modified-Since times are interpreted by the server, whose + clock might not be synchronized with the client. + + Note: When handling an If-Modified-Since header field, some + servers will use an exact date comparison function, rather than a + less-than function, for deciding whether to send a 304 (Not + Modified) response. To get best results when sending an If- + Modified-Since header field for cache validation, clients are + advised to use the exact date string received in a previous Last- + Modified header field whenever possible. + + Note: If a client uses an arbitrary date in the If-Modified-Since + header instead of a date taken from the Last-Modified header for + the same request, the client should be aware of the fact that this + date is interpreted in the server's understanding of time. The + client should consider unsynchronized clocks and rounding problems + due to the different encodings of time between the client and + server. This includes the possibility of race conditions if the + document has changed between the time it was first requested and + the If-Modified-Since date of a subsequent request, and the + + + +Fielding, et al. Standards Track [Page 131] + +RFC 2616 HTTP/1.1 June 1999 + + + possibility of clock-skew-related problems if the If-Modified- + Since date is derived from the client's clock without correction + to the server's clock. Corrections for different time bases + between client and server are at best approximate due to network + latency. + + The result of a request having both an If-Modified-Since header field + and either an If-Match or an If-Unmodified-Since header fields is + undefined by this specification. + +14.26 If-None-Match + + The If-None-Match request-header field is used with a method to make + it conditional. A client that has one or more entities previously + obtained from the resource can verify that none of those entities is + current by including a list of their associated entity tags in the + If-None-Match header field. The purpose of this feature is to allow + efficient updates of cached information with a minimum amount of + transaction overhead. It is also used to prevent a method (e.g. PUT) + from inadvertently modifying an existing resource when the client + believes that the resource does not exist. + + As a special case, the value "*" matches any current entity of the + resource. + + If-None-Match = "If-None-Match" ":" ( "*" | 1#entity-tag ) + + If any of the entity tags match the entity tag of the entity that + would have been returned in the response to a similar GET request + (without the If-None-Match header) on that resource, or if "*" is + given and any current entity exists for that resource, then the + server MUST NOT perform the requested method, unless required to do + so because the resource's modification date fails to match that + supplied in an If-Modified-Since header field in the request. + Instead, if the request method was GET or HEAD, the server SHOULD + respond with a 304 (Not Modified) response, including the cache- + related header fields (particularly ETag) of one of the entities that + matched. For all other request methods, the server MUST respond with + a status of 412 (Precondition Failed). + + See section 13.3.3 for rules on how to determine if two entities tags + match. The weak comparison function can only be used with GET or HEAD + requests. + + + + + + + + +Fielding, et al. Standards Track [Page 132] + +RFC 2616 HTTP/1.1 June 1999 + + + If none of the entity tags match, then the server MAY perform the + requested method as if the If-None-Match header field did not exist, + but MUST also ignore any If-Modified-Since header field(s) in the + request. That is, if no entity tags match, then the server MUST NOT + return a 304 (Not Modified) response. + + If the request would, without the If-None-Match header field, result + in anything other than a 2xx or 304 status, then the If-None-Match + header MUST be ignored. (See section 13.3.4 for a discussion of + server behavior when both If-Modified-Since and If-None-Match appear + in the same request.) + + The meaning of "If-None-Match: *" is that the method MUST NOT be + performed if the representation selected by the origin server (or by + a cache, possibly using the Vary mechanism, see section 14.44) + exists, and SHOULD be performed if the representation does not exist. + This feature is intended to be useful in preventing races between PUT + operations. + + Examples: + + If-None-Match: "xyzzy" + If-None-Match: W/"xyzzy" + If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + If-None-Match: W/"xyzzy", W/"r2d2xxxx", W/"c3piozzzz" + If-None-Match: * + + The result of a request having both an If-None-Match header field and + either an If-Match or an If-Unmodified-Since header fields is + undefined by this specification. + +14.27 If-Range + + If a client has a partial copy of an entity in its cache, and wishes + to have an up-to-date copy of the entire entity in its cache, it + could use the Range request-header with a conditional GET (using + either or both of If-Unmodified-Since and If-Match.) However, if the + condition fails because the entity has been modified, the client + would then have to make a second request to obtain the entire current + entity-body. + + The If-Range header allows a client to "short-circuit" the second + request. Informally, its meaning is `if the entity is unchanged, send + me the part(s) that I am missing; otherwise, send me the entire new + entity'. + + If-Range = "If-Range" ":" ( entity-tag | HTTP-date ) + + + + +Fielding, et al. Standards Track [Page 133] + +RFC 2616 HTTP/1.1 June 1999 + + + If the client has no entity tag for an entity, but does have a Last- + Modified date, it MAY use that date in an If-Range header. (The + server can distinguish between a valid HTTP-date and any form of + entity-tag by examining no more than two characters.) The If-Range + header SHOULD only be used together with a Range header, and MUST be + ignored if the request does not include a Range header, or if the + server does not support the sub-range operation. + + If the entity tag given in the If-Range header matches the current + entity tag for the entity, then the server SHOULD provide the + specified sub-range of the entity using a 206 (Partial content) + response. If the entity tag does not match, then the server SHOULD + return the entire entity using a 200 (OK) response. + +14.28 If-Unmodified-Since + + The If-Unmodified-Since request-header field is used with a method to + make it conditional. If the requested resource has not been modified + since the time specified in this field, the server SHOULD perform the + requested operation as if the If-Unmodified-Since header were not + present. + + If the requested variant has been modified since the specified time, + the server MUST NOT perform the requested operation, and MUST return + a 412 (Precondition Failed). + + If-Unmodified-Since = "If-Unmodified-Since" ":" HTTP-date + + An example of the field is: + + If-Unmodified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + If the request normally (i.e., without the If-Unmodified-Since + header) would result in anything other than a 2xx or 412 status, the + If-Unmodified-Since header SHOULD be ignored. + + If the specified date is invalid, the header is ignored. + + The result of a request having both an If-Unmodified-Since header + field and either an If-None-Match or an If-Modified-Since header + fields is undefined by this specification. + +14.29 Last-Modified + + The Last-Modified entity-header field indicates the date and time at + which the origin server believes the variant was last modified. + + Last-Modified = "Last-Modified" ":" HTTP-date + + + +Fielding, et al. Standards Track [Page 134] + +RFC 2616 HTTP/1.1 June 1999 + + + An example of its use is + + Last-Modified: Tue, 15 Nov 1994 12:45:26 GMT + + The exact meaning of this header field depends on the implementation + of the origin server and the nature of the original resource. For + files, it may be just the file system last-modified time. For + entities with dynamically included parts, it may be the most recent + of the set of last-modify times for its component parts. For database + gateways, it may be the last-update time stamp of the record. For + virtual objects, it may be the last time the internal state changed. + + An origin server MUST NOT send a Last-Modified date which is later + than the server's time of message origination. In such cases, where + the resource's last modification would indicate some time in the + future, the server MUST replace that date with the message + origination date. + + An origin server SHOULD obtain the Last-Modified value of the entity + as close as possible to the time that it generates the Date value of + its response. This allows a recipient to make an accurate assessment + of the entity's modification time, especially if the entity changes + near the time that the response is generated. + + HTTP/1.1 servers SHOULD send Last-Modified whenever feasible. + +14.30 Location + + The Location response-header field is used to redirect the recipient + to a location other than the Request-URI for completion of the + request or identification of a new resource. For 201 (Created) + responses, the Location is that of the new resource which was created + by the request. For 3xx responses, the location SHOULD indicate the + server's preferred URI for automatic redirection to the resource. The + field value consists of a single absolute URI. + + Location = "Location" ":" absoluteURI + [[ [ "#" fragment ] ]] + + An example is: + + Location: http://www.w3.org/pub/WWW/People.html + + Note: The Content-Location header field (section 14.14) differs + from Location in that the Content-Location identifies the original + location of the entity enclosed in the request. It is therefore + possible for a response to contain header fields for both Location + and Content-Location. Also see section 13.10 for cache + requirements of some methods. + + + +Fielding, et al. Standards Track [Page 135] + +RFC 2616 HTTP/1.1 June 1999 + + +14.31 Max-Forwards + + The Max-Forwards request-header field provides a mechanism with the + TRACE (section 9.8) and OPTIONS (section 9.2) methods to limit the + number of proxies or gateways that can forward the request to the + next inbound server. This can be useful when the client is attempting + to trace a request chain which appears to be failing or looping in + mid-chain. + + Max-Forwards = "Max-Forwards" ":" 1*DIGIT + + The Max-Forwards value is a decimal integer indicating the remaining + number of times this request message may be forwarded. + + Each proxy or gateway recipient of a TRACE or OPTIONS request + containing a Max-Forwards header field MUST check and update its + value prior to forwarding the request. If the received value is zero + (0), the recipient MUST NOT forward the request; instead, it MUST + respond as the final recipient. If the received Max-Forwards value is + greater than zero, then the forwarded message MUST contain an updated + Max-Forwards field with a value decremented by one (1). + + The Max-Forwards header field MAY be ignored for all other methods + defined by this specification and for any extension methods for which + it is not explicitly referred to as part of that method definition. + +14.32 Pragma + + The Pragma general-header field is used to include implementation- + specific directives that might apply to any recipient along the + request/response chain. All pragma directives specify optional + behavior from the viewpoint of the protocol; however, some systems + MAY require that behavior be consistent with the directives. + + Pragma = "Pragma" ":" 1#pragma-directive + pragma-directive = "no-cache" | extension-pragma + extension-pragma = token [ "=" ( token | quoted-string ) ] + + When the no-cache directive is present in a request message, an + application SHOULD forward the request toward the origin server even + if it has a cached copy of what is being requested. This pragma + directive has the same semantics as the no-cache cache-directive (see + section 14.9) and is defined here for backward compatibility with + HTTP/1.0. Clients SHOULD include both header fields when a no-cache + request is sent to a server not known to be HTTP/1.1 compliant. + + + + + + +Fielding, et al. Standards Track [Page 136] + +RFC 2616 HTTP/1.1 June 1999 + + + Pragma directives MUST be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives might be applicable to all recipients along the + request/response chain. It is not possible to specify a pragma for a + specific recipient; however, any pragma directive not relevant to a + recipient SHOULD be ignored by that recipient. + + HTTP/1.1 caches SHOULD treat "Pragma: no-cache" as if the client had + sent "Cache-Control: no-cache". No new Pragma directives will be + defined in HTTP. + + Note: because the meaning of "Pragma: no-cache as a response + header field is not actually specified, it does not provide a + reliable replacement for "Cache-Control: no-cache" in a response + +14.33 Proxy-Authenticate + + The Proxy-Authenticate response-header field MUST be included as part + of a 407 (Proxy Authentication Required) response. The field value + consists of a challenge that indicates the authentication scheme and + parameters applicable to the proxy for this Request-URI. + + Proxy-Authenticate = "Proxy-Authenticate" ":" 1#challenge + + The HTTP access authentication process is described in "HTTP + Authentication: Basic and Digest Access Authentication" [43]. Unlike + WWW-Authenticate, the Proxy-Authenticate header field applies only to + the current connection and SHOULD NOT be passed on to downstream + clients. However, an intermediate proxy might need to obtain its own + credentials by requesting them from the downstream client, which in + some circumstances will appear as if the proxy is forwarding the + Proxy-Authenticate header field. + +14.34 Proxy-Authorization + + The Proxy-Authorization request-header field allows the client to + identify itself (or its user) to a proxy which requires + authentication. The Proxy-Authorization field value consists of + credentials containing the authentication information of the user + agent for the proxy and/or realm of the resource being requested. + + Proxy-Authorization = "Proxy-Authorization" ":" credentials + + The HTTP access authentication process is described in "HTTP + Authentication: Basic and Digest Access Authentication" [43] . Unlike + Authorization, the Proxy-Authorization header field applies only to + the next outbound proxy that demanded authentication using the Proxy- + Authenticate field. When multiple proxies are used in a chain, the + + + +Fielding, et al. Standards Track [Page 137] + +RFC 2616 HTTP/1.1 June 1999 + + + Proxy-Authorization header field is consumed by the first outbound + proxy that was expecting to receive credentials. A proxy MAY relay + the credentials from the client request to the next proxy if that is + the mechanism by which the proxies cooperatively authenticate a given + request. + +14.35 Range + +14.35.1 Byte Ranges + + Since all HTTP entities are represented in HTTP messages as sequences + of bytes, the concept of a byte range is meaningful for any HTTP + entity. (However, not all clients and servers need to support byte- + range operations.) + + Byte range specifications in HTTP apply to the sequence of bytes in + the entity-body (not necessarily the same as the message-body). + + A byte range operation MAY specify a single range of bytes, or a set + of ranges within a single entity. + + ranges-specifier = byte-ranges-specifier + byte-ranges-specifier = bytes-unit "=" byte-range-set + byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec ) + byte-range-spec = first-byte-pos "-" [last-byte-pos] + first-byte-pos = 1*DIGIT + last-byte-pos = 1*DIGIT + + The first-byte-pos value in a byte-range-spec gives the byte-offset + of the first byte in a range. The last-byte-pos value gives the + byte-offset of the last byte in the range; that is, the byte + positions specified are inclusive. Byte offsets start at zero. + + If the last-byte-pos value is present, it MUST be greater than or + equal to the first-byte-pos in that byte-range-spec, or the byte- + range-spec is syntactically invalid. The recipient of a byte-range- + set that includes one or more syntactically invalid byte-range-spec + values MUST ignore the header field that includes that byte-range- + set. + + If the last-byte-pos value is absent, or if the value is greater than + or equal to the current length of the entity-body, last-byte-pos is + taken to be equal to one less than the current length of the entity- + body in bytes. + + By its choice of last-byte-pos, a client can limit the number of + bytes retrieved without knowing the size of the entity. + + + + +Fielding, et al. Standards Track [Page 138] + +RFC 2616 HTTP/1.1 June 1999 + + + suffix-byte-range-spec = "-" suffix-length + suffix-length = 1*DIGIT + + A suffix-byte-range-spec is used to specify the suffix of the + entity-body, of a length given by the suffix-length value. (That is, + this form specifies the last N bytes of an entity-body.) If the + entity is shorter than the specified suffix-length, the entire + entity-body is used. + + If a syntactically valid byte-range-set includes at least one byte- + range-spec whose first-byte-pos is less than the current length of + the entity-body, or at least one suffix-byte-range-spec with a non- + zero suffix-length, then the byte-range-set is satisfiable. + Otherwise, the byte-range-set is unsatisfiable. If the byte-range-set + is unsatisfiable, the server SHOULD return a response with a status + of 416 (Requested range not satisfiable). Otherwise, the server + SHOULD return a response with a status of 206 (Partial Content) + containing the satisfiable ranges of the entity-body. + + Examples of byte-ranges-specifier values (assuming an entity-body of + length 10000): + + - The first 500 bytes (byte offsets 0-499, inclusive): bytes=0- + 499 + + - The second 500 bytes (byte offsets 500-999, inclusive): + bytes=500-999 + + - The final 500 bytes (byte offsets 9500-9999, inclusive): + bytes=-500 + + - Or bytes=9500- + + - The first and last bytes only (bytes 0 and 9999): bytes=0-0,-1 + + - Several legal but not canonical specifications of the second 500 + bytes (byte offsets 500-999, inclusive): + bytes=500-600,601-999 + bytes=500-700,601-999 + +14.35.2 Range Retrieval Requests + + HTTP retrieval requests using conditional or unconditional GET + methods MAY request one or more sub-ranges of the entity, instead of + the entire entity, using the Range request header, which applies to + the entity returned as the result of the request: + + Range = "Range" ":" ranges-specifier + + + +Fielding, et al. Standards Track [Page 139] + +RFC 2616 HTTP/1.1 June 1999 + + + A server MAY ignore the Range header. However, HTTP/1.1 origin + servers and intermediate caches ought to support byte ranges when + possible, since Range supports efficient recovery from partially + failed transfers, and supports efficient partial retrieval of large + entities. + + If the server supports the Range header and the specified range or + ranges are appropriate for the entity: + + - The presence of a Range header in an unconditional GET modifies + what is returned if the GET is otherwise successful. In other + words, the response carries a status code of 206 (Partial + Content) instead of 200 (OK). + + - The presence of a Range header in a conditional GET (a request + using one or both of If-Modified-Since and If-None-Match, or + one or both of If-Unmodified-Since and If-Match) modifies what + is returned if the GET is otherwise successful and the + condition is true. It does not affect the 304 (Not Modified) + response returned if the conditional is false. + + In some cases, it might be more appropriate to use the If-Range + header (see section 14.27) in addition to the Range header. + + If a proxy that supports ranges receives a Range request, forwards + the request to an inbound server, and receives an entire entity in + reply, it SHOULD only return the requested range to its client. It + SHOULD store the entire received response in its cache if that is + consistent with its cache allocation policies. + +14.36 Referer + + The Referer[sic] request-header field allows the client to specify, + for the server's benefit, the address (URI) of the resource from + which the Request-URI was obtained (the "referrer", although the + header field is misspelled.) The Referer request-header allows a + server to generate lists of back-links to resources for interest, + logging, optimized caching, etc. It also allows obsolete or mistyped + links to be traced for maintenance. The Referer field MUST NOT be + sent if the Request-URI was obtained from a source that does not have + its own URI, such as input from the user keyboard. + + Referer = "Referer" ":" ( absoluteURI | relativeURI ) + + Example: + + Referer: http://www.w3.org/hypertext/DataSources/Overview.html + + + + +Fielding, et al. Standards Track [Page 140] + +RFC 2616 HTTP/1.1 June 1999 + + + If the field value is a relative URI, it SHOULD be interpreted + relative to the Request-URI. The URI MUST NOT include a fragment. See + section 15.1.3 for security considerations. + +14.37 Retry-After + + The Retry-After response-header field can be used with a 503 (Service + Unavailable) response to indicate how long the service is expected to + be unavailable to the requesting client. This field MAY also be used + with any 3xx (Redirection) response to indicate the minimum time the + user-agent is asked wait before issuing the redirected request. The + value of this field can be either an HTTP-date or an integer number + of seconds (in decimal) after the time of the response. + + Retry-After = "Retry-After" ":" ( HTTP-date | delta-seconds ) + + Two examples of its use are + + Retry-After: Fri, 31 Dec 1999 23:59:59 GMT + Retry-After: 120 + + In the latter example, the delay is 2 minutes. + +14.38 Server + + The Server response-header field contains information about the + software used by the origin server to handle the request. The field + can contain multiple product tokens (section 3.8) and comments + identifying the server and any significant subproducts. The product + tokens are listed in order of their significance for identifying the + application. + + Server = "Server" ":" 1*( product | comment ) + + Example: + + Server: CERN/3.0 libwww/2.17 + + If the response is being forwarded through a proxy, the proxy + application MUST NOT modify the Server response-header. Instead, it + SHOULD include a Via field (as described in section 14.45). + [[ Actually, it MUST ]] + + Note: Revealing the specific software version of the server might + allow the server machine to become more vulnerable to attacks + against software that is known to contain security holes. Server + implementors are encouraged to make this field a configurable + option. + + + + +Fielding, et al. Standards Track [Page 141] + +RFC 2616 HTTP/1.1 June 1999 + + +14.39 TE + + The TE request-header field indicates what extension transfer-codings + it is willing to accept in the response and whether or not it is + willing to accept trailer fields in a chunked transfer-coding. Its + value may consist of the keyword "trailers" and/or a comma-separated + list of extension transfer-coding names with optional accept + parameters (as described in section 3.6). + + TE = "TE" ":" #( t-codings ) + t-codings = "trailers" | ( transfer-extension [ accept-params ] ) + + The presence of the keyword "trailers" indicates that the client is + willing to accept trailer fields in a chunked transfer-coding, as + defined in section 3.6.1. This keyword is reserved for use with + transfer-coding values even though it does not itself represent a + transfer-coding. + + Examples of its use are: + + TE: deflate + TE: + TE: trailers, deflate;q=0.5 + + The TE header field only applies to the immediate connection. + Therefore, the keyword MUST be supplied within a Connection header + field (section 14.10) whenever TE is present in an HTTP/1.1 message. + + A server tests whether a transfer-coding is acceptable, according to + a TE field, using these rules: + + 1. The "chunked" transfer-coding is always acceptable. If the + keyword "trailers" is listed, the client indicates that it is + willing to accept trailer fields in the chunked response on + behalf of itself and any downstream clients. The implication is + that, if given, the client is stating that either all + downstream clients are willing to accept trailer fields in the + forwarded response, or that it will attempt to buffer the + response on behalf of downstream recipients. + + Note: HTTP/1.1 does not define any means to limit the size of a + chunked response such that a client can be assured of buffering + the entire response. + + 2. If the transfer-coding being tested is one of the transfer- + codings listed in the TE field, then it is acceptable unless it + is accompanied by a qvalue of 0. (As defined in section 3.9, a + qvalue of 0 means "not acceptable.") + + + +Fielding, et al. Standards Track [Page 142] + +RFC 2616 HTTP/1.1 June 1999 + + + 3. If multiple transfer-codings are acceptable, then the + acceptable transfer-coding with the highest non-zero qvalue is + preferred. The "chunked" transfer-coding always has a qvalue + of 1. + + If the TE field-value is empty or if no TE field is present, the only + transfer-coding is "chunked". A message with no transfer-coding is + always acceptable. + +14.40 Trailer + + The Trailer general field value indicates that the given set of + header fields is present in the trailer of a message encoded with + chunked transfer-coding. + + Trailer = "Trailer" ":" 1#field-name + + An HTTP/1.1 message SHOULD include a Trailer header field in a + message using chunked transfer-coding with a non-empty trailer. Doing + so allows the recipient to know which header fields to expect in the + trailer. + + If no Trailer header field is present, the trailer SHOULD NOT include + any header fields. See section 3.6.1 for restrictions on the use of + trailer fields in a "chunked" transfer-coding. + + Message header fields listed in the Trailer header field MUST NOT + include the following header fields: + + . Transfer-Encoding + + . Content-Length + + . Trailer + +14.41 Transfer-Encoding + + The Transfer-Encoding general-header field indicates what (if any) + type of transformation has been applied to the message body in order + to safely transfer it between the sender and the recipient. This + differs from the content-coding in that the transfer-coding is a + property of the message, not of the entity. + + Transfer-Encoding = "Transfer-Encoding" ":" 1#transfer-coding + + Transfer-codings are defined in section 3.6. An example is: + + Transfer-Encoding: chunked + + + +Fielding, et al. Standards Track [Page 143] + +RFC 2616 HTTP/1.1 June 1999 + + + If multiple encodings have been applied to an entity, the transfer- + codings MUST be listed in the order in which they were applied. + Additional information about the encoding parameters MAY be provided + by other entity-header fields not defined by this specification. + + Many older HTTP/1.0 applications do not understand the Transfer- + Encoding header. + +14.42 Upgrade + + The Upgrade general-header allows the client to specify what + additional communication protocols it supports and would like to use + if the server finds it appropriate to switch protocols. The server + MUST use the Upgrade header field within a 101 (Switching Protocols) + response to indicate which protocol(s) are being switched. + + Upgrade = "Upgrade" ":" 1#product + + For example, + + Upgrade: HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11 + + The Upgrade header field is intended to provide a simple mechanism + for transition from HTTP/1.1 to some other, incompatible protocol. It + does so by allowing the client to advertise its desire to use another + protocol, such as a later version of HTTP with a higher major version + number, even though the current request has been made using HTTP/1.1. + This eases the difficult transition between incompatible protocols by + allowing the client to initiate a request in the more commonly + supported protocol while indicating to the server that it would like + to use a "better" protocol if available (where "better" is determined + by the server, possibly according to the nature of the method and/or + resource being requested). + + The Upgrade header field only applies to switching application-layer + protocols upon the existing transport-layer connection. Upgrade + cannot be used to insist on a protocol change; its acceptance and use + by the server is optional. The capabilities and nature of the + application-layer communication after the protocol change is entirely + dependent upon the new protocol chosen, although the first action + after changing the protocol MUST be a response to the initial HTTP + request containing the Upgrade header field. + + The Upgrade header field only applies to the immediate connection. + Therefore, the upgrade keyword MUST be supplied within a Connection + header field (section 14.10) whenever Upgrade is present in an + HTTP/1.1 message. + + + + +Fielding, et al. Standards Track [Page 144] + +RFC 2616 HTTP/1.1 June 1999 + + + The Upgrade header field cannot be used to indicate a switch to a + protocol on a different connection. For that purpose, it is more + appropriate to use a 301, 302, 303, or 305 redirection response. + + This specification only defines the protocol name "HTTP" for use by + the family of Hypertext Transfer Protocols, as defined by the HTTP + version rules of section 3.1 and future updates to this + specification. Any token can be used as a protocol name; however, it + will only be useful if both the client and server associate the name + with the same protocol. + +14.43 User-Agent + + The User-Agent request-header field contains information about the + user agent originating the request. This is for statistical purposes, + the tracing of protocol violations, and automated recognition of user + agents for the sake of tailoring responses to avoid particular user + agent limitations. User agents SHOULD include this field with + requests. The field can contain multiple product tokens (section 3.8) + and comments identifying the agent and any subproducts which form a + significant part of the user agent. By convention, the product tokens + are listed in order of their significance for identifying the + application. + + User-Agent = "User-Agent" ":" 1*( product | comment ) + + Example: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + +14.44 Vary + + The Vary field value indicates the set of request-header fields that + fully determines, while the response is fresh, whether a cache is + permitted to use the response to reply to a subsequent request + without revalidation. For uncacheable or stale responses, the Vary + field value advises the user agent about the criteria that were used + to select the representation. A Vary field value of "*" implies that + a cache cannot determine from the request headers of a subsequent + request whether this response is the appropriate representation. See + section 13.6 for use of the Vary header field by caches. + + Vary = "Vary" ":" ( "*" | 1#field-name ) + + An HTTP/1.1 server SHOULD include a Vary header field with any + cacheable response that is subject to server-driven negotiation. + Doing so allows a cache to properly interpret future requests on that + resource and informs the user agent about the presence of negotiation + + + +Fielding, et al. Standards Track [Page 145] + +RFC 2616 HTTP/1.1 June 1999 + + + on that resource. A server MAY include a Vary header field with a + non-cacheable response that is subject to server-driven negotiation, + since this might provide the user agent with useful information about + the dimensions over which the response varies at the time of the + response. + + A Vary field value consisting of a list of field-names signals that + the representation selected for the response is based on a selection + algorithm which considers ONLY the listed request-header field values + in selecting the most appropriate representation. A cache MAY assume + that the same selection will be made for future requests with the + same values for the listed field names, for the duration of time for + which the response is fresh. + + The field-names given are not limited to the set of standard + request-header fields defined by this specification. Field names are + case-insensitive. + + A Vary field value of "*" signals that unspecified parameters not + limited to the request-headers (e.g., the network address of the + client), play a role in the selection of the response representation. + The "*" value MUST NOT be generated by a proxy server; it may only be + generated by an origin server. + +14.45 Via + + The Via general-header field MUST be used by gateways and proxies to + indicate the intermediate protocols and recipients between the user + agent and the server on requests, and between the origin server and + the client on responses. It is analogous to the "Received" field of + RFC 822 [9] and is intended to be used for tracking message forwards, + avoiding request loops, and identifying the protocol capabilities of + all senders along the request/response chain. + + Via = "Via" ":" 1#( received-protocol received-by [ comment ] ) + received-protocol = [ protocol-name "/" ] protocol-version + protocol-name = token + protocol-version = token + received-by = ( host [ ":" port ] ) | pseudonym + pseudonym = token + + The received-protocol indicates the protocol version of the message + received by the server or client along each segment of the + request/response chain. The received-protocol version is appended to + the Via field value when the message is forwarded so that information + about the protocol capabilities of upstream applications remains + visible to all recipients. + + + + +Fielding, et al. Standards Track [Page 146] + +RFC 2616 HTTP/1.1 June 1999 + + + The protocol-name is optional if and only if it would be "HTTP". The + received-by field is normally the host and optional port number of a + recipient server or client that subsequently forwarded the message. + However, if the real host is considered to be sensitive information, + it MAY be replaced by a pseudonym. If the port is not given, it MAY + be assumed to be the default port of the received-protocol. + + Multiple Via field values represents each proxy or gateway that has + forwarded the message. Each recipient MUST append its information + such that the end result is ordered according to the sequence of + forwarding applications. + + Comments MAY be used in the Via header field to identify the software + of the recipient proxy or gateway, analogous to the User-Agent and + Server header fields. However, all comments in the Via field are + optional and MAY be removed by any recipient prior to forwarding the + message. + + For example, a request message could be sent from an HTTP/1.0 user + agent to an internal proxy code-named "fred", which uses HTTP/1.1 to + forward the request to a public proxy at nowhere.com, which completes + the request by forwarding it to the origin server at www.ics.uci.edu. + The request received by www.ics.uci.edu would then have the following + Via header field: + + Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) + + Proxies and gateways used as a portal through a network firewall + SHOULD NOT, by default, forward the names and ports of hosts within + the firewall region. This information SHOULD only be propagated if + explicitly enabled. If not enabled, the received-by host of any host + behind the firewall SHOULD be replaced by an appropriate pseudonym + for that host. + + For organizations that have strong privacy requirements for hiding + internal structures, a proxy MAY combine an ordered subsequence of + Via header field entries with identical received-protocol values into + a single such entry. For example, + + Via: 1.0 ricky, 1.1 ethel, 1.1 fred, 1.0 lucy + + could be collapsed to + + Via: 1.0 ricky, 1.1 mertz, 1.0 lucy + + + + + + + +Fielding, et al. Standards Track [Page 147] + +RFC 2616 HTTP/1.1 June 1999 + + + Applications SHOULD NOT combine multiple entries unless they are all + under the same organizational control and the hosts have already been + replaced by pseudonyms. Applications MUST NOT combine entries which + have different received-protocol values. + +14.46 Warning + + The Warning general-header field is used to carry additional + information about the status or transformation of a message which + might not be reflected in the message. This information is typically + used to warn about a possible lack of semantic transparency from + caching operations or transformations applied to the entity body of + the message. + + Warning headers are sent with responses using: + + Warning = "Warning" ":" 1#warning-value + + warning-value = warn-code SP warn-agent SP warn-text + [SP warn-date] + + warn-code = 3DIGIT + warn-agent = ( host [ ":" port ] ) | pseudonym + ; the name or pseudonym of the server adding + ; the Warning header, for use in debugging + warn-text = quoted-string + warn-date = <"> HTTP-date <"> + + A response MAY carry more than one Warning header. + + The warn-text SHOULD be in a natural language and character set that + is most likely to be intelligible to the human user receiving the + response. This decision MAY be based on any available knowledge, such + as the location of the cache or user, the Accept-Language field in a + request, the Content-Language field in a response, etc. The default + language is English and the default character set is ISO-8859-1. + + If a character set other than ISO-8859-1 is used, it MUST be encoded + in the warn-text using the method described in RFC 2047 [14]. + + Warning headers can in general be applied to any message, however + some specific warn-codes are specific to caches and can only be + applied to response messages. New Warning headers SHOULD be added + after any existing Warning headers. A cache MUST NOT delete any + Warning header that it received with a message. However, if a cache + successfully validates a cache entry, it SHOULD remove any Warning + headers previously attached to that entry except as specified for + + + + +Fielding, et al. Standards Track [Page 148] + +RFC 2616 HTTP/1.1 June 1999 + + + specific Warning codes. It MUST then add any Warning headers received + in the validating response. In other words, Warning headers are those + that would be attached to the most recent relevant response. + + When multiple Warning headers are attached to a response, the user + agent ought to inform the user of as many of them as possible, in the + order that they appear in the response. If it is not possible to + inform the user of all of the warnings, the user agent SHOULD follow + these heuristics: + + - Warnings that appear early in the response take priority over + those appearing later in the response. + + - Warnings in the user's preferred character set take priority + over warnings in other character sets but with identical warn- + codes and warn-agents. + + Systems that generate multiple Warning headers SHOULD order them with + this user agent behavior in mind. + + Requirements for the behavior of caches with respect to Warnings are + stated in section 13.1.2. + + This is a list of the currently-defined warn-codes, each with a + recommended warn-text in English, and a description of its meaning. + + 110 Response is stale + MUST be included whenever the returned response is stale. + + 111 Revalidation failed + MUST be included if a cache returns a stale response because an + attempt to revalidate the response failed, due to an inability to + reach the server. + + 112 Disconnected operation + SHOULD be included if the cache is intentionally disconnected from + the rest of the network for a period of time. + + 113 Heuristic expiration + MUST be included if the cache heuristically chose a freshness + lifetime greater than 24 hours and the response's age is greater + than 24 hours. + + 199 Miscellaneous warning + The warning text MAY include arbitrary information to be presented + to a human user, or logged. A system receiving this warning MUST + NOT take any automated action, besides presenting the warning to + the user. + + + +Fielding, et al. Standards Track [Page 149] + +RFC 2616 HTTP/1.1 June 1999 + + + 214 Transformation applied + MUST be added by an intermediate cache or proxy if it applies any + transformation changing the content-coding (as specified in the + Content-Encoding header) or media-type (as specified in the + Content-Type header) of the response, or the entity-body of the + response, unless this Warning code already appears in the response. + + 299 Miscellaneous persistent warning + The warning text MAY include arbitrary information to be presented + to a human user, or logged. A system receiving this warning MUST + NOT take any automated action. + + If an implementation sends a message with one or more Warning headers + whose version is HTTP/1.0 or lower, then the sender MUST include in + each warning-value a warn-date that matches the date in the response. + + If an implementation receives a message with a warning-value that + includes a warn-date, and that warn-date is different from the Date + value in the response, then that warning-value MUST be deleted from + the message before storing, forwarding, or using it. (This prevents + bad consequences of naive caching of Warning header fields.) If all + of the warning-values are deleted for this reason, the Warning header + MUST be deleted as well. + +14.47 WWW-Authenticate + + The WWW-Authenticate response-header field MUST be included in 401 + (Unauthorized) response messages. The field value consists of at + least one challenge that indicates the authentication scheme(s) and + parameters applicable to the Request-URI. + + WWW-Authenticate = "WWW-Authenticate" ":" 1#challenge + + The HTTP access authentication process is described in "HTTP + Authentication: Basic and Digest Access Authentication" [43]. User + agents are advised to take special care in parsing the WWW- + Authenticate field value as it might contain more than one challenge, + or if more than one WWW-Authenticate header field is provided, the + contents of a challenge itself can contain a comma-separated list of + authentication parameters. + +15 Security Considerations + + This section is meant to inform application developers, information + providers, and users of the security limitations in HTTP/1.1 as + described by this document. The discussion does not include + definitive solutions to the problems revealed, though it does make + some suggestions for reducing security risks. + + + +Fielding, et al. Standards Track [Page 150] + +RFC 2616 HTTP/1.1 June 1999 + + +15.1 Personal Information + + HTTP clients are often privy to large amounts of personal information + (e.g. the user's name, location, mail address, passwords, encryption + keys, etc.), and SHOULD be very careful to prevent unintentional + leakage of this information via the HTTP protocol to other sources. + We very strongly recommend that a convenient interface be provided + for the user to control dissemination of such information, and that + designers and implementors be particularly careful in this area. + History shows that errors in this area often create serious security + and/or privacy problems and generate highly adverse publicity for the + implementor's company. + +15.1.1 Abuse of Server Log Information + + A server is in the position to save personal data about a user's + requests which might identify their reading patterns or subjects of + interest. This information is clearly confidential in nature and its + handling can be constrained by law in certain countries. People using + the HTTP protocol to provide data are responsible for ensuring that + such material is not distributed without the permission of any + individuals that are identifiable by the published results. + +15.1.2 Transfer of Sensitive Information + + Like any generic data transfer protocol, HTTP cannot regulate the + content of the data that is transferred, nor is there any a priori + method of determining the sensitivity of any particular piece of + information within the context of any given request. Therefore, + applications SHOULD supply as much control over this information as + possible to the provider of that information. Four header fields are + worth special mention in this context: Server, Via, Referer and From. + + Revealing the specific software version of the server might allow the + server machine to become more vulnerable to attacks against software + that is known to contain security holes. Implementors SHOULD make the + Server header field a configurable option. + + Proxies which serve as a portal through a network firewall SHOULD + take special precautions regarding the transfer of header information + that identifies the hosts behind the firewall. In particular, they + SHOULD remove, or replace with sanitized versions, any Via fields + generated behind the firewall. + + The Referer header allows reading patterns to be studied and reverse + links drawn. Although it can be very useful, its power can be abused + if user details are not separated from the information contained in + + + + +Fielding, et al. Standards Track [Page 151] + +RFC 2616 HTTP/1.1 June 1999 + + + the Referer. Even when the personal information has been removed, the + Referer header might indicate a private document's URI whose + publication would be inappropriate. + + The information sent in the From field might conflict with the user's + privacy interests or their site's security policy, and hence it + SHOULD NOT be transmitted without the user being able to disable, + enable, and modify the contents of the field. The user MUST be able + to set the contents of this field within a user preference or + application defaults configuration. + + We suggest, though do not require, that a convenient toggle interface + be provided for the user to enable or disable the sending of From and + Referer information. + + The User-Agent (section 14.43) or Server (section 14.38) header + fields can sometimes be used to determine that a specific client or + server have a particular security hole which might be exploited. + Unfortunately, this same information is often used for other valuable + purposes for which HTTP currently has no better mechanism. + +15.1.3 Encoding Sensitive Information in URI's + + Because the source of a link might be private information or might + reveal an otherwise private information source, it is strongly + recommended that the user be able to select whether or not the + Referer field is sent. For example, a browser client could have a + toggle switch for browsing openly/anonymously, which would + respectively enable/disable the sending of Referer and From + information. + + Clients SHOULD NOT include a Referer header field in a (non-secure) + HTTP request if the referring page was transferred with a secure + protocol. + + Authors of services which use the HTTP protocol SHOULD NOT use GET + based forms for the submission of sensitive data, because this will + cause this data to be encoded in the Request-URI. Many existing + servers, proxies, and user agents will log the request URI in some + place where it might be visible to third parties. Servers can use + POST-based form submission instead + +15.1.4 Privacy Issues Connected to Accept Headers + + Accept request-headers can reveal information about the user to all + servers which are accessed. The Accept-Language header in particular + can reveal information the user would consider to be of a private + nature, because the understanding of particular languages is often + + + +Fielding, et al. Standards Track [Page 152] + +RFC 2616 HTTP/1.1 June 1999 + + + strongly correlated to the membership of a particular ethnic group. + User agents which offer the option to configure the contents of an + Accept-Language header to be sent in every request are strongly + encouraged to let the configuration process include a message which + makes the user aware of the loss of privacy involved. + + An approach that limits the loss of privacy would be for a user agent + to omit the sending of Accept-Language headers by default, and to ask + the user whether or not to start sending Accept-Language headers to a + server if it detects, by looking for any Vary response-header fields + generated by the server, that such sending could improve the quality + of service. + + Elaborate user-customized accept header fields sent in every request, + in particular if these include quality values, can be used by servers + as relatively reliable and long-lived user identifiers. Such user + identifiers would allow content providers to do click-trail tracking, + and would allow collaborating content providers to match cross-server + click-trails or form submissions of individual users. Note that for + many users not behind a proxy, the network address of the host + running the user agent will also serve as a long-lived user + identifier. In environments where proxies are used to enhance + privacy, user agents ought to be conservative in offering accept + header configuration options to end users. As an extreme privacy + measure, proxies could filter the accept headers in relayed requests. + General purpose user agents which provide a high degree of header + configurability SHOULD warn users about the loss of privacy which can + be involved. + +15.2 Attacks Based On File and Path Names + + Implementations of HTTP origin servers SHOULD be careful to restrict + the documents returned by HTTP requests to be only those that were + intended by the server administrators. If an HTTP server translates + HTTP URIs directly into file system calls, the server MUST take + special care not to serve files that were not intended to be + delivered to HTTP clients. For example, UNIX, Microsoft Windows, and + other operating systems use ".." as a path component to indicate a + directory level above the current one. On such a system, an HTTP + server MUST disallow any such construct in the Request-URI if it + would otherwise allow access to a resource outside those intended to + be accessible via the HTTP server. Similarly, files intended for + reference only internally to the server (such as access control + files, configuration files, and script code) MUST be protected from + inappropriate retrieval, since they might contain sensitive + information. Experience has shown that minor bugs in such HTTP server + implementations have turned into security risks. + + + + +Fielding, et al. Standards Track [Page 153] + +RFC 2616 HTTP/1.1 June 1999 + + +15.3 DNS Spoofing + + Clients using HTTP rely heavily on the Domain Name Service, and are + thus generally prone to security attacks based on the deliberate + mis-association of IP addresses and DNS names. Clients need to be + cautious in assuming the continuing validity of an IP number/DNS name + association. + + In particular, HTTP clients SHOULD rely on their name resolver for + confirmation of an IP number/DNS name association, rather than + caching the result of previous host name lookups. Many platforms + already can cache host name lookups locally when appropriate, and + they SHOULD be configured to do so. It is proper for these lookups to + be cached, however, only when the TTL (Time To Live) information + reported by the name server makes it likely that the cached + information will remain useful. + + If HTTP clients cache the results of host name lookups in order to + achieve a performance improvement, they MUST observe the TTL + information reported by DNS. + + If HTTP clients do not observe this rule, they could be spoofed when + a previously-accessed server's IP address changes. As network + renumbering is expected to become increasingly common [24], the + possibility of this form of attack will grow. Observing this + requirement thus reduces this potential security vulnerability. + + This requirement also improves the load-balancing behavior of clients + for replicated servers using the same DNS name and reduces the + likelihood of a user's experiencing failure in accessing sites which + use that strategy. + +15.4 Location Headers and Spoofing + + If a single server supports multiple organizations that do not trust + one another, then it MUST check the values of Location and Content- + Location headers in responses that are generated under control of + said organizations to make sure that they do not attempt to + invalidate resources over which they have no authority. + +15.5 Content-Disposition Issues + + RFC 1806 [35], from which the often implemented Content-Disposition + (see section 19.5.1) header in HTTP is derived, has a number of very + serious security considerations. Content-Disposition is not part of + the HTTP standard, but since it is widely implemented, we are + documenting its use and risks for implementors. See RFC 2183 [49] + (which updates RFC 1806) for details. + + + +Fielding, et al. Standards Track [Page 154] + +RFC 2616 HTTP/1.1 June 1999 + + +15.6 Authentication Credentials and Idle Clients + + Existing HTTP clients and user agents typically retain authentication + information indefinitely. HTTP/1.1. does not provide a method for a + server to direct clients to discard these cached credentials. This is + a significant defect that requires further extensions to HTTP. + Circumstances under which credential caching can interfere with the + application's security model include but are not limited to: + + - Clients which have been idle for an extended period following + which the server might wish to cause the client to reprompt the + user for credentials. + + - Applications which include a session termination indication + (such as a `logout' or `commit' button on a page) after which + the server side of the application `knows' that there is no + further reason for the client to retain the credentials. + + This is currently under separate study. There are a number of work- + arounds to parts of this problem, and we encourage the use of + password protection in screen savers, idle time-outs, and other + methods which mitigate the security problems inherent in this + problem. In particular, user agents which cache credentials are + encouraged to provide a readily accessible mechanism for discarding + cached credentials under user control. + +15.7 Proxies and Caching + + By their very nature, HTTP proxies are men-in-the-middle, and + represent an opportunity for man-in-the-middle attacks. Compromise of + the systems on which the proxies run can result in serious security + and privacy problems. Proxies have access to security-related + information, personal information about individual users and + organizations, and proprietary information belonging to users and + content providers. A compromised proxy, or a proxy implemented or + configured without regard to security and privacy considerations, + might be used in the commission of a wide range of potential attacks. + + Proxy operators should protect the systems on which proxies run as + they would protect any system that contains or transports sensitive + information. In particular, log information gathered at proxies often + contains highly sensitive personal information, and/or information + about organizations. Log information should be carefully guarded, and + appropriate guidelines for use developed and followed. (Section + 15.1.1). + + + + + + +Fielding, et al. Standards Track [Page 155] + +RFC 2616 HTTP/1.1 June 1999 + + + Caching proxies provide additional potential vulnerabilities, since + the contents of the cache represent an attractive target for + malicious exploitation. Because cache contents persist after an HTTP + request is complete, an attack on the cache can reveal information + long after a user believes that the information has been removed from + the network. Therefore, cache contents should be protected as + sensitive information. + + Proxy implementors should consider the privacy and security + implications of their design and coding decisions, and of the + configuration options they provide to proxy operators (especially the + default configuration). + + Users of a proxy need to be aware that they are no trustworthier than + the people who run the proxy; HTTP itself cannot solve this problem. + + The judicious use of cryptography, when appropriate, may suffice to + protect against a broad range of security and privacy attacks. Such + cryptography is beyond the scope of the HTTP/1.1 specification. + +15.7.1 Denial of Service Attacks on Proxies + + They exist. They are hard to defend against. Research continues. + Beware. + +16 Acknowledgments + + This specification makes heavy use of the augmented BNF and generic + constructs defined by David H. Crocker for RFC 822 [9]. Similarly, it + reuses many of the definitions provided by Nathaniel Borenstein and + Ned Freed for MIME [7]. We hope that their inclusion in this + specification will help reduce past confusion over the relationship + between HTTP and Internet mail message formats. + + The HTTP protocol has evolved considerably over the years. It has + benefited from a large and active developer community--the many + people who have participated on the www-talk mailing list--and it is + that community which has been most responsible for the success of + HTTP and of the World-Wide Web in general. Marc Andreessen, Robert + Cailliau, Daniel W. Connolly, Bob Denny, John Franks, Jean-Francois + Groff, Phillip M. Hallam-Baker, Hakon W. Lie, Ari Luotonen, Rob + McCool, Lou Montulli, Dave Raggett, Tony Sanders, and Marc + VanHeyningen deserve special recognition for their efforts in + defining early aspects of the protocol. + + This document has benefited greatly from the comments of all those + participating in the HTTP-WG. In addition to those already mentioned, + the following individuals have contributed to this specification: + + + +Fielding, et al. Standards Track [Page 156] + +RFC 2616 HTTP/1.1 June 1999 + + + Gary Adams Ross Patterson + Harald Tveit Alvestrand Albert Lunde + Keith Ball John C. Mallery + Brian Behlendorf Jean-Philippe Martin-Flatin + Paul Burchard Mitra + Maurizio Codogno David Morris + Mike Cowlishaw Gavin Nicol + Roman Czyborra Bill Perry + Michael A. Dolan Jeffrey Perry + David J. Fiander Scott Powers + Alan Freier Owen Rees + Marc Hedlund Luigi Rizzo + Greg Herlihy David Robinson + Koen Holtman Marc Salomon + Alex Hopmann Rich Salz + Bob Jernigan Allan M. Schiffman + Shel Kaphan Jim Seidman + Rohit Khare Chuck Shotton + John Klensin Eric W. Sink + Martijn Koster Simon E. Spero + Alexei Kosut Richard N. Taylor + David M. Kristol Robert S. Thau + Daniel LaLiberte Bill (BearHeart) Weinman + Ben Laurie Francois Yergeau + Paul J. Leach Mary Ellen Zurko + Daniel DuBois Josh Cohen + + + Much of the content and presentation of the caching design is due to + suggestions and comments from individuals including: Shel Kaphan, + Paul Leach, Koen Holtman, David Morris, and Larry Masinter. + + Most of the specification of ranges is based on work originally done + by Ari Luotonen and John Franks, with additional input from Steve + Zilles. + + Thanks to the "cave men" of Palo Alto. You know who you are. + + Jim Gettys (the current editor of this document) wishes particularly + to thank Roy Fielding, the previous editor of this document, along + with John Klensin, Jeff Mogul, Paul Leach, Dave Kristol, Koen + Holtman, John Franks, Josh Cohen, Alex Hopmann, Scott Lawrence, and + Larry Masinter for their help. And thanks go particularly to Jeff + Mogul and Scott Lawrence for performing the "MUST/MAY/SHOULD" audit. + + + + + + + +Fielding, et al. Standards Track [Page 157] + +RFC 2616 HTTP/1.1 June 1999 + + + The Apache Group, Anselm Baird-Smith, author of Jigsaw, and Henrik + Frystyk implemented RFC 2068 early, and we wish to thank them for the + discovery of many of the problems that this document attempts to + rectify. + +17 References + + [1] Alvestrand, H., "Tags for the Identification of Languages", RFC + 1766, March 1995. + + [2] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D., Torrey, + D. and B. Alberti, "The Internet Gopher Protocol (a distributed + document search and retrieval protocol)", RFC 1436, March 1993. + + [3] Berners-Lee, T., "Universal Resource Identifiers in WWW", RFC + 1630, June 1994. + + [4] Berners-Lee, T., Masinter, L. and M. McCahill, "Uniform Resource + Locators (URL)", RFC 1738, December 1994. + + [5] Berners-Lee, T. and D. Connolly, "Hypertext Markup Language - + 2.0", RFC 1866, November 1995. + + [6] Berners-Lee, T., Fielding, R. and H. Frystyk, "Hypertext Transfer + Protocol -- HTTP/1.0", RFC 1945, May 1996. + + [7] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part One: Format of Internet Message Bodies", + RFC 2045, November 1996. + + [8] Braden, R., "Requirements for Internet Hosts -- Communication + Layers", STD 3, RFC 1123, October 1989. + + [9] Crocker, D., "Standard for The Format of ARPA Internet Text + Messages", STD 11, RFC 822, August 1982. + + [10] Davis, F., Kahle, B., Morris, H., Salem, J., Shen, T., Wang, R., + Sui, J., and M. Grinbaum, "WAIS Interface Protocol Prototype + Functional Specification," (v1.5), Thinking Machines + Corporation, April 1990. + + [11] Fielding, R., "Relative Uniform Resource Locators", RFC 1808, + June 1995. + + [12] Horton, M. and R. Adams, "Standard for Interchange of USENET + Messages", RFC 1036, December 1987. + + + + + +Fielding, et al. Standards Track [Page 158] + +RFC 2616 HTTP/1.1 June 1999 + + + [13] Kantor, B. and P. Lapsley, "Network News Transfer Protocol", RFC + 977, February 1986. + + [14] Moore, K., "MIME (Multipurpose Internet Mail Extensions) Part + Three: Message Header Extensions for Non-ASCII Text", RFC 2047, + November 1996. + + [15] Nebel, E. and L. Masinter, "Form-based File Upload in HTML", RFC + 1867, November 1995. + + [16] Postel, J., "Simple Mail Transfer Protocol", STD 10, RFC 821, + August 1982. + + [17] Postel, J., "Media Type Registration Procedure", RFC 1590, + November 1996. + +[[ Should be: ]] +[[ [17] Freed, N., Klensin, J., and Postel, J., "Multipurpose Internet ]] +[[ Mail Extensions (MIME) Part Four: "Registration Procedure", ]] +[[ RFC 2048, November 1996. ]] + + [18] Postel, J. and J. Reynolds, "File Transfer Protocol", STD 9, RFC + 959, October 1985. + + [19] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, RFC 1700, + October 1994. + + [20] Sollins, K. and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, December 1994. + + [21] US-ASCII. Coded Character Set - 7-Bit American Standard Code for + Information Interchange. Standard ANSI X3.4-1986, ANSI, 1986. + + [22] ISO-8859. International Standard -- Information Processing -- + 8-bit Single-Byte Coded Graphic Character Sets -- + Part 1: Latin alphabet No. 1, ISO-8859-1:1987. + Part 2: Latin alphabet No. 2, ISO-8859-2, 1987. + Part 3: Latin alphabet No. 3, ISO-8859-3, 1988. + Part 4: Latin alphabet No. 4, ISO-8859-4, 1988. + Part 5: Latin/Cyrillic alphabet, ISO-8859-5, 1988. + Part 6: Latin/Arabic alphabet, ISO-8859-6, 1987. + Part 7: Latin/Greek alphabet, ISO-8859-7, 1987. + Part 8: Latin/Hebrew alphabet, ISO-8859-8, 1988. + Part 9: Latin alphabet No. 5, ISO-8859-9, 1990. + + [23] Meyers, J. and M. Rose, "The Content-MD5 Header Field", RFC + 1864, October 1995. + + [24] Carpenter, B. and Y. Rekhter, "Renumbering Needs Work", RFC + 1900, February 1996. + + [25] Deutsch, P., "GZIP file format specification version 4.3", RFC + 1952, May 1996. + + + +Fielding, et al. Standards Track [Page 159] + +RFC 2616 HTTP/1.1 June 1999 + + + [26] Venkata N. Padmanabhan, and Jeffrey C. Mogul. "Improving HTTP + Latency", Computer Networks and ISDN Systems, v. 28, pp. 25-35, + Dec. 1995. Slightly revised version of paper in Proc. 2nd + International WWW Conference '94: Mosaic and the Web, Oct. 1994, + which is available at + http://www.ncsa.uiuc.edu/SDG/IT94/Proceedings/DDay/mogul/HTTPLat + ency.html. + + [27] Joe Touch, John Heidemann, and Katia Obraczka. "Analysis of HTTP + Performance", , + ISI Research Report ISI/RR-98-463, (original report dated Aug. + 1996), USC/Information Sciences Institute, August 1998. + + [28] Mills, D., "Network Time Protocol (Version 3) Specification, + Implementation and Analysis", RFC 1305, March 1992. + + [29] Deutsch, P., "DEFLATE Compressed Data Format Specification + version 1.3", RFC 1951, May 1996. + + [30] S. Spero, "Analysis of HTTP Performance Problems," + http://sunsite.unc.edu/mdma-release/http-prob.html. + + [31] Deutsch, P. and J. Gailly, "ZLIB Compressed Data Format + Specification version 3.3", RFC 1950, May 1996. + + [32] Franks, J., Hallam-Baker, P., Hostetler, J., Leach, P., + Luotonen, A., Sink, E. and L. Stewart, "An Extension to HTTP: + Digest Access Authentication", RFC 2069, January 1997. + + [33] Fielding, R., Gettys, J., Mogul, J., Frystyk, H. and T. + Berners-Lee, "Hypertext Transfer Protocol -- HTTP/1.1", RFC + 2068, January 1997. + + [34] Bradner, S., "Key words for use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + + [35] Troost, R. and Dorner, S., "Communicating Presentation + Information in Internet Messages: The Content-Disposition + Header", RFC 1806, June 1995. + + [36] Mogul, J., Fielding, R., Gettys, J. and H. Frystyk, "Use and + Interpretation of HTTP Version Numbers", RFC 2145, May 1997. + [jg639] + + [37] Palme, J., "Common Internet Message Headers", RFC 2076, February + 1997. [jg640] + + + + + +Fielding, et al. Standards Track [Page 160] + +RFC 2616 HTTP/1.1 June 1999 + + + [38] Yergeau, F., "UTF-8, a transformation format of Unicode and + ISO-10646", RFC 2279, January 1998. [jg641] + + [39] Nielsen, H.F., Gettys, J., Baird-Smith, A., Prud'hommeaux, E., + Lie, H., and C. Lilley. "Network Performance Effects of + HTTP/1.1, CSS1, and PNG," Proceedings of ACM SIGCOMM '97, Cannes + France, September 1997.[jg642] + + [40] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, November + 1996. [jg643] + + [41] Alvestrand, H., "IETF Policy on Character Sets and Languages", + BCP 18, RFC 2277, January 1998. [jg644] + + [42] Berners-Lee, T., Fielding, R. and L. Masinter, "Uniform Resource + Identifiers (URI): Generic Syntax and Semantics", RFC 2396, + August 1998. [jg645] + + [43] Franks, J., Hallam-Baker, P., Hostetler, J., Lawrence, S., + Leach, P., Luotonen, A., Sink, E. and L. Stewart, "HTTP + Authentication: Basic and Digest Access Authentication", RFC + 2617, June 1999. [jg646] + + [44] Luotonen, A., "Tunneling TCP based protocols through Web proxy + servers," Work in Progress. [jg647] + + [45] Palme, J. and A. Hopmann, "MIME E-mail Encapsulation of + Aggregate Documents, such as HTML (MHTML)", RFC 2110, March + 1997. + + [46] Bradner, S., "The Internet Standards Process -- Revision 3", BCP + 9, RFC 2026, October 1996. + + [47] Masinter, L., "Hyper Text Coffee Pot Control Protocol + (HTCPCP/1.0)", RFC 2324, 1 April 1998. + + [48] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Five: Conformance Criteria and Examples", + RFC 2049, November 1996. + + [49] Troost, R., Dorner, S. and K. Moore, "Communicating Presentation + Information in Internet Messages: The Content-Disposition Header + Field", RFC 2183, August 1997. + + + + + + + +Fielding, et al. Standards Track [Page 161] + +RFC 2616 HTTP/1.1 June 1999 + + +18 Authors' Addresses + + Roy T. Fielding + Information and Computer Science + University of California, Irvine + Irvine, CA 92697-3425, USA + + Fax: +1 (949) 824-1715 + EMail: fielding@ics.uci.edu + + + James Gettys + World Wide Web Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: jg@w3.org + + + Jeffrey C. Mogul + Western Research Laboratory + Compaq Computer Corporation + 250 University Avenue + Palo Alto, California, 94305, USA + + EMail: mogul@wrl.dec.com + + + Henrik Frystyk Nielsen + World Wide Web Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: frystyk@w3.org + + + Larry Masinter + Xerox Corporation + 3333 Coyote Hill Road + Palo Alto, CA 94034, USA + + EMail: masinter@parc.xerox.com + + + + + +Fielding, et al. Standards Track [Page 162] + +RFC 2616 HTTP/1.1 June 1999 + + + Paul J. Leach + Microsoft Corporation + 1 Microsoft Way + Redmond, WA 98052, USA + + EMail: paulle@microsoft.com + + + Tim Berners-Lee + Director, World Wide Web Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: timbl@w3.org + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 163] + +RFC 2616 HTTP/1.1 June 1999 + + +19 Appendices + +19.1 Internet Media Type message/http and application/http + + In addition to defining the HTTP/1.1 protocol, this document serves + as the specification for the Internet media type "message/http" and + "application/http". The message/http type can be used to enclose a + single HTTP request or response message, provided that it obeys the + MIME restrictions for all "message" types regarding line length and + encodings. The application/http type can be used to enclose a + pipeline of one or more HTTP request or response messages (not + intermixed). The following is to be registered with IANA [17]. + + Media Type name: message + Media subtype name: http + Required parameters: none + Optional parameters: version, msgtype + version: The HTTP-Version number of the enclosed message + (e.g., "1.1"). If not present, the version can be + determined from the first line of the body. + msgtype: The message type -- "request" or "response". If not + present, the type can be determined from the first + line of the body. + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + Security considerations: none + + Media Type name: application + Media subtype name: http + Required parameters: none + Optional parameters: version, msgtype + version: The HTTP-Version number of the enclosed messages + (e.g., "1.1"). If not present, the version can be + determined from the first line of the body. + msgtype: The message type -- "request" or "response". If not + present, the type can be determined from the first + line of the body. + Encoding considerations: HTTP messages enclosed by this type + are in "binary" format; use of an appropriate + Content-Transfer-Encoding is required when + transmitted via E-mail. + Security considerations: none + + + + + + + + + +Fielding, et al. Standards Track [Page 164] + +RFC 2616 HTTP/1.1 June 1999 + + +19.2 Internet Media Type multipart/byteranges + + When an HTTP 206 (Partial Content) response message includes the + content of multiple ranges (a response to a request for multiple + non-overlapping ranges), these are transmitted as a multipart + message-body. The media type for this purpose is called + "multipart/byteranges". + + The multipart/byteranges media type includes two or more parts, each + with its own Content-Type and Content-Range fields. The required + boundary parameter specifies the boundary string used to separate + each body-part. + + Media Type name: multipart + Media subtype name: byteranges + Required parameters: boundary + Optional parameters: none + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + Security considerations: none + + + For example: + + HTTP/1.1 206 Partial Content + Date: Wed, 15 Nov 1995 06:25:24 GMT + Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT + Content-type: multipart/byteranges; boundary=THIS_STRING_SEPARATES + + --THIS_STRING_SEPARATES + Content-type: application/pdf + Content-range: bytes 500-999/8000 + + ...the first range... + --THIS_STRING_SEPARATES + Content-type: application/pdf + Content-range: bytes 7000-7999/8000 + + ...the second range + --THIS_STRING_SEPARATES-- + + Notes: + + 1) Additional CRLFs may precede the first boundary string in the + entity. + + + + + + +Fielding, et al. Standards Track [Page 165] + +RFC 2616 HTTP/1.1 June 1999 + + + 2) Although RFC 2046 [40] permits the boundary string to be + quoted, some existing implementations handle a quoted boundary + string incorrectly. + + 3) A number of browsers and servers were coded to an early draft + of the byteranges specification to use a media type of + multipart/x-byteranges, which is almost, but not quite + compatible with the version documented in HTTP/1.1. + +19.3 Tolerant Applications + + Although this document specifies the requirements for the generation + of HTTP/1.1 messages, not all applications will be correct in their + implementation. We therefore recommend that operational applications + be tolerant of deviations whenever those deviations can be + interpreted unambiguously. + + Clients SHOULD be tolerant in parsing the Status-Line and servers + tolerant when parsing the Request-Line. In particular, they SHOULD + accept any amount of SP or HT characters between fields, even though + only a single SP is required. + + The line terminator for message-header fields is the sequence CRLF. + However, we recommend that applications, when parsing such headers, + recognize a single LF as a line terminator and ignore the leading CR. + + The character set of an entity-body SHOULD be labeled as the lowest + common denominator of the character codes used within that body, with + the exception that not labeling the entity is preferred over labeling + the entity with the labels US-ASCII or ISO-8859-1. See section 3.7.1 + and 3.4.1. + + Additional rules for requirements on parsing and encoding of dates + and other potential problems with date encodings include: + + - HTTP/1.1 clients and caches SHOULD assume that an RFC-850 date + which appears to be more than 50 years in the future is in fact + in the past (this helps solve the "year 2000" problem). + + - An HTTP/1.1 implementation MAY internally represent a parsed + Expires date as earlier than the proper value, but MUST NOT + internally represent a parsed Expires date as later than the + proper value. + + - All expiration-related calculations MUST be done in GMT. The + local time zone MUST NOT influence the calculation or comparison + of an age or expiration time. + + + + +Fielding, et al. Standards Track [Page 166] + +RFC 2616 HTTP/1.1 June 1999 + + + - If an HTTP header incorrectly carries a date value with a time + zone other than GMT, it MUST be converted into GMT using the + most conservative possible conversion. + +19.4 Differences Between HTTP Entities and RFC 2045 Entities + + HTTP/1.1 uses many of the constructs defined for Internet Mail (RFC + 822 [9]) and the Multipurpose Internet Mail Extensions (MIME [7]) to + allow entities to be transmitted in an open variety of + representations and with extensible mechanisms. However, RFC 2045 + discusses mail, and HTTP has a few features that are different from + those described in RFC 2045. These differences were carefully chosen + to optimize performance over binary connections, to allow greater + freedom in the use of new media types, to make date comparisons + easier, and to acknowledge the practice of some early HTTP servers + and clients. + + This appendix describes specific areas where HTTP differs from RFC + 2045. Proxies and gateways to strict MIME environments SHOULD be + aware of these differences and provide the appropriate conversions + where necessary. Proxies and gateways from MIME environments to HTTP + also need to be aware of the differences because some conversions + might be required. + +19.4.1 MIME-Version + + HTTP is not a MIME-compliant protocol. However, HTTP/1.1 messages MAY + include a single MIME-Version general-header field to indicate what + version of the MIME protocol was used to construct the message. Use + of the MIME-Version header field indicates that the message is in + full compliance with the MIME protocol (as defined in RFC 2045[7]). + Proxies/gateways are responsible for ensuring full compliance (where + possible) when exporting HTTP messages to strict MIME environments. + + MIME-Version = "MIME-Version" ":" 1*DIGIT "." 1*DIGIT + + MIME version "1.0" is the default for use in HTTP/1.1. However, + HTTP/1.1 message parsing and semantics are defined by this document + and not the MIME specification. + +19.4.2 Conversion to Canonical Form + + RFC 2045 [7] requires that an Internet mail entity be converted to + canonical form prior to being transferred, as described in section 4 + of RFC 2049 [48]. Section 3.7.1 of this document describes the forms + allowed for subtypes of the "text" media type when transmitted over + HTTP. RFC 2046 requires that content with a type of "text" represent + line breaks as CRLF and forbids the use of CR or LF outside of line + + + +Fielding, et al. Standards Track [Page 167] + +RFC 2616 HTTP/1.1 June 1999 + + + break sequences. HTTP allows CRLF, bare CR, and bare LF to indicate a + line break within text content when a message is transmitted over + HTTP. + + Where it is possible, a proxy or gateway from HTTP to a strict MIME + environment SHOULD translate all line breaks within the text media + types described in section 3.7.1 of this document to the RFC 2049 + canonical form of CRLF. Note, however, that this might be complicated + by the presence of a Content-Encoding and by the fact that HTTP + allows the use of some character sets which do not use octets 13 and + 10 to represent CR and LF, as is the case for some multi-byte + character sets. + + Implementors should note that conversion will break any cryptographic + checksums applied to the original content unless the original content + is already in canonical form. Therefore, the canonical form is + recommended for any content that uses such checksums in HTTP. + +19.4.3 Conversion of Date Formats + + HTTP/1.1 uses a restricted set of date formats (section 3.3.1) to + simplify the process of date comparison. Proxies and gateways from + other protocols SHOULD ensure that any Date header field present in a + message conforms to one of the HTTP/1.1 formats and rewrite the date + if necessary. + +19.4.4 Introduction of Content-Encoding + + RFC 2045 does not include any concept equivalent to HTTP/1.1's + Content-Encoding header field. Since this acts as a modifier on the + media type, proxies and gateways from HTTP to MIME-compliant + protocols MUST either change the value of the Content-Type header + field or decode the entity-body before forwarding the message. (Some + experimental applications of Content-Type for Internet mail have used + a media-type parameter of ";conversions=" to perform + a function equivalent to Content-Encoding. However, this parameter is + not part of RFC 2045.) + +19.4.5 No Content-Transfer-Encoding + + HTTP does not use the Content-Transfer-Encoding (CTE) field of RFC + 2045. Proxies and gateways from MIME-compliant protocols to HTTP MUST + remove any non-identity CTE ("quoted-printable" or "base64") encoding + prior to delivering the response message to an HTTP client. + + [[ "MUST remove any CTE encoding prior to delivering the response ]] + [[ message to an HTTP client." ]] + + Proxies and gateways from HTTP to MIME-compliant protocols are + responsible for ensuring that the message is in the correct format + and encoding for safe transport on that protocol, where "safe + + + +Fielding, et al. Standards Track [Page 168] + +RFC 2616 HTTP/1.1 June 1999 + + + transport" is defined by the limitations of the protocol being used. + Such a proxy or gateway SHOULD label the data with an appropriate + Content-Transfer-Encoding if doing so will improve the likelihood of + safe transport over the destination protocol. + +19.4.6 Introduction of Transfer-Encoding + + HTTP/1.1 introduces the Transfer-Encoding header field (section + 14.41). Proxies/gateways MUST remove any transfer-coding prior to + forwarding a message via a MIME-compliant protocol. + + A process for decoding the "chunked" transfer-coding (section 3.6) + can be represented in pseudo-code as: + + length := 0 + read chunk-size, chunk-extension (if any) and CRLF + while (chunk-size > 0) { + read chunk-data and CRLF + append chunk-data to entity-body + length := length + chunk-size + read chunk-size and CRLF + } + read entity-header + while (entity-header not empty) { + append entity-header to existing header fields + read entity-header + } + Content-Length := length + Remove "chunked" from Transfer-Encoding + +19.4.7 MHTML and Line Length Limitations + + HTTP implementations which share code with MHTML [45] implementations + need to be aware of MIME line length limitations. Since HTTP does not + have this limitation, HTTP does not fold long lines. MHTML messages + being transported by HTTP follow all conventions of MHTML, including + line length limitations and folding, canonicalization, etc., since + HTTP transports all message-bodies as payload (see section 3.7.2) and + does not interpret the content or any MIME header lines that might be + contained therein. + +19.5 Additional Features + + RFC 1945 and RFC 2068 document protocol elements used by some + existing HTTP implementations, but not consistently and correctly + across most HTTP/1.1 applications. Implementors are advised to be + aware of these features, but cannot rely upon their presence in, or + interoperability with, other HTTP/1.1 applications. Some of these + + + +Fielding, et al. Standards Track [Page 169] + +RFC 2616 HTTP/1.1 June 1999 + + + describe proposed experimental features, and some describe features + that experimental deployment found lacking that are now addressed in + the base HTTP/1.1 specification. + + A number of other headers, such as Content-Disposition and Title, + from SMTP and MIME are also often implemented (see RFC 2076 [37]). + +19.5.1 Content-Disposition + + The Content-Disposition response-header field has been proposed as a + means for the origin server to suggest a default filename if the user + requests that the content is saved to a file. This usage is derived + from the definition of Content-Disposition in RFC 1806 [35]. + + content-disposition = "Content-Disposition" ":" + disposition-type *( ";" disposition-parm ) + disposition-type = "attachment" | disp-extension-token + disposition-parm = filename-parm | disp-extension-parm + filename-parm = "filename" "=" quoted-string + disp-extension-token = token + disp-extension-parm = token "=" ( token | quoted-string ) + + An example is + + Content-Disposition: attachment; filename="fname.ext" + + The receiving user agent SHOULD NOT respect any directory path + information present in the filename-parm parameter, which is the only + parameter believed to apply to HTTP implementations at this time. The + filename SHOULD be treated as a terminal component only. + + If this header is used in a response with the application/octet- + stream content-type, the implied suggestion is that the user agent + should not display the response, but directly enter a `save response + as...' dialog. + + See section 15.5 for Content-Disposition security issues. + +19.6 Compatibility with Previous Versions + + It is beyond the scope of a protocol specification to mandate + compliance with previous versions. HTTP/1.1 was deliberately + designed, however, to make supporting previous versions easy. It is + worth noting that, at the time of composing this specification + (1996), we would expect commercial HTTP/1.1 servers to: + + - recognize the format of the Request-Line for HTTP/0.9, 1.0, and + 1.1 requests; + + + +Fielding, et al. Standards Track [Page 170] + +RFC 2616 HTTP/1.1 June 1999 + + + - understand any valid request in the format of HTTP/0.9, 1.0, or + 1.1; + + - respond appropriately with a message in the same major version + used by the client. + + And we would expect HTTP/1.1 clients to: + + - recognize the format of the Status-Line for HTTP/1.0 and 1.1 + responses; + + - understand any valid response in the format of HTTP/0.9, 1.0, or + 1.1. + + For most implementations of HTTP/1.0, each connection is established + by the client prior to the request and closed by the server after + sending the response. Some implementations implement the Keep-Alive + version of persistent connections described in section 19.7.1 of RFC + 2068 [33]. + +19.6.1 Changes from HTTP/1.0 + + This section summarizes major differences between versions HTTP/1.0 + and HTTP/1.1. + +19.6.1.1 Changes to Simplify Multi-homed Web Servers and Conserve IP + Addresses + + The requirements that clients and servers support the Host request- + header, report an error if the Host request-header (section 14.23) is + missing from an HTTP/1.1 request, and accept absolute URIs (section + 5.1.2) are among the most important changes defined by this + specification. + + Older HTTP/1.0 clients assumed a one-to-one relationship of IP + addresses and servers; there was no other established mechanism for + distinguishing the intended server of a request than the IP address + to which that request was directed. The changes outlined above will + allow the Internet, once older HTTP clients are no longer common, to + support multiple Web sites from a single IP address, greatly + simplifying large operational Web servers, where allocation of many + IP addresses to a single host has created serious problems. The + Internet will also be able to recover the IP addresses that have been + allocated for the sole purpose of allowing special-purpose domain + names to be used in root-level HTTP URLs. Given the rate of growth of + the Web, and the number of servers already deployed, it is extremely + + + + + +Fielding, et al. Standards Track [Page 171] + +RFC 2616 HTTP/1.1 June 1999 + + + important that all implementations of HTTP (including updates to + existing HTTP/1.0 applications) correctly implement these + requirements: + + - Both clients and servers MUST support the Host request-header. + + - A client that sends an HTTP/1.1 request MUST send a Host header. + + - Servers MUST report a 400 (Bad Request) error if an HTTP/1.1 + request does not include a Host request-header. + + - Servers MUST accept absolute URIs. + +19.6.2 Compatibility with HTTP/1.0 Persistent Connections + + Some clients and servers might wish to be compatible with some + previous implementations of persistent connections in HTTP/1.0 + clients and servers. Persistent connections in HTTP/1.0 are + explicitly negotiated as they are not the default behavior. HTTP/1.0 + experimental implementations of persistent connections are faulty, + and the new facilities in HTTP/1.1 are designed to rectify these + problems. The problem was that some existing 1.0 clients may be + sending Keep-Alive to a proxy server that doesn't understand + Connection, which would then erroneously forward it to the next + inbound server, which would establish the Keep-Alive connection and + result in a hung HTTP/1.0 proxy waiting for the close on the + response. The result is that HTTP/1.0 clients must be prevented from + using Keep-Alive when talking to proxies. + + However, talking to proxies is the most important use of persistent + connections, so that prohibition is clearly unacceptable. Therefore, + we need some other mechanism for indicating a persistent connection + is desired, which is safe to use even when talking to an old proxy + that ignores Connection. Persistent connections are the default for + HTTP/1.1 messages; we introduce a new keyword (Connection: close) for + declaring non-persistence. See section 14.10. + + The original HTTP/1.0 form of persistent connections (the Connection: + Keep-Alive and Keep-Alive header) is documented in RFC 2068. [33] + +19.6.3 Changes from RFC 2068 + + This specification has been carefully audited to correct and + disambiguate key word usage; RFC 2068 had many problems in respect to + the conventions laid out in RFC 2119 [34]. + + Clarified which error code should be used for inbound server failures + (e.g. DNS failures). (Section 10.5.5). + + + +Fielding, et al. Standards Track [Page 172] + +RFC 2616 HTTP/1.1 June 1999 + + + CREATE had a race that required an Etag be sent when a resource is + first created. (Section 10.2.2). + + Content-Base was deleted from the specification: it was not + implemented widely, and there is no simple, safe way to introduce it + without a robust extension mechanism. In addition, it is used in a + similar, but not identical fashion in MHTML [45]. + + Transfer-coding and message lengths all interact in ways that + required fixing exactly when chunked encoding is used (to allow for + transfer encoding that may not be self delimiting); it was important + to straighten out exactly how message lengths are computed. (Sections + 3.6, 4.4, 7.2.2, 13.5.2, 14.13, 14.16) + + A content-coding of "identity" was introduced, to solve problems + discovered in caching. (section 3.5) + + Quality Values of zero should indicate that "I don't want something" + to allow clients to refuse a representation. (Section 3.9) + + The use and interpretation of HTTP version numbers has been clarified + by RFC 2145. Require proxies to upgrade requests to highest protocol + version they support to deal with problems discovered in HTTP/1.0 + implementations (Section 3.1) + + Charset wildcarding is introduced to avoid explosion of character set + names in accept headers. (Section 14.2) + + A case was missed in the Cache-Control model of HTTP/1.1; s-maxage + was introduced to add this missing case. (Sections 13.4, 14.8, 14.9, + 14.9.3) + + The Cache-Control: max-age directive was not properly defined for + responses. (Section 14.9.3) + + There are situations where a server (especially a proxy) does not + know the full length of a response but is capable of serving a + byterange request. We therefore need a mechanism to allow byteranges + with a content-range not indicating the full length of the message. + (Section 14.16) + + Range request responses would become very verbose if all meta-data + were always returned; by allowing the server to only send needed + headers in a 206 response, this problem can be avoided. (Section + 10.2.7, 13.5.3, and 14.27) + + + + + + +Fielding, et al. Standards Track [Page 173] + +RFC 2616 HTTP/1.1 June 1999 + + + Fix problem with unsatisfiable range requests; there are two cases: + syntactic problems, and range doesn't exist in the document. The 416 + status code was needed to resolve this ambiguity needed to indicate + an error for a byte range request that falls outside of the actual + contents of a document. (Section 10.4.17, 14.16) + + Rewrite of message transmission requirements to make it much harder + for implementors to get it wrong, as the consequences of errors here + can have significant impact on the Internet, and to deal with the + following problems: + + 1. Changing "HTTP/1.1 or later" to "HTTP/1.1", in contexts where + this was incorrectly placing a requirement on the behavior of + an implementation of a future version of HTTP/1.x + + 2. Made it clear that user-agents should retry requests, not + "clients" in general. + + 3. Converted requirements for clients to ignore unexpected 100 + (Continue) responses, and for proxies to forward 100 responses, + into a general requirement for 1xx responses. + + 4. Modified some TCP-specific language, to make it clearer that + non-TCP transports are possible for HTTP. + + 5. Require that the origin server MUST NOT wait for the request + body before it sends a required 100 (Continue) response. + + 6. Allow, rather than require, a server to omit 100 (Continue) if + it has already seen some of the request body. + + 7. Allow servers to defend against denial-of-service attacks and + broken clients. + + This change adds the Expect header and 417 status code. The message + transmission requirements fixes are in sections 8.2, 10.4.18, + 8.1.2.2, 13.11, and 14.20. + + Proxies should be able to add Content-Length when appropriate. + (Section 13.5.2) + + Clean up confusion between 403 and 404 responses. (Section 10.4.4, + 10.4.5, and 10.4.11) + + Warnings could be cached incorrectly, or not updated appropriately. + (Section 13.1.2, 13.2.4, 13.5.2, 13.5.3, 14.9.3, and 14.46) Warning + also needed to be a general header, as PUT or other methods may have + need for it in requests. + + + +Fielding, et al. Standards Track [Page 174] + +RFC 2616 HTTP/1.1 June 1999 + + + Transfer-coding had significant problems, particularly with + interactions with chunked encoding. The solution is that transfer- + codings become as full fledged as content-codings. This involves + adding an IANA registry for transfer-codings (separate from content + codings), a new header field (TE) and enabling trailer headers in the + future. Transfer encoding is a major performance benefit, so it was + worth fixing [39]. TE also solves another, obscure, downward + interoperability problem that could have occurred due to interactions + between authentication trailers, chunked encoding and HTTP/1.0 + clients.(Section 3.6, 3.6.1, and 14.39) + + The PATCH, LINK, UNLINK methods were defined but not commonly + implemented in previous versions of this specification. See RFC 2068 + [33]. + + The Alternates, Content-Version, Derived-From, Link, URI, Public and + Content-Base header fields were defined in previous versions of this + specification, but not commonly implemented. See RFC 2068 [33]. + +20 Index + + Please see the PostScript version of this RFC for the INDEX. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 175] + +RFC 2616 HTTP/1.1 June 1999 + + +21. Full Copyright Statement + + Copyright (C) The Internet Society (1999). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 176] + diff --git a/docs/specs/rfc2617.txt b/docs/specs/rfc2617.txt new file mode 100644 index 0000000..b8fdf59 --- /dev/null +++ b/docs/specs/rfc2617.txt @@ -0,0 +1,1909 @@ + +[[ Text in double brackets is from the unofficial errata at ]] +[[ http://skrb.org/ietf/http_errata.html ]] + +Network Working Group J. Franks +Request for Comments: 2617 Northwestern University +Obsoletes: 2069 P. Hallam-Baker +Category: Standards Track Verisign, Inc. + J. Hostetler + AbiSource, Inc. + S. Lawrence + Agranat Systems, Inc. + P. Leach + Microsoft Corporation + A. Luotonen + Netscape Communications Corporation + L. Stewart + Open Market, Inc. + June 1999 + + + HTTP Authentication: Basic and Digest Access Authentication + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1999). All Rights Reserved. + +Abstract + + "HTTP/1.0", includes the specification for a Basic Access + Authentication scheme. This scheme is not considered to be a secure + method of user authentication (unless used in conjunction with some + external secure system such as SSL [5]), as the user name and + password are passed over the network as cleartext. + + This document also provides the specification for HTTP's + authentication framework, the original Basic authentication scheme + and a scheme based on cryptographic hashes, referred to as "Digest + Access Authentication". It is therefore also intended to serve as a + replacement for RFC 2069 [6]. Some optional elements specified by + RFC 2069 have been removed from this specification due to problems + found since its publication; other new elements have been added for + compatibility, those new elements have been made optional, but are + strongly recommended. + + + +Franks, et al. Standards Track [Page 1] + +RFC 2617 HTTP Authentication June 1999 + + + Like Basic, Digest access authentication verifies that both parties + to a communication know a shared secret (a password); unlike Basic, + this verification can be done without sending the password in the + clear, which is Basic's biggest weakness. As with most other + authentication protocols, the greatest sources of risks are usually + found not in the core protocol itself but in policies and procedures + surrounding its use. + +Table of Contents + + 1 Access Authentication................................ 3 + 1.1 Reliance on the HTTP/1.1 Specification............ 3 + 1.2 Access Authentication Framework................... 3 + 2 Basic Authentication Scheme.......................... 5 + 3 Digest Access Authentication Scheme.................. 6 + 3.1 Introduction...................................... 6 + 3.1.1 Purpose......................................... 6 + 3.1.2 Overall Operation............................... 6 + 3.1.3 Representation of digest values................. 7 + 3.1.4 Limitations..................................... 7 + 3.2 Specification of Digest Headers................... 7 + 3.2.1 The WWW-Authenticate Response Header............ 8 + 3.2.2 The Authorization Request Header................ 11 + 3.2.3 The Authentication-Info Header.................. 15 + 3.3 Digest Operation.................................. 17 + 3.4 Security Protocol Negotiation..................... 18 + 3.5 Example........................................... 18 + 3.6 Proxy-Authentication and Proxy-Authorization...... 19 + 4 Security Considerations.............................. 19 + 4.1 Authentication of Clients using Basic + Authentication.................................... 19 + 4.2 Authentication of Clients using Digest + Authentication.................................... 20 + 4.3 Limited Use Nonce Values.......................... 21 + 4.4 Comparison of Digest with Basic Authentication.... 22 + 4.5 Replay Attacks.................................... 22 + 4.6 Weakness Created by Multiple Authentication + Schemes........................................... 23 + 4.7 Online dictionary attacks......................... 23 + 4.8 Man in the Middle................................. 24 + 4.9 Chosen plaintext attacks.......................... 24 + 4.10 Precomputed dictionary attacks.................... 25 + 4.11 Batch brute force attacks......................... 25 + 4.12 Spoofing by Counterfeit Servers................... 25 + 4.13 Storing passwords................................. 26 + 4.14 Summary........................................... 26 + 5 Sample implementation................................ 27 + 6 Acknowledgments...................................... 31 + + + +Franks, et al. Standards Track [Page 2] + +RFC 2617 HTTP Authentication June 1999 + + + 7 References........................................... 31 + 8 Authors' Addresses................................... 32 + 9 Full Copyright Statement............................. 34 + +1 Access Authentication + +1.1 Reliance on the HTTP/1.1 Specification + + This specification is a companion to the HTTP/1.1 specification [2]. + It uses the augmented BNF section 2.1 of that document, and relies on + both the non-terminals defined in that document and other aspects of + the HTTP/1.1 specification. + +1.2 Access Authentication Framework + + HTTP provides a simple challenge-response authentication mechanism + that MAY be used by a server to challenge a client request and by a + client to provide authentication information. It uses an extensible, + case-insensitive token to identify the authentication scheme, + followed by a comma-separated list of attribute-value pairs which + carry the parameters necessary for achieving authentication via that + scheme. + + auth-scheme = token + auth-param = token "=" ( token | quoted-string ) + + The 401 (Unauthorized) response message is used by an origin server + to challenge the authorization of a user agent. This response MUST + include a WWW-Authenticate header field containing at least one + challenge applicable to the requested resource. The 407 (Proxy + Authentication Required) response message is used by a proxy to + challenge the authorization of a client and MUST include a Proxy- + Authenticate header field containing at least one challenge + applicable to the proxy for the requested resource. + + challenge = auth-scheme 1*SP 1#auth-param + + Note: User agents will need to take special care in parsing the WWW- + Authenticate or Proxy-Authenticate header field value if it contains + more than one challenge, or if more than one WWW-Authenticate header + field is provided, since the contents of a challenge may itself + contain a comma-separated list of authentication parameters. + + The authentication parameter realm is defined for all authentication + schemes: + + realm = "realm" "=" realm-value + realm-value = quoted-string + + + +Franks, et al. Standards Track [Page 3] + +RFC 2617 HTTP Authentication June 1999 + + + The realm directive (case-insensitive) is required for all + authentication schemes that issue a challenge. The realm value + (case-sensitive), in combination with the canonical root URL (the + absoluteURI for the server whose abs_path is empty; see section 5.1.2 + of [2]) of the server being accessed, defines the protection space. + These realms allow the protected resources on a server to be + partitioned into a set of protection spaces, each with its own + authentication scheme and/or authorization database. The realm value + is a string, generally assigned by the origin server, which may have + additional semantics specific to the authentication scheme. Note that + there may be multiple challenges with the same auth-scheme but + different realms. + + A user agent that wishes to authenticate itself with an origin + server--usually, but not necessarily, after receiving a 401 + (Unauthorized)--MAY do so by including an Authorization header field + with the request. A client that wishes to authenticate itself with a + proxy--usually, but not necessarily, after receiving a 407 (Proxy + Authentication Required)--MAY do so by including a Proxy- + Authorization header field with the request. Both the Authorization + field value and the Proxy-Authorization field value consist of + credentials containing the authentication information of the client + for the realm of the resource being requested. The user agent MUST + choose to use one of the challenges with the strongest auth-scheme it + understands and request credentials from the user based upon that + challenge. + + credentials = auth-scheme #auth-param + + Note that many browsers will only recognize Basic and will require + that it be the first auth-scheme presented. Servers should only + include Basic if it is minimally acceptable. + + The protection space determines the domain over which credentials can + be automatically applied. If a prior request has been authorized, the + same credentials MAY be reused for all other requests within that + protection space for a period of time determined by the + authentication scheme, parameters, and/or user preference. Unless + otherwise defined by the authentication scheme, a single protection + space cannot extend outside the scope of its server. + + If the origin server does not wish to accept the credentials sent + with a request, it SHOULD return a 401 (Unauthorized) response. The + response MUST include a WWW-Authenticate header field containing at + least one (possibly new) challenge applicable to the requested + resource. If a proxy does not accept the credentials sent with a + request, it SHOULD return a 407 (Proxy Authentication Required). The + response MUST include a Proxy-Authenticate header field containing a + + + +Franks, et al. Standards Track [Page 4] + +RFC 2617 HTTP Authentication June 1999 + + + (possibly new) challenge applicable to the proxy for the requested + resource. + + The HTTP protocol does not restrict applications to this simple + challenge-response mechanism for access authentication. Additional + mechanisms MAY be used, such as encryption at the transport level or + via message encapsulation, and with additional header fields + specifying authentication information. However, these additional + mechanisms are not defined by this specification. + + Proxies MUST be completely transparent regarding user agent + authentication by origin servers. That is, they must forward the + WWW-Authenticate and Authorization headers untouched, and follow the + rules found in section 14.8 of [2]. Both the Proxy-Authenticate and + the Proxy-Authorization header fields are hop-by-hop headers (see + section 13.5.1 of [2]). + +2 Basic Authentication Scheme + + The "basic" authentication scheme is based on the model that the + client must authenticate itself with a user-ID and a password for + each realm. The realm value should be considered an opaque string + which can only be compared for equality with other realms on that + server. The server will service the request only if it can validate + the user-ID and password for the protection space of the Request-URI. + There are no optional authentication parameters. + + For Basic, the framework above is utilized as follows: + + challenge = "Basic" realm + credentials = "Basic" basic-credentials + + Upon receipt of an unauthorized request for a URI within the + protection space, the origin server MAY respond with a challenge like + the following: + + WWW-Authenticate: Basic realm="WallyWorld" + + where "WallyWorld" is the string assigned by the server to identify + the protection space of the Request-URI. A proxy may respond with the + same challenge using the Proxy-Authenticate header field. + + To receive authorization, the client sends the userid and password, + separated by a single colon (":") character, within a base64 [7] + encoded string in the credentials. + + basic-credentials = base64-user-pass + base64-user-pass = + user-pass = userid ":" password + userid = * + password = *TEXT + + Userids might be case sensitive. + + If the user agent wishes to send the userid "Aladdin" and password + "open sesame", it would use the following header field: + + Authorization: Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ== + + A client SHOULD assume that all paths at or deeper than the depth of + the last symbolic element in the path field of the Request-URI also + are within the protection space specified by the Basic realm value of + the current challenge. A client MAY preemptively send the + corresponding Authorization header with requests for resources in + that space without receipt of another challenge from the server. + Similarly, when a client sends a request to a proxy, it may reuse a + userid and password in the Proxy-Authorization header field without + receiving another challenge from the proxy server. See section 4 for + security considerations associated with Basic authentication. + +3 Digest Access Authentication Scheme + +3.1 Introduction + +3.1.1 Purpose + + The protocol referred to as "HTTP/1.0" includes the specification for + a Basic Access Authentication scheme[1]. That scheme is not + considered to be a secure method of user authentication, as the user + name and password are passed over the network in an unencrypted form. + This section provides the specification for a scheme that does not + send the password in cleartext, referred to as "Digest Access + Authentication". + + The Digest Access Authentication scheme is not intended to be a + complete answer to the need for security in the World Wide Web. This + scheme provides no encryption of message content. The intent is + simply to create an access authentication method that avoids the most + serious flaws of Basic authentication. + +3.1.2 Overall Operation + + Like Basic Access Authentication, the Digest scheme is based on a + simple challenge-response paradigm. The Digest scheme challenges + using a nonce value. A valid response contains a checksum (by + + + +Franks, et al. Standards Track [Page 6] + +RFC 2617 HTTP Authentication June 1999 + + + default, the MD5 checksum) of the username, the password, the given + nonce value, the HTTP method, and the requested URI. In this way, the + password is never sent in the clear. Just as with the Basic scheme, + the username and password must be prearranged in some fashion not + addressed by this document. + +3.1.3 Representation of digest values + + An optional header allows the server to specify the algorithm used to + create the checksum or digest. By default the MD5 algorithm is used + and that is the only algorithm described in this document. + + For the purposes of this document, an MD5 digest of 128 bits is + represented as 32 ASCII printable characters. The bits in the 128 bit + digest are converted from most significant to least significant bit, + four bits at a time to their ASCII presentation as follows. Each four + bits is represented by its familiar hexadecimal notation from the + characters 0123456789abcdef. That is, binary 0000 gets represented by + the character '0', 0001, by '1', and so on up to the representation + of 1111 as 'f'. + +3.1.4 Limitations + + The Digest authentication scheme described in this document suffers + from many known limitations. It is intended as a replacement for + Basic authentication and nothing more. It is a password-based system + and (on the server side) suffers from all the same problems of any + password system. In particular, no provision is made in this protocol + for the initial secure arrangement between user and server to + establish the user's password. + + Users and implementors should be aware that this protocol is not as + secure as Kerberos, and not as secure as any client-side private-key + scheme. Nevertheless it is better than nothing, better than what is + commonly used with telnet and ftp, and better than Basic + authentication. + +3.2 Specification of Digest Headers + + The Digest Access Authentication scheme is conceptually similar to + the Basic scheme. The formats of the modified WWW-Authenticate header + line and the Authorization header line are specified below. In + addition, a new header, Authentication-Info, is specified. + + + + + + + + +Franks, et al. Standards Track [Page 7] + +RFC 2617 HTTP Authentication June 1999 + + +3.2.1 The WWW-Authenticate Response Header + + If a server receives a request for an access-protected object, and an + acceptable Authorization header is not sent, the server responds with + a "401 Unauthorized" status code, and a WWW-Authenticate header as + per the framework defined above, which for the digest scheme is + utilized as follows: + + challenge = "Digest" digest-challenge + + digest-challenge = 1#( realm | [ domain ] | nonce | + [ opaque ] |[ stale ] | [ algorithm ] | + [ qop-options ] | [auth-param] ) + + + domain = "domain" "=" <"> URI ( 1*SP URI ) <"> + [[ Should be: ]] + [[ domain = "domain" "=" <"> URI *( 1*SP URI ) <"> ]] + URI = absoluteURI | abs_path + nonce = "nonce" "=" nonce-value + nonce-value = quoted-string + opaque = "opaque" "=" quoted-string + stale = "stale" "=" ( "true" | "false" ) + algorithm = "algorithm" "=" ( "MD5" | "MD5-sess" | + token ) + qop-options = "qop" "=" <"> 1#qop-value <"> + qop-value = "auth" | "auth-int" | token + + The meanings of the values of the directives used above are as + follows: + + realm + A string to be displayed to users so they know which username and + password to use. This string should contain at least the name of + the host performing the authentication and might additionally + indicate the collection of users who might have access. An example + might be "registered_users@gotham.news.com". + + domain + A quoted, space-separated list of URIs, as specified in RFC XURI + [7], that define the protection space. If a URI is an abs_path, it + is relative to the canonical root URL (see section 1.2 above) of + the server being accessed. An absoluteURI in this list may refer to + a different server than the one being accessed. The client can use + this list to determine the set of URIs for which the same + authentication information may be sent: any URI that has a URI in + this list as a prefix (after both have been made absolute) may be + assumed to be in the same protection space. If this directive is + omitted or its value is empty, the client should assume that the + protection space consists of all URIs on the responding server. + + + +Franks, et al. Standards Track [Page 8] + +RFC 2617 HTTP Authentication June 1999 + + + This directive is not meaningful in Proxy-Authenticate headers, for + which the protection space is always the entire proxy; if present + it should be ignored. + + nonce + A server-specified data string which should be uniquely generated + each time a 401 response is made. It is recommended that this + string be base64 or hexadecimal data. Specifically, since the + string is passed in the header lines as a quoted string, the + double-quote character is not allowed. + + The contents of the nonce are implementation dependent. The quality + of the implementation depends on a good choice. A nonce might, for + example, be constructed as the base 64 encoding of + + time-stamp H(time-stamp ":" ETag ":" private-key) + + where time-stamp is a server-generated time or other non-repeating + value, ETag is the value of the HTTP ETag header associated with + the requested entity, and private-key is data known only to the + server. With a nonce of this form a server would recalculate the + hash portion after receiving the client authentication header and + reject the request if it did not match the nonce from that header + or if the time-stamp value is not recent enough. In this way the + server can limit the time of the nonce's validity. The inclusion of + the ETag prevents a replay request for an updated version of the + resource. (Note: including the IP address of the client in the + nonce would appear to offer the server the ability to limit the + reuse of the nonce to the same client that originally got it. + However, that would break proxy farms, where requests from a single + user often go through different proxies in the farm. Also, IP + address spoofing is not that hard.) + + An implementation might choose not to accept a previously used + nonce or a previously used digest, in order to protect against a + replay attack. Or, an implementation might choose to use one-time + nonces or digests for POST or PUT requests and a time-stamp for GET + requests. For more details on the issues involved see section 4. + of this document. + + The nonce is opaque to the client. + + opaque + A string of data, specified by the server, which should be returned + by the client unchanged in the Authorization header of subsequent + requests with URIs in the same protection space. It is recommended + that this string be base64 or hexadecimal data. + + + + +Franks, et al. Standards Track [Page 9] + +RFC 2617 HTTP Authentication June 1999 + + + stale + A flag, indicating that the previous request from the client was + rejected because the nonce value was stale. If stale is TRUE + (case-insensitive), the client may wish to simply retry the request + with a new encrypted response, without reprompting the user for a + new username and password. The server should only set stale to TRUE + if it receives a request for which the nonce is invalid but with a + valid digest for that nonce (indicating that the client knows the + correct username/password). If stale is FALSE, or anything other + than TRUE, or the stale directive is not present, the username + and/or password are invalid, and new values must be obtained. + + algorithm + A string indicating a pair of algorithms used to produce the digest + and a checksum. If this is not present it is assumed to be "MD5". + If the algorithm is not understood, the challenge should be ignored + (and a different one used, if there is more than one). + + In this document the string obtained by applying the digest + algorithm to the data "data" with secret "secret" will be denoted + by KD(secret, data), and the string obtained by applying the + checksum algorithm to the data "data" will be denoted H(data). The + notation unq(X) means the value of the quoted-string X without the + surrounding quotes. + + For the "MD5" and "MD5-sess" algorithms + + H(data) = MD5(data) + + and + + KD(secret, data) = H(concat(secret, ":", data)) + + i.e., the digest is the MD5 of the secret concatenated with a colon + concatenated with the data. The "MD5-sess" algorithm is intended to + allow efficient 3rd party authentication servers; for the + difference in usage, see the description in section 3.2.2.2. + + qop-options + This directive is optional, but is made so only for backward + compatibility with RFC 2069 [6]; it SHOULD be used by all + implementations compliant with this version of the Digest scheme. + If present, it is a quoted string of one or more tokens indicating + the "quality of protection" values supported by the server. The + value "auth" indicates authentication; the value "auth-int" + indicates authentication with integrity protection; see the + + + + + +Franks, et al. Standards Track [Page 10] + +RFC 2617 HTTP Authentication June 1999 + + + descriptions below for calculating the response directive value for + the application of this choice. Unrecognized options MUST be + ignored. + + auth-param + This directive allows for future extensions. Any unrecognized + directive MUST be ignored. + +3.2.2 The Authorization Request Header + + The client is expected to retry the request, passing an Authorization + header line, which is defined according to the framework above, + utilized as follows. + + credentials = "Digest" digest-response + digest-response = 1#( username | realm | nonce | digest-uri + | response | [ algorithm ] | [cnonce] | + [opaque] | [message-qop] | + [nonce-count] | [auth-param] ) + + username = "username" "=" username-value + username-value = quoted-string + digest-uri = "uri" "=" digest-uri-value + digest-uri-value = request-uri ; As specified by HTTP/1.1 + message-qop = "qop" "=" qop-value + cnonce = "cnonce" "=" cnonce-value + cnonce-value = nonce-value + nonce-count = "nc" "=" nc-value + nc-value = 8LHEX + response = "response" "=" request-digest + request-digest = <"> 32LHEX <"> + LHEX = "0" | "1" | "2" | "3" | + "4" | "5" | "6" | "7" | + "8" | "9" | "a" | "b" | + "c" | "d" | "e" | "f" + + The values of the opaque and algorithm fields must be those supplied + in the WWW-Authenticate response header for the entity being + requested. + + response + A string of 32 hex digits computed as defined below, which proves + that the user knows a password + + username + The user's name in the specified realm. + + + + + +Franks, et al. Standards Track [Page 11] + +RFC 2617 HTTP Authentication June 1999 + + + digest-uri + The URI from Request-URI of the Request-Line; duplicated here + because proxies are allowed to change the Request-Line in transit. + + qop + Indicates what "quality of protection" the client has applied to + the message. If present, its value MUST be one of the alternatives + the server indicated it supports in the WWW-Authenticate header. + These values affect the computation of the request-digest. Note + that this is a single token, not a quoted list of alternatives as + in WWW- Authenticate. This directive is optional in order to + preserve backward compatibility with a minimal implementation of + RFC 2069 [6], but SHOULD be used if the server indicated that qop + is supported by providing a qop directive in the WWW-Authenticate + header field. + + cnonce + This MUST be specified if a qop directive is sent (see above), and + MUST NOT be specified if the server did not send a qop directive in + the WWW-Authenticate header field. The cnonce-value is an opaque + quoted string value provided by the client and used by both client + and server to avoid chosen plaintext attacks, to provide mutual + authentication, and to provide some message integrity protection. + See the descriptions below of the calculation of the response- + digest and request-digest values. + + nonce-count + This MUST be specified if a qop directive is sent (see above), and + MUST NOT be specified if the server did not send a qop directive in + the WWW-Authenticate header field. The nc-value is the hexadecimal + count of the number of requests (including the current request) + that the client has sent with the nonce value in this request. For + example, in the first request sent in response to a given nonce + value, the client sends "nc=00000001". The purpose of this + directive is to allow the server to detect request replays by + maintaining its own copy of this count - if the same nc-value is + seen twice, then the request is a replay. See the description + below of the construction of the request-digest value. + + auth-param + This directive allows for future extensions. Any unrecognized + directive MUST be ignored. + + If a directive or its value is improper, or required directives are + missing, the proper response is 400 Bad Request. If the request- + digest is invalid, then a login failure should be logged, since + repeated login failures from a single client may indicate an attacker + attempting to guess passwords. + + + +Franks, et al. Standards Track [Page 12] + +RFC 2617 HTTP Authentication June 1999 + + + The definition of request-digest above indicates the encoding for its + value. The following definitions show how the value is computed. + +3.2.2.1 Request-Digest + + If the "qop" value is "auth" or "auth-int": + + request-digest = <"> < KD ( H(A1), unq(nonce-value) + ":" nc-value + ":" unq(cnonce-value) + ":" unq(qop-value) + ":" H(A2) + ) <"> + + If the "qop" directive is not present (this construction is for + compatibility with RFC 2069): + + request-digest = + <"> < KD ( H(A1), unq(nonce-value) ":" H(A2) ) > + <"> + + See below for the definitions for A1 and A2. + +3.2.2.2 A1 + + If the "algorithm" directive's value is "MD5" or is unspecified, then + A1 is: + + A1 = unq(username-value) ":" unq(realm-value) ":" passwd + + where + + passwd = < user's password > + + If the "algorithm" directive's value is "MD5-sess", then A1 is + calculated only once - on the first request by the client following + receipt of a WWW-Authenticate challenge from the server. It uses the + server nonce from that challenge, and the first client nonce value to + construct A1 as follows: + + A1 = H( unq(username-value) ":" unq(realm-value) + ":" passwd ) + ":" unq(nonce-value) ":" unq(cnonce-value) + + This creates a 'session key' for the authentication of subsequent + requests and responses which is different for each "authentication + session", thus limiting the amount of material hashed with any one + key. (Note: see further discussion of the authentication session in + + + +Franks, et al. Standards Track [Page 13] + +RFC 2617 HTTP Authentication June 1999 + + + section 3.3.) Because the server need only use the hash of the user + credentials in order to create the A1 value, this construction could + be used in conjunction with a third party authentication service so + that the web server would not need the actual password value. The + specification of such a protocol is beyond the scope of this + specification. + +3.2.2.3 A2 + + If the "qop" directive's value is "auth" or is unspecified, then A2 + is: + + A2 = Method ":" digest-uri-value + + If the "qop" value is "auth-int", then A2 is: + + A2 = Method ":" digest-uri-value ":" H(entity-body) + +3.2.2.4 Directive values and quoted-string + + Note that the value of many of the directives, such as "username- + value", are defined as a "quoted-string". However, the "unq" notation + indicates that surrounding quotation marks are removed in forming the + string A1. Thus if the Authorization header includes the fields + + username="Mufasa", realm=myhost@testrealm.com + + and the user Mufasa has password "Circle Of Life" then H(A1) would be + H(Mufasa:myhost@testrealm.com:Circle Of Life) with no quotation marks + in the digested string. + + No white space is allowed in any of the strings to which the digest + function H() is applied unless that white space exists in the quoted + strings or entity body whose contents make up the string to be + digested. For example, the string A1 illustrated above must be + + Mufasa:myhost@testrealm.com:Circle Of Life + + with no white space on either side of the colons, but with the white + space between the words used in the password value. Likewise, the + other strings digested by H() must not have white space on either + side of the colons which delimit their fields unless that white space + was in the quoted strings or entity body being digested. + + Also note that if integrity protection is applied (qop=auth-int), the + H(entity-body) is the hash of the entity body, not the message body - + it is computed before any transfer encoding is applied by the sender + + + + +Franks, et al. Standards Track [Page 14] + +RFC 2617 HTTP Authentication June 1999 + + + and after it has been removed by the recipient. Note that this + includes multipart boundaries and embedded headers in each part of + any multipart content-type. + +3.2.2.5 Various considerations + + The "Method" value is the HTTP request method as specified in section + 5.1.1 of [2]. The "request-uri" value is the Request-URI from the + request line as specified in section 5.1.2 of [2]. This may be "*", + an "absoluteURL" or an "abs_path" as specified in section 5.1.2 of + [2], but it MUST agree with the Request-URI. In particular, it MUST + be an "absoluteURL" if the Request-URI is an "absoluteURL". The + "cnonce-value" is an optional client-chosen value whose purpose is + to foil chosen plaintext attacks. + + The authenticating server must assure that the resource designated by + the "uri" directive is the same as the resource specified in the + Request-Line; if they are not, the server SHOULD return a 400 Bad + Request error. (Since this may be a symptom of an attack, server + implementers may want to consider logging such errors.) The purpose + of duplicating information from the request URL in this field is to + deal with the possibility that an intermediate proxy may alter the + client's Request-Line. This altered (but presumably semantically + equivalent) request would not result in the same digest as that + calculated by the client. + + Implementers should be aware of how authenticated transactions + interact with shared caches. The HTTP/1.1 protocol specifies that + when a shared cache (see section 13.7 of [2]) has received a request + containing an Authorization header and a response from relaying that + request, it MUST NOT return that response as a reply to any other + request, unless one of two Cache-Control (see section 14.9 of [2]) + directives was present in the response. If the original response + included the "must-revalidate" Cache-Control directive, the cache MAY + use the entity of that response in replying to a subsequent request, + but MUST first revalidate it with the origin server, using the + request headers from the new request to allow the origin server to + authenticate the new request. Alternatively, if the original response + included the "public" Cache-Control directive, the response entity + MAY be returned in reply to any subsequent request. + +3.2.3 The Authentication-Info Header + + The Authentication-Info header is used by the server to communicate + some information regarding the successful authentication in the + response. + + + + + +Franks, et al. Standards Track [Page 15] + +RFC 2617 HTTP Authentication June 1999 + + + AuthenticationInfo = "Authentication-Info" ":" auth-info + auth-info = 1#(nextnonce | [ message-qop ] + | [ response-auth ] | [ cnonce ] + | [nonce-count] ) + nextnonce = "nextnonce" "=" nonce-value + response-auth = "rspauth" "=" response-digest + response-digest = <"> *LHEX <"> + + The value of the nextnonce directive is the nonce the server wishes + the client to use for a future authentication response. The server + may send the Authentication-Info header with a nextnonce field as a + means of implementing one-time or otherwise changing nonces. If the + nextnonce field is present the client SHOULD use it when constructing + the Authorization header for its next request. Failure of the client + to do so may result in a request to re-authenticate from the server + with the "stale=TRUE". + + Server implementations should carefully consider the performance + implications of the use of this mechanism; pipelined requests will + not be possible if every response includes a nextnonce directive + that must be used on the next request received by the server. + Consideration should be given to the performance vs. security + tradeoffs of allowing an old nonce value to be used for a limited + time to permit request pipelining. Use of the nonce-count can + retain most of the security advantages of a new server nonce + without the deleterious affects on pipelining. + + message-qop + Indicates the "quality of protection" options applied to the + response by the server. The value "auth" indicates authentication; + the value "auth-int" indicates authentication with integrity + protection. The server SHOULD use the same value for the message- + qop directive in the response as was sent by the client in the + corresponding request. + + The optional response digest in the "response-auth" directive + supports mutual authentication -- the server proves that it knows the + user's secret, and with qop=auth-int also provides limited integrity + protection of the response. The "response-digest" value is calculated + as for the "request-digest" in the Authorization header, except that + if "qop=auth" or is not specified in the Authorization header for the + request, A2 is + + A2 = ":" digest-uri-value + + and if "qop=auth-int", then A2 is + + A2 = ":" digest-uri-value ":" H(entity-body) + + + +Franks, et al. Standards Track [Page 16] + +RFC 2617 HTTP Authentication June 1999 + + + where "digest-uri-value" is the value of the "uri" directive on the + Authorization header in the request. The "cnonce-value" and "nc- + value" MUST be the ones for the client request to which this message + is the response. The "response-auth", "cnonce", and "nonce-count" + directives MUST BE present if "qop=auth" or "qop=auth-int" is + specified. + + The Authentication-Info header is allowed in the trailer of an HTTP + message transferred via chunked transfer-coding. + +3.3 Digest Operation + + Upon receiving the Authorization header, the server may check its + validity by looking up the password that corresponds to the submitted + username. Then, the server must perform the same digest operation + (e.g., MD5) performed by the client, and compare the result to the + given request-digest value. + + Note that the HTTP server does not actually need to know the user's + cleartext password. As long as H(A1) is available to the server, the + validity of an Authorization header may be verified. + + The client response to a WWW-Authenticate challenge for a protection + space starts an authentication session with that protection space. + The authentication session lasts until the client receives another + WWW-Authenticate challenge from any server in the protection space. A + client should remember the username, password, nonce, nonce count and + opaque values associated with an authentication session to use to + construct the Authorization header in future requests within that + protection space. The Authorization header may be included + preemptively; doing so improves server efficiency and avoids extra + round trips for authentication challenges. The server may choose to + accept the old Authorization header information, even though the + nonce value included might not be fresh. Alternatively, the server + may return a 401 response with a new nonce value, causing the client + to retry the request; by specifying stale=TRUE with this response, + the server tells the client to retry with the new nonce, but without + prompting for a new username and password. + + Because the client is required to return the value of the opaque + directive given to it by the server for the duration of a session, + the opaque data may be used to transport authentication session state + information. (Note that any such use can also be accomplished more + easily and safely by including the state in the nonce.) For example, + a server could be responsible for authenticating content that + actually sits on another server. It would achieve this by having the + first 401 response include a domain directive whose value includes a + URI on the second server, and an opaque directive whose value + + + +Franks, et al. Standards Track [Page 17] + +RFC 2617 HTTP Authentication June 1999 + + + contains the state information. The client will retry the request, at + which time the server might respond with a 301/302 redirection, + pointing to the URI on the second server. The client will follow the + redirection, and pass an Authorization header , including the + data. + + As with the basic scheme, proxies must be completely transparent in + the Digest access authentication scheme. That is, they must forward + the WWW-Authenticate, Authentication-Info and Authorization headers + untouched. If a proxy wants to authenticate a client before a request + is forwarded to the server, it can be done using the Proxy- + Authenticate and Proxy-Authorization headers described in section 3.6 + below. + +3.4 Security Protocol Negotiation + + It is useful for a server to be able to know which security schemes a + client is capable of handling. + + It is possible that a server may want to require Digest as its + authentication method, even if the server does not know that the + client supports it. A client is encouraged to fail gracefully if the + server specifies only authentication schemes it cannot handle. + +3.5 Example + + The following example assumes that an access-protected document is + being requested from the server via a GET request. The URI of the + document is "http://www.nowhere.org/dir/index.html". Both client and + server know that the username for this document is "Mufasa", and the + password is "Circle Of Life" (with one space between each of the + three words). + + The first time the client requests the document, no Authorization + header is sent, so the server responds with: + + HTTP/1.1 401 Unauthorized + WWW-Authenticate: Digest + realm="testrealm@host.com", + qop="auth,auth-int", + nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", + opaque="5ccc069c403ebaf9f0171e9517f40e41" + + The client may prompt the user for the username and password, after + which it will respond with a new request, including the following + Authorization header: + + + + + +Franks, et al. Standards Track [Page 18] + +RFC 2617 HTTP Authentication June 1999 + + + Authorization: Digest username="Mufasa", + realm="testrealm@host.com", + nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", + uri="/dir/index.html", + qop=auth, + nc=00000001, + cnonce="0a4f113b", + response="6629fae49393a05397450978507c4ef1", + opaque="5ccc069c403ebaf9f0171e9517f40e41" + +3.6 Proxy-Authentication and Proxy-Authorization + + The digest authentication scheme may also be used for authenticating + users to proxies, proxies to proxies, or proxies to origin servers by + use of the Proxy-Authenticate and Proxy-Authorization headers. These + headers are instances of the Proxy-Authenticate and Proxy- + Authorization headers specified in sections 10.33 and 10.34 of the + HTTP/1.1 specification [2] and their behavior is subject to + restrictions described there. The transactions for proxy + authentication are very similar to those already described. Upon + receiving a request which requires authentication, the proxy/server + must issue the "407 Proxy Authentication Required" response with a + "Proxy-Authenticate" header. The digest-challenge used in the + Proxy-Authenticate header is the same as that for the WWW- + Authenticate header as defined above in section 3.2.1. + + The client/proxy must then re-issue the request with a Proxy- + Authorization header, with directives as specified for the + Authorization header in section 3.2.2 above. + + On subsequent responses, the server sends Proxy-Authentication-Info + with directives the same as those for the Authentication-Info header + field. + + Note that in principle a client could be asked to authenticate itself + to both a proxy and an end-server, but never in the same response. + +4 Security Considerations + +4.1 Authentication of Clients using Basic Authentication + + The Basic authentication scheme is not a secure method of user + authentication, nor does it in any way protect the entity, which is + transmitted in cleartext across the physical network used as the + carrier. HTTP does not prevent additional authentication schemes and + encryption mechanisms from being employed to increase security or the + addition of enhancements (such as schemes to use one-time passwords) + to Basic authentication. + + + +Franks, et al. Standards Track [Page 19] + +RFC 2617 HTTP Authentication June 1999 + + + The most serious flaw in Basic authentication is that it results in + the essentially cleartext transmission of the user's password over + the physical network. It is this problem which Digest Authentication + attempts to address. + + Because Basic authentication involves the cleartext transmission of + passwords it SHOULD NOT be used (without enhancements) to protect + sensitive or valuable information. + + A common use of Basic authentication is for identification purposes + -- requiring the user to provide a user name and password as a means + of identification, for example, for purposes of gathering accurate + usage statistics on a server. When used in this way it is tempting to + think that there is no danger in its use if illicit access to the + protected documents is not a major concern. This is only correct if + the server issues both user name and password to the users and in + particular does not allow the user to choose his or her own password. + The danger arises because naive users frequently reuse a single + password to avoid the task of maintaining multiple passwords. + + If a server permits users to select their own passwords, then the + threat is not only unauthorized access to documents on the server but + also unauthorized access to any other resources on other systems that + the user protects with the same password. Furthermore, in the + server's password database, many of the passwords may also be users' + passwords for other sites. The owner or administrator of such a + system could therefore expose all users of the system to the risk of + unauthorized access to all those sites if this information is not + maintained in a secure fashion. + + Basic Authentication is also vulnerable to spoofing by counterfeit + servers. If a user can be led to believe that he is connecting to a + host containing information protected by Basic authentication when, + in fact, he is connecting to a hostile server or gateway, then the + attacker can request a password, store it for later use, and feign an + error. This type of attack is not possible with Digest + Authentication. Server implementers SHOULD guard against the + possibility of this sort of counterfeiting by gateways or CGI + scripts. In particular it is very dangerous for a server to simply + turn over a connection to a gateway. That gateway can then use the + persistent connection mechanism to engage in multiple transactions + with the client while impersonating the original server in a way that + is not detectable by the client. + +4.2 Authentication of Clients using Digest Authentication + + Digest Authentication does not provide a strong authentication + mechanism, when compared to public key based mechanisms, for example. + + + +Franks, et al. Standards Track [Page 20] + +RFC 2617 HTTP Authentication June 1999 + + + However, it is significantly stronger than (e.g.) CRAM-MD5, which has + been proposed for use with LDAP [10], POP and IMAP (see RFC 2195 + [9]). It is intended to replace the much weaker and even more + dangerous Basic mechanism. + + Digest Authentication offers no confidentiality protection beyond + protecting the actual password. All of the rest of the request and + response are available to an eavesdropper. + + Digest Authentication offers only limited integrity protection for + the messages in either direction. If qop=auth-int mechanism is used, + those parts of the message used in the calculation of the WWW- + Authenticate and Authorization header field response directive values + (see section 3.2 above) are protected. Most header fields and their + values could be modified as a part of a man-in-the-middle attack. + + Many needs for secure HTTP transactions cannot be met by Digest + Authentication. For those needs TLS or SHTTP are more appropriate + protocols. In particular Digest authentication cannot be used for any + transaction requiring confidentiality protection. Nevertheless many + functions remain for which Digest authentication is both useful and + appropriate. Any service in present use that uses Basic should be + switched to Digest as soon as practical. + +4.3 Limited Use Nonce Values + + The Digest scheme uses a server-specified nonce to seed the + generation of the request-digest value (as specified in section + 3.2.2.1 above). As shown in the example nonce in section 3.2.1, the + server is free to construct the nonce such that it may only be used + from a particular client, for a particular resource, for a limited + period of time or number of uses, or any other restrictions. Doing + so strengthens the protection provided against, for example, replay + attacks (see 4.5). However, it should be noted that the method + chosen for generating and checking the nonce also has performance and + resource implications. For example, a server may choose to allow + each nonce value to be used only once by maintaining a record of + whether or not each recently issued nonce has been returned and + sending a next-nonce directive in the Authentication-Info header + field of every response. This protects against even an immediate + replay attack, but has a high cost checking nonce values, and perhaps + more important will cause authentication failures for any pipelined + requests (presumably returning a stale nonce indication). Similarly, + incorporating a request-specific element such as the Etag value for a + resource limits the use of the nonce to that version of the resource + and also defeats pipelining. Thus it may be useful to do so for + methods with side effects but have unacceptable performance for those + that do not. + + + +Franks, et al. Standards Track [Page 21] + +RFC 2617 HTTP Authentication June 1999 + + +4.4 Comparison of Digest with Basic Authentication + + Both Digest and Basic Authentication are very much on the weak end of + the security strength spectrum. But a comparison between the two + points out the utility, even necessity, of replacing Basic by Digest. + + The greatest threat to the type of transactions for which these + protocols are used is network snooping. This kind of transaction + might involve, for example, online access to a database whose use is + restricted to paying subscribers. With Basic authentication an + eavesdropper can obtain the password of the user. This not only + permits him to access anything in the database, but, often worse, + will permit access to anything else the user protects with the same + password. + + By contrast, with Digest Authentication the eavesdropper only gets + access to the transaction in question and not to the user's password. + The information gained by the eavesdropper would permit a replay + attack, but only with a request for the same document, and even that + may be limited by the server's choice of nonce. + +4.5 Replay Attacks + + A replay attack against Digest authentication would usually be + pointless for a simple GET request since an eavesdropper would + already have seen the only document he could obtain with a replay. + This is because the URI of the requested document is digested in the + client request and the server will only deliver that document. By + contrast under Basic Authentication once the eavesdropper has the + user's password, any document protected by that password is open to + him. + + Thus, for some purposes, it is necessary to protect against replay + attacks. A good Digest implementation can do this in various ways. + The server created "nonce" value is implementation dependent, but if + it contains a digest of the client IP, a time-stamp, the resource + ETag, and a private server key (as recommended above) then a replay + attack is not simple. An attacker must convince the server that the + request is coming from a false IP address and must cause the server + to deliver the document to an IP address different from the address + to which it believes it is sending the document. An attack can only + succeed in the period before the time-stamp expires. Digesting the + client IP and time-stamp in the nonce permits an implementation which + does not maintain state between transactions. + + For applications where no possibility of replay attack can be + tolerated the server can use one-time nonce values which will not be + honored for a second use. This requires the overhead of the server + + + +Franks, et al. Standards Track [Page 22] + +RFC 2617 HTTP Authentication June 1999 + + + remembering which nonce values have been used until the nonce time- + stamp (and hence the digest built with it) has expired, but it + effectively protects against replay attacks. + + An implementation must give special attention to the possibility of + replay attacks with POST and PUT requests. Unless the server employs + one-time or otherwise limited-use nonces and/or insists on the use of + the integrity protection of qop=auth-int, an attacker could replay + valid credentials from a successful request with counterfeit form + data or other message body. Even with the use of integrity protection + most metadata in header fields is not protected. Proper nonce + generation and checking provides some protection against replay of + previously used valid credentials, but see 4.8. + +4.6 Weakness Created by Multiple Authentication Schemes + + An HTTP/1.1 server may return multiple challenges with a 401 + (Authenticate) response, and each challenge may use a different + auth-scheme. A user agent MUST choose to use the strongest auth- + scheme it understands and request credentials from the user based + upon that challenge. + + Note that many browsers will only recognize Basic and will require + that it be the first auth-scheme presented. Servers should only + include Basic if it is minimally acceptable. + + When the server offers choices of authentication schemes using the + WWW-Authenticate header, the strength of the resulting authentication + is only as good as that of the of the weakest of the authentication + schemes. See section 4.8 below for discussion of particular attack + scenarios that exploit multiple authentication schemes. + +4.7 Online dictionary attacks + + If the attacker can eavesdrop, then it can test any overheard + nonce/response pairs against a list of common words. Such a list is + usually much smaller than the total number of possible passwords. The + cost of computing the response for each password on the list is paid + once for each challenge. + + The server can mitigate this attack by not allowing users to select + passwords that are in a dictionary. + + + + + + + + + +Franks, et al. Standards Track [Page 23] + +RFC 2617 HTTP Authentication June 1999 + + +4.8 Man in the Middle + + Both Basic and Digest authentication are vulnerable to "man in the + middle" (MITM) attacks, for example, from a hostile or compromised + proxy. Clearly, this would present all the problems of eavesdropping. + But it also offers some additional opportunities to the attacker. + + A possible man-in-the-middle attack would be to add a weak + authentication scheme to the set of choices, hoping that the client + will use one that exposes the user's credentials (e.g. password). For + this reason, the client should always use the strongest scheme that + it understands from the choices offered. + + An even better MITM attack would be to remove all offered choices, + replacing them with a challenge that requests only Basic + authentication, then uses the cleartext credentials from the Basic + authentication to authenticate to the origin server using the + stronger scheme it requested. A particularly insidious way to mount + such a MITM attack would be to offer a "free" proxy caching service + to gullible users. + + User agents should consider measures such as presenting a visual + indication at the time of the credentials request of what + authentication scheme is to be used, or remembering the strongest + authentication scheme ever requested by a server and produce a + warning message before using a weaker one. It might also be a good + idea for the user agent to be configured to demand Digest + authentication in general, or from specific sites. + + Or, a hostile proxy might spoof the client into making a request the + attacker wanted rather than one the client wanted. Of course, this is + still much harder than a comparable attack against Basic + Authentication. + +4.9 Chosen plaintext attacks + + With Digest authentication, a MITM or a malicious server can + arbitrarily choose the nonce that the client will use to compute the + response. This is called a "chosen plaintext" attack. The ability to + choose the nonce is known to make cryptanalysis much easier [8]. + + However, no way to analyze the MD5 one-way function used by Digest + using chosen plaintext is currently known. + + The countermeasure against this attack is for clients to be + configured to require the use of the optional "cnonce" directive; + this allows the client to vary the input to the hash in a way not + chosen by the attacker. + + + +Franks, et al. Standards Track [Page 24] + +RFC 2617 HTTP Authentication June 1999 + + +4.10 Precomputed dictionary attacks + + With Digest authentication, if the attacker can execute a chosen + plaintext attack, the attacker can precompute the response for many + common words to a nonce of its choice, and store a dictionary of + (response, password) pairs. Such precomputation can often be done in + parallel on many machines. It can then use the chosen plaintext + attack to acquire a response corresponding to that challenge, and + just look up the password in the dictionary. Even if most passwords + are not in the dictionary, some might be. Since the attacker gets to + pick the challenge, the cost of computing the response for each + password on the list can be amortized over finding many passwords. A + dictionary with 100 million password/response pairs would take about + 3.2 gigabytes of disk storage. + + The countermeasure against this attack is to for clients to be + configured to require the use of the optional "cnonce" directive. + +4.11 Batch brute force attacks + + With Digest authentication, a MITM can execute a chosen plaintext + attack, and can gather responses from many users to the same nonce. + It can then find all the passwords within any subset of password + space that would generate one of the nonce/response pairs in a single + pass over that space. It also reduces the time to find the first + password by a factor equal to the number of nonce/response pairs + gathered. This search of the password space can often be done in + parallel on many machines, and even a single machine can search large + subsets of the password space very quickly -- reports exist of + searching all passwords with six or fewer letters in a few hours. + + The countermeasure against this attack is to for clients to be + configured to require the use of the optional "cnonce" directive. + +4.12 Spoofing by Counterfeit Servers + + Basic Authentication is vulnerable to spoofing by counterfeit + servers. If a user can be led to believe that she is connecting to a + host containing information protected by a password she knows, when + in fact she is connecting to a hostile server, then the hostile + server can request a password, store it away for later use, and feign + an error. This type of attack is more difficult with Digest + Authentication -- but the client must know to demand that Digest + authentication be used, perhaps using some of the techniques + described above to counter "man-in-the-middle" attacks. Again, the + user can be helped in detecting this attack by a visual indication of + the authentication mechanism in use with appropriate guidance in + interpreting the implications of each scheme. + + + +Franks, et al. Standards Track [Page 25] + +RFC 2617 HTTP Authentication June 1999 + + +4.13 Storing passwords + + Digest authentication requires that the authenticating agent (usually + the server) store some data derived from the user's name and password + in a "password file" associated with a given realm. Normally this + might contain pairs consisting of username and H(A1), where H(A1) is + the digested value of the username, realm, and password as described + above. + + The security implications of this are that if this password file is + compromised, then an attacker gains immediate access to documents on + the server using this realm. Unlike, say a standard UNIX password + file, this information need not be decrypted in order to access + documents in the server realm associated with this file. On the other + hand, decryption, or more likely a brute force attack, would be + necessary to obtain the user's password. This is the reason that the + realm is part of the digested data stored in the password file. It + means that if one Digest authentication password file is compromised, + it does not automatically compromise others with the same username + and password (though it does expose them to brute force attack). + + There are two important security consequences of this. First the + password file must be protected as if it contained unencrypted + passwords, because for the purpose of accessing documents in its + realm, it effectively does. + + A second consequence of this is that the realm string should be + unique among all realms which any single user is likely to use. In + particular a realm string should include the name of the host doing + the authentication. The inability of the client to authenticate the + server is a weakness of Digest Authentication. + +4.14 Summary + + By modern cryptographic standards Digest Authentication is weak. But + for a large range of purposes it is valuable as a replacement for + Basic Authentication. It remedies some, but not all, weaknesses of + Basic Authentication. Its strength may vary depending on the + implementation. In particular the structure of the nonce (which is + dependent on the server implementation) may affect the ease of + mounting a replay attack. A range of server options is appropriate + since, for example, some implementations may be willing to accept the + server overhead of one-time nonces or digests to eliminate the + possibility of replay. Others may satisfied with a nonce like the one + recommended above restricted to a single IP address and a single ETag + or with a limited lifetime. + + + + + +Franks, et al. Standards Track [Page 26] + +RFC 2617 HTTP Authentication June 1999 + + + The bottom line is that *any* compliant implementation will be + relatively weak by cryptographic standards, but *any* compliant + implementation will be far superior to Basic Authentication. + +5 Sample implementation + + [[ WARNING: DigestCalcHA1 IS WRONG ]] + + The following code implements the calculations of H(A1), H(A2), + request-digest and response-digest, and a test program which computes + the values used in the example of section 3.5. It uses the MD5 + implementation from RFC 1321. + + File "digcalc.h": + +#define HASHLEN 16 +typedef char HASH[HASHLEN]; +#define HASHHEXLEN 32 +typedef char HASHHEX[HASHHEXLEN+1]; +#define IN +#define OUT + +/* calculate H(A1) as per HTTP Digest spec */ +void DigestCalcHA1( + IN char * pszAlg, + IN char * pszUserName, + IN char * pszRealm, + IN char * pszPassword, + IN char * pszNonce, + IN char * pszCNonce, + OUT HASHHEX SessionKey + ); + +/* calculate request-digest/response-digest as per HTTP Digest spec */ +void DigestCalcResponse( + IN HASHHEX HA1, /* H(A1) */ + IN char * pszNonce, /* nonce from server */ + IN char * pszNonceCount, /* 8 hex digits */ + IN char * pszCNonce, /* client nonce */ + IN char * pszQop, /* qop-value: "", "auth", "auth-int" */ + IN char * pszMethod, /* method from the request */ + IN char * pszDigestUri, /* requested URL */ + IN HASHHEX HEntity, /* H(entity body) if qop="auth-int" */ + OUT HASHHEX Response /* request-digest or response-digest */ + ); + +File "digcalc.c": + +#include +#include + + + +Franks, et al. Standards Track [Page 27] + +RFC 2617 HTTP Authentication June 1999 + + +#include +#include "digcalc.h" + +void CvtHex( + IN HASH Bin, + OUT HASHHEX Hex + ) +{ + unsigned short i; + unsigned char j; + + for (i = 0; i < HASHLEN; i++) { + j = (Bin[i] >> 4) & 0xf; + if (j <= 9) + Hex[i*2] = (j + '0'); + else + Hex[i*2] = (j + 'a' - 10); + j = Bin[i] & 0xf; + if (j <= 9) + Hex[i*2+1] = (j + '0'); + else + Hex[i*2+1] = (j + 'a' - 10); + }; + Hex[HASHHEXLEN] = '\0'; +}; + +/* calculate H(A1) as per spec */ +void DigestCalcHA1( + IN char * pszAlg, + IN char * pszUserName, + IN char * pszRealm, + IN char * pszPassword, + IN char * pszNonce, + IN char * pszCNonce, + OUT HASHHEX SessionKey + ) +{ + MD5_CTX Md5Ctx; + HASH HA1; + + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, pszUserName, strlen(pszUserName)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszRealm, strlen(pszRealm)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszPassword, strlen(pszPassword)); + MD5Final(HA1, &Md5Ctx); + if (stricmp(pszAlg, "md5-sess") == 0) { + + + +Franks, et al. Standards Track [Page 28] + +RFC 2617 HTTP Authentication June 1999 + + + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, HA1, HASHLEN); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); + MD5Final(HA1, &Md5Ctx); + }; + CvtHex(HA1, SessionKey); +}; + +/* calculate request-digest/response-digest as per HTTP Digest spec */ +void DigestCalcResponse( + IN HASHHEX HA1, /* H(A1) */ + IN char * pszNonce, /* nonce from server */ + IN char * pszNonceCount, /* 8 hex digits */ + IN char * pszCNonce, /* client nonce */ + IN char * pszQop, /* qop-value: "", "auth", "auth-int" */ + IN char * pszMethod, /* method from the request */ + IN char * pszDigestUri, /* requested URL */ + IN HASHHEX HEntity, /* H(entity body) if qop="auth-int" */ + OUT HASHHEX Response /* request-digest or response-digest */ + ) +{ + MD5_CTX Md5Ctx; + HASH HA2; + HASH RespHash; + HASHHEX HA2Hex; + + // calculate H(A2) + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, pszMethod, strlen(pszMethod)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszDigestUri, strlen(pszDigestUri)); + if (stricmp(pszQop, "auth-int") == 0) { + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, HEntity, HASHHEXLEN); + }; + MD5Final(HA2, &Md5Ctx); + CvtHex(HA2, HA2Hex); + + // calculate response + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, HA1, HASHHEXLEN); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); + MD5Update(&Md5Ctx, ":", 1); + if (*pszQop) { + + + +Franks, et al. Standards Track [Page 29] + +RFC 2617 HTTP Authentication June 1999 + + + MD5Update(&Md5Ctx, pszNonceCount, strlen(pszNonceCount)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszQop, strlen(pszQop)); + MD5Update(&Md5Ctx, ":", 1); + }; + MD5Update(&Md5Ctx, HA2Hex, HASHHEXLEN); + MD5Final(RespHash, &Md5Ctx); + CvtHex(RespHash, Response); +}; + +File "digtest.c": + + +#include +#include "digcalc.h" + +void main(int argc, char ** argv) { + + char * pszNonce = "dcd98b7102dd2f0e8b11d0f600bfb0c093"; + char * pszCNonce = "0a4f113b"; + char * pszUser = "Mufasa"; + char * pszRealm = "testrealm@host.com"; + char * pszPass = "Circle Of Life"; + char * pszAlg = "md5"; + char szNonceCount[9] = "00000001"; + char * pszMethod = "GET"; + char * pszQop = "auth"; + char * pszURI = "/dir/index.html"; + HASHHEX HA1; + HASHHEX HA2 = ""; + HASHHEX Response; + + DigestCalcHA1(pszAlg, pszUser, pszRealm, pszPass, pszNonce, +pszCNonce, HA1); + DigestCalcResponse(HA1, pszNonce, szNonceCount, pszCNonce, pszQop, + pszMethod, pszURI, HA2, Response); + printf("Response = %s\n", Response); +}; + + + + + + + + + + + +Franks, et al. Standards Track [Page 30] + +RFC 2617 HTTP Authentication June 1999 + + +6 Acknowledgments + + Eric W. Sink, of AbiSource, Inc., was one of the original authors + before the specification underwent substantial revision. + + In addition to the authors, valuable discussion instrumental in + creating this document has come from Peter J. Churchyard, Ned Freed, + and David M. Kristol. + + Jim Gettys and Larry Masinter edited this document for update. + +7 References + + [1] Berners-Lee, T., Fielding, R. and H. Frystyk, "Hypertext + Transfer Protocol -- HTTP/1.0", RFC 1945, May 1996. + + [2] Fielding, R., Gettys, J., Mogul, J., Frysyk, H., Masinter, L., + Leach, P. and T. Berners-Lee, "Hypertext Transfer Protocol -- + HTTP/1.1", RFC 2616, June 1999. + + [3] Rivest, R., "The MD5 Message-Digest Algorithm", RFC 1321, April + 1992. + + [4] Freed, N. and N. Borenstein. "Multipurpose Internet Mail + Extensions (MIME) Part One: Format of Internet Message Bodies", + RFC 2045, November 1996. + + [5] Dierks, T. and C. Allen "The TLS Protocol, Version 1.0", RFC + 2246, January 1999. + + [6] Franks, J., Hallam-Baker, P., Hostetler, J., Leach, P., + Luotonen, A., Sink, E. and L. Stewart, "An Extension to HTTP : + Digest Access Authentication", RFC 2069, January 1997. + + [7] Berners Lee, T, Fielding, R. and L. Masinter, "Uniform Resource + Identifiers (URI): Generic Syntax", RFC 2396, August 1998. + + [8] Kaliski, B.,Robshaw, M., "Message Authentication with MD5", + CryptoBytes, Sping 1995, RSA Inc, + (http://www.rsa.com/rsalabs/pubs/cryptobytes/spring95/md5.htm) + + [9] Klensin, J., Catoe, R. and P. Krumviede, "IMAP/POP AUTHorize + Extension for Simple Challenge/Response", RFC 2195, September + 1997. + + [10] Morgan, B., Alvestrand, H., Hodges, J., Wahl, M., + "Authentication Methods for LDAP", Work in Progress. + + + + +Franks, et al. Standards Track [Page 31] + +RFC 2617 HTTP Authentication June 1999 + + +8 Authors' Addresses + + John Franks + Professor of Mathematics + Department of Mathematics + Northwestern University + Evanston, IL 60208-2730, USA + + EMail: john@math.nwu.edu + + + Phillip M. Hallam-Baker + Principal Consultant + Verisign Inc. + 301 Edgewater Place + Suite 210 + Wakefield MA 01880, USA + + EMail: pbaker@verisign.com + + + Jeffery L. Hostetler + Software Craftsman + AbiSource, Inc. + 6 Dunlap Court + Savoy, IL 61874 + + EMail: jeff@AbiSource.com + + + Scott D. Lawrence + Agranat Systems, Inc. + 5 Clocktower Place, Suite 400 + Maynard, MA 01754, USA + + EMail: lawrence@agranat.com + + + Paul J. Leach + Microsoft Corporation + 1 Microsoft Way + Redmond, WA 98052, USA + + EMail: paulle@microsoft.com + + + + + + + +Franks, et al. Standards Track [Page 32] + +RFC 2617 HTTP Authentication June 1999 + + + Ari Luotonen + Member of Technical Staff + Netscape Communications Corporation + 501 East Middlefield Road + Mountain View, CA 94043, USA + + + Lawrence C. Stewart + Open Market, Inc. + 215 First Street + Cambridge, MA 02142, USA + + EMail: stewart@OpenMarket.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Franks, et al. Standards Track [Page 33] + +RFC 2617 HTTP Authentication June 1999 + + +9. Full Copyright Statement + + Copyright (C) The Internet Society (1999). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Franks, et al. Standards Track [Page 34] + diff --git a/docs/specs/rfc2817.txt b/docs/specs/rfc2817.txt new file mode 100644 index 0000000..d7b7e70 --- /dev/null +++ b/docs/specs/rfc2817.txt @@ -0,0 +1,731 @@ + + + + + + +Network Working Group R. Khare +Request for Comments: 2817 4K Associates / UC Irvine +Updates: 2616 S. Lawrence +Category: Standards Track Agranat Systems, Inc. + May 2000 + + + Upgrading to TLS Within HTTP/1.1 + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This memo explains how to use the Upgrade mechanism in HTTP/1.1 to + initiate Transport Layer Security (TLS) over an existing TCP + connection. This allows unsecured and secured HTTP traffic to share + the same well known port (in this case, http: at 80 rather than + https: at 443). It also enables "virtual hosting", so a single HTTP + + TLS server can disambiguate traffic intended for several hostnames at + a single IP address. + + Since HTTP/1.1 [1] defines Upgrade as a hop-by-hop mechanism, this + memo also documents the HTTP CONNECT method for establishing end-to- + end tunnels across HTTP proxies. Finally, this memo establishes new + IANA registries for public HTTP status codes, as well as public or + private Upgrade product tokens. + + This memo does NOT affect the current definition of the 'https' URI + scheme, which already defines a separate namespace + (http://example.org/ and https://example.org/ are not equivalent). + + + + + + + + + + + +Khare & Lawrence Standards Track [Page 1] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + +Table of Contents + + 1. Motivation . . . . . . . . . . . . . . . . . . . . . . . . . . 2 + 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2.1 Requirements Terminology . . . . . . . . . . . . . . . . . . . 4 + 3. Client Requested Upgrade to HTTP over TLS . . . . . . . . . . 4 + 3.1 Optional Upgrade . . . . . . . . . . . . . . . . . . . . . . . 4 + 3.2 Mandatory Upgrade . . . . . . . . . . . . . . . . . . . . . . 4 + 3.3 Server Acceptance of Upgrade Request . . . . . . . . . . . . . 4 + 4. Server Requested Upgrade to HTTP over TLS . . . . . . . . . . 5 + 4.1 Optional Advertisement . . . . . . . . . . . . . . . . . . . . 5 + 4.2 Mandatory Advertisement . . . . . . . . . . . . . . . . . . . 5 + 5. Upgrade across Proxies . . . . . . . . . . . . . . . . . . . . 6 + 5.1 Implications of Hop By Hop Upgrade . . . . . . . . . . . . . . 6 + 5.2 Requesting a Tunnel with CONNECT . . . . . . . . . . . . . . . 6 + 5.3 Establishing a Tunnel with CONNECT . . . . . . . . . . . . . . 7 + 6. Rationale for the use of a 4xx (client error) Status Code . . 7 + 7. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 8 + 7.1 HTTP Status Code Registry . . . . . . . . . . . . . . . . . . 8 + 7.2 HTTP Upgrade Token Registry . . . . . . . . . . . . . . . . . 8 + 8. Security Considerations . . . . . . . . . . . . . . . . . . . 9 + 8.1 Implications for the https: URI Scheme . . . . . . . . . . . . 10 + 8.2 Security Considerations for CONNECT . . . . . . . . . . . . . 10 + References . . . . . . . . . . . . . . . . . . . . . . . . . . 10 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . 11 + A. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12 + Full Copyright Statement . . . . . . . . . . . . . . . . . . . 13 + +1. Motivation + + The historical practice of deploying HTTP over SSL3 [3] has + distinguished the combination from HTTP alone by a unique URI scheme + and the TCP port number. The scheme 'http' meant the HTTP protocol + alone on port 80, while 'https' meant the HTTP protocol over SSL on + port 443. Parallel well-known port numbers have similarly been + requested -- and in some cases, granted -- to distinguish between + secured and unsecured use of other application protocols (e.g. + snews, ftps). This approach effectively halves the number of + available well known ports. + + At the Washington DC IETF meeting in December 1997, the Applications + Area Directors and the IESG reaffirmed that the practice of issuing + parallel "secure" port numbers should be deprecated. The HTTP/1.1 + Upgrade mechanism can apply Transport Layer Security [6] to an open + HTTP connection. + + + + + + +Khare & Lawrence Standards Track [Page 2] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + In the nearly two years since, there has been broad acceptance of the + concept behind this proposal, but little interest in implementing + alternatives to port 443 for generic Web browsing. In fact, nothing + in this memo affects the current interpretation of https: URIs. + However, new application protocols built atop HTTP, such as the + Internet Printing Protocol [7], call for just such a mechanism in + order to move ahead in the IETF standards process. + + The Upgrade mechanism also solves the "virtual hosting" problem. + Rather than allocating multiple IP addresses to a single host, an + HTTP/1.1 server will use the Host: header to disambiguate the + intended web service. As HTTP/1.1 usage has grown more prevalent, + more ISPs are offering name-based virtual hosting, thus delaying IP + address space exhaustion. + + TLS (and SSL) have been hobbled by the same limitation as earlier + versions of HTTP: the initial handshake does not specify the intended + hostname, relying exclusively on the IP address. Using a cleartext + HTTP/1.1 Upgrade: preamble to the TLS handshake -- choosing the + certificates based on the initial Host: header -- will allow ISPs to + provide secure name-based virtual hosting as well. + +2. Introduction + + TLS, a.k.a., SSL (Secure Sockets Layer), establishes a private end- + to-end connection, optionally including strong mutual authentication, + using a variety of cryptosystems. Initially, a handshake phase uses + three subprotocols to set up a record layer, authenticate endpoints, + set parameters, as well as report errors. Then, there is an ongoing + layered record protocol that handles encryption, compression, and + reassembly for the remainder of the connection. The latter is + intended to be completely transparent. For example, there is no + dependency between TLS's record markers and or certificates and + HTTP/1.1's chunked encoding or authentication. + + Either the client or server can use the HTTP/1.1 [1] Upgrade + mechanism (Section 14.42) to indicate that a TLS-secured connection + is desired or necessary. This memo defines the "TLS/1.0" Upgrade + token, and a new HTTP Status Code, "426 Upgrade Required". + + Section 3 and Section 4 describe the operation of a directly + connected client and server. Intermediate proxies must establish an + end-to-end tunnel before applying those operations, as explained in + Section 5. + + + + + + + +Khare & Lawrence Standards Track [Page 3] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + +2.1 Requirements Terminology + + Keywords "MUST", "MUST NOT", "REQUIRED", "SHOULD", "SHOULD NOT" and + "MAY" that appear in this document are to be interpreted as described + in RFC 2119 [11]. + +3. Client Requested Upgrade to HTTP over TLS + + When the client sends an HTTP/1.1 request with an Upgrade header + field containing the token "TLS/1.0", it is requesting the server to + complete the current HTTP/1.1 request after switching to TLS/1.0. + +3.1 Optional Upgrade + + A client MAY offer to switch to secured operation during any clear + HTTP request when an unsecured response would be acceptable: + + GET http://example.bank.com/acct_stat.html?749394889300 HTTP/1.1 + Host: example.bank.com + Upgrade: TLS/1.0 + Connection: Upgrade + + In this case, the server MAY respond to the clear HTTP operation + normally, OR switch to secured operation (as detailed in the next + section). + + Note that HTTP/1.1 [1] specifies "the upgrade keyword MUST be + supplied within a Connection header field (section 14.10) whenever + Upgrade is present in an HTTP/1.1 message". + +3.2 Mandatory Upgrade + + If an unsecured response would be unacceptable, a client MUST send an + OPTIONS request first to complete the switch to TLS/1.0 (if + possible). + + OPTIONS * HTTP/1.1 + Host: example.bank.com + Upgrade: TLS/1.0 + Connection: Upgrade + +3.3 Server Acceptance of Upgrade Request + + As specified in HTTP/1.1 [1], if the server is prepared to initiate + the TLS handshake, it MUST send the intermediate "101 Switching + Protocol" and MUST include an Upgrade response header specifying the + tokens of the protocol stack it is switching to: + + + + +Khare & Lawrence Standards Track [Page 4] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + HTTP/1.1 101 Switching Protocols + Upgrade: TLS/1.0, HTTP/1.1 + Connection: Upgrade + + Note that the protocol tokens listed in the Upgrade header of a 101 + Switching Protocols response specify an ordered 'bottom-up' stack. + + As specified in HTTP/1.1 [1], Section 10.1.2: "The server will + switch protocols to those defined by the response's Upgrade header + field immediately after the empty line which terminates the 101 + response". + + Once the TLS handshake completes successfully, the server MUST + continue with the response to the original request. Any TLS handshake + failure MUST lead to disconnection, per the TLS error alert + specification. + +4. Server Requested Upgrade to HTTP over TLS + + The Upgrade response header field advertises possible protocol + upgrades a server MAY accept. In conjunction with the "426 Upgrade + Required" status code, a server can advertise the exact protocol + upgrade(s) that a client MUST accept to complete the request. + +4.1 Optional Advertisement + + As specified in HTTP/1.1 [1], the server MAY include an Upgrade + header in any response other than 101 or 426 to indicate a + willingness to switch to any (combination) of the protocols listed. + +4.2 Mandatory Advertisement + + A server MAY indicate that a client request can not be completed + without TLS using the "426 Upgrade Required" status code, which MUST + include an an Upgrade header field specifying the token of the + required TLS version. + + HTTP/1.1 426 Upgrade Required + Upgrade: TLS/1.0, HTTP/1.1 + Connection: Upgrade + + The server SHOULD include a message body in the 426 response which + indicates in human readable form the reason for the error and + describes any alternative courses which may be available to the user. + + Note that even if a client is willing to use TLS, it must use the + operations in Section 3 to proceed; the TLS handshake cannot begin + immediately after the 426 response. + + + +Khare & Lawrence Standards Track [Page 5] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + +5. Upgrade across Proxies + + As a hop-by-hop header, Upgrade is negotiated between each pair of + HTTP counterparties. If a User Agent sends a request with an Upgrade + header to a proxy, it is requesting a change to the protocol between + itself and the proxy, not an end-to-end change. + + Since TLS, in particular, requires end-to-end connectivity to provide + authentication and prevent man-in-the-middle attacks, this memo + specifies the CONNECT method to establish a tunnel across proxies. + + Once a tunnel is established, any of the operations in Section 3 can + be used to establish a TLS connection. + +5.1 Implications of Hop By Hop Upgrade + + If an origin server receives an Upgrade header from a proxy and + responds with a 101 Switching Protocols response, it is changing the + protocol only on the connection between the proxy and itself. + Similarly, a proxy might return a 101 response to its client to + change the protocol on that connection independently of the protocols + it is using to communicate toward the origin server. + + These scenarios also complicate diagnosis of a 426 response. Since + Upgrade is a hop-by-hop header, a proxy that does not recognize 426 + might remove the accompanying Upgrade header and prevent the client + from determining the required protocol switch. If a client receives + a 426 status without an accompanying Upgrade header, it will need to + request an end to end tunnel connection as described in Section 5.2 + and repeat the request in order to obtain the required upgrade + information. + + This hop-by-hop definition of Upgrade was a deliberate choice. It + allows for incremental deployment on either side of proxies, and for + optimized protocols between cascaded proxies without the knowledge of + the parties that are not a part of the change. + +5.2 Requesting a Tunnel with CONNECT + + A CONNECT method requests that a proxy establish a tunnel connection + on its behalf. The Request-URI portion of the Request-Line is always + an 'authority' as defined by URI Generic Syntax [2], which is to say + the host name and port number destination of the requested connection + separated by a colon: + + CONNECT server.example.com:80 HTTP/1.1 + Host: server.example.com:80 + + + + +Khare & Lawrence Standards Track [Page 6] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + Other HTTP mechanisms can be used normally with the CONNECT method -- + except end-to-end protocol Upgrade requests, of course, since the + tunnel must be established first. + + For example, proxy authentication might be used to establish the + authority to create a tunnel: + + CONNECT server.example.com:80 HTTP/1.1 + Host: server.example.com:80 + Proxy-Authorization: basic aGVsbG86d29ybGQ= + + Like any other pipelined HTTP/1.1 request, data to be tunneled may be + sent immediately after the blank line. The usual caveats also apply: + data may be discarded if the eventual response is negative, and the + connection may be reset with no response if more than one TCP segment + is outstanding. + +5.3 Establishing a Tunnel with CONNECT + + Any successful (2xx) response to a CONNECT request indicates that the + proxy has established a connection to the requested host and port, + and has switched to tunneling the current connection to that server + connection. + + It may be the case that the proxy itself can only reach the requested + origin server through another proxy. In this case, the first proxy + SHOULD make a CONNECT request of that next proxy, requesting a tunnel + to the authority. A proxy MUST NOT respond with any 2xx status code + unless it has either a direct or tunnel connection established to the + authority. + + An origin server which receives a CONNECT request for itself MAY + respond with a 2xx status code to indicate that a connection is + established. + + If at any point either one of the peers gets disconnected, any + outstanding data that came from that peer will be passed to the other + one, and after that also the other connection will be terminated by + the proxy. If there is outstanding data to that peer undelivered, + that data will be discarded. + +6. Rationale for the use of a 4xx (client error) Status Code + + Reliable, interoperable negotiation of Upgrade features requires an + unambiguous failure signal. The 426 Upgrade Required status code + allows a server to definitively state the precise protocol extensions + a given resource must be served with. + + + + +Khare & Lawrence Standards Track [Page 7] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + It might at first appear that the response should have been some form + of redirection (a 3xx code), by analogy to an old-style redirection + to an https: URI. User agents that do not understand Upgrade: + preclude this. + + Suppose that a 3xx code had been assigned for "Upgrade Required"; a + user agent that did not recognize it would treat it as 300. It would + then properly look for a "Location" header in the response and + attempt to repeat the request at the URL in that header field. Since + it did not know to Upgrade to incorporate the TLS layer, it would at + best fail again at the new URL. + +7. IANA Considerations + + IANA shall create registries for two name spaces, as described in BCP + 26 [10]: + + o HTTP Status Codes + o HTTP Upgrade Tokens + +7.1 HTTP Status Code Registry + + The HTTP Status Code Registry defines the name space for the Status- + Code token in the Status line of an HTTP response. The initial + values for this name space are those specified by: + + 1. Draft Standard for HTTP/1.1 [1] + 2. Web Distributed Authoring and Versioning [4] [defines 420-424] + 3. WebDAV Advanced Collections [5] (Work in Progress) [defines 425] + 4. Section 6 [defines 426] + + Values to be added to this name space SHOULD be subject to review in + the form of a standards track document within the IETF Applications + Area. Any such document SHOULD be traceable through statuses of + either 'Obsoletes' or 'Updates' to the Draft Standard for + HTTP/1.1 [1]. + +7.2 HTTP Upgrade Token Registry + + The HTTP Upgrade Token Registry defines the name space for product + tokens used to identify protocols in the Upgrade HTTP header field. + Each registered token should be associated with one or a set of + specifications, and with contact information. + + The Draft Standard for HTTP/1.1 [1] specifies that these tokens obey + the production for 'product': + + + + + +Khare & Lawrence Standards Track [Page 8] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + product = token ["/" product-version] + product-version = token + + Registrations should be allowed on a First Come First Served basis as + described in BCP 26 [10]. These specifications need not be IETF + documents or be subject to IESG review, but should obey the following + rules: + + 1. A token, once registered, stays registered forever. + 2. The registration MUST name a responsible party for the + registration. + 3. The registration MUST name a point of contact. + 4. The registration MAY name the documentation required for the + token. + 5. The responsible party MAY change the registration at any time. + The IANA will keep a record of all such changes, and make them + available upon request. + 6. The responsible party for the first registration of a "product" + token MUST approve later registrations of a "version" token + together with that "product" token before they can be registered. + 7. If absolutely required, the IESG MAY reassign the responsibility + for a token. This will normally only be used in the case when a + responsible party cannot be contacted. + + This specification defines the protocol token "TLS/1.0" as the + identifier for the protocol specified by The TLS Protocol [6]. + + It is NOT required that specifications for upgrade tokens be made + publicly available, but the contact information for the registration + SHOULD be. + +8. Security Considerations + + The potential for a man-in-the-middle attack (deleting the Upgrade + header) remains the same as current, mixed http/https practice: + + o Removing the Upgrade header is similar to rewriting web pages to + change https:// links to http:// links. + o The risk is only present if the server is willing to vend such + information over both a secure and an insecure channel in the + first place. + o If the client knows for a fact that a server is TLS-compliant, it + can insist on it by only sending an Upgrade request with a no-op + method like OPTIONS. + o Finally, as the https: specification warns, "users should + carefully examine the certificate presented by the server to + determine if it meets their expectations". + + + + +Khare & Lawrence Standards Track [Page 9] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + Furthermore, for clients that do not explicitly try to invoke TLS, + servers can use the Upgrade header in any response other than 101 or + 426 to advertise TLS compliance. Since TLS compliance should be + considered a feature of the server and not the resource at hand, it + should be sufficient to send it once, and let clients cache that + fact. + +8.1 Implications for the https: URI Scheme + + While nothing in this memo affects the definition of the 'https' URI + scheme, widespread adoption of this mechanism for HyperText content + could use 'http' to identify both secure and non-secure resources. + + The choice of what security characteristics are required on the + connection is left to the client and server. This allows either + party to use any information available in making this determination. + For example, user agents may rely on user preference settings or + information about the security of the network such as 'TLS required + on all POST operations not on my local net', or servers may apply + resource access rules such as 'the FORM on this page must be served + and submitted using TLS'. + +8.2 Security Considerations for CONNECT + + A generic TCP tunnel is fraught with security risks. First, such + authorization should be limited to a small number of known ports. + The Upgrade: mechanism defined here only requires onward tunneling at + port 80. Second, since tunneled data is opaque to the proxy, there + are additional risks to tunneling to other well-known or reserved + ports. A putative HTTP client CONNECTing to port 25 could relay spam + via SMTP, for example. + +References + + [1] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., Masinter, L., + Leach, P. and T. Berners-Lee, "Hypertext Transfer Protocol -- + HTTP/1.1", RFC 2616, June 1999. + + [2] Berners-Lee, T., Fielding, R. and L. Masinter, "URI Generic + Syntax", RFC 2396, August 1998. + + [3] Rescorla, E., "HTTP Over TLS", RFC 2818, May 2000. + + [4] Goland, Y., Whitehead, E., Faizi, A., Carter, S. and D. Jensen, + "Web Distributed Authoring and Versioning", RFC 2518, February + 1999. + + + + + +Khare & Lawrence Standards Track [Page 10] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + + [5] Slein, J., Whitehead, E.J., et al., "WebDAV Advanced Collections + Protocol", Work In Progress. + + [6] Dierks, T. and C. Allen, "The TLS Protocol", RFC 2246, January + 1999. + + [7] Herriot, R., Butler, S., Moore, P. and R. Turner, "Internet + Printing Protocol/1.0: Encoding and Transport", RFC 2565, April + 1999. + + [8] Luotonen, A., "Tunneling TCP based protocols through Web proxy + servers", Work In Progress. (Also available in: Luotonen, Ari. + Web Proxy Servers, Prentice-Hall, 1997 ISBN:0136806120.) + + [9] Rose, M., "Writing I-Ds and RFCs using XML", RFC 2629, June + 1999. + + [10] Narten, T. and H. Alvestrand, "Guidelines for Writing an IANA + Considerations Section in RFCs", BCP 26, RFC 2434, October 1998. + + [11] Bradner, S., "Key words for use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + +Authors' Addresses + + Rohit Khare + 4K Associates / UC Irvine + 3207 Palo Verde + Irvine, CA 92612 + US + + Phone: +1 626 806 7574 + EMail: rohit@4K-associates.com + URI: http://www.4K-associates.com/ + + + Scott Lawrence + Agranat Systems, Inc. + 5 Clocktower Place + Suite 400 + Maynard, MA 01754 + US + + Phone: +1 978 461 0888 + EMail: lawrence@agranat.com + URI: http://www.agranat.com/ + + + + + +Khare & Lawrence Standards Track [Page 11] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + +Appendix A. Acknowledgments + + The CONNECT method was originally described in a Work in Progress + titled, "Tunneling TCP based protocols through Web proxy servers", + [8] by Ari Luotonen of Netscape Communications Corporation. It was + widely implemented by HTTP proxies, but was never made a part of any + IETF Standards Track document. The method name CONNECT was reserved, + but not defined in [1]. + + The definition provided here is derived directly from that earlier + memo, with some editorial changes and conformance to the stylistic + conventions since established in other HTTP specifications. + + Additional Thanks to: + + o Paul Hoffman for his work on the STARTTLS command extension for + ESMTP. + o Roy Fielding for assistance with the rationale behind Upgrade: + and its interaction with OPTIONS. + o Eric Rescorla for his work on standardizing the existing https: + practice to compare with. + o Marshall Rose, for the xml2rfc document type description and tools + [9]. + o Jim Whitehead, for sorting out the current range of available HTTP + status codes. + o Henrik Frystyk Nielsen, whose work on the Mandatory extension + mechanism pointed out a hop-by-hop Upgrade still requires + tunneling. + o Harald Alvestrand for improvements to the token registration + rules. + + + + + + + + + + + + + + + + + + + + + +Khare & Lawrence Standards Track [Page 12] + +RFC 2817 HTTP Upgrade to TLS May 2000 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Khare & Lawrence Standards Track [Page 13] + diff --git a/docs/specs/rfc2818.txt b/docs/specs/rfc2818.txt new file mode 100644 index 0000000..219a1c4 --- /dev/null +++ b/docs/specs/rfc2818.txt @@ -0,0 +1,395 @@ + + + + + + +Network Working Group E. Rescorla +Request for Comments: 2818 RTFM, Inc. +Category: Informational May 2000 + + + HTTP Over TLS + +Status of this Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This memo describes how to use TLS to secure HTTP connections over + the Internet. Current practice is to layer HTTP over SSL (the + predecessor to TLS), distinguishing secured traffic from insecure + traffic by the use of a different server port. This document + documents that practice using TLS. A companion document describes a + method for using HTTP/TLS over the same port as normal HTTP + [RFC2817]. + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . 2 + 1.1. Requirements Terminology . . . . . . . . . . . . . . . 2 + 2. HTTP Over TLS . . . . . . . . . . . . . . . . . . . . . . 2 + 2.1. Connection Initiation . . . . . . . . . . . . . . . . . 2 + 2.2. Connection Closure . . . . . . . . . . . . . . . . . . 2 + 2.2.1. Client Behavior . . . . . . . . . . . . . . . . . . . 3 + 2.2.2. Server Behavior . . . . . . . . . . . . . . . . . . . 3 + 2.3. Port Number . . . . . . . . . . . . . . . . . . . . . . 4 + 2.4. URI Format . . . . . . . . . . . . . . . . . . . . . . 4 + 3. Endpoint Identification . . . . . . . . . . . . . . . . . 4 + 3.1. Server Identity . . . . . . . . . . . . . . . . . . . . 4 + 3.2. Client Identity . . . . . . . . . . . . . . . . . . . . 5 + References . . . . . . . . . . . . . . . . . . . . . . . . . 6 + Security Considerations . . . . . . . . . . . . . . . . . . 6 + Author's Address . . . . . . . . . . . . . . . . . . . . . . 6 + Full Copyright Statement . . . . . . . . . . . . . . . . . . 7 + + + + + + +Rescorla Informational [Page 1] + +RFC 2818 HTTP Over TLS May 2000 + + +1. Introduction + + HTTP [RFC2616] was originally used in the clear on the Internet. + However, increased use of HTTP for sensitive applications has + required security measures. SSL, and its successor TLS [RFC2246] were + designed to provide channel-oriented security. This document + describes how to use HTTP over TLS. + +1.1. Requirements Terminology + + Keywords "MUST", "MUST NOT", "REQUIRED", "SHOULD", "SHOULD NOT" and + "MAY" that appear in this document are to be interpreted as described + in [RFC2119]. + +2. HTTP Over TLS + + Conceptually, HTTP/TLS is very simple. Simply use HTTP over TLS + precisely as you would use HTTP over TCP. + +2.1. Connection Initiation + + The agent acting as the HTTP client should also act as the TLS + client. It should initiate a connection to the server on the + appropriate port and then send the TLS ClientHello to begin the TLS + handshake. When the TLS handshake has finished. The client may then + initiate the first HTTP request. All HTTP data MUST be sent as TLS + "application data". Normal HTTP behavior, including retained + connections should be followed. + +2.2. Connection Closure + + TLS provides a facility for secure connection closure. When a valid + closure alert is received, an implementation can be assured that no + further data will be received on that connection. TLS + implementations MUST initiate an exchange of closure alerts before + closing a connection. A TLS implementation MAY, after sending a + closure alert, close the connection without waiting for the peer to + send its closure alert, generating an "incomplete close". Note that + an implementation which does this MAY choose to reuse the session. + This SHOULD only be done when the application knows (typically + through detecting HTTP message boundaries) that it has received all + the message data that it cares about. + + As specified in [RFC2246], any implementation which receives a + connection close without first receiving a valid closure alert (a + "premature close") MUST NOT reuse that session. Note that a + premature close does not call into question the security of the data + already received, but simply indicates that subsequent data might + + + +Rescorla Informational [Page 2] + +RFC 2818 HTTP Over TLS May 2000 + + + have been truncated. Because TLS is oblivious to HTTP + request/response boundaries, it is necessary to examine the HTTP data + itself (specifically the Content-Length header) to determine whether + the truncation occurred inside a message or between messages. + +2.2.1. Client Behavior + + Because HTTP uses connection closure to signal end of server data, + client implementations MUST treat any premature closes as errors and + the data received as potentially truncated. While in some cases the + HTTP protocol allows the client to find out whether truncation took + place so that, if it received the complete reply, it may tolerate + such errors following the principle to "[be] strict when sending and + tolerant when receiving" [RFC1958], often truncation does not show in + the HTTP protocol data; two cases in particular deserve special note: + + A HTTP response without a Content-Length header. Since data length + in this situation is signalled by connection close a premature + close generated by the server cannot be distinguished from a + spurious close generated by an attacker. + + A HTTP response with a valid Content-Length header closed before + all data has been read. Because TLS does not provide document + oriented protection, it is impossible to determine whether the + server has miscomputed the Content-Length or an attacker has + truncated the connection. + + There is one exception to the above rule. When encountering a + premature close, a client SHOULD treat as completed all requests for + which it has received as much data as specified in the Content-Length + header. + + A client detecting an incomplete close SHOULD recover gracefully. It + MAY resume a TLS session closed in this fashion. + + Clients MUST send a closure alert before closing the connection. + Clients which are unprepared to receive any more data MAY choose not + to wait for the server's closure alert and simply close the + connection, thus generating an incomplete close on the server side. + +2.2.2. Server Behavior + + RFC 2616 permits an HTTP client to close the connection at any time, + and requires servers to recover gracefully. In particular, servers + SHOULD be prepared to receive an incomplete close from the client, + since the client can often determine when the end of server data is. + Servers SHOULD be willing to resume TLS sessions closed in this + fashion. + + + +Rescorla Informational [Page 3] + +RFC 2818 HTTP Over TLS May 2000 + + + Implementation note: In HTTP implementations which do not use + persistent connections, the server ordinarily expects to be able to + signal end of data by closing the connection. When Content-Length is + used, however, the client may have already sent the closure alert and + dropped the connection. + + Servers MUST attempt to initiate an exchange of closure alerts with + the client before closing the connection. Servers MAY close the + connection after sending the closure alert, thus generating an + incomplete close on the client side. + +2.3. Port Number + + The first data that an HTTP server expects to receive from the client + is the Request-Line production. The first data that a TLS server (and + hence an HTTP/TLS server) expects to receive is the ClientHello. + Consequently, common practice has been to run HTTP/TLS over a + separate port in order to distinguish which protocol is being used. + When HTTP/TLS is being run over a TCP/IP connection, the default port + is 443. This does not preclude HTTP/TLS from being run over another + transport. TLS only presumes a reliable connection-oriented data + stream. + +2.4. URI Format + + HTTP/TLS is differentiated from HTTP URIs by using the 'https' + protocol identifier in place of the 'http' protocol identifier. An + example URI specifying HTTP/TLS is: + + https://www.example.com/~smith/home.html + +3. Endpoint Identification + +3.1. Server Identity + + In general, HTTP/TLS requests are generated by dereferencing a URI. + As a consequence, the hostname for the server is known to the client. + If the hostname is available, the client MUST check it against the + server's identity as presented in the server's Certificate message, + in order to prevent man-in-the-middle attacks. + + If the client has external information as to the expected identity of + the server, the hostname check MAY be omitted. (For instance, a + client may be connecting to a machine whose address and hostname are + dynamic but the client knows the certificate that the server will + present.) In such cases, it is important to narrow the scope of + acceptable certificates as much as possible in order to prevent man + + + + +Rescorla Informational [Page 4] + +RFC 2818 HTTP Over TLS May 2000 + + + in the middle attacks. In special cases, it may be appropriate for + the client to simply ignore the server's identity, but it must be + understood that this leaves the connection open to active attack. + + If a subjectAltName extension of type dNSName is present, that MUST + be used as the identity. Otherwise, the (most specific) Common Name + field in the Subject field of the certificate MUST be used. Although + the use of the Common Name is existing practice, it is deprecated and + Certification Authorities are encouraged to use the dNSName instead. + + Matching is performed using the matching rules specified by + [RFC2459]. If more than one identity of a given type is present in + the certificate (e.g., more than one dNSName name, a match in any one + of the set is considered acceptable.) Names may contain the wildcard + character * which is considered to match any single domain name + component or component fragment. E.g., *.a.com matches foo.a.com but + not bar.foo.a.com. f*.com matches foo.com but not bar.com. + + In some cases, the URI is specified as an IP address rather than a + hostname. In this case, the iPAddress subjectAltName must be present + in the certificate and must exactly match the IP in the URI. + + If the hostname does not match the identity in the certificate, user + oriented clients MUST either notify the user (clients MAY give the + user the opportunity to continue with the connection in any case) or + terminate the connection with a bad certificate error. Automated + clients MUST log the error to an appropriate audit log (if available) + and SHOULD terminate the connection (with a bad certificate error). + Automated clients MAY provide a configuration setting that disables + this check, but MUST provide a setting which enables it. + + Note that in many cases the URI itself comes from an untrusted + source. The above-described check provides no protection against + attacks where this source is compromised. For example, if the URI was + obtained by clicking on an HTML page which was itself obtained + without using HTTP/TLS, a man in the middle could have replaced the + URI. In order to prevent this form of attack, users should carefully + examine the certificate presented by the server to determine if it + meets their expectations. + +3.2. Client Identity + + Typically, the server has no external knowledge of what the client's + identity ought to be and so checks (other than that the client has a + certificate chain rooted in an appropriate CA) are not possible. If a + server has such knowledge (typically from some source external to + HTTP or TLS) it SHOULD check the identity as described above. + + + + +Rescorla Informational [Page 5] + +RFC 2818 HTTP Over TLS May 2000 + + +References + + [RFC2459] Housley, R., Ford, W., Polk, W. and D. Solo, "Internet + Public Key Infrastructure: Part I: X.509 Certificate and + CRL Profile", RFC 2459, January 1999. + + [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., Masinter, + L., Leach, P. and T. Berners-Lee, "Hypertext Transfer + Protocol, HTTP/1.1", RFC 2616, June 1999. + + [RFC2119] Bradner, S., "Key Words for use in RFCs to indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2246] Dierks, T. and C. Allen, "The TLS Protocol", RFC 2246, + January 1999. + + [RFC2817] Khare, R. and S. Lawrence, "Upgrading to TLS Within + HTTP/1.1", RFC 2817, May 2000. + +Security Considerations + + This entire document is about security. + +Author's Address + + Eric Rescorla + RTFM, Inc. + 30 Newell Road, #16 + East Palo Alto, CA 94303 + + Phone: (650) 328-8631 + EMail: ekr@rtfm.com + + + + + + + + + + + + + + + + + + + +Rescorla Informational [Page 6] + +RFC 2818 HTTP Over TLS May 2000 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Rescorla Informational [Page 7] + diff --git a/docs/specs/rfc2965.txt b/docs/specs/rfc2965.txt new file mode 100644 index 0000000..8a4d02b --- /dev/null +++ b/docs/specs/rfc2965.txt @@ -0,0 +1,1459 @@ + + + + + + +Network Working Group D. Kristol +Request for Comments: 2965 Bell Laboratories, Lucent Technologies +Obsoletes: 2109 L. Montulli +Category: Standards Track Epinions.com, Inc. + October 2000 + + + HTTP State Management Mechanism + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +IESG Note + + The IESG notes that this mechanism makes use of the .local top-level + domain (TLD) internally when handling host names that don't contain + any dots, and that this mechanism might not work in the expected way + should an actual .local TLD ever be registered. + +Abstract + + This document specifies a way to create a stateful session with + Hypertext Transfer Protocol (HTTP) requests and responses. It + describes three new headers, Cookie, Cookie2, and Set-Cookie2, which + carry state information between participating origin servers and user + agents. The method described here differs from Netscape's Cookie + proposal [Netscape], but it can interoperate with HTTP/1.0 user + agents that use Netscape's method. (See the HISTORICAL section.) + + This document reflects implementation experience with RFC 2109 and + obsoletes it. + +1. TERMINOLOGY + + The terms user agent, client, server, proxy, origin server, and + http_URL have the same meaning as in the HTTP/1.1 specification + [RFC2616]. The terms abs_path and absoluteURI have the same meaning + as in the URI Syntax specification [RFC2396]. + + + + +Kristol & Montulli Standards Track [Page 1] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Host name (HN) means either the host domain name (HDN) or the numeric + Internet Protocol (IP) address of a host. The fully qualified domain + name is preferred; use of numeric IP addresses is strongly + discouraged. + + The terms request-host and request-URI refer to the values the client + would send to the server as, respectively, the host (but not port) + and abs_path portions of the absoluteURI (http_URL) of the HTTP + request line. Note that request-host is a HN. + + The term effective host name is related to host name. If a host name + contains no dots, the effective host name is that name with the + string .local appended to it. Otherwise the effective host name is + the same as the host name. Note that all effective host names + contain at least one dot. + + The term request-port refers to the port portion of the absoluteURI + (http_URL) of the HTTP request line. If the absoluteURI has no + explicit port, the request-port is the HTTP default, 80. The + request-port of a cookie is the request-port of the request in which + a Set-Cookie2 response header was returned to the user agent. + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons + SHALL be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + + + +Kristol & Montulli Standards Track [Page 2] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + * Otherwise, the reach of H is H. + + For two strings that represent paths, P1 and P2, P1 path-matches P2 + if P2 is a prefix of P1 (including the case where P1 and P2 string- + compare equal). Thus, the string /tec/waldo path-matches /tec. + + Because it was used in Netscape's original implementation of state + management, we will use the term cookie to refer to the state + information that passes between an origin server and user agent, and + that gets stored by the user agent. + +1.1 Requirements + + The key words "MAY", "MUST", "MUST NOT", "OPTIONAL", "RECOMMENDED", + "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +2. STATE AND SESSIONS + + This document describes a way to create stateful sessions with HTTP + requests and responses. Currently, HTTP servers respond to each + client request without relating that request to previous or + subsequent requests; the state management mechanism allows clients + and servers that wish to exchange state information to place HTTP + requests and responses within a larger context, which we term a + "session". This context might be used to create, for example, a + "shopping cart", in which user selections can be aggregated before + purchase, or a magazine browsing system, in which a user's previous + reading affects which offerings are presented. + + Neither clients nor servers are required to support cookies. A + server MAY refuse to provide content to a client that does not return + the cookies it sends. + +3. DESCRIPTION + + We describe here a way for an origin server to send state information + to the user agent, and for the user agent to return the state + information to the origin server. The goal is to have a minimal + impact on HTTP and user agents. + +3.1 Syntax: General + + The two state management headers, Set-Cookie2 and Cookie, have common + syntactic properties involving attribute-value pairs. The following + grammar uses the notation, and tokens DIGIT (decimal digits), token + + + + + +Kristol & Montulli Standards Track [Page 3] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + (informally, a sequence of non-special, non-white space characters), + and http_URL from the HTTP/1.1 specification [RFC2616] to describe + their syntax. + + av-pairs = av-pair *(";" av-pair) + av-pair = attr ["=" value] ; optional value + attr = token + value = token | quoted-string + + Attributes (names) (attr) are case-insensitive. White space is + permitted between tokens. Note that while the above syntax + description shows value as optional, most attrs require them. + + NOTE: The syntax above allows whitespace between the attribute and + the = sign. + +3.2 Origin Server Role + + 3.2.1 General The origin server initiates a session, if it so + desires. To do so, it returns an extra response header to the + client, Set-Cookie2. (The details follow later.) + + A user agent returns a Cookie request header (see below) to the + origin server if it chooses to continue a session. The origin server + MAY ignore it or use it to determine the current state of the + session. It MAY send back to the client a Set-Cookie2 response + header with the same or different information, or it MAY send no + Set-Cookie2 header at all. The origin server effectively ends a + session by sending the client a Set-Cookie2 header with Max-Age=0. + + Servers MAY return Set-Cookie2 response headers with any response. + User agents SHOULD send Cookie request headers, subject to other + rules detailed below, with every request. + + An origin server MAY include multiple Set-Cookie2 headers in a + response. Note that an intervening gateway could fold multiple such + headers into a single header. + + + + + + + + + + + + + + +Kristol & Montulli Standards Track [Page 4] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + 3.2.2 Set-Cookie2 Syntax The syntax for the Set-Cookie2 response + header is + + set-cookie = "Set-Cookie2:" cookies + cookies = 1#cookie + cookie = NAME "=" VALUE *(";" set-cookie-av) + NAME = attr + VALUE = value + set-cookie-av = "Comment" "=" value + | "CommentURL" "=" <"> http_URL <"> + | "Discard" + | "Domain" "=" value + | "Max-Age" "=" value + | "Path" "=" value + | "Port" [ "=" <"> portlist <"> ] + | "Secure" + | "Version" "=" 1*DIGIT + portlist = 1#portnum + portnum = 1*DIGIT + + Informally, the Set-Cookie2 response header comprises the token Set- + Cookie2:, followed by a comma-separated list of one or more cookies. + Each cookie begins with a NAME=VALUE pair, followed by zero or more + semi-colon-separated attribute-value pairs. The syntax for + attribute-value pairs was shown earlier. The specific attributes and + the semantics of their values follows. The NAME=VALUE attribute- + value pair MUST come first in each cookie. The others, if present, + can occur in any order. If an attribute appears more than once in a + cookie, the client SHALL use only the value associated with the first + appearance of the attribute; a client MUST ignore values after the + first. + + The NAME of a cookie MAY be the same as one of the attributes in this + specification. However, because the cookie's NAME must come first in + a Set-Cookie2 response header, the NAME and its VALUE cannot be + confused with an attribute-value pair. + + NAME=VALUE + REQUIRED. The name of the state information ("cookie") is NAME, + and its value is VALUE. NAMEs that begin with $ are reserved and + MUST NOT be used by applications. + + The VALUE is opaque to the user agent and may be anything the + origin server chooses to send, possibly in a server-selected + printable ASCII encoding. "Opaque" implies that the content is of + interest and relevance only to the origin server. The content + may, in fact, be readable by anyone that examines the Set-Cookie2 + header. + + + +Kristol & Montulli Standards Track [Page 5] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Comment=value + OPTIONAL. Because cookies can be used to derive or store private + information about a user, the value of the Comment attribute + allows an origin server to document how it intends to use the + cookie. The user can inspect the information to decide whether to + initiate or continue a session with this cookie. Characters in + value MUST be in UTF-8 encoding. [RFC2279] + + CommentURL="http_URL" + OPTIONAL. Because cookies can be used to derive or store private + information about a user, the CommentURL attribute allows an + origin server to document how it intends to use the cookie. The + user can inspect the information identified by the URL to decide + whether to initiate or continue a session with this cookie. + + Discard + OPTIONAL. The Discard attribute instructs the user agent to + discard the cookie unconditionally when the user agent terminates. + + Domain=value + OPTIONAL. The value of the Domain attribute specifies the domain + for which the cookie is valid. If an explicitly specified value + does not start with a dot, the user agent supplies a leading dot. + + Max-Age=value + OPTIONAL. The value of the Max-Age attribute is delta-seconds, + the lifetime of the cookie in seconds, a decimal non-negative + integer. To handle cached cookies correctly, a client SHOULD + calculate the age of the cookie according to the age calculation + rules in the HTTP/1.1 specification [RFC2616]. When the age is + greater than delta-seconds seconds, the client SHOULD discard the + cookie. A value of zero means the cookie SHOULD be discarded + immediately. + + Path=value + OPTIONAL. The value of the Path attribute specifies the subset of + URLs on the origin server to which this cookie applies. + + Port[="portlist"] + OPTIONAL. The Port attribute restricts the port to which a cookie + may be returned in a Cookie request header. Note that the syntax + REQUIREs quotes around the OPTIONAL portlist even if there is only + one portnum in portlist. + + + + + + + + +Kristol & Montulli Standards Track [Page 6] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Secure + OPTIONAL. The Secure attribute (with no value) directs the user + agent to use only (unspecified) secure means to contact the origin + server whenever it sends back this cookie, to protect the + confidentially and authenticity of the information in the cookie. + + The user agent (possibly with user interaction) MAY determine what + level of security it considers appropriate for "secure" cookies. + The Secure attribute should be considered security advice from the + server to the user agent, indicating that it is in the session's + interest to protect the cookie contents. When it sends a "secure" + cookie back to a server, the user agent SHOULD use no less than + the same level of security as was used when it received the cookie + from the server. + + Version=value + REQUIRED. The value of the Version attribute, a decimal integer, + identifies the version of the state management specification to + which the cookie conforms. For this specification, Version=1 + applies. + + 3.2.3 Controlling Caching An origin server must be cognizant of the + effect of possible caching of both the returned resource and the + Set-Cookie2 header. Caching "public" documents is desirable. For + example, if the origin server wants to use a public document such as + a "front door" page as a sentinel to indicate the beginning of a + session for which a Set-Cookie2 response header must be generated, + the page SHOULD be stored in caches "pre-expired" so that the origin + server will see further requests. "Private documents", for example + those that contain information strictly private to a session, SHOULD + NOT be cached in shared caches. + + If the cookie is intended for use by a single user, the Set-Cookie2 + header SHOULD NOT be cached. A Set-Cookie2 header that is intended + to be shared by multiple users MAY be cached. + + The origin server SHOULD send the following additional HTTP/1.1 + response headers, depending on circumstances: + + * To suppress caching of the Set-Cookie2 header: + + Cache-control: no-cache="set-cookie2" + + and one of the following: + + * To suppress caching of a private document in shared caches: + + Cache-control: private + + + +Kristol & Montulli Standards Track [Page 7] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + * To allow caching of a document and require that it be validated + before returning it to the client: + + Cache-Control: must-revalidate, max-age=0 + + * To allow caching of a document, but to require that proxy + caches (not user agent caches) validate it before returning it + to the client: + + Cache-Control: proxy-revalidate, max-age=0 + + * To allow caching of a document and request that it be validated + before returning it to the client (by "pre-expiring" it): + + Cache-control: max-age=0 + + Not all caches will revalidate the document in every case. + + HTTP/1.1 servers MUST send Expires: old-date (where old-date is a + date long in the past) on responses containing Set-Cookie2 response + headers unless they know for certain (by out of band means) that + there are no HTTP/1.0 proxies in the response chain. HTTP/1.1 + servers MAY send other Cache-Control directives that permit caching + by HTTP/1.1 proxies in addition to the Expires: old-date directive; + the Cache-Control directive will override the Expires: old-date for + HTTP/1.1 proxies. + +3.3 User Agent Role + + 3.3.1 Interpreting Set-Cookie2 The user agent keeps separate track + of state information that arrives via Set-Cookie2 response headers + from each origin server (as distinguished by name or IP address and + port). The user agent MUST ignore attribute-value pairs whose + attribute it does not recognize. The user agent applies these + defaults for optional attributes that are missing: + + Discard The default behavior is dictated by the presence or absence + of a Max-Age attribute. + + Domain Defaults to the effective request-host. (Note that because + there is no dot at the beginning of effective request-host, + the default Domain can only domain-match itself.) + + Max-Age The default behavior is to discard the cookie when the user + agent exits. + + Path Defaults to the path of the request URL that generated the + Set-Cookie2 response, up to and including the right-most /. + + + +Kristol & Montulli Standards Track [Page 8] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Port The default behavior is that a cookie MAY be returned to any + request-port. + + Secure If absent, the user agent MAY send the cookie over an + insecure channel. + + 3.3.2 Rejecting Cookies To prevent possible security or privacy + violations, a user agent rejects a cookie according to rules below. + The goal of the rules is to try to limit the set of servers for which + a cookie is valid, based on the values of the Path, Domain, and Port + attributes and the request-URI, request-host and request-port. + + A user agent rejects (SHALL NOT store its information) if the Version + attribute is missing. Moreover, a user agent rejects (SHALL NOT + store its information) if any of the following is true of the + attributes explicitly present in the Set-Cookie2 response header: + + * The value for the Path attribute is not a prefix of the + request-URI. + + * The value for the Domain attribute contains no embedded dots, + and the value is not .local. + + * The effective host name that derives from the request-host does + not domain-match the Domain attribute. + + * The request-host is a HDN (not IP address) and has the form HD, + where D is the value of the Domain attribute, and H is a string + that contains one or more dots. + + * The Port attribute has a "port-list", and the request-port was + not in the list. + + Examples: + + * A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com + would be rejected, because H is y.x and contains a dot. + + * A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com + would be accepted. + + * A Set-Cookie2 with Domain=.com or Domain=.com., will always be + rejected, because there is no embedded dot. + + * A Set-Cookie2 with Domain=ajax.com will be accepted, and the + value for Domain will be taken to be .ajax.com, because a dot + gets prepended to the value. + + + + +Kristol & Montulli Standards Track [Page 9] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + * A Set-Cookie2 with Port="80,8000" will be accepted if the + request was made to port 80 or 8000 and will be rejected + otherwise. + + * A Set-Cookie2 from request-host example for Domain=.local will + be accepted, because the effective host name for the request- + host is example.local, and example.local domain-matches .local. + + 3.3.3 Cookie Management If a user agent receives a Set-Cookie2 + response header whose NAME is the same as that of a cookie it has + previously stored, the new cookie supersedes the old when: the old + and new Domain attribute values compare equal, using a case- + insensitive string-compare; and, the old and new Path attribute + values string-compare equal (case-sensitive). However, if the Set- + Cookie2 has a value for Max-Age of zero, the (old and new) cookie is + discarded. Otherwise a cookie persists (resources permitting) until + whichever happens first, then gets discarded: its Max-Age lifetime is + exceeded; or, if the Discard attribute is set, the user agent + terminates the session. + + Because user agents have finite space in which to store cookies, they + MAY also discard older cookies to make space for newer ones, using, + for example, a least-recently-used algorithm, along with constraints + on the maximum number of cookies that each origin server may set. + + If a Set-Cookie2 response header includes a Comment attribute, the + user agent SHOULD store that information in a human-readable form + with the cookie and SHOULD display the comment text as part of a + cookie inspection user interface. + + If a Set-Cookie2 response header includes a CommentURL attribute, the + user agent SHOULD store that information in a human-readable form + with the cookie, or, preferably, SHOULD allow the user to follow the + http_URL link as part of a cookie inspection user interface. + + The cookie inspection user interface may include a facility whereby a + user can decide, at the time the user agent receives the Set-Cookie2 + response header, whether or not to accept the cookie. A potentially + confusing situation could arise if the following sequence occurs: + + * the user agent receives a cookie that contains a CommentURL + attribute; + + * the user agent's cookie inspection interface is configured so + that it presents a dialog to the user before the user agent + accepts the cookie; + + + + + +Kristol & Montulli Standards Track [Page 10] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + * the dialog allows the user to follow the CommentURL link when + the user agent receives the cookie; and, + + * when the user follows the CommentURL link, the origin server + (or another server, via other links in the returned content) + returns another cookie. + + The user agent SHOULD NOT send any cookies in this context. The user + agent MAY discard any cookie it receives in this context that the + user has not, through some user agent mechanism, deemed acceptable. + + User agents SHOULD allow the user to control cookie destruction, but + they MUST NOT extend the cookie's lifetime beyond that controlled by + the Discard and Max-Age attributes. An infrequently-used cookie may + function as a "preferences file" for network applications, and a user + may wish to keep it even if it is the least-recently-used cookie. One + possible implementation would be an interface that allows the + permanent storage of a cookie through a checkbox (or, conversely, its + immediate destruction). + + Privacy considerations dictate that the user have considerable + control over cookie management. The PRIVACY section contains more + information. + + 3.3.4 Sending Cookies to the Origin Server When it sends a request + to an origin server, the user agent includes a Cookie request header + if it has stored cookies that are applicable to the request, based on + + * the request-host and request-port; + + * the request-URI; + + * the cookie's age. + + The syntax for the header is: + +cookie = "Cookie:" cookie-version 1*((";" | ",") cookie-value) +cookie-value = NAME "=" VALUE [";" path] [";" domain] [";" port] +cookie-version = "$Version" "=" value +NAME = attr +VALUE = value +path = "$Path" "=" value +domain = "$Domain" "=" value +port = "$Port" [ "=" <"> value <"> ] + + The value of the cookie-version attribute MUST be the value from the + Version attribute of the corresponding Set-Cookie2 response header. + Otherwise the value for cookie-version is 0. The value for the path + + + +Kristol & Montulli Standards Track [Page 11] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + attribute MUST be the value from the Path attribute, if one was + present, of the corresponding Set-Cookie2 response header. Otherwise + the attribute SHOULD be omitted from the Cookie request header. The + value for the domain attribute MUST be the value from the Domain + attribute, if one was present, of the corresponding Set-Cookie2 + response header. Otherwise the attribute SHOULD be omitted from the + Cookie request header. + + The port attribute of the Cookie request header MUST mirror the Port + attribute, if one was present, in the corresponding Set-Cookie2 + response header. That is, the port attribute MUST be present if the + Port attribute was present in the Set-Cookie2 header, and it MUST + have the same value, if any. Otherwise, if the Port attribute was + absent from the Set-Cookie2 header, the attribute likewise MUST be + omitted from the Cookie request header. + + Note that there is neither a Comment nor a CommentURL attribute in + the Cookie request header corresponding to the ones in the Set- + Cookie2 response header. The user agent does not return the comment + information to the origin server. + + The user agent applies the following rules to choose applicable + cookie-values to send in Cookie request headers from among all the + cookies it has received. + + Domain Selection + The origin server's effective host name MUST domain-match the + Domain attribute of the cookie. + + Port Selection + There are three possible behaviors, depending on the Port + attribute in the Set-Cookie2 response header: + + 1. By default (no Port attribute), the cookie MAY be sent to any + port. + + 2. If the attribute is present but has no value (e.g., Port), the + cookie MUST only be sent to the request-port it was received + from. + + 3. If the attribute has a port-list, the cookie MUST only be + returned if the new request-port is one of those listed in + port-list. + + Path Selection + The request-URI MUST path-match the Path attribute of the cookie. + + + + + +Kristol & Montulli Standards Track [Page 12] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Max-Age Selection + Cookies that have expired should have been discarded and thus are + not forwarded to an origin server. + + If multiple cookies satisfy the criteria above, they are ordered in + the Cookie header such that those with more specific Path attributes + precede those with less specific. Ordering with respect to other + attributes (e.g., Domain) is unspecified. + + Note: For backward compatibility, the separator in the Cookie header + is semi-colon (;) everywhere. A server SHOULD also accept comma (,) + as the separator between cookie-values for future compatibility. + + 3.3.5 Identifying What Version is Understood: Cookie2 The Cookie2 + request header facilitates interoperation between clients and servers + that understand different versions of the cookie specification. When + the client sends one or more cookies to an origin server, if at least + one of those cookies contains a $Version attribute whose value is + different from the version that the client understands, then the + client MUST also send a Cookie2 request header, the syntax for which + is + + cookie2 = "Cookie2:" cookie-version + + Here the value for cookie-version is the highest version of cookie + specification (currently 1) that the client understands. The client + needs to send at most one such request header per request. + + 3.3.6 Sending Cookies in Unverifiable Transactions Users MUST have + control over sessions in order to ensure privacy. (See PRIVACY + section below.) To simplify implementation and to prevent an + additional layer of complexity where adequate safeguards exist, + however, this document distinguishes between transactions that are + verifiable and those that are unverifiable. A transaction is + verifiable if the user, or a user-designated agent, has the option to + review the request-URI prior to its use in the transaction. A + transaction is unverifiable if the user does not have that option. + Unverifiable transactions typically arise when a user agent + automatically requests inlined or embedded entities or when it + resolves redirection (3xx) responses from an origin server. + Typically the origin transaction, the transaction that the user + initiates, is verifiable, and that transaction may directly or + indirectly induce the user agent to make unverifiable transactions. + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + + + +Kristol & Montulli Standards Track [Page 13] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + When it makes an unverifiable transaction, a user agent MUST disable + all cookie processing (i.e., MUST NOT send cookies, and MUST NOT + accept any received cookies) if the transaction is to a third-party + host. + + This restriction prevents a malicious service author from using + unverifiable transactions to induce a user agent to start or continue + a session with a server in a different domain. The starting or + continuation of such sessions could be contrary to the privacy + expectations of the user, and could also be a security problem. + + User agents MAY offer configurable options that allow the user agent, + or any autonomous programs that the user agent executes, to ignore + the above rule, so long as these override options default to "off". + + (N.B. Mechanisms may be proposed that will automate overriding the + third-party restrictions under controlled conditions.) + + Many current user agents already provide a review option that would + render many links verifiable. For instance, some user agents display + the URL that would be referenced for a particular link when the mouse + pointer is placed over that link. The user can therefore determine + whether to visit that site before causing the browser to do so. + (Though not implemented on current user agents, a similar technique + could be used for a button used to submit a form -- the user agent + could display the action to be taken if the user were to select that + button.) However, even this would not make all links verifiable; for + example, links to automatically loaded images would not normally be + subject to "mouse pointer" verification. + + Many user agents also provide the option for a user to view the HTML + source of a document, or to save the source to an external file where + it can be viewed by another application. While such an option does + provide a crude review mechanism, some users might not consider it + acceptable for this purpose. + +3.4 How an Origin Server Interprets the Cookie Header + + A user agent returns much of the information in the Set-Cookie2 + header to the origin server when the request-URI path-matches the + Path attribute of the cookie. When it receives a Cookie header, the + origin server SHOULD treat cookies with NAMEs whose prefix is $ + specially, as an attribute for the cookie. + + + + + + + + +Kristol & Montulli Standards Track [Page 14] + +RFC 2965 HTTP State Management Mechanism October 2000 + + +3.5 Caching Proxy Role + + One reason for separating state information from both a URL and + document content is to facilitate the scaling that caching permits. + To support cookies, a caching proxy MUST obey these rules already in + the HTTP specification: + + * Honor requests from the cache, if possible, based on cache + validity rules. + + * Pass along a Cookie request header in any request that the + proxy must make of another server. + + * Return the response to the client. Include any Set-Cookie2 + response header. + + * Cache the received response subject to the control of the usual + headers, such as Expires, + + Cache-control: no-cache + + and + + Cache-control: private + + * Cache the Set-Cookie2 subject to the control of the usual + header, + + Cache-control: no-cache="set-cookie2" + + (The Set-Cookie2 header should usually not be cached.) + + Proxies MUST NOT introduce Set-Cookie2 (Cookie) headers of their own + in proxy responses (requests). + +4. EXAMPLES + +4.1 Example 1 + + Most detail of request and response headers has been omitted. Assume + the user agent has no stored cookies. + + 1. User Agent -> Server + + POST /acme/login HTTP/1.1 + [form data] + + User identifies self via a form. + + + +Kristol & Montulli Standards Track [Page 15] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + 2. Server -> User Agent + + HTTP/1.1 200 OK + Set-Cookie2: Customer="WILE_E_COYOTE"; Version="1"; Path="/acme" + + Cookie reflects user's identity. + + 3. User Agent -> Server + + POST /acme/pickitem HTTP/1.1 + Cookie: $Version="1"; Customer="WILE_E_COYOTE"; $Path="/acme" + [form data] + + User selects an item for "shopping basket". + + 4. Server -> User Agent + + HTTP/1.1 200 OK + Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; + Path="/acme" + + Shopping basket contains an item. + + 5. User Agent -> Server + + POST /acme/shipping HTTP/1.1 + Cookie: $Version="1"; + Customer="WILE_E_COYOTE"; $Path="/acme"; + Part_Number="Rocket_Launcher_0001"; $Path="/acme" + [form data] + + User selects shipping method from form. + + 6. Server -> User Agent + + HTTP/1.1 200 OK + Set-Cookie2: Shipping="FedEx"; Version="1"; Path="/acme" + + New cookie reflects shipping method. + + 7. User Agent -> Server + + POST /acme/process HTTP/1.1 + Cookie: $Version="1"; + Customer="WILE_E_COYOTE"; $Path="/acme"; + Part_Number="Rocket_Launcher_0001"; $Path="/acme"; + Shipping="FedEx"; $Path="/acme" + [form data] + + + +Kristol & Montulli Standards Track [Page 16] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + User chooses to process order. + + 8. Server -> User Agent + + HTTP/1.1 200 OK + + Transaction is complete. + + The user agent makes a series of requests on the origin server, after + each of which it receives a new cookie. All the cookies have the + same Path attribute and (default) domain. Because the request-URIs + all path-match /acme, the Path attribute of each cookie, each request + contains all the cookies received so far. + +4.2 Example 2 + + This example illustrates the effect of the Path attribute. All + detail of request and response headers has been omitted. Assume the + user agent has no stored cookies. + + Imagine the user agent has received, in response to earlier requests, + the response headers + + Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; + Path="/acme" + + and + + Set-Cookie2: Part_Number="Riding_Rocket_0023"; Version="1"; + Path="/acme/ammo" + + A subsequent request by the user agent to the (same) server for URLs + of the form /acme/ammo/... would include the following request + header: + + Cookie: $Version="1"; + Part_Number="Riding_Rocket_0023"; $Path="/acme/ammo"; + Part_Number="Rocket_Launcher_0001"; $Path="/acme" + + Note that the NAME=VALUE pair for the cookie with the more specific + Path attribute, /acme/ammo, comes before the one with the less + specific Path attribute, /acme. Further note that the same cookie + name appears more than once. + + A subsequent request by the user agent to the (same) server for a URL + of the form /acme/parts/ would include the following request header: + + + + + +Kristol & Montulli Standards Track [Page 17] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Cookie: $Version="1"; Part_Number="Rocket_Launcher_0001"; + $Path="/acme" + + Here, the second cookie's Path attribute /acme/ammo is not a prefix + of the request URL, /acme/parts/, so the cookie does not get + forwarded to the server. + +5. IMPLEMENTATION CONSIDERATIONS + + Here we provide guidance on likely or desirable details for an origin + server that implements state management. + +5.1 Set-Cookie2 Content + + An origin server's content should probably be divided into disjoint + application areas, some of which require the use of state + information. The application areas can be distinguished by their + request URLs. The Set-Cookie2 header can incorporate information + about the application areas by setting the Path attribute for each + one. + + The session information can obviously be clear or encoded text that + describes state. However, if it grows too large, it can become + unwieldy. Therefore, an implementor might choose for the session + information to be a key to a server-side resource. Of course, using + a database creates some problems that this state management + specification was meant to avoid, namely: + + 1. keeping real state on the server side; + + 2. how and when to garbage-collect the database entry, in case the + user agent terminates the session by, for example, exiting. + +5.2 Stateless Pages + + Caching benefits the scalability of WWW. Therefore it is important + to reduce the number of documents that have state embedded in them + inherently. For example, if a shopping-basket-style application + always displays a user's current basket contents on each page, those + pages cannot be cached, because each user's basket's contents would + be different. On the other hand, if each page contains just a link + that allows the user to "Look at My Shopping Basket", the page can be + cached. + + + + + + + + +Kristol & Montulli Standards Track [Page 18] + +RFC 2965 HTTP State Management Mechanism October 2000 + + +5.3 Implementation Limits + + Practical user agent implementations have limits on the number and + size of cookies that they can store. In general, user agents' cookie + support should have no fixed limits. They should strive to store as + many frequently-used cookies as possible. Furthermore, general-use + user agents SHOULD provide each of the following minimum capabilities + individually, although not necessarily simultaneously: + + * at least 300 cookies + + * at least 4096 bytes per cookie (as measured by the characters + that comprise the cookie non-terminal in the syntax description + of the Set-Cookie2 header, and as received in the Set-Cookie2 + header) + + * at least 20 cookies per unique host or domain name + + User agents created for specific purposes or for limited-capacity + devices SHOULD provide at least 20 cookies of 4096 bytes, to ensure + that the user can interact with a session-based origin server. + + The information in a Set-Cookie2 response header MUST be retained in + its entirety. If for some reason there is inadequate space to store + the cookie, it MUST be discarded, not truncated. + + Applications should use as few and as small cookies as possible, and + they should cope gracefully with the loss of a cookie. + + 5.3.1 Denial of Service Attacks User agents MAY choose to set an + upper bound on the number of cookies to be stored from a given host + or domain name or on the size of the cookie information. Otherwise a + malicious server could attempt to flood a user agent with many + cookies, or large cookies, on successive responses, which would force + out cookies the user agent had received from other servers. However, + the minima specified above SHOULD still be supported. + +6. PRIVACY + + Informed consent should guide the design of systems that use cookies. + A user should be able to find out how a web site plans to use + information in a cookie and should be able to choose whether or not + those policies are acceptable. Both the user agent and the origin + server must assist informed consent. + + + + + + + +Kristol & Montulli Standards Track [Page 19] + +RFC 2965 HTTP State Management Mechanism October 2000 + + +6.1 User Agent Control + + An origin server could create a Set-Cookie2 header to track the path + of a user through the server. Users may object to this behavior as + an intrusive accumulation of information, even if their identity is + not evident. (Identity might become evident, for example, if a user + subsequently fills out a form that contains identifying information.) + This state management specification therefore requires that a user + agent give the user control over such a possible intrusion, although + the interface through which the user is given this control is left + unspecified. However, the control mechanisms provided SHALL at least + allow the user + + * to completely disable the sending and saving of cookies. + + * to determine whether a stateful session is in progress. + + * to control the saving of a cookie on the basis of the cookie's + Domain attribute. + + Such control could be provided, for example, by mechanisms + + * to notify the user when the user agent is about to send a + cookie to the origin server, to offer the option not to begin a + session. + + * to display a visual indication that a stateful session is in + progress. + + * to let the user decide which cookies, if any, should be saved + when the user concludes a window or user agent session. + + * to let the user examine and delete the contents of a cookie at + any time. + + A user agent usually begins execution with no remembered state + information. It SHOULD be possible to configure a user agent never + to send Cookie headers, in which case it can never sustain state with + an origin server. (The user agent would then behave like one that is + unaware of how to handle Set-Cookie2 response headers.) + + When the user agent terminates execution, it SHOULD let the user + discard all state information. Alternatively, the user agent MAY ask + the user whether state information should be retained; the default + should be "no". If the user chooses to retain state information, it + would be restored the next time the user agent runs. + + + + + +Kristol & Montulli Standards Track [Page 20] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + NOTE: User agents should probably be cautious about using files to + store cookies long-term. If a user runs more than one instance of + the user agent, the cookies could be commingled or otherwise + corrupted. + +6.2 Origin Server Role + + An origin server SHOULD promote informed consent by adding CommentURL + or Comment information to the cookies it sends. CommentURL is + preferred because of the opportunity to provide richer information in + a multiplicity of languages. + +6.3 Clear Text + + The information in the Set-Cookie2 and Cookie headers is unprotected. + As a consequence: + + 1. Any sensitive information that is conveyed in them is exposed + to intruders. + + 2. A malicious intermediary could alter the headers as they travel + in either direction, with unpredictable results. + + These facts imply that information of a personal and/or financial + nature should only be sent over a secure channel. For less sensitive + information, or when the content of the header is a database key, an + origin server should be vigilant to prevent a bad Cookie value from + causing failures. + + A user agent in a shared user environment poses a further risk. + Using a cookie inspection interface, User B could examine the + contents of cookies that were saved when User A used the machine. + +7. SECURITY CONSIDERATIONS + +7.1 Protocol Design + + The restrictions on the value of the Domain attribute, and the rules + concerning unverifiable transactions, are meant to reduce the ways + that cookies can "leak" to the "wrong" site. The intent is to + restrict cookies to one host, or a closely related set of hosts. + Therefore a request-host is limited as to what values it can set for + Domain. We consider it acceptable for hosts host1.foo.com and + host2.foo.com to share cookies, but not a.com and b.com. + + Similarly, a server can set a Path only for cookies that are related + to the request-URI. + + + + +Kristol & Montulli Standards Track [Page 21] + +RFC 2965 HTTP State Management Mechanism October 2000 + + +7.2 Cookie Spoofing + + Proper application design can avoid spoofing attacks from related + domains. Consider: + + 1. User agent makes request to victim.cracker.edu, gets back + cookie session_id="1234" and sets the default domain + victim.cracker.edu. + + 2. User agent makes request to spoof.cracker.edu, gets back cookie + session-id="1111", with Domain=".cracker.edu". + + 3. User agent makes request to victim.cracker.edu again, and + passes + + Cookie: $Version="1"; session_id="1234", + $Version="1"; session_id="1111"; $Domain=".cracker.edu" + + The server at victim.cracker.edu should detect that the second + cookie was not one it originated by noticing that the Domain + attribute is not for itself and ignore it. + +7.3 Unexpected Cookie Sharing + + A user agent SHOULD make every attempt to prevent the sharing of + session information between hosts that are in different domains. + Embedded or inlined objects may cause particularly severe privacy + problems if they can be used to share cookies between disparate + hosts. For example, a malicious server could embed cookie + information for host a.com in a URI for a CGI on host b.com. User + agent implementors are strongly encouraged to prevent this sort of + exchange whenever possible. + +7.4 Cookies For Account Information + + While it is common practice to use them this way, cookies are not + designed or intended to be used to hold authentication information, + such as account names and passwords. Unless such cookies are + exchanged over an encrypted path, the account information they + contain is highly vulnerable to perusal and theft. + +8. OTHER, SIMILAR, PROPOSALS + + Apart from RFC 2109, three other proposals have been made to + accomplish similar goals. This specification began as an amalgam of + Kristol's State-Info proposal [DMK95] and Netscape's Cookie proposal + [Netscape]. + + + + +Kristol & Montulli Standards Track [Page 22] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + Brian Behlendorf proposed a Session-ID header that would be user- + agent-initiated and could be used by an origin server to track + "clicktrails". It would not carry any origin-server-defined state, + however. Phillip Hallam-Baker has proposed another client-defined + session ID mechanism for similar purposes. + + While both session IDs and cookies can provide a way to sustain + stateful sessions, their intended purpose is different, and, + consequently, the privacy requirements for them are different. A + user initiates session IDs to allow servers to track progress through + them, or to distinguish multiple users on a shared machine. Cookies + are server-initiated, so the cookie mechanism described here gives + users control over something that would otherwise take place without + the users' awareness. Furthermore, cookies convey rich, server- + selected information, whereas session IDs comprise user-selected, + simple information. + +9. HISTORICAL + +9.1 Compatibility with Existing Implementations + + Existing cookie implementations, based on the Netscape specification, + use the Set-Cookie (not Set-Cookie2) header. User agents that + receive in the same response both a Set-Cookie and Set-Cookie2 + response header for the same cookie MUST discard the Set-Cookie + information and use only the Set-Cookie2 information. Furthermore, a + user agent MUST assume, if it received a Set-Cookie2 response header, + that the sending server complies with this document and will + understand Cookie request headers that also follow this + specification. + + New cookies MUST replace both equivalent old- and new-style cookies. + That is, if a user agent that follows both this specification and + Netscape's original specification receives a Set-Cookie2 response + header, and the NAME and the Domain and Path attributes match (per + the Cookie Management section) a Netscape-style cookie, the + Netscape-style cookie MUST be discarded, and the user agent MUST + retain only the cookie adhering to this specification. + + Older user agents that do not understand this specification, but that + do understand Netscape's original specification, will not recognize + the Set-Cookie2 response header and will receive and send cookies + according to the older specification. + + + + + + + + +Kristol & Montulli Standards Track [Page 23] + +RFC 2965 HTTP State Management Mechanism October 2000 + + + A user agent that supports both this specification and Netscape-style + cookies SHOULD send a Cookie request header that follows the older + Netscape specification if it received the cookie in a Set-Cookie + response header and not in a Set-Cookie2 response header. However, + it SHOULD send the following request header as well: + + Cookie2: $Version="1" + + The Cookie2 header advises the server that the user agent understands + new-style cookies. If the server understands new-style cookies, as + well, it SHOULD continue the stateful session by sending a Set- + Cookie2 response header, rather than Set-Cookie. A server that does + not understand new-style cookies will simply ignore the Cookie2 + request header. + +9.2 Caching and HTTP/1.0 + + Some caches, such as those conforming to HTTP/1.0, will inevitably + cache the Set-Cookie2 and Set-Cookie headers, because there was no + mechanism to suppress caching of headers prior to HTTP/1.1. This + caching can lead to security problems. Documents transmitted by an + origin server along with Set-Cookie2 and Set-Cookie headers usually + either will be uncachable, or will be "pre-expired". As long as + caches obey instructions not to cache documents (following Expires: + or Pragma: no-cache (HTTP/1.0), or Cache- + control: no-cache (HTTP/1.1)) uncachable documents present no + problem. However, pre-expired documents may be stored in caches. + They require validation (a conditional GET) on each new request, but + some cache operators loosen the rules for their caches, and sometimes + serve expired documents without first validating them. This + combination of factors can lead to cookies meant for one user later + being sent to another user. The Set-Cookie2 and Set-Cookie headers + are stored in the cache, and, although the document is stale + (expired), the cache returns the document in response to later + requests, including cached headers. + +10. ACKNOWLEDGEMENTS + + This document really represents the collective efforts of the HTTP + Working Group of the IETF and, particularly, the following people, in + addition to the authors: Roy Fielding, Yaron Goland, Marc Hedlund, + Ted Hardie, Koen Holtman, Shel Kaphan, Rohit Khare, Foteos Macrides, + David W. Morris. + + + + + + + + +Kristol & Montulli Standards Track [Page 24] + +RFC 2965 HTTP State Management Mechanism October 2000 + + +11. AUTHORS' ADDRESSES + + David M. Kristol + Bell Laboratories, Lucent Technologies + 600 Mountain Ave. Room 2A-333 + Murray Hill, NJ 07974 + + Phone: (908) 582-2250 + Fax: (908) 582-1239 + EMail: dmk@bell-labs.com + + + Lou Montulli + Epinions.com, Inc. + 2037 Landings Dr. + Mountain View, CA 94301 + + EMail: lou@montulli.org + +12. REFERENCES + + [DMK95] Kristol, D.M., "Proposed HTTP State-Info Mechanism", + available at , September, 1995. + + [Netscape] "Persistent Client State -- HTTP Cookies", available at + , + undated. + + [RFC2109] Kristol, D. and L. Montulli, "HTTP State Management + Mechanism", RFC 2109, February 1997. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2279] Yergeau, F., "UTF-8, a transformation format of Unicode + and ISO-10646", RFC 2279, January 1998. + + [RFC2396] Berners-Lee, T., Fielding, R. and L. Masinter, "Uniform + Resource Identifiers (URI): Generic Syntax", RFC 2396, + August 1998. + + [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H. and T. + Berners-Lee, "Hypertext Transfer Protocol -- HTTP/1.1", + RFC 2616, June 1999. + + + + + + +Kristol & Montulli Standards Track [Page 25] + +RFC 2965 HTTP State Management Mechanism October 2000 + + +13. Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Kristol & Montulli Standards Track [Page 26] + diff --git a/docs/specs/rfc3986.txt b/docs/specs/rfc3986.txt new file mode 100644 index 0000000..c56ed4e --- /dev/null +++ b/docs/specs/rfc3986.txt @@ -0,0 +1,3419 @@ + + + + + + +Network Working Group T. Berners-Lee +Request for Comments: 3986 W3C/MIT +STD: 66 R. Fielding +Updates: 1738 Day Software +Obsoletes: 2732, 2396, 1808 L. Masinter +Category: Standards Track Adobe Systems + January 2005 + + + Uniform Resource Identifier (URI): Generic Syntax + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + A Uniform Resource Identifier (URI) is a compact sequence of + characters that identifies an abstract or physical resource. This + specification defines the generic URI syntax and a process for + resolving URI references that might be in relative form, along with + guidelines and security considerations for the use of URIs on the + Internet. The URI syntax defines a grammar that is a superset of all + valid URIs, allowing an implementation to parse the common components + of a URI reference without knowing the scheme-specific requirements + of every possible identifier. This specification does not define a + generative grammar for URIs; that task is performed by the individual + specifications of each URI scheme. + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 1] + +RFC 3986 URI Generic Syntax January 2005 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 1.1. Overview of URIs . . . . . . . . . . . . . . . . . . . . 4 + 1.1.1. Generic Syntax . . . . . . . . . . . . . . . . . 6 + 1.1.2. Examples . . . . . . . . . . . . . . . . . . . . 7 + 1.1.3. URI, URL, and URN . . . . . . . . . . . . . . . 7 + 1.2. Design Considerations . . . . . . . . . . . . . . . . . 8 + 1.2.1. Transcription . . . . . . . . . . . . . . . . . 8 + 1.2.2. Separating Identification from Interaction . . . 9 + 1.2.3. Hierarchical Identifiers . . . . . . . . . . . . 10 + 1.3. Syntax Notation . . . . . . . . . . . . . . . . . . . . 11 + 2. Characters . . . . . . . . . . . . . . . . . . . . . . . . . . 11 + 2.1. Percent-Encoding . . . . . . . . . . . . . . . . . . . . 12 + 2.2. Reserved Characters . . . . . . . . . . . . . . . . . . 12 + 2.3. Unreserved Characters . . . . . . . . . . . . . . . . . 13 + 2.4. When to Encode or Decode . . . . . . . . . . . . . . . . 14 + 2.5. Identifying Data . . . . . . . . . . . . . . . . . . . . 14 + 3. Syntax Components . . . . . . . . . . . . . . . . . . . . . . 16 + 3.1. Scheme . . . . . . . . . . . . . . . . . . . . . . . . . 17 + 3.2. Authority . . . . . . . . . . . . . . . . . . . . . . . 17 + 3.2.1. User Information . . . . . . . . . . . . . . . . 18 + 3.2.2. Host . . . . . . . . . . . . . . . . . . . . . . 18 + 3.2.3. Port . . . . . . . . . . . . . . . . . . . . . . 22 + 3.3. Path . . . . . . . . . . . . . . . . . . . . . . . . . . 22 + 3.4. Query . . . . . . . . . . . . . . . . . . . . . . . . . 23 + 3.5. Fragment . . . . . . . . . . . . . . . . . . . . . . . . 24 + 4. Usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 + 4.1. URI Reference . . . . . . . . . . . . . . . . . . . . . 25 + 4.2. Relative Reference . . . . . . . . . . . . . . . . . . . 26 + 4.3. Absolute URI . . . . . . . . . . . . . . . . . . . . . . 27 + 4.4. Same-Document Reference . . . . . . . . . . . . . . . . 27 + 4.5. Suffix Reference . . . . . . . . . . . . . . . . . . . . 27 + 5. Reference Resolution . . . . . . . . . . . . . . . . . . . . . 28 + 5.1. Establishing a Base URI . . . . . . . . . . . . . . . . 28 + 5.1.1. Base URI Embedded in Content . . . . . . . . . . 29 + 5.1.2. Base URI from the Encapsulating Entity . . . . . 29 + 5.1.3. Base URI from the Retrieval URI . . . . . . . . 30 + 5.1.4. Default Base URI . . . . . . . . . . . . . . . . 30 + 5.2. Relative Resolution . . . . . . . . . . . . . . . . . . 30 + 5.2.1. Pre-parse the Base URI . . . . . . . . . . . . . 31 + 5.2.2. Transform References . . . . . . . . . . . . . . 31 + 5.2.3. Merge Paths . . . . . . . . . . . . . . . . . . 32 + 5.2.4. Remove Dot Segments . . . . . . . . . . . . . . 33 + 5.3. Component Recomposition . . . . . . . . . . . . . . . . 35 + 5.4. Reference Resolution Examples . . . . . . . . . . . . . 35 + 5.4.1. Normal Examples . . . . . . . . . . . . . . . . 36 + 5.4.2. Abnormal Examples . . . . . . . . . . . . . . . 36 + + + +Berners-Lee, et al. Standards Track [Page 2] + +RFC 3986 URI Generic Syntax January 2005 + + + 6. Normalization and Comparison . . . . . . . . . . . . . . . . . 38 + 6.1. Equivalence . . . . . . . . . . . . . . . . . . . . . . 38 + 6.2. Comparison Ladder . . . . . . . . . . . . . . . . . . . 39 + 6.2.1. Simple String Comparison . . . . . . . . . . . . 39 + 6.2.2. Syntax-Based Normalization . . . . . . . . . . . 40 + 6.2.3. Scheme-Based Normalization . . . . . . . . . . . 41 + 6.2.4. Protocol-Based Normalization . . . . . . . . . . 42 + 7. Security Considerations . . . . . . . . . . . . . . . . . . . 43 + 7.1. Reliability and Consistency . . . . . . . . . . . . . . 43 + 7.2. Malicious Construction . . . . . . . . . . . . . . . . . 43 + 7.3. Back-End Transcoding . . . . . . . . . . . . . . . . . . 44 + 7.4. Rare IP Address Formats . . . . . . . . . . . . . . . . 45 + 7.5. Sensitive Information . . . . . . . . . . . . . . . . . 45 + 7.6. Semantic Attacks . . . . . . . . . . . . . . . . . . . . 45 + 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 46 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 46 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 46 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 46 + 10.2. Informative References . . . . . . . . . . . . . . . . . 47 + A. Collected ABNF for URI . . . . . . . . . . . . . . . . . . . . 49 + B. Parsing a URI Reference with a Regular Expression . . . . . . 50 + C. Delimiting a URI in Context . . . . . . . . . . . . . . . . . 51 + D. Changes from RFC 2396 . . . . . . . . . . . . . . . . . . . . 53 + D.1. Additions . . . . . . . . . . . . . . . . . . . . . . . 53 + D.2. Modifications . . . . . . . . . . . . . . . . . . . . . 53 + Index . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 56 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 60 + Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 61 + + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 3] + +RFC 3986 URI Generic Syntax January 2005 + + +1. Introduction + + A Uniform Resource Identifier (URI) provides a simple and extensible + means for identifying a resource. This specification of URI syntax + and semantics is derived from concepts introduced by the World Wide + Web global information initiative, whose use of these identifiers + dates from 1990 and is described in "Universal Resource Identifiers + in WWW" [RFC1630]. The syntax is designed to meet the + recommendations laid out in "Functional Recommendations for Internet + Resource Locators" [RFC1736] and "Functional Requirements for Uniform + Resource Names" [RFC1737]. + + This document obsoletes [RFC2396], which merged "Uniform Resource + Locators" [RFC1738] and "Relative Uniform Resource Locators" + [RFC1808] in order to define a single, generic syntax for all URIs. + It obsoletes [RFC2732], which introduced syntax for an IPv6 address. + It excludes portions of RFC 1738 that defined the specific syntax of + individual URI schemes; those portions will be updated as separate + documents. The process for registration of new URI schemes is + defined separately by [BCP35]. Advice for designers of new URI + schemes can be found in [RFC2718]. All significant changes from RFC + 2396 are noted in Appendix D. + + This specification uses the terms "character" and "coded character + set" in accordance with the definitions provided in [BCP19], and + "character encoding" in place of what [BCP19] refers to as a + "charset". + +1.1. Overview of URIs + + URIs are characterized as follows: + + Uniform + + Uniformity provides several benefits. It allows different types + of resource identifiers to be used in the same context, even when + the mechanisms used to access those resources may differ. It + allows uniform semantic interpretation of common syntactic + conventions across different types of resource identifiers. It + allows introduction of new types of resource identifiers without + interfering with the way that existing identifiers are used. It + allows the identifiers to be reused in many different contexts, + thus permitting new applications or protocols to leverage a pre- + existing, large, and widely used set of resource identifiers. + + + + + + + +Berners-Lee, et al. Standards Track [Page 4] + +RFC 3986 URI Generic Syntax January 2005 + + + Resource + + This specification does not limit the scope of what might be a + resource; rather, the term "resource" is used in a general sense + for whatever might be identified by a URI. Familiar examples + include an electronic document, an image, a source of information + with a consistent purpose (e.g., "today's weather report for Los + Angeles"), a service (e.g., an HTTP-to-SMS gateway), and a + collection of other resources. A resource is not necessarily + accessible via the Internet; e.g., human beings, corporations, and + bound books in a library can also be resources. Likewise, + abstract concepts can be resources, such as the operators and + operands of a mathematical equation, the types of a relationship + (e.g., "parent" or "employee"), or numeric values (e.g., zero, + one, and infinity). + + Identifier + + An identifier embodies the information required to distinguish + what is being identified from all other things within its scope of + identification. Our use of the terms "identify" and "identifying" + refer to this purpose of distinguishing one resource from all + other resources, regardless of how that purpose is accomplished + (e.g., by name, address, or context). These terms should not be + mistaken as an assumption that an identifier defines or embodies + the identity of what is referenced, though that may be the case + for some identifiers. Nor should it be assumed that a system + using URIs will access the resource identified: in many cases, + URIs are used to denote resources without any intention that they + be accessed. Likewise, the "one" resource identified might not be + singular in nature (e.g., a resource might be a named set or a + mapping that varies over time). + + A URI is an identifier consisting of a sequence of characters + matching the syntax rule named in Section 3. It enables + uniform identification of resources via a separately defined + extensible set of naming schemes (Section 3.1). How that + identification is accomplished, assigned, or enabled is delegated to + each scheme specification. + + This specification does not place any limits on the nature of a + resource, the reasons why an application might seek to refer to a + resource, or the kinds of systems that might use URIs for the sake of + identifying resources. This specification does not require that a + URI persists in identifying the same resource over time, though that + is a common goal of all URI schemes. Nevertheless, nothing in this + + + + + +Berners-Lee, et al. Standards Track [Page 5] + +RFC 3986 URI Generic Syntax January 2005 + + + specification prevents an application from limiting itself to + particular types of resources, or to a subset of URIs that maintains + characteristics desired by that application. + + URIs have a global scope and are interpreted consistently regardless + of context, though the result of that interpretation may be in + relation to the end-user's context. For example, "http://localhost/" + has the same interpretation for every user of that reference, even + though the network interface corresponding to "localhost" may be + different for each end-user: interpretation is independent of access. + However, an action made on the basis of that reference will take + place in relation to the end-user's context, which implies that an + action intended to refer to a globally unique thing must use a URI + that distinguishes that resource from all other things. URIs that + identify in relation to the end-user's local context should only be + used when the context itself is a defining aspect of the resource, + such as when an on-line help manual refers to a file on the end- + user's file system (e.g., "file:///etc/hosts"). + +1.1.1. Generic Syntax + + Each URI begins with a scheme name, as defined in Section 3.1, that + refers to a specification for assigning identifiers within that + scheme. As such, the URI syntax is a federated and extensible naming + system wherein each scheme's specification may further restrict the + syntax and semantics of identifiers using that scheme. + + This specification defines those elements of the URI syntax that are + required of all URI schemes or are common to many URI schemes. It + thus defines the syntax and semantics needed to implement a scheme- + independent parsing mechanism for URI references, by which the + scheme-dependent handling of a URI can be postponed until the + scheme-dependent semantics are needed. Likewise, protocols and data + formats that make use of URI references can refer to this + specification as a definition for the range of syntax allowed for all + URIs, including those schemes that have yet to be defined. This + decouples the evolution of identification schemes from the evolution + of protocols, data formats, and implementations that make use of + URIs. + + A parser of the generic URI syntax can parse any URI reference into + its major components. Once the scheme is determined, further + scheme-specific parsing can be performed on the components. In other + words, the URI generic syntax is a superset of the syntax of all URI + schemes. + + + + + + +Berners-Lee, et al. Standards Track [Page 6] + +RFC 3986 URI Generic Syntax January 2005 + + +1.1.2. Examples + + The following example URIs illustrate several URI schemes and + variations in their common syntax components: + + ftp://ftp.is.co.za/rfc/rfc1808.txt + + http://www.ietf.org/rfc/rfc2396.txt + + ldap://[2001:db8::7]/c=GB?objectClass?one + + mailto:John.Doe@example.com + + news:comp.infosystems.www.servers.unix + + tel:+1-816-555-1212 + + telnet://192.0.2.16:80/ + + urn:oasis:names:specification:docbook:dtd:xml:4.1.2 + + +1.1.3. URI, URL, and URN + + A URI can be further classified as a locator, a name, or both. The + term "Uniform Resource Locator" (URL) refers to the subset of URIs + that, in addition to identifying a resource, provide a means of + locating the resource by describing its primary access mechanism + (e.g., its network "location"). The term "Uniform Resource Name" + (URN) has been used historically to refer to both URIs under the + "urn" scheme [RFC2141], which are required to remain globally unique + and persistent even when the resource ceases to exist or becomes + unavailable, and to any other URI with the properties of a name. + + An individual scheme does not have to be classified as being just one + of "name" or "locator". Instances of URIs from any given scheme may + have the characteristics of names or locators or both, often + depending on the persistence and care in the assignment of + identifiers by the naming authority, rather than on any quality of + the scheme. Future specifications and related documentation should + use the general term "URI" rather than the more restrictive terms + "URL" and "URN" [RFC3305]. + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 7] + +RFC 3986 URI Generic Syntax January 2005 + + +1.2. Design Considerations + +1.2.1. Transcription + + The URI syntax has been designed with global transcription as one of + its main considerations. A URI is a sequence of characters from a + very limited set: the letters of the basic Latin alphabet, digits, + and a few special characters. A URI may be represented in a variety + of ways; e.g., ink on paper, pixels on a screen, or a sequence of + character encoding octets. The interpretation of a URI depends only + on the characters used and not on how those characters are + represented in a network protocol. + + The goal of transcription can be described by a simple scenario. + Imagine two colleagues, Sam and Kim, sitting in a pub at an + international conference and exchanging research ideas. Sam asks Kim + for a location to get more information, so Kim writes the URI for the + research site on a napkin. Upon returning home, Sam takes out the + napkin and types the URI into a computer, which then retrieves the + information to which Kim referred. + + There are several design considerations revealed by the scenario: + + o A URI is a sequence of characters that is not always represented + as a sequence of octets. + + o A URI might be transcribed from a non-network source and thus + should consist of characters that are most likely able to be + entered into a computer, within the constraints imposed by + keyboards (and related input devices) across languages and + locales. + + o A URI often has to be remembered by people, and it is easier for + people to remember a URI when it consists of meaningful or + familiar components. + + These design considerations are not always in alignment. For + example, it is often the case that the most meaningful name for a URI + component would require characters that cannot be typed into some + systems. The ability to transcribe a resource identifier from one + medium to another has been considered more important than having a + URI consist of the most meaningful of components. + + In local or regional contexts and with improving technology, users + might benefit from being able to use a wider range of characters; + such use is not defined by this specification. Percent-encoded + octets (Section 2.1) may be used within a URI to represent characters + outside the range of the US-ASCII coded character set if this + + + +Berners-Lee, et al. Standards Track [Page 8] + +RFC 3986 URI Generic Syntax January 2005 + + + representation is allowed by the scheme or by the protocol element in + which the URI is referenced. Such a definition should specify the + character encoding used to map those characters to octets prior to + being percent-encoded for the URI. + +1.2.2. Separating Identification from Interaction + + A common misunderstanding of URIs is that they are only used to refer + to accessible resources. The URI itself only provides + identification; access to the resource is neither guaranteed nor + implied by the presence of a URI. Instead, any operation associated + with a URI reference is defined by the protocol element, data format + attribute, or natural language text in which it appears. + + Given a URI, a system may attempt to perform a variety of operations + on the resource, as might be characterized by words such as "access", + "update", "replace", or "find attributes". Such operations are + defined by the protocols that make use of URIs, not by this + specification. However, we do use a few general terms for describing + common operations on URIs. URI "resolution" is the process of + determining an access mechanism and the appropriate parameters + necessary to dereference a URI; this resolution may require several + iterations. To use that access mechanism to perform an action on the + URI's resource is to "dereference" the URI. + + When URIs are used within information retrieval systems to identify + sources of information, the most common form of URI dereference is + "retrieval": making use of a URI in order to retrieve a + representation of its associated resource. A "representation" is a + sequence of octets, along with representation metadata describing + those octets, that constitutes a record of the state of the resource + at the time when the representation is generated. Retrieval is + achieved by a process that might include using the URI as a cache key + to check for a locally cached representation, resolution of the URI + to determine an appropriate access mechanism (if any), and + dereference of the URI for the sake of applying a retrieval + operation. Depending on the protocols used to perform the retrieval, + additional information might be supplied about the resource (resource + metadata) and its relation to other resources. + + URI references in information retrieval systems are designed to be + late-binding: the result of an access is generally determined when it + is accessed and may vary over time or due to other aspects of the + interaction. These references are created in order to be used in the + future: what is being identified is not some specific result that was + obtained in the past, but rather some characteristic that is expected + to be true for future results. In such cases, the resource referred + to by the URI is actually a sameness of characteristics as observed + + + +Berners-Lee, et al. Standards Track [Page 9] + +RFC 3986 URI Generic Syntax January 2005 + + + over time, perhaps elucidated by additional comments or assertions + made by the resource provider. + + Although many URI schemes are named after protocols, this does not + imply that use of these URIs will result in access to the resource + via the named protocol. URIs are often used simply for the sake of + identification. Even when a URI is used to retrieve a representation + of a resource, that access might be through gateways, proxies, + caches, and name resolution services that are independent of the + protocol associated with the scheme name. The resolution of some + URIs may require the use of more than one protocol (e.g., both DNS + and HTTP are typically used to access an "http" URI's origin server + when a representation isn't found in a local cache). + +1.2.3. Hierarchical Identifiers + + The URI syntax is organized hierarchically, with components listed in + order of decreasing significance from left to right. For some URI + schemes, the visible hierarchy is limited to the scheme itself: + everything after the scheme component delimiter (":") is considered + opaque to URI processing. Other URI schemes make the hierarchy + explicit and visible to generic parsing algorithms. + + The generic syntax uses the slash ("/"), question mark ("?"), and + number sign ("#") characters to delimit components that are + significant to the generic parser's hierarchical interpretation of an + identifier. In addition to aiding the readability of such + identifiers through the consistent use of familiar syntax, this + uniform representation of hierarchy across naming schemes allows + scheme-independent references to be made relative to that hierarchy. + + It is often the case that a group or "tree" of documents has been + constructed to serve a common purpose, wherein the vast majority of + URI references in these documents point to resources within the tree + rather than outside it. Similarly, documents located at a particular + site are much more likely to refer to other resources at that site + than to resources at remote sites. Relative referencing of URIs + allows document trees to be partially independent of their location + and access scheme. For instance, it is possible for a single set of + hypertext documents to be simultaneously accessible and traversable + via each of the "file", "http", and "ftp" schemes if the documents + refer to each other with relative references. Furthermore, such + document trees can be moved, as a whole, without changing any of the + relative references. + + A relative reference (Section 4.2) refers to a resource by describing + the difference within a hierarchical name space between the reference + context and the target URI. The reference resolution algorithm, + + + +Berners-Lee, et al. Standards Track [Page 10] + +RFC 3986 URI Generic Syntax January 2005 + + + presented in Section 5, defines how such a reference is transformed + to the target URI. As relative references can only be used within + the context of a hierarchical URI, designers of new URI schemes + should use a syntax consistent with the generic syntax's hierarchical + components unless there are compelling reasons to forbid relative + referencing within that scheme. + + NOTE: Previous specifications used the terms "partial URI" and + "relative URI" to denote a relative reference to a URI. As some + readers misunderstood those terms to mean that relative URIs are a + subset of URIs rather than a method of referencing URIs, this + specification simply refers to them as relative references. + + All URI references are parsed by generic syntax parsers when used. + However, because hierarchical processing has no effect on an absolute + URI used in a reference unless it contains one or more dot-segments + (complete path segments of "." or "..", as described in Section 3.3), + URI scheme specifications can define opaque identifiers by + disallowing use of slash characters, question mark characters, and + the URIs "scheme:." and "scheme:..". + +1.3. Syntax Notation + + This specification uses the Augmented Backus-Naur Form (ABNF) + notation of [RFC2234], including the following core ABNF syntax rules + defined by that specification: ALPHA (letters), CR (carriage return), + DIGIT (decimal digits), DQUOTE (double quote), HEXDIG (hexadecimal + digits), LF (line feed), and SP (space). The complete URI syntax is + collected in Appendix A. + +2. Characters + + The URI syntax provides a method of encoding data, presumably for the + sake of identifying a resource, as a sequence of characters. The URI + characters are, in turn, frequently encoded as octets for transport + or presentation. This specification does not mandate any particular + character encoding for mapping between URI characters and the octets + used to store or transmit those characters. When a URI appears in a + protocol element, the character encoding is defined by that protocol; + without such a definition, a URI is assumed to be in the same + character encoding as the surrounding text. + + The ABNF notation defines its terminal values to be non-negative + integers (codepoints) based on the US-ASCII coded character set + [ASCII]. Because a URI is a sequence of characters, we must invert + that relation in order to understand the URI syntax. Therefore, the + + + + + +Berners-Lee, et al. Standards Track [Page 11] + +RFC 3986 URI Generic Syntax January 2005 + + + integer values used by the ABNF must be mapped back to their + corresponding characters via US-ASCII in order to complete the syntax + rules. + + A URI is composed from a limited set of characters consisting of + digits, letters, and a few graphic symbols. A reserved subset of + those characters may be used to delimit syntax components within a + URI while the remaining characters, including both the unreserved set + and those reserved characters not acting as delimiters, define each + component's identifying data. + +2.1. Percent-Encoding + + A percent-encoding mechanism is used to represent a data octet in a + component when that octet's corresponding character is outside the + allowed set or is being used as a delimiter of, or within, the + component. A percent-encoded octet is encoded as a character + triplet, consisting of the percent character "%" followed by the two + hexadecimal digits representing that octet's numeric value. For + example, "%20" is the percent-encoding for the binary octet + "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space + character (SP). Section 2.4 describes when percent-encoding and + decoding is applied. + + pct-encoded = "%" HEXDIG HEXDIG + + The uppercase hexadecimal digits 'A' through 'F' are equivalent to + the lowercase digits 'a' through 'f', respectively. If two URIs + differ only in the case of hexadecimal digits used in percent-encoded + octets, they are equivalent. For consistency, URI producers and + normalizers should use uppercase hexadecimal digits for all percent- + encodings. + +2.2. Reserved Characters + + URIs include components and subcomponents that are delimited by + characters in the "reserved" set. These characters are called + "reserved" because they may (or may not) be defined as delimiters by + the generic syntax, by each scheme-specific syntax, or by the + implementation-specific syntax of a URI's dereferencing algorithm. + If data for a URI component would conflict with a reserved + character's purpose as a delimiter, then the conflicting data must be + percent-encoded before the URI is formed. + + + + + + + + +Berners-Lee, et al. Standards Track [Page 12] + +RFC 3986 URI Generic Syntax January 2005 + + + reserved = gen-delims / sub-delims + + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + The purpose of reserved characters is to provide a set of delimiting + characters that are distinguishable from other data within a URI. + URIs that differ in the replacement of a reserved character with its + corresponding percent-encoded octet are not equivalent. Percent- + encoding a reserved character, or decoding a percent-encoded octet + that corresponds to a reserved character, will change how the URI is + interpreted by most applications. Thus, characters in the reserved + set are protected from normalization and are therefore safe to be + used by scheme-specific and producer-specific algorithms for + delimiting data subcomponents within a URI. + + A subset of the reserved characters (gen-delims) is used as + delimiters of the generic URI components described in Section 3. A + component's ABNF syntax rule will not use the reserved or gen-delims + rule names directly; instead, each syntax rule lists the characters + allowed within that component (i.e., not delimiting it), and any of + those characters that are also in the reserved set are "reserved" for + use as subcomponent delimiters within the component. Only the most + common subcomponents are defined by this specification; other + subcomponents may be defined by a URI scheme's specification, or by + the implementation-specific syntax of a URI's dereferencing + algorithm, provided that such subcomponents are delimited by + characters in the reserved set allowed within that component. + + URI producing applications should percent-encode data octets that + correspond to characters in the reserved set unless these characters + are specifically allowed by the URI scheme to represent data in that + component. If a reserved character is found in a URI component and + no delimiting role is known for that character, then it must be + interpreted as representing the data octet corresponding to that + character's encoding in US-ASCII. + +2.3. Unreserved Characters + + Characters that are allowed in a URI but do not have a reserved + purpose are called unreserved. These include uppercase and lowercase + letters, decimal digits, hyphen, period, underscore, and tilde. + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + + + + + +Berners-Lee, et al. Standards Track [Page 13] + +RFC 3986 URI Generic Syntax January 2005 + + + URIs that differ in the replacement of an unreserved character with + its corresponding percent-encoded US-ASCII octet are equivalent: they + identify the same resource. However, URI comparison implementations + do not always perform normalization prior to comparison (see Section + 6). For consistency, percent-encoded octets in the ranges of ALPHA + (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), + underscore (%5F), or tilde (%7E) should not be created by URI + producers and, when found in a URI, should be decoded to their + corresponding unreserved characters by URI normalizers. + +2.4. When to Encode or Decode + + Under normal circumstances, the only time when octets within a URI + are percent-encoded is during the process of producing the URI from + its component parts. This is when an implementation determines which + of the reserved characters are to be used as subcomponent delimiters + and which can be safely used as data. Once produced, a URI is always + in its percent-encoded form. + + When a URI is dereferenced, the components and subcomponents + significant to the scheme-specific dereferencing process (if any) + must be parsed and separated before the percent-encoded octets within + those components can be safely decoded, as otherwise the data may be + mistaken for component delimiters. The only exception is for + percent-encoded octets corresponding to characters in the unreserved + set, which can be decoded at any time. For example, the octet + corresponding to the tilde ("~") character is often encoded as "%7E" + by older URI processing implementations; the "%7E" can be replaced by + "~" without changing its interpretation. + + Because the percent ("%") character serves as the indicator for + percent-encoded octets, it must be percent-encoded as "%25" for that + octet to be used as data within a URI. Implementations must not + percent-encode or decode the same string more than once, as decoding + an already decoded string might lead to misinterpreting a percent + data octet as the beginning of a percent-encoding, or vice versa in + the case of percent-encoding an already percent-encoded string. + +2.5. Identifying Data + + URI characters provide identifying data for each of the URI + components, serving as an external interface for identification + between systems. Although the presence and nature of the URI + production interface is hidden from clients that use its URIs (and is + thus beyond the scope of the interoperability requirements defined by + this specification), it is a frequent source of confusion and errors + in the interpretation of URI character issues. Implementers have to + be aware that there are multiple character encodings involved in the + + + +Berners-Lee, et al. Standards Track [Page 14] + +RFC 3986 URI Generic Syntax January 2005 + + + production and transmission of URIs: local name and data encoding, + public interface encoding, URI character encoding, data format + encoding, and protocol encoding. + + Local names, such as file system names, are stored with a local + character encoding. URI producing applications (e.g., origin + servers) will typically use the local encoding as the basis for + producing meaningful names. The URI producer will transform the + local encoding to one that is suitable for a public interface and + then transform the public interface encoding into the restricted set + of URI characters (reserved, unreserved, and percent-encodings). + Those characters are, in turn, encoded as octets to be used as a + reference within a data format (e.g., a document charset), and such + data formats are often subsequently encoded for transmission over + Internet protocols. + + For most systems, an unreserved character appearing within a URI + component is interpreted as representing the data octet corresponding + to that character's encoding in US-ASCII. Consumers of URIs assume + that the letter "X" corresponds to the octet "01011000", and even + when that assumption is incorrect, there is no harm in making it. A + system that internally provides identifiers in the form of a + different character encoding, such as EBCDIC, will generally perform + character translation of textual identifiers to UTF-8 [STD63] (or + some other superset of the US-ASCII character encoding) at an + internal interface, thereby providing more meaningful identifiers + than those resulting from simply percent-encoding the original + octets. + + For example, consider an information service that provides data, + stored locally using an EBCDIC-based file system, to clients on the + Internet through an HTTP server. When an author creates a file with + the name "Laguna Beach" on that file system, the "http" URI + corresponding to that resource is expected to contain the meaningful + string "Laguna%20Beach". If, however, that server produces URIs by + using an overly simplistic raw octet mapping, then the result would + be a URI containing "%D3%81%87%A4%95%81@%C2%85%81%83%88". An + internal transcoding interface fixes this problem by transcoding the + local name to a superset of US-ASCII prior to producing the URI. + Naturally, proper interpretation of an incoming URI on such an + interface requires that percent-encoded octets be decoded (e.g., + "%20" to SP) before the reverse transcoding is applied to obtain the + local name. + + In some cases, the internal interface between a URI component and the + identifying data that it has been crafted to represent is much less + direct than a character encoding translation. For example, portions + of a URI might reflect a query on non-ASCII data, or numeric + + + +Berners-Lee, et al. Standards Track [Page 15] + +RFC 3986 URI Generic Syntax January 2005 + + + coordinates on a map. Likewise, a URI scheme may define components + with additional encoding requirements that are applied prior to + forming the component and producing the URI. + + When a new URI scheme defines a component that represents textual + data consisting of characters from the Universal Character Set [UCS], + the data should first be encoded as octets according to the UTF-8 + character encoding [STD63]; then only those octets that do not + correspond to characters in the unreserved set should be percent- + encoded. For example, the character A would be represented as "A", + the character LATIN CAPITAL LETTER A WITH GRAVE would be represented + as "%C3%80", and the character KATAKANA LETTER A would be represented + as "%E3%82%A2". + +3. Syntax Components + + The generic URI syntax consists of a hierarchical sequence of + components referred to as the scheme, authority, path, query, and + fragment. + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + The scheme and path components are required, though the path may be + empty (no characters). When authority is present, the path must + either be empty or begin with a slash ("/") character. When + authority is not present, the path cannot begin with two slash + characters ("//"). These restrictions result in five different ABNF + rules for a path (Section 3.3), only one of which will match any + given URI reference. + + The following are two example URIs and their component parts: + + foo://example.com:8042/over/there?name=ferret#nose + \_/ \______________/\_________/ \_________/ \__/ + | | | | | + scheme authority path query fragment + | _____________________|__ + / \ / \ + urn:example:animal:ferret:nose + + + + + + + +Berners-Lee, et al. Standards Track [Page 16] + +RFC 3986 URI Generic Syntax January 2005 + + +3.1. Scheme + + Each URI begins with a scheme name that refers to a specification for + assigning identifiers within that scheme. As such, the URI syntax is + a federated and extensible naming system wherein each scheme's + specification may further restrict the syntax and semantics of + identifiers using that scheme. + + Scheme names consist of a sequence of characters beginning with a + letter and followed by any combination of letters, digits, plus + ("+"), period ("."), or hyphen ("-"). Although schemes are case- + insensitive, the canonical form is lowercase and documents that + specify schemes must do so with lowercase letters. An implementation + should accept uppercase letters as equivalent to lowercase in scheme + names (e.g., allow "HTTP" as well as "http") for the sake of + robustness but should only produce lowercase scheme names for + consistency. + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + Individual schemes are not specified by this document. The process + for registration of new URI schemes is defined separately by [BCP35]. + The scheme registry maintains the mapping between scheme names and + their specifications. Advice for designers of new URI schemes can be + found in [RFC2718]. URI scheme specifications must define their own + syntax so that all strings matching their scheme-specific syntax will + also match the grammar, as described in Section 4.3. + + When presented with a URI that violates one or more scheme-specific + restrictions, the scheme-specific resolution process should flag the + reference as an error rather than ignore the unused parts; doing so + reduces the number of equivalent URIs and helps detect abuses of the + generic syntax, which might indicate that the URI has been + constructed to mislead the user (Section 7.6). + +3.2. Authority + + Many URI schemes include a hierarchical element for a naming + authority so that governance of the name space defined by the + remainder of the URI is delegated to that authority (which may, in + turn, delegate it further). The generic syntax provides a common + means for distinguishing an authority based on a registered name or + server address, along with optional port and user information. + + The authority component is preceded by a double slash ("//") and is + terminated by the next slash ("/"), question mark ("?"), or number + sign ("#") character, or by the end of the URI. + + + + +Berners-Lee, et al. Standards Track [Page 17] + +RFC 3986 URI Generic Syntax January 2005 + + + authority = [ userinfo "@" ] host [ ":" port ] + + URI producers and normalizers should omit the ":" delimiter that + separates host from port if the port component is empty. Some + schemes do not allow the userinfo and/or port subcomponents. + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. Non- + validating parsers (those that merely separate a URI reference into + its major components) will often ignore the subcomponent structure of + authority, treating it as an opaque string from the double-slash to + the first terminating delimiter, until such time as the URI is + dereferenced. + +3.2.1. User Information + + The userinfo subcomponent may consist of a user name and, optionally, + scheme-specific information about how to gain authorization to access + the resource. The user information, if present, is followed by a + commercial at-sign ("@") that delimits it from the host. + + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + Use of the format "user:password" in the userinfo field is + deprecated. Applications should not render as clear text any data + after the first colon (":") character found within a userinfo + subcomponent unless the data after the colon is the empty string + (indicating no password). Applications may choose to ignore or + reject such data when it is received as part of a reference and + should reject the storage of such data in unencrypted form. The + passing of authentication information in clear text has proven to be + a security risk in almost every case where it has been used. + + Applications that render a URI for the sake of user feedback, such as + in graphical hypertext browsing, should render userinfo in a way that + is distinguished from the rest of a URI, when feasible. Such + rendering will assist the user in cases where the userinfo has been + misleadingly crafted to look like a trusted domain name + (Section 7.6). + +3.2.2. Host + + The host subcomponent of authority is identified by an IP literal + encapsulated within square brackets, an IPv4 address in dotted- + decimal form, or a registered name. The host subcomponent is case- + insensitive. The presence of a host subcomponent within a URI does + not imply that the scheme requires access to the given host on the + Internet. In many cases, the host syntax is used only for the sake + + + +Berners-Lee, et al. Standards Track [Page 18] + +RFC 3986 URI Generic Syntax January 2005 + + + of reusing the existing registration process created and deployed for + DNS, thus obtaining a globally unique name without the cost of + deploying another registry. However, such use comes with its own + costs: domain name ownership may change over time for reasons not + anticipated by the URI producer. In other cases, the data within the + host component identifies a registered name that has nothing to do + with an Internet host. We use the name "host" for the ABNF rule + because that is its most common purpose, not its only purpose. + + host = IP-literal / IPv4address / reg-name + + The syntax rule for host is ambiguous because it does not completely + distinguish between an IPv4address and a reg-name. In order to + disambiguate the syntax, we apply the "first-match-wins" algorithm: + If host matches the rule for IPv4address, then it should be + considered an IPv4 address literal and not a reg-name. Although host + is case-insensitive, producers and normalizers should use lowercase + for registered names and hexadecimal addresses for the sake of + uniformity, while only using uppercase letters for percent-encodings. + + A host identified by an Internet Protocol literal address, version 6 + [RFC3513] or later, is distinguished by enclosing the IP literal + within square brackets ("[" and "]"). This is the only place where + square bracket characters are allowed in the URI syntax. In + anticipation of future, as-yet-undefined IP literal address formats, + an implementation may use an optional version flag to indicate such a + format explicitly rather than rely on heuristic determination. + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + The version flag does not indicate the IP version; rather, it + indicates future versions of the literal format. As such, + implementations must not provide the version flag for the existing + IPv4 and IPv6 literal address forms described below. If a URI + containing an IP-literal that starts with "v" (case-insensitive), + indicating that the version flag is present, is dereferenced by an + application that does not know the meaning of that version flag, then + the application should return an appropriate error for "address + mechanism not supported". + + A host identified by an IPv6 literal address is represented inside + the square brackets without a preceding version flag. The ABNF + provided here is a translation of the text definition of an IPv6 + literal address provided in [RFC3513]. This syntax does not support + IPv6 scoped addressing zone identifiers. + + + + +Berners-Lee, et al. Standards Track [Page 19] + +RFC 3986 URI Generic Syntax January 2005 + + + A 128-bit IPv6 address is divided into eight 16-bit pieces. Each + piece is represented numerically in case-insensitive hexadecimal, + using one to four hexadecimal digits (leading zeroes are permitted). + The eight encoded pieces are given most-significant first, separated + by colon characters. Optionally, the least-significant two pieces + may instead be represented in IPv4 address textual format. A + sequence of one or more consecutive zero-valued 16-bit pieces within + the address may be elided, omitting all their digits and leaving + exactly two consecutive colons in their place to mark the elision. + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + ls32 = ( h16 ":" h16 ) / IPv4address + ; least-significant 32 bits of address + + h16 = 1*4HEXDIG + ; 16 bits of address represented in hexadecimal + + A host identified by an IPv4 literal address is represented in + dotted-decimal notation (a sequence of four decimal numbers in the + range 0 to 255, separated by "."), as described in [RFC1123] by + reference to [RFC0952]. Note that other forms of dotted notation may + be interpreted on some platforms, as described in Section 7.4, but + only the dotted-decimal form of four octets is allowed by this + grammar. + + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + A host identified by a registered name is a sequence of characters + usually intended for lookup within a locally defined host or service + name registry, though the URI's scheme-specific semantics may require + that a specific registry (or fixed name table) be used instead. The + most common name registry mechanism is the Domain Name System (DNS). + A registered name intended for lookup in the DNS uses the syntax + + + +Berners-Lee, et al. Standards Track [Page 20] + +RFC 3986 URI Generic Syntax January 2005 + + + defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123]. + Such a name consists of a sequence of domain labels separated by ".", + each domain label starting and ending with an alphanumeric character + and possibly also containing "-" characters. The rightmost domain + label of a fully qualified domain name in DNS may be followed by a + single "." and should be if it is necessary to distinguish between + the complete domain name and some local domain. + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + If the URI scheme defines a default for host, then that default + applies when the host subcomponent is undefined or when the + registered name is empty (zero length). For example, the "file" URI + scheme is defined so that no authority, an empty host, and + "localhost" all mean the end-user's machine, whereas the "http" + scheme considers a missing authority or empty host invalid. + + This specification does not mandate a particular registered name + lookup technology and therefore does not restrict the syntax of reg- + name beyond what is necessary for interoperability. Instead, it + delegates the issue of registered name syntax conformance to the + operating system of each application performing URI resolution, and + that operating system decides what it will allow for the purpose of + host identification. A URI resolution implementation might use DNS, + host tables, yellow pages, NetInfo, WINS, or any other system for + lookup of registered names. However, a globally scoped naming + system, such as DNS fully qualified domain names, is necessary for + URIs intended to have global scope. URI producers should use names + that conform to the DNS syntax, even when use of DNS is not + immediately apparent, and should limit these names to no more than + 255 characters in length. + + The reg-name syntax allows percent-encoded octets in order to + represent non-ASCII registered names in a uniform way that is + independent of the underlying name resolution technology. Non-ASCII + characters must first be encoded according to UTF-8 [STD63], and then + each octet of the corresponding UTF-8 sequence must be percent- + encoded to be represented as URI characters. URI producing + applications must not use percent-encoding in host unless it is used + to represent a UTF-8 character sequence. When a non-ASCII registered + name represents an internationalized domain name intended for + resolution via the DNS, the name must be transformed to the IDNA + encoding [RFC3490] prior to name lookup. URI producers should + provide these registered names in the IDNA encoding, rather than a + percent-encoding, if they wish to maximize interoperability with + legacy URI resolvers. + + + + + +Berners-Lee, et al. Standards Track [Page 21] + +RFC 3986 URI Generic Syntax January 2005 + + +3.2.3. Port + + The port subcomponent of authority is designated by an optional port + number in decimal following the host and delimited from it by a + single colon (":") character. + + port = *DIGIT + + A scheme may define a default port. For example, the "http" scheme + defines a default port of "80", corresponding to its reserved TCP + port number. The type of port designated by the port number (e.g., + TCP, UDP, SCTP) is defined by the URI scheme. URI producers and + normalizers should omit the port component and its ":" delimiter if + port is empty or if its value would be the same as that of the + scheme's default. + +3.3. Path + + The path component contains data, usually organized in hierarchical + form, that, along with data in the non-hierarchical query component + (Section 3.4), serves to identify a resource within the scope of the + URI's scheme and naming authority (if any). The path is terminated + by the first question mark ("?") or number sign ("#") character, or + by the end of the URI. + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. If a URI + does not contain an authority component, then the path cannot begin + with two slash characters ("//"). In addition, a URI reference + (Section 4.1) may be a relative-path reference, in which case the + first path segment cannot contain a colon (":") character. The ABNF + requires five separate rules to disambiguate these cases, only one of + which will match the path substring within a given URI reference. We + use the generic term "path component" to describe the URI substring + matched by the parser to one of these rules. + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + + + +Berners-Lee, et al. Standards Track [Page 22] + +RFC 3986 URI Generic Syntax January 2005 + + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + A path consists of a sequence of path segments separated by a slash + ("/") character. A path is always defined for a URI, though the + defined path may be empty (zero length). Use of the slash character + to indicate hierarchy is only required when a URI will be used as the + context for relative references. For example, the URI + has a path of "fred@example.com", whereas + the URI has an empty path. + + The path segments "." and "..", also known as dot-segments, are + defined for relative reference within the path name hierarchy. They + are intended for use at the beginning of a relative-path reference + (Section 4.2) to indicate relative position within the hierarchical + tree of names. This is similar to their role within some operating + systems' file directory structures to indicate the current directory + and parent directory, respectively. However, unlike in a file + system, these dot-segments are only interpreted within the URI path + hierarchy and are removed as part of the resolution process (Section + 5.2). + + Aside from dot-segments in hierarchical paths, a path segment is + considered opaque by the generic syntax. URI producing applications + often use the reserved characters allowed in a segment to delimit + scheme-specific or dereference-handler-specific subcomponents. For + example, the semicolon (";") and equals ("=") reserved characters are + often used to delimit parameters and parameter values applicable to + that segment. The comma (",") reserved character is often used for + similar purposes. For example, one URI producer might use a segment + such as "name;v=1.1" to indicate a reference to version 1.1 of + "name", whereas another might use a segment such as "name,1.1" to + indicate the same. Parameter types may be defined by scheme-specific + semantics, but in most cases the syntax of a parameter is specific to + the implementation of the URI's dereferencing algorithm. + +3.4. Query + + The query component contains non-hierarchical data that, along with + data in the path component (Section 3.3), serves to identify a + resource within the scope of the URI's scheme and naming authority + (if any). The query component is indicated by the first question + mark ("?") character and terminated by a number sign ("#") character + or by the end of the URI. + + + +Berners-Lee, et al. Standards Track [Page 23] + +RFC 3986 URI Generic Syntax January 2005 + + + query = *( pchar / "/" / "?" ) + + The characters slash ("/") and question mark ("?") may represent data + within the query component. Beware that some older, erroneous + implementations may not handle such data correctly when it is used as + the base URI for relative references (Section 5.1), apparently + because they fail to distinguish query data from path data when + looking for hierarchical separators. However, as query components + are often used to carry identifying information in the form of + "key=value" pairs and one frequently used value is a reference to + another URI, it is sometimes better for usability to avoid percent- + encoding those characters. + +3.5. Fragment + + The fragment identifier component of a URI allows indirect + identification of a secondary resource by reference to a primary + resource and additional identifying information. The identified + secondary resource may be some portion or subset of the primary + resource, some view on representations of the primary resource, or + some other resource defined or described by those representations. A + fragment identifier component is indicated by the presence of a + number sign ("#") character and terminated by the end of the URI. + + fragment = *( pchar / "/" / "?" ) + + The semantics of a fragment identifier are defined by the set of + representations that might result from a retrieval action on the + primary resource. The fragment's format and resolution is therefore + dependent on the media type [RFC2046] of a potentially retrieved + representation, even though such a retrieval is only performed if the + URI is dereferenced. If no such representation exists, then the + semantics of the fragment are considered unknown and are effectively + unconstrained. Fragment identifier semantics are independent of the + URI scheme and thus cannot be redefined by scheme specifications. + + Individual media types may define their own restrictions on or + structures within the fragment identifier syntax for specifying + different types of subsets, views, or external references that are + identifiable as secondary resources by that media type. If the + primary resource has multiple representations, as is often the case + for resources whose representation is selected based on attributes of + the retrieval request (a.k.a., content negotiation), then whatever is + identified by the fragment should be consistent across all of those + representations. Each representation should either define the + fragment so that it corresponds to the same secondary resource, + regardless of how it is represented, or should leave the fragment + undefined (i.e., not found). + + + +Berners-Lee, et al. Standards Track [Page 24] + +RFC 3986 URI Generic Syntax January 2005 + + + As with any URI, use of a fragment identifier component does not + imply that a retrieval action will take place. A URI with a fragment + identifier may be used to refer to the secondary resource without any + implication that the primary resource is accessible or will ever be + accessed. + + Fragment identifiers have a special role in information retrieval + systems as the primary form of client-side indirect referencing, + allowing an author to specifically identify aspects of an existing + resource that are only indirectly provided by the resource owner. As + such, the fragment identifier is not used in the scheme-specific + processing of a URI; instead, the fragment identifier is separated + from the rest of the URI prior to a dereference, and thus the + identifying information within the fragment itself is dereferenced + solely by the user agent, regardless of the URI scheme. Although + this separate handling is often perceived to be a loss of + information, particularly for accurate redirection of references as + resources move over time, it also serves to prevent information + providers from denying reference authors the right to refer to + information within a resource selectively. Indirect referencing also + provides additional flexibility and extensibility to systems that use + URIs, as new media types are easier to define and deploy than new + schemes of identification. + + The characters slash ("/") and question mark ("?") are allowed to + represent data within the fragment identifier. Beware that some + older, erroneous implementations may not handle this data correctly + when it is used as the base URI for relative references (Section + 5.1). + +4. Usage + + When applications make reference to a URI, they do not always use the + full form of reference defined by the "URI" syntax rule. To save + space and take advantage of hierarchical locality, many Internet + protocol elements and media type formats allow an abbreviation of a + URI, whereas others restrict the syntax to a particular form of URI. + We define the most common forms of reference syntax in this + specification because they impact and depend upon the design of the + generic syntax, requiring a uniform parsing algorithm in order to be + interpreted consistently. + +4.1. URI Reference + + URI-reference is used to denote the most common usage of a resource + identifier. + + URI-reference = URI / relative-ref + + + +Berners-Lee, et al. Standards Track [Page 25] + +RFC 3986 URI Generic Syntax January 2005 + + + A URI-reference is either a URI or a relative reference. If the + URI-reference's prefix does not match the syntax of a scheme followed + by its colon separator, then the URI-reference is a relative + reference. + + A URI-reference is typically parsed first into the five URI + components, in order to determine what components are present and + whether the reference is relative. Then, each component is parsed + for its subparts and their validation. The ABNF of URI-reference, + along with the "first-match-wins" disambiguation rule, is sufficient + to define a validating parser for the generic syntax. Readers + familiar with regular expressions should see Appendix B for an + example of a non-validating URI-reference parser that will take any + given string and extract the URI components. + +4.2. Relative Reference + + A relative reference takes advantage of the hierarchical syntax + (Section 1.2.3) to express a URI reference relative to the name space + of another hierarchical URI. + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + The URI referred to by a relative reference, also known as the target + URI, is obtained by applying the reference resolution algorithm of + Section 5. + + A relative reference that begins with two slash characters is termed + a network-path reference; such references are rarely used. A + relative reference that begins with a single slash character is + termed an absolute-path reference. A relative reference that does + not begin with a slash character is termed a relative-path reference. + + A path segment that contains a colon character (e.g., "this:that") + cannot be used as the first segment of a relative-path reference, as + it would be mistaken for a scheme name. Such a segment must be + preceded by a dot-segment (e.g., "./this:that") to make a relative- + path reference. + + + + + + + + +Berners-Lee, et al. Standards Track [Page 26] + +RFC 3986 URI Generic Syntax January 2005 + + +4.3. Absolute URI + + Some protocol elements allow only the absolute form of a URI without + a fragment identifier. For example, defining a base URI for later + use by relative references calls for an absolute-URI syntax rule that + does not allow a fragment. + + absolute-URI = scheme ":" hier-part [ "?" query ] + + URI scheme specifications must define their own syntax so that all + strings matching their scheme-specific syntax will also match the + grammar. Scheme specifications will not define + fragment identifier syntax or usage, regardless of its applicability + to resources identifiable via that scheme, as fragment identification + is orthogonal to scheme definition. However, scheme specifications + are encouraged to include a wide range of examples, including + examples that show use of the scheme's URIs with fragment identifiers + when such usage is appropriate. + +4.4. Same-Document Reference + + When a URI reference refers to a URI that is, aside from its fragment + component (if any), identical to the base URI (Section 5.1), that + reference is called a "same-document" reference. The most frequent + examples of same-document references are relative references that are + empty or include only the number sign ("#") separator followed by a + fragment identifier. + + When a same-document reference is dereferenced for a retrieval + action, the target of that reference is defined to be within the same + entity (representation, document, or message) as the reference; + therefore, a dereference should not result in a new retrieval action. + + Normalization of the base and target URIs prior to their comparison, + as described in Sections 6.2.2 and 6.2.3, is allowed but rarely + performed in practice. Normalization may increase the set of same- + document references, which may be of benefit to some caching + applications. As such, reference authors should not assume that a + slightly different, though equivalent, reference URI will (or will + not) be interpreted as a same-document reference by any given + application. + +4.5. Suffix Reference + + The URI syntax is designed for unambiguous reference to resources and + extensibility via the URI scheme. However, as URI identification and + usage have become commonplace, traditional media (television, radio, + newspapers, billboards, etc.) have increasingly used a suffix of the + + + +Berners-Lee, et al. Standards Track [Page 27] + +RFC 3986 URI Generic Syntax January 2005 + + + URI as a reference, consisting of only the authority and path + portions of the URI, such as + + www.w3.org/Addressing/ + + or simply a DNS registered name on its own. Such references are + primarily intended for human interpretation rather than for machines, + with the assumption that context-based heuristics are sufficient to + complete the URI (e.g., most registered names beginning with "www" + are likely to have a URI prefix of "http://"). Although there is no + standard set of heuristics for disambiguating a URI suffix, many + client implementations allow them to be entered by the user and + heuristically resolved. + + Although this practice of using suffix references is common, it + should be avoided whenever possible and should never be used in + situations where long-term references are expected. The heuristics + noted above will change over time, particularly when a new URI scheme + becomes popular, and are often incorrect when used out of context. + Furthermore, they can lead to security issues along the lines of + those described in [RFC1535]. + + As a URI suffix has the same syntax as a relative-path reference, a + suffix reference cannot be used in contexts where a relative + reference is expected. As a result, suffix references are limited to + places where there is no defined base URI, such as dialog boxes and + off-line advertisements. + +5. Reference Resolution + + This section defines the process of resolving a URI reference within + a context that allows relative references so that the result is a + string matching the syntax rule of Section 3. + +5.1. Establishing a Base URI + + The term "relative" implies that a "base URI" exists against which + the relative reference is applied. Aside from fragment-only + references (Section 4.4), relative references are only usable when a + base URI is known. A base URI must be established by the parser + prior to parsing URI references that might be relative. A base URI + must conform to the syntax rule (Section 4.3). If the + base URI is obtained from a URI reference, then that reference must + be converted to absolute form and stripped of any fragment component + prior to its use as a base URI. + + + + + + +Berners-Lee, et al. Standards Track [Page 28] + +RFC 3986 URI Generic Syntax January 2005 + + + The base URI of a reference can be established in one of four ways, + discussed below in order of precedence. The order of precedence can + be thought of in terms of layers, where the innermost defined base + URI has the highest precedence. This can be visualized graphically + as follows: + + .----------------------------------------------------------. + | .----------------------------------------------------. | + | | .----------------------------------------------. | | + | | | .----------------------------------------. | | | + | | | | .----------------------------------. | | | | + | | | | | | | | | | + | | | | `----------------------------------' | | | | + | | | | (5.1.1) Base URI embedded in content | | | | + | | | `----------------------------------------' | | | + | | | (5.1.2) Base URI of the encapsulating entity | | | + | | | (message, representation, or none) | | | + | | `----------------------------------------------' | | + | | (5.1.3) URI used to retrieve the entity | | + | `----------------------------------------------------' | + | (5.1.4) Default Base URI (application-dependent) | + `----------------------------------------------------------' + +5.1.1. Base URI Embedded in Content + + Within certain media types, a base URI for relative references can be + embedded within the content itself so that it can be readily obtained + by a parser. This can be useful for descriptive documents, such as + tables of contents, which may be transmitted to others through + protocols other than their usual retrieval context (e.g., email or + USENET news). + + It is beyond the scope of this specification to specify how, for each + media type, a base URI can be embedded. The appropriate syntax, when + available, is described by the data format specification associated + with each media type. + +5.1.2. Base URI from the Encapsulating Entity + + If no base URI is embedded, the base URI is defined by the + representation's retrieval context. For a document that is enclosed + within another entity, such as a message or archive, the retrieval + context is that entity. Thus, the default base URI of a + representation is the base URI of the entity in which the + representation is encapsulated. + + + + + + +Berners-Lee, et al. Standards Track [Page 29] + +RFC 3986 URI Generic Syntax January 2005 + + + A mechanism for embedding a base URI within MIME container types + (e.g., the message and multipart types) is defined by MHTML + [RFC2557]. Protocols that do not use the MIME message header syntax, + but that do allow some form of tagged metadata to be included within + messages, may define their own syntax for defining a base URI as part + of a message. + +5.1.3. Base URI from the Retrieval URI + + If no base URI is embedded and the representation is not encapsulated + within some other entity, then, if a URI was used to retrieve the + representation, that URI shall be considered the base URI. Note that + if the retrieval was the result of a redirected request, the last URI + used (i.e., the URI that resulted in the actual retrieval of the + representation) is the base URI. + +5.1.4. Default Base URI + + If none of the conditions described above apply, then the base URI is + defined by the context of the application. As this definition is + necessarily application-dependent, failing to define a base URI by + using one of the other methods may result in the same content being + interpreted differently by different types of applications. + + A sender of a representation containing relative references is + responsible for ensuring that a base URI for those references can be + established. Aside from fragment-only references, relative + references can only be used reliably in situations where the base URI + is well defined. + +5.2. Relative Resolution + + This section describes an algorithm for converting a URI reference + that might be relative to a given base URI into the parsed components + of the reference's target. The components can then be recomposed, as + described in Section 5.3, to form the target URI. This algorithm + provides definitive results that can be used to test the output of + other implementations. Applications may implement relative reference + resolution by using some other algorithm, provided that the results + match what would be given by this one. + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 30] + +RFC 3986 URI Generic Syntax January 2005 + + +5.2.1. Pre-parse the Base URI + + The base URI (Base) is established according to the procedure of + Section 5.1 and parsed into the five main components described in + Section 3. Note that only the scheme component is required to be + present in a base URI; the other components may be empty or + undefined. A component is undefined if its associated delimiter does + not appear in the URI reference; the path component is never + undefined, though it may be empty. + + Normalization of the base URI, as described in Sections 6.2.2 and + 6.2.3, is optional. A URI reference must be transformed to its + target URI before it can be normalized. + +5.2.2. Transform References + + For each URI reference (R), the following pseudocode describes an + algorithm for transforming R into its target URI (T): + + -- The URI reference is parsed into the five URI components + -- + (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R); + + -- A non-strict parser may ignore a scheme in the reference + -- if it is identical to the base URI's scheme. + -- + if ((not strict) and (R.scheme == Base.scheme)) then + undefine(R.scheme); + endif; + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 31] + +RFC 3986 URI Generic Syntax January 2005 + + + if defined(R.scheme) then + T.scheme = R.scheme; + T.authority = R.authority; + T.path = remove_dot_segments(R.path); + T.query = R.query; + else + if defined(R.authority) then + T.authority = R.authority; + T.path = remove_dot_segments(R.path); + T.query = R.query; + else + if (R.path == "") then + T.path = Base.path; + if defined(R.query) then + T.query = R.query; + else + T.query = Base.query; + endif; + else + if (R.path starts-with "/") then + T.path = remove_dot_segments(R.path); + else + T.path = merge(Base.path, R.path); + T.path = remove_dot_segments(T.path); + endif; + T.query = R.query; + endif; + T.authority = Base.authority; + endif; + T.scheme = Base.scheme; + endif; + + T.fragment = R.fragment; + +5.2.3. Merge Paths + + The pseudocode above refers to a "merge" routine for merging a + relative-path reference with the path of the base URI. This is + accomplished as follows: + + o If the base URI has a defined authority component and an empty + path, then return a string consisting of "/" concatenated with the + reference's path; otherwise, + + + + + + + + +Berners-Lee, et al. Standards Track [Page 32] + +RFC 3986 URI Generic Syntax January 2005 + + + o return a string consisting of the reference's path component + appended to all but the last segment of the base URI's path (i.e., + excluding any characters after the right-most "/" in the base URI + path, or excluding the entire base URI path if it does not contain + any "/" characters). + +5.2.4. Remove Dot Segments + + The pseudocode also refers to a "remove_dot_segments" routine for + interpreting and removing the special "." and ".." complete path + segments from a referenced path. This is done after the path is + extracted from a reference, whether or not the path was relative, in + order to remove any invalid or extraneous dot-segments prior to + forming the target URI. Although there are many ways to accomplish + this removal process, we describe a simple method using two string + buffers. + + 1. The input buffer is initialized with the now-appended path + components and the output buffer is initialized to the empty + string. + + 2. While the input buffer is not empty, loop as follows: + + A. If the input buffer begins with a prefix of "../" or "./", + then remove that prefix from the input buffer; otherwise, + + B. if the input buffer begins with a prefix of "/./" or "/.", + where "." is a complete path segment, then replace that + prefix with "/" in the input buffer; otherwise, + + C. if the input buffer begins with a prefix of "/../" or "/..", + where ".." is a complete path segment, then replace that + prefix with "/" in the input buffer and remove the last + segment and its preceding "/" (if any) from the output + buffer; otherwise, + + D. if the input buffer consists only of "." or "..", then remove + that from the input buffer; otherwise, + + E. move the first path segment in the input buffer to the end of + the output buffer, including the initial "/" character (if + any) and any subsequent characters up to, but not including, + the next "/" character or the end of the input buffer. + + 3. Finally, the output buffer is returned as the result of + remove_dot_segments. + + + + + +Berners-Lee, et al. Standards Track [Page 33] + +RFC 3986 URI Generic Syntax January 2005 + + + Note that dot-segments are intended for use in URI references to + express an identifier relative to the hierarchy of names in the base + URI. The remove_dot_segments algorithm respects that hierarchy by + removing extra dot-segments rather than treat them as an error or + leaving them to be misinterpreted by dereference implementations. + + The following illustrates how the above steps are applied for two + examples of merged paths, showing the state of the two buffers after + each step. + + STEP OUTPUT BUFFER INPUT BUFFER + + 1 : /a/b/c/./../../g + 2E: /a /b/c/./../../g + 2E: /a/b /c/./../../g + 2E: /a/b/c /./../../g + 2B: /a/b/c /../../g + 2C: /a/b /../g + 2C: /a /g + 2E: /a/g + + STEP OUTPUT BUFFER INPUT BUFFER + + 1 : mid/content=5/../6 + 2E: mid /content=5/../6 + 2E: mid/content=5 /../6 + 2C: mid /6 + 2E: mid/6 + + Some applications may find it more efficient to implement the + remove_dot_segments algorithm by using two segment stacks rather than + strings. + + Note: Beware that some older, erroneous implementations will fail + to separate a reference's query component from its path component + prior to merging the base and reference paths, resulting in an + interoperability failure if the query component contains the + strings "/../" or "/./". + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 34] + +RFC 3986 URI Generic Syntax January 2005 + + +5.3. Component Recomposition + + Parsed URI components can be recomposed to obtain the corresponding + URI reference string. Using pseudocode, this would be: + + result = "" + + if defined(scheme) then + append scheme to result; + append ":" to result; + endif; + + if defined(authority) then + append "//" to result; + append authority to result; + endif; + + append path to result; + + if defined(query) then + append "?" to result; + append query to result; + endif; + + if defined(fragment) then + append "#" to result; + append fragment to result; + endif; + + return result; + + Note that we are careful to preserve the distinction between a + component that is undefined, meaning that its separator was not + present in the reference, and a component that is empty, meaning that + the separator was present and was immediately followed by the next + component separator or the end of the reference. + +5.4. Reference Resolution Examples + + Within a representation with a well defined base URI of + + http://a/b/c/d;p?q + + a relative reference is transformed to its target URI as follows. + + + + + + + +Berners-Lee, et al. Standards Track [Page 35] + +RFC 3986 URI Generic Syntax January 2005 + + +5.4.1. Normal Examples + + "g:h" = "g:h" + "g" = "http://a/b/c/g" + "./g" = "http://a/b/c/g" + "g/" = "http://a/b/c/g/" + "/g" = "http://a/g" + "//g" = "http://g" + "?y" = "http://a/b/c/d;p?y" + "g?y" = "http://a/b/c/g?y" + "#s" = "http://a/b/c/d;p?q#s" + "g#s" = "http://a/b/c/g#s" + "g?y#s" = "http://a/b/c/g?y#s" + ";x" = "http://a/b/c/;x" + "g;x" = "http://a/b/c/g;x" + "g;x?y#s" = "http://a/b/c/g;x?y#s" + "" = "http://a/b/c/d;p?q" + "." = "http://a/b/c/" + "./" = "http://a/b/c/" + ".." = "http://a/b/" + "../" = "http://a/b/" + "../g" = "http://a/b/g" + "../.." = "http://a/" + "../../" = "http://a/" + "../../g" = "http://a/g" + +5.4.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur in + normal practice, all URI parsers should be capable of resolving them + consistently. Each example uses the same base as that above. + + Parsers must be careful in handling cases where there are more ".." + segments in a relative-path reference than there are hierarchical + levels in the base URI's path. Note that the ".." syntax cannot be + used to change the authority component of a URI. + + "../../../g" = "http://a/g" + "../../../../g" = "http://a/g" + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 36] + +RFC 3986 URI Generic Syntax January 2005 + + + Similarly, parsers must remove the dot-segments "." and ".." when + they are complete components of a path, but not when they are only + part of a segment. + + "/./g" = "http://a/g" + "/../g" = "http://a/g" + "g." = "http://a/b/c/g." + ".g" = "http://a/b/c/.g" + "g.." = "http://a/b/c/g.." + "..g" = "http://a/b/c/..g" + + Less likely are cases where the relative reference uses unnecessary + or nonsensical forms of the "." and ".." complete path segments. + + "./../g" = "http://a/b/g" + "./g/." = "http://a/b/c/g/" + "g/./h" = "http://a/b/c/g/h" + "g/../h" = "http://a/b/c/h" + "g;x=1/./y" = "http://a/b/c/g;x=1/y" + "g;x=1/../y" = "http://a/b/c/y" + + Some applications fail to separate the reference's query and/or + fragment components from the path component before merging it with + the base path and removing dot-segments. This error is rarely + noticed, as typical usage of a fragment never includes the hierarchy + ("/") character and the query component is not normally used within + relative references. + + "g?y/./x" = "http://a/b/c/g?y/./x" + "g?y/../x" = "http://a/b/c/g?y/../x" + "g#s/./x" = "http://a/b/c/g#s/./x" + "g#s/../x" = "http://a/b/c/g#s/../x" + + Some parsers allow the scheme name to be present in a relative + reference if it is the same as the base URI scheme. This is + considered to be a loophole in prior specifications of partial URI + [RFC1630]. Its use should be avoided but is allowed for backward + compatibility. + + "http:g" = "http:g" ; for strict parsers + / "http://a/b/c/g" ; for backward compatibility + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 37] + +RFC 3986 URI Generic Syntax January 2005 + + +6. Normalization and Comparison + + One of the most common operations on URIs is simple comparison: + determining whether two URIs are equivalent without using the URIs to + access their respective resource(s). A comparison is performed every + time a response cache is accessed, a browser checks its history to + color a link, or an XML parser processes tags within a namespace. + Extensive normalization prior to comparison of URIs is often used by + spiders and indexing engines to prune a search space or to reduce + duplication of request actions and response storage. + + URI comparison is performed for some particular purpose. Protocols + or implementations that compare URIs for different purposes will + often be subject to differing design trade-offs in regards to how + much effort should be spent in reducing aliased identifiers. This + section describes various methods that may be used to compare URIs, + the trade-offs between them, and the types of applications that might + use them. + +6.1. Equivalence + + Because URIs exist to identify resources, presumably they should be + considered equivalent when they identify the same resource. However, + this definition of equivalence is not of much practical use, as there + is no way for an implementation to compare two resources unless it + has full knowledge or control of them. For this reason, + determination of equivalence or difference of URIs is based on string + comparison, perhaps augmented by reference to additional rules + provided by URI scheme definitions. We use the terms "different" and + "equivalent" to describe the possible outcomes of such comparisons, + but there are many application-dependent versions of equivalence. + + Even though it is possible to determine that two URIs are equivalent, + URI comparison is not sufficient to determine whether two URIs + identify different resources. For example, an owner of two different + domain names could decide to serve the same resource from both, + resulting in two different URIs. Therefore, comparison methods are + designed to minimize false negatives while strictly avoiding false + positives. + + In testing for equivalence, applications should not directly compare + relative references; the references should be converted to their + respective target URIs before comparison. When URIs are compared to + select (or avoid) a network action, such as retrieval of a + representation, fragment components (if any) should be excluded from + the comparison. + + + + + +Berners-Lee, et al. Standards Track [Page 38] + +RFC 3986 URI Generic Syntax January 2005 + + +6.2. Comparison Ladder + + A variety of methods are used in practice to test URI equivalence. + These methods fall into a range, distinguished by the amount of + processing required and the degree to which the probability of false + negatives is reduced. As noted above, false negatives cannot be + eliminated. In practice, their probability can be reduced, but this + reduction requires more processing and is not cost-effective for all + applications. + + If this range of comparison practices is considered as a ladder, the + following discussion will climb the ladder, starting with practices + that are cheap but have a relatively higher chance of producing false + negatives, and proceeding to those that have higher computational + cost and lower risk of false negatives. + +6.2.1. Simple String Comparison + + If two URIs, when considered as character strings, are identical, + then it is safe to conclude that they are equivalent. This type of + equivalence test has very low computational cost and is in wide use + in a variety of applications, particularly in the domain of parsing. + + Testing strings for equivalence requires some basic precautions. + This procedure is often referred to as "bit-for-bit" or + "byte-for-byte" comparison, which is potentially misleading. Testing + strings for equality is normally based on pair comparison of the + characters that make up the strings, starting from the first and + proceeding until both strings are exhausted and all characters are + found to be equal, until a pair of characters compares unequal, or + until one of the strings is exhausted before the other. + + This character comparison requires that each pair of characters be + put in comparable form. For example, should one URI be stored in a + byte array in EBCDIC encoding and the second in a Java String object + (UTF-16), bit-for-bit comparisons applied naively will produce + errors. It is better to speak of equality on a character-for- + character basis rather than on a byte-for-byte or bit-for-bit basis. + In practical terms, character-by-character comparisons should be done + codepoint-by-codepoint after conversion to a common character + encoding. + + False negatives are caused by the production and use of URI aliases. + Unnecessary aliases can be reduced, regardless of the comparison + method, by consistently providing URI references in an already- + normalized form (i.e., a form identical to what would be produced + after normalization is applied, as described below). + + + + +Berners-Lee, et al. Standards Track [Page 39] + +RFC 3986 URI Generic Syntax January 2005 + + + Protocols and data formats often limit some URI comparisons to simple + string comparison, based on the theory that people and + implementations will, in their own best interest, be consistent in + providing URI references, or at least consistent enough to negate any + efficiency that might be obtained from further normalization. + +6.2.2. Syntax-Based Normalization + + Implementations may use logic based on the definitions provided by + this specification to reduce the probability of false negatives. + This processing is moderately higher in cost than character-for- + character string comparison. For example, an application using this + approach could reasonably consider the following two URIs equivalent: + + example://a/b/c/%7Bfoo%7D + eXAMPLE://a/./b/../b/%63/%7bfoo%7d + + Web user agents, such as browsers, typically apply this type of URI + normalization when determining whether a cached response is + available. Syntax-based normalization includes such techniques as + case normalization, percent-encoding normalization, and removal of + dot-segments. + +6.2.2.1. Case Normalization + + For all URIs, the hexadecimal digits within a percent-encoding + triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore + should be normalized to use uppercase letters for the digits A-F. + + When a URI uses components of the generic syntax, the component + syntax equivalence rules always apply; namely, that the scheme and + host are case-insensitive and therefore should be normalized to + lowercase. For example, the URI is + equivalent to . The other generic syntax + components are assumed to be case-sensitive unless specifically + defined otherwise by the scheme (see Section 6.2.3). + +6.2.2.2. Percent-Encoding Normalization + + The percent-encoding mechanism (Section 2.1) is a frequent source of + variance among otherwise identical URIs. In addition to the case + normalization issue noted above, some URI producers percent-encode + octets that do not require percent-encoding, resulting in URIs that + are equivalent to their non-encoded counterparts. These URIs should + be normalized by decoding any percent-encoded octet that corresponds + to an unreserved character, as described in Section 2.3. + + + + + +Berners-Lee, et al. Standards Track [Page 40] + +RFC 3986 URI Generic Syntax January 2005 + + +6.2.2.3. Path Segment Normalization + + The complete path segments "." and ".." are intended only for use + within relative references (Section 4.1) and are removed as part of + the reference resolution process (Section 5.2). However, some + deployed implementations incorrectly assume that reference resolution + is not necessary when the reference is already a URI and thus fail to + remove dot-segments when they occur in non-relative paths. URI + normalizers should remove dot-segments by applying the + remove_dot_segments algorithm to the path, as described in + Section 5.2.4. + +6.2.3. Scheme-Based Normalization + + The syntax and semantics of URIs vary from scheme to scheme, as + described by the defining specification for each scheme. + Implementations may use scheme-specific rules, at further processing + cost, to reduce the probability of false negatives. For example, + because the "http" scheme makes use of an authority component, has a + default port of "80", and defines an empty path to be equivalent to + "/", the following four URIs are equivalent: + + http://example.com + http://example.com/ + http://example.com:/ + http://example.com:80/ + + In general, a URI that uses the generic syntax for authority with an + empty path should be normalized to a path of "/". Likewise, an + explicit ":port", for which the port is empty or the default for the + scheme, is equivalent to one where the port and its ":" delimiter are + elided and thus should be removed by scheme-based normalization. For + example, the second URI above is the normal form for the "http" + scheme. + + Another case where normalization varies by scheme is in the handling + of an empty authority component or empty host subcomponent. For many + scheme specifications, an empty authority or host is considered an + error; for others, it is considered equivalent to "localhost" or the + end-user's host. When a scheme defines a default for authority and a + URI reference to that default is desired, the reference should be + normalized to an empty authority for the sake of uniformity, brevity, + and internationalization. If, however, either the userinfo or port + subcomponents are non-empty, then the host should be given explicitly + even if it matches the default. + + Normalization should not remove delimiters when their associated + component is empty unless licensed to do so by the scheme + + + +Berners-Lee, et al. Standards Track [Page 41] + +RFC 3986 URI Generic Syntax January 2005 + + + specification. For example, the URI "http://example.com/?" cannot be + assumed to be equivalent to any of the examples above. Likewise, the + presence or absence of delimiters within a userinfo subcomponent is + usually significant to its interpretation. The fragment component is + not subject to any scheme-based normalization; thus, two URIs that + differ only by the suffix "#" are considered different regardless of + the scheme. + + Some schemes define additional subcomponents that consist of case- + insensitive data, giving an implicit license to normalizers to + convert this data to a common case (e.g., all lowercase). For + example, URI schemes that define a subcomponent of path to contain an + Internet hostname, such as the "mailto" URI scheme, cause that + subcomponent to be case-insensitive and thus subject to case + normalization (e.g., "mailto:Joe@Example.COM" is equivalent to + "mailto:Joe@example.com", even though the generic syntax considers + the path component to be case-sensitive). + + Other scheme-specific normalizations are possible. + +6.2.4. Protocol-Based Normalization + + Substantial effort to reduce the incidence of false negatives is + often cost-effective for web spiders. Therefore, they implement even + more aggressive techniques in URI comparison. For example, if they + observe that a URI such as + + http://example.com/data + + redirects to a URI differing only in the trailing slash + + http://example.com/data/ + + they will likely regard the two as equivalent in the future. This + kind of technique is only appropriate when equivalence is clearly + indicated by both the result of accessing the resources and the + common conventions of their scheme's dereference algorithm (in this + case, use of redirection by HTTP origin servers to avoid problems + with relative references). + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 42] + +RFC 3986 URI Generic Syntax January 2005 + + +7. Security Considerations + + A URI does not in itself pose a security threat. However, as URIs + are often used to provide a compact set of instructions for access to + network resources, care must be taken to properly interpret the data + within a URI, to prevent that data from causing unintended access, + and to avoid including data that should not be revealed in plain + text. + +7.1. Reliability and Consistency + + There is no guarantee that once a URI has been used to retrieve + information, the same information will be retrievable by that URI in + the future. Nor is there any guarantee that the information + retrievable via that URI in the future will be observably similar to + that retrieved in the past. The URI syntax does not constrain how a + given scheme or authority apportions its namespace or maintains it + over time. Such guarantees can only be obtained from the person(s) + controlling that namespace and the resource in question. A specific + URI scheme may define additional semantics, such as name persistence, + if those semantics are required of all naming authorities for that + scheme. + +7.2. Malicious Construction + + It is sometimes possible to construct a URI so that an attempt to + perform a seemingly harmless, idempotent operation, such as the + retrieval of a representation, will in fact cause a possibly damaging + remote operation. The unsafe URI is typically constructed by + specifying a port number other than that reserved for the network + protocol in question. The client unwittingly contacts a site running + a different protocol service, and data within the URI contains + instructions that, when interpreted according to this other protocol, + cause an unexpected operation. A frequent example of such abuse has + been the use of a protocol-based scheme with a port component of + "25", thereby fooling user agent software into sending an unintended + or impersonating message via an SMTP server. + + Applications should prevent dereference of a URI that specifies a TCP + port number within the "well-known port" range (0 - 1023) unless the + protocol being used to dereference that URI is compatible with the + protocol expected on that well-known port. Although IANA maintains a + registry of well-known ports, applications should make such + restrictions user-configurable to avoid preventing the deployment of + new services. + + + + + + +Berners-Lee, et al. Standards Track [Page 43] + +RFC 3986 URI Generic Syntax January 2005 + + + When a URI contains percent-encoded octets that match the delimiters + for a given resolution or dereference protocol (for example, CR and + LF characters for the TELNET protocol), these percent-encodings must + not be decoded before transmission across that protocol. Transfer of + the percent-encoding, which might violate the protocol, is less + harmful than allowing decoded octets to be interpreted as additional + operations or parameters, perhaps triggering an unexpected and + possibly harmful remote operation. + +7.3. Back-End Transcoding + + When a URI is dereferenced, the data within it is often parsed by + both the user agent and one or more servers. In HTTP, for example, a + typical user agent will parse a URI into its five major components, + access the authority's server, and send it the data within the + authority, path, and query components. A typical server will take + that information, parse the path into segments and the query into + key/value pairs, and then invoke implementation-specific handlers to + respond to the request. As a result, a common security concern for + server implementations that handle a URI, either as a whole or split + into separate components, is proper interpretation of the octet data + represented by the characters and percent-encodings within that URI. + + Percent-encoded octets must be decoded at some point during the + dereference process. Applications must split the URI into its + components and subcomponents prior to decoding the octets, as + otherwise the decoded octets might be mistaken for delimiters. + Security checks of the data within a URI should be applied after + decoding the octets. Note, however, that the "%00" percent-encoding + (NUL) may require special handling and should be rejected if the + application is not expecting to receive raw data within a component. + + Special care should be taken when the URI path interpretation process + involves the use of a back-end file system or related system + functions. File systems typically assign an operational meaning to + special characters, such as the "/", "\", ":", "[", and "]" + characters, and to special device names like ".", "..", "...", "aux", + "lpt", etc. In some cases, merely testing for the existence of such + a name will cause the operating system to pause or invoke unrelated + system calls, leading to significant security concerns regarding + denial of service and unintended data transfer. It would be + impossible for this specification to list all such significant + characters and device names. Implementers should research the + reserved names and characters for the types of storage device that + may be attached to their applications and restrict the use of data + obtained from URI components accordingly. + + + + + +Berners-Lee, et al. Standards Track [Page 44] + +RFC 3986 URI Generic Syntax January 2005 + + +7.4. Rare IP Address Formats + + Although the URI syntax for IPv4address only allows the common + dotted-decimal form of IPv4 address literal, many implementations + that process URIs make use of platform-dependent system routines, + such as gethostbyname() and inet_aton(), to translate the string + literal to an actual IP address. Unfortunately, such system routines + often allow and process a much larger set of formats than those + described in Section 3.2.2. + + For example, many implementations allow dotted forms of three + numbers, wherein the last part is interpreted as a 16-bit quantity + and placed in the right-most two bytes of the network address (e.g., + a Class B network). Likewise, a dotted form of two numbers means + that the last part is interpreted as a 24-bit quantity and placed in + the right-most three bytes of the network address (Class A), and a + single number (without dots) is interpreted as a 32-bit quantity and + stored directly in the network address. Adding further to the + confusion, some implementations allow each dotted part to be + interpreted as decimal, octal, or hexadecimal, as specified in the C + language (i.e., a leading 0x or 0X implies hexadecimal; a leading 0 + implies octal; otherwise, the number is interpreted as decimal). + + These additional IP address formats are not allowed in the URI syntax + due to differences between platform implementations. However, they + can become a security concern if an application attempts to filter + access to resources based on the IP address in string literal format. + If this filtering is performed, literals should be converted to + numeric form and filtered based on the numeric value, and not on a + prefix or suffix of the string form. + +7.5. Sensitive Information + + URI producers should not provide a URI that contains a username or + password that is intended to be secret. URIs are frequently + displayed by browsers, stored in clear text bookmarks, and logged by + user agent history and intermediary applications (proxies). A + password appearing within the userinfo component is deprecated and + should be considered an error (or simply ignored) except in those + rare cases where the 'password' parameter is intended to be public. + +7.6. Semantic Attacks + + Because the userinfo subcomponent is rarely used and appears before + the host in the authority component, it can be used to construct a + URI intended to mislead a human user by appearing to identify one + (trusted) naming authority while actually identifying a different + authority hidden behind the noise. For example + + + +Berners-Lee, et al. Standards Track [Page 45] + +RFC 3986 URI Generic Syntax January 2005 + + + ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm + + might lead a human user to assume that the host is 'cnn.example.com', + whereas it is actually '10.0.0.1'. Note that a misleading userinfo + subcomponent could be much longer than the example above. + + A misleading URI, such as that above, is an attack on the user's + preconceived notions about the meaning of a URI rather than an attack + on the software itself. User agents may be able to reduce the impact + of such attacks by distinguishing the various components of the URI + when they are rendered, such as by using a different color or tone to + render userinfo if any is present, though there is no panacea. More + information on URI-based semantic attacks can be found in [Siedzik]. + +8. IANA Considerations + + URI scheme names, as defined by in Section 3.1, form a + registered namespace that is managed by IANA according to the + procedures defined in [BCP35]. No IANA actions are required by this + document. + +9. Acknowledgements + + This specification is derived from RFC 2396 [RFC2396], RFC 1808 + [RFC1808], and RFC 1738 [RFC1738]; the acknowledgements in those + documents still apply. It also incorporates the update (with + corrections) for IPv6 literals in the host syntax, as defined by + Robert M. Hinden, Brian E. Carpenter, and Larry Masinter in + [RFC2732]. In addition, contributions by Gisle Aas, Reese Anschultz, + Daniel Barclay, Tim Bray, Mike Brown, Rob Cameron, Jeremy Carroll, + Dan Connolly, Adam M. Costello, John Cowan, Jason Diamond, Martin + Duerst, Stefan Eissing, Clive D.W. Feather, Al Gilman, Tony Hammond, + Elliotte Harold, Pat Hayes, Henry Holtzman, Ian B. Jacobs, Michael + Kay, John C. Klensin, Graham Klyne, Dan Kohn, Bruce Lilly, Andrew + Main, Dave McAlpin, Ira McDonald, Michael Mealling, Ray Merkert, + Stephen Pollei, Julian Reschke, Tomas Rokicki, Miles Sabin, Kai + Schaetzl, Mark Thomson, Ronald Tschalaer, Norm Walsh, Marc Warne, + Stuart Williams, and Henry Zongaro are gratefully acknowledged. + +10. References + +10.1. Normative References + + [ASCII] American National Standards Institute, "Coded Character + Set -- 7-bit American Standard Code for Information + Interchange", ANSI X3.4, 1986. + + + + + +Berners-Lee, et al. Standards Track [Page 46] + +RFC 3986 URI Generic Syntax January 2005 + + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [STD63] Yergeau, F., "UTF-8, a transformation format of + ISO 10646", STD 63, RFC 3629, November 2003. + + [UCS] International Organization for Standardization, + "Information Technology - Universal Multiple-Octet Coded + Character Set (UCS)", ISO/IEC 10646:2003, December 2003. + +10.2. Informative References + + [BCP19] Freed, N. and J. Postel, "IANA Charset Registration + Procedures", BCP 19, RFC 2978, October 2000. + + [BCP35] Petke, R. and I. King, "Registration Procedures for URL + Scheme Names", BCP 35, RFC 2717, November 1999. + + [RFC0952] Harrenstien, K., Stahl, M., and E. Feinler, "DoD Internet + host table specification", RFC 952, October 1985. + + [RFC1034] Mockapetris, P., "Domain names - concepts and facilities", + STD 13, RFC 1034, November 1987. + + [RFC1123] Braden, R., "Requirements for Internet Hosts - Application + and Support", STD 3, RFC 1123, October 1989. + + [RFC1535] Gavron, E., "A Security Problem and Proposed Correction + With Widely Deployed DNS Software", RFC 1535, + October 1993. + + [RFC1630] Berners-Lee, T., "Universal Resource Identifiers in WWW: A + Unifying Syntax for the Expression of Names and Addresses + of Objects on the Network as used in the World-Wide Web", + RFC 1630, June 1994. + + [RFC1736] Kunze, J., "Functional Recommendations for Internet + Resource Locators", RFC 1736, February 1995. + + [RFC1737] Sollins, K. and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, December 1994. + + [RFC1738] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform + Resource Locators (URL)", RFC 1738, December 1994. + + [RFC1808] Fielding, R., "Relative Uniform Resource Locators", + RFC 1808, June 1995. + + + + +Berners-Lee, et al. Standards Track [Page 47] + +RFC 3986 URI Generic Syntax January 2005 + + + [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, + November 1996. + + [RFC2141] Moats, R., "URN Syntax", RFC 2141, May 1997. + + [RFC2396] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifiers (URI): Generic Syntax", RFC 2396, + August 1998. + + [RFC2518] Goland, Y., Whitehead, E., Faizi, A., Carter, S., and D. + Jensen, "HTTP Extensions for Distributed Authoring -- + WEBDAV", RFC 2518, February 1999. + + [RFC2557] Palme, J., Hopmann, A., and N. Shelness, "MIME + Encapsulation of Aggregate Documents, such as HTML + (MHTML)", RFC 2557, March 1999. + + [RFC2718] Masinter, L., Alvestrand, H., Zigmond, D., and R. Petke, + "Guidelines for new URL Schemes", RFC 2718, November 1999. + + [RFC2732] Hinden, R., Carpenter, B., and L. Masinter, "Format for + Literal IPv6 Addresses in URL's", RFC 2732, December 1999. + + [RFC3305] Mealling, M. and R. Denenberg, "Report from the Joint + W3C/IETF URI Planning Interest Group: Uniform Resource + Identifiers (URIs), URLs, and Uniform Resource Names + (URNs): Clarifications and Recommendations", RFC 3305, + August 2002. + + [RFC3490] Faltstrom, P., Hoffman, P., and A. Costello, + "Internationalizing Domain Names in Applications (IDNA)", + RFC 3490, March 2003. + + [RFC3513] Hinden, R. and S. Deering, "Internet Protocol Version 6 + (IPv6) Addressing Architecture", RFC 3513, April 2003. + + [Siedzik] Siedzik, R., "Semantic Attacks: What's in a URL?", + April 2001, . + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 48] + +RFC 3986 URI Generic Syntax January 2005 + + +Appendix A. Collected ABNF for URI + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + URI-reference = URI / relative-ref + + absolute-URI = scheme ":" hier-part [ "?" query ] + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + host = IP-literal / IPv4address / reg-name + port = *DIGIT + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + h16 = 1*4HEXDIG + ls32 = ( h16 ":" h16 ) / IPv4address + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + + + + + + +Berners-Lee, et al. Standards Track [Page 49] + +RFC 3986 URI Generic Syntax January 2005 + + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + query = *( pchar / "/" / "?" ) + + fragment = *( pchar / "/" / "?" ) + + pct-encoded = "%" HEXDIG HEXDIG + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + +Appendix B. Parsing a URI Reference with a Regular Expression + + As the "first-match-wins" algorithm is identical to the "greedy" + disambiguation method used by POSIX regular expressions, it is + natural and commonplace to use a regular expression for parsing the + potential five components of a URI reference. + + The following line is the regular expression for breaking-down a + well-formed URI reference into its components. + + + +Berners-Lee, et al. Standards Track [Page 50] + +RFC 3986 URI Generic Syntax January 2005 + + + ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + 12 3 4 5 6 7 8 9 + + The numbers in the second line above are only to assist readability; + they indicate the reference points for each subexpression (i.e., each + paired parenthesis). We refer to the value matched for subexpression + as $. For example, matching the above expression to + + http://www.ics.uci.edu/pub/ietf/uri/#Related + + results in the following subexpression matches: + + $1 = http: + $2 = http + $3 = //www.ics.uci.edu + $4 = www.ics.uci.edu + $5 = /pub/ietf/uri/ + $6 = + $7 = + $8 = #Related + $9 = Related + + where indicates that the component is not present, as is + the case for the query component in the above example. Therefore, we + can determine the value of the five components as + + scheme = $2 + authority = $4 + path = $5 + query = $7 + fragment = $9 + + Going in the opposite direction, we can recreate a URI reference from + its components by using the algorithm of Section 5.3. + +Appendix C. Delimiting a URI in Context + + URIs are often transmitted through formats that do not provide a + clear context for their interpretation. For example, there are many + occasions when a URI is included in plain text; examples include text + sent in email, USENET news, and on printed paper. In such cases, it + is important to be able to delimit the URI from the rest of the text, + and in particular from punctuation marks that might be mistaken for + part of the URI. + + In practice, URIs are delimited in a variety of ways, but usually + within double-quotes "http://example.com/", angle brackets + , or just by using whitespace: + + + +Berners-Lee, et al. Standards Track [Page 51] + +RFC 3986 URI Generic Syntax January 2005 + + + http://example.com/ + + These wrappers do not form part of the URI. + + In some cases, extra whitespace (spaces, line-breaks, tabs, etc.) may + have to be added to break a long URI across lines. The whitespace + should be ignored when the URI is extracted. + + No whitespace should be introduced after a hyphen ("-") character. + Because some typesetters and printers may (erroneously) introduce a + hyphen at the end of line when breaking it, the interpreter of a URI + containing a line break immediately after a hyphen should ignore all + whitespace around the line break and should be aware that the hyphen + may or may not actually be part of the URI. + + Using <> angle brackets around each URI is especially recommended as + a delimiting style for a reference that contains embedded whitespace. + + The prefix "URL:" (with or without a trailing space) was formerly + recommended as a way to help distinguish a URI from other bracketed + designators, though it is not commonly used in practice and is no + longer recommended. + + For robustness, software that accepts user-typed URI should attempt + to recognize and strip both delimiters and embedded whitespace. + + For example, the text + + Yes, Jim, I found it under "http://www.w3.org/Addressing/", + but you can probably pick it up from . Note the warning in . + + contains the URI references + + http://www.w3.org/Addressing/ + ftp://foo.example.com/rfc/ + http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 52] + +RFC 3986 URI Generic Syntax January 2005 + + +Appendix D. Changes from RFC 2396 + +D.1. Additions + + An ABNF rule for URI has been introduced to correspond to one common + usage of the term: an absolute URI with optional fragment. + + IPv6 (and later) literals have been added to the list of possible + identifiers for the host portion of an authority component, as + described by [RFC2732], with the addition of "[" and "]" to the + reserved set and a version flag to anticipate future versions of IP + literals. Square brackets are now specified as reserved within the + authority component and are not allowed outside their use as + delimiters for an IP literal within host. In order to make this + change without changing the technical definition of the path, query, + and fragment components, those rules were redefined to directly + specify the characters allowed. + + As [RFC2732] defers to [RFC3513] for definition of an IPv6 literal + address, which, unfortunately, lacks an ABNF description of + IPv6address, we created a new ABNF rule for IPv6address that matches + the text representations defined by Section 2.2 of [RFC3513]. + Likewise, the definition of IPv4address has been improved in order to + limit each decimal octet to the range 0-255. + + Section 6, on URI normalization and comparison, has been completely + rewritten and extended by using input from Tim Bray and discussion + within the W3C Technical Architecture Group. + +D.2. Modifications + + The ad-hoc BNF syntax of RFC 2396 has been replaced with the ABNF of + [RFC2234]. This change required all rule names that formerly + included underscore characters to be renamed with a dash instead. In + addition, a number of syntax rules have been eliminated or simplified + to make the overall grammar more comprehensible. Specifications that + refer to the obsolete grammar rules may be understood by replacing + those rules according to the following table: + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 53] + +RFC 3986 URI Generic Syntax January 2005 + + + +----------------+--------------------------------------------------+ + | obsolete rule | translation | + +----------------+--------------------------------------------------+ + | absoluteURI | absolute-URI | + | relativeURI | relative-part [ "?" query ] | + | hier_part | ( "//" authority path-abempty / | + | | path-absolute ) [ "?" query ] | + | | | + | opaque_part | path-rootless [ "?" query ] | + | net_path | "//" authority path-abempty | + | abs_path | path-absolute | + | rel_path | path-rootless | + | rel_segment | segment-nz-nc | + | reg_name | reg-name | + | server | authority | + | hostport | host [ ":" port ] | + | hostname | reg-name | + | path_segments | path-abempty | + | param | * | + | | | + | uric | unreserved / pct-encoded / ";" / "?" / ":" | + | | / "@" / "&" / "=" / "+" / "$" / "," / "/" | + | | | + | uric_no_slash | unreserved / pct-encoded / ";" / "?" / ":" | + | | / "@" / "&" / "=" / "+" / "$" / "," | + | | | + | mark | "-" / "_" / "." / "!" / "~" / "*" / "'" | + | | / "(" / ")" | + | | | + | escaped | pct-encoded | + | hex | HEXDIG | + | alphanum | ALPHA / DIGIT | + +----------------+--------------------------------------------------+ + + Use of the above obsolete rules for the definition of scheme-specific + syntax is deprecated. + + Section 2, on characters, has been rewritten to explain what + characters are reserved, when they are reserved, and why they are + reserved, even when they are not used as delimiters by the generic + syntax. The mark characters that are typically unsafe to decode, + including the exclamation mark ("!"), asterisk ("*"), single-quote + ("'"), and open and close parentheses ("(" and ")"), have been moved + to the reserved set in order to clarify the distinction between + reserved and unreserved and, hopefully, to answer the most common + question of scheme designers. Likewise, the section on + percent-encoded characters has been rewritten, and URI normalizers + are now given license to decode any percent-encoded octets + + + +Berners-Lee, et al. Standards Track [Page 54] + +RFC 3986 URI Generic Syntax January 2005 + + + corresponding to unreserved characters. In general, the terms + "escaped" and "unescaped" have been replaced with "percent-encoded" + and "decoded", respectively, to reduce confusion with other forms of + escape mechanisms. + + The ABNF for URI and URI-reference has been redesigned to make them + more friendly to LALR parsers and to reduce complexity. As a result, + the layout form of syntax description has been removed, along with + the uric, uric_no_slash, opaque_part, net_path, abs_path, rel_path, + path_segments, rel_segment, and mark rules. All references to + "opaque" URIs have been replaced with a better description of how the + path component may be opaque to hierarchy. The relativeURI rule has + been replaced with relative-ref to avoid unnecessary confusion over + whether they are a subset of URI. The ambiguity regarding the + parsing of URI-reference as a URI or a relative-ref with a colon in + the first segment has been eliminated through the use of five + separate path matching rules. + + The fragment identifier has been moved back into the section on + generic syntax components and within the URI and relative-ref rules, + though it remains excluded from absolute-URI. The number sign ("#") + character has been moved back to the reserved set as a result of + reintegrating the fragment syntax. + + The ABNF has been corrected to allow the path component to be empty. + This also allows an absolute-URI to consist of nothing after the + "scheme:", as is present in practice with the "dav:" namespace + [RFC2518] and with the "about:" scheme used internally by many WWW + browser implementations. The ambiguity regarding the boundary + between authority and path has been eliminated through the use of + five separate path matching rules. + + Registry-based naming authorities that use the generic syntax are now + defined within the host rule. This change allows current + implementations, where whatever name provided is simply fed to the + local name resolution mechanism, to be consistent with the + specification. It also removes the need to re-specify DNS name + formats here. Furthermore, it allows the host component to contain + percent-encoded octets, which is necessary to enable + internationalized domain names to be provided in URIs, processed in + their native character encodings at the application layers above URI + processing, and passed to an IDNA library as a registered name in the + UTF-8 character encoding. The server, hostport, hostname, + domainlabel, toplabel, and alphanum rules have been removed. + + The resolving relative references algorithm of [RFC2396] has been + rewritten with pseudocode for this revision to improve clarity and + fix the following issues: + + + +Berners-Lee, et al. Standards Track [Page 55] + +RFC 3986 URI Generic Syntax January 2005 + + + o [RFC2396] section 5.2, step 6a, failed to account for a base URI + with no path. + + o Restored the behavior of [RFC1808] where, if the reference + contains an empty path and a defined query component, the target + URI inherits the base URI's path component. + + o The determination of whether a URI reference is a same-document + reference has been decoupled from the URI parser, simplifying the + URI processing interface within applications in a way consistent + with the internal architecture of deployed URI processing + implementations. The determination is now based on comparison to + the base URI after transforming a reference to absolute form, + rather than on the format of the reference itself. This change + may result in more references being considered "same-document" + under this specification than there would be under the rules given + in RFC 2396, especially when normalization is used to reduce + aliases. However, it does not change the status of existing + same-document references. + + o Separated the path merge routine into two routines: merge, for + describing combination of the base URI path with a relative-path + reference, and remove_dot_segments, for describing how to remove + the special "." and ".." segments from a composed path. The + remove_dot_segments algorithm is now applied to all URI reference + paths in order to match common implementations and to improve the + normalization of URIs in practice. This change only impacts the + parsing of abnormal references and same-scheme references wherein + the base URI has a non-hierarchical path. + +Index + + A + ABNF 11 + absolute 27 + absolute-path 26 + absolute-URI 27 + access 9 + authority 17, 18 + + B + base URI 28 + + C + character encoding 4 + character 4 + characters 8, 11 + coded character set 4 + + + +Berners-Lee, et al. Standards Track [Page 56] + +RFC 3986 URI Generic Syntax January 2005 + + + D + dec-octet 20 + dereference 9 + dot-segments 23 + + F + fragment 16, 24 + + G + gen-delims 13 + generic syntax 6 + + H + h16 20 + hier-part 16 + hierarchical 10 + host 18 + + I + identifier 5 + IP-literal 19 + IPv4 20 + IPv4address 19, 20 + IPv6 19 + IPv6address 19, 20 + IPvFuture 19 + + L + locator 7 + ls32 20 + + M + merge 32 + + N + name 7 + network-path 26 + + P + path 16, 22, 26 + path-abempty 22 + path-absolute 22 + path-empty 22 + path-noscheme 22 + path-rootless 22 + path-abempty 16, 22, 26 + path-absolute 16, 22, 26 + path-empty 16, 22, 26 + + + +Berners-Lee, et al. Standards Track [Page 57] + +RFC 3986 URI Generic Syntax January 2005 + + + path-rootless 16, 22 + pchar 23 + pct-encoded 12 + percent-encoding 12 + port 22 + + Q + query 16, 23 + + R + reg-name 21 + registered name 20 + relative 10, 28 + relative-path 26 + relative-ref 26 + remove_dot_segments 33 + representation 9 + reserved 12 + resolution 9, 28 + resource 5 + retrieval 9 + + S + same-document 27 + sameness 9 + scheme 16, 17 + segment 22, 23 + segment-nz 23 + segment-nz-nc 23 + sub-delims 13 + suffix 27 + + T + transcription 8 + + U + uniform 4 + unreserved 13 + URI grammar + absolute-URI 27 + ALPHA 11 + authority 18 + CR 11 + dec-octet 20 + DIGIT 11 + DQUOTE 11 + fragment 24 + gen-delims 13 + + + +Berners-Lee, et al. Standards Track [Page 58] + +RFC 3986 URI Generic Syntax January 2005 + + + h16 20 + HEXDIG 11 + hier-part 16 + host 19 + IP-literal 19 + IPv4address 20 + IPv6address 20 + IPvFuture 19 + LF 11 + ls32 20 + OCTET 11 + path 22 + path-abempty 22 + path-absolute 22 + path-empty 22 + path-noscheme 22 + path-rootless 22 + pchar 23 + pct-encoded 12 + port 22 + query 24 + reg-name 21 + relative-ref 26 + reserved 13 + scheme 17 + segment 23 + segment-nz 23 + segment-nz-nc 23 + SP 11 + sub-delims 13 + unreserved 13 + URI 16 + URI-reference 25 + userinfo 18 + URI 16 + URI-reference 25 + URL 7 + URN 7 + userinfo 18 + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 59] + +RFC 3986 URI Generic Syntax January 2005 + + +Authors' Addresses + + Tim Berners-Lee + World Wide Web Consortium + Massachusetts Institute of Technology + 77 Massachusetts Avenue + Cambridge, MA 02139 + USA + + Phone: +1-617-253-5702 + Fax: +1-617-258-5999 + EMail: timbl@w3.org + URI: http://www.w3.org/People/Berners-Lee/ + + + Roy T. Fielding + Day Software + 5251 California Ave., Suite 110 + Irvine, CA 92617 + USA + + Phone: +1-949-679-2960 + Fax: +1-949-679-2972 + EMail: fielding@gbiv.com + URI: http://roy.gbiv.com/ + + + Larry Masinter + Adobe Systems Incorporated + 345 Park Ave + San Jose, CA 95110 + USA + + Phone: +1-408-536-3024 + EMail: LMM@acm.org + URI: http://larry.masinter.net/ + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 60] + +RFC 3986 URI Generic Syntax January 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the IETF's procedures with respect to rights in IETF Documents can + be found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + +Berners-Lee, et al. Standards Track [Page 61] + diff --git a/libsoup-2.4.pc.in b/libsoup-2.4.pc.in new file mode 100644 index 0000000..1394939 --- /dev/null +++ b/libsoup-2.4.pc.in @@ -0,0 +1,12 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libsoup +Description: a glib-based HTTP library +Version: @VERSION@ +Requires: glib-2.0 gobject-2.0 gio-2.0 +Requires.private: libxml-2.0 +Libs: -L${libdir} -lsoup-2.4 +Cflags: -I${includedir}/libsoup-2.4 diff --git a/libsoup-gnome-2.4.pc.in b/libsoup-gnome-2.4.pc.in new file mode 100644 index 0000000..7238df2 --- /dev/null +++ b/libsoup-gnome-2.4.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libsoup +Description: a glib-based HTTP library +Version: @VERSION@ +Requires: libsoup-2.4 +Libs: -L${libdir} -lsoup-gnome-2.4 +Cflags: -I${includedir}/libsoup-gnome-2.4 diff --git a/libsoup-zip.in b/libsoup-zip.in new file mode 100755 index 0000000..fc44fe5 --- /dev/null +++ b/libsoup-zip.in @@ -0,0 +1,24 @@ +#!/bin/sh + +# Build zipfiles for libsoup on Win32. Separate runtime and developer +# ones. After running make install, run this. + +ZIP=/tmp/libsoup-@VERSION@.zip +DEVZIP=/tmp/libsoup-dev-@VERSION@.zip + +cd @prefix@ +rm $ZIP + +current_minus_age=`expr @SOUP_CURRENT@ - @SOUP_AGE@` + +zip $ZIP -@ < + + libsoup + HTTP client/server library for GNOME + libsoup is an HTTP client/server library for GNOME. It uses GObjects and the glib main loop, to integrate well with GNOME applications. + + + + + + + + + Dan Winship + + danw + + + diff --git a/libsoup/Makefile.am b/libsoup/Makefile.am new file mode 100644 index 0000000..59d06ba --- /dev/null +++ b/libsoup/Makefile.am @@ -0,0 +1,298 @@ +## Process this file with automake to produce Makefile.in + +if OS_WIN32 +LIBWS2_32 = -lws2_32 +endif + +INCLUDES = \ + -DG_LOG_DOMAIN=\"libsoup\" \ + -I$(top_srcdir) \ + $(SOUP_DEBUG_FLAGS) \ + $(SOUP_MAINTAINER_FLAGS) \ + $(GLIB_CFLAGS) \ + $(XML_CFLAGS) \ + $(SQLITE_CFLAGS) \ + $(GNOME_KEYRING_CFLAGS) + +MARSHAL_GENERATED = soup-marshal.c soup-marshal.h +MKENUMS_GENERATED = soup-enum-types.c soup-enum-types.h + +soup-marshal.h: soup-marshal.list + $(AM_V_GEN) ( $(GLIB_GENMARSHAL) --prefix=soup_marshal $(srcdir)/soup-marshal.list --header > soup-marshal.tmp \ + && mv soup-marshal.tmp soup-marshal.h ) \ + || ( rm -f soup-marshal.tmp && exit 1 ) + +soup-marshal.c: soup-marshal.h + $(AM_V_GEN) ( (echo '#include "soup-marshal.h"'; $(GLIB_GENMARSHAL) --prefix=soup_marshal $(srcdir)/soup-marshal.list --body) > soup-marshal.tmp \ + && mv soup-marshal.tmp soup-marshal.c ) \ + || ( rm -f soup-marshal.tmp && exit 1 ) + +soup-enum-types.h: $(soup_headers) + $(AM_V_GEN) ( cd $(srcdir) && $(GLIB_MKENUMS) --template soup-enum-types.h.tmpl \ + $(soup_headers) ) > soup-enum-types.h.tmp \ + && mv soup-enum-types.h.tmp soup-enum-types.h \ + || rm -f soup-enum-type.h.tmp + +soup-enum-types.c: $(libsoupinclude_HEADERS) + $(AM_V_GEN) ( cd $(srcdir) && $(GLIB_MKENUMS) --template soup-enum-types.c.tmpl \ + $(soup_headers) ) > soup-enum-types.c.tmp \ + && mv soup-enum-types.c.tmp soup-enum-types.c \ + || rm -f soup-enum-type.c.tmp + +BUILT_SOURCES = $(MARSHAL_GENERATED) $(MKENUMS_GENERATED) + +CLEANFILES = $(MARSHAL_GENERATED) $(MKENUMS_GENERATED) + +libsoupincludedir = $(includedir)/libsoup-2.4/libsoup + +soup_headers = \ + soup.h \ + soup-address.h \ + soup-auth.h \ + soup-auth-domain.h \ + soup-auth-domain-basic.h \ + soup-auth-domain-digest.h \ + soup-cache.h \ + soup-content-decoder.h \ + soup-content-sniffer.h \ + soup-cookie.h \ + soup-cookie-jar.h \ + soup-cookie-jar-text.h \ + soup-date.h \ + soup-form.h \ + soup-headers.h \ + soup-logger.h \ + soup-message.h \ + soup-message-body.h \ + soup-message-headers.h \ + soup-method.h \ + soup-misc.h \ + soup-multipart.h \ + soup-password-manager.h \ + soup-portability.h \ + soup-proxy-resolver.h \ + soup-proxy-resolver-default.h \ + soup-proxy-uri-resolver.h \ + soup-request.h \ + soup-request-data.h \ + soup-request-file.h \ + soup-request-http.h \ + soup-requester.h \ + soup-server.h \ + soup-session.h \ + soup-session-async.h \ + soup-session-feature.h \ + soup-session-sync.h \ + soup-socket.h \ + soup-status.h \ + soup-types.h \ + soup-uri.h \ + soup-value-utils.h \ + soup-xmlrpc.h + +if SQLLITE_SUPPORT +soup_headers += \ + soup-cookie-jar-sqlite.h +endif + +libsoupinclude_HEADERS = \ + $(soup_headers) \ + soup-enum-types.h + +lib_LTLIBRARIES = libsoup-2.4.la + +libsoup_2_4_la_LDFLAGS = \ + -version-info $(SOUP_CURRENT):$(SOUP_REVISION):$(SOUP_AGE) -no-undefined + +libsoup_2_4_la_LIBADD = \ + $(GLIB_LIBS) \ + $(XML_LIBS) \ + -lz \ + $(LIBWS2_32) + +if SQLLITE_SUPPORT +libsoup_2_4_la_LIBADD += \ + $(SQLITE_LIBS) +endif + +libsoup_2_4_la_SOURCES = \ + $(BUILT_SOURCES) \ + soup-address.c \ + soup-auth.c \ + soup-auth-basic.h \ + soup-auth-basic.c \ + soup-auth-digest.h \ + soup-auth-digest.c \ + soup-auth-ntlm.h \ + soup-auth-ntlm.c \ + soup-auth-domain.c \ + soup-auth-domain-basic.c \ + soup-auth-domain-digest.c \ + soup-auth-manager.h \ + soup-auth-manager.c \ + soup-auth-manager-ntlm.h \ + soup-auth-manager-ntlm.c \ + soup-cache.c \ + soup-cache-private.h \ + soup-connection.h \ + soup-connection.c \ + soup-content-decoder.c \ + soup-content-sniffer.c \ + soup-cookie.c \ + soup-cookie-jar.c \ + soup-cookie-jar-text.c \ + soup-date.c \ + soup-directory-input-stream.h \ + soup-directory-input-stream.c \ + soup-form.c \ + soup-headers.c \ + soup-http-input-stream.h \ + soup-http-input-stream.c \ + soup-logger.c \ + soup-message.c \ + soup-message-body.c \ + soup-message-client-io.c \ + soup-message-headers.c \ + soup-message-io.c \ + soup-message-private.h \ + soup-message-queue.h \ + soup-message-queue.c \ + soup-message-server-io.c \ + soup-method.c \ + soup-misc.c \ + soup-misc-private.h \ + soup-multipart.c \ + soup-password-manager.c \ + soup-path-map.h \ + soup-path-map.c \ + soup-proxy-resolver.c \ + soup-proxy-resolver-default.c \ + soup-proxy-resolver-static.h \ + soup-proxy-resolver-static.c \ + soup-proxy-uri-resolver.c \ + soup-request.c \ + soup-request-data.c \ + soup-request-file.c \ + soup-request-http.c \ + soup-requester.c \ + soup-server.c \ + soup-session.c \ + soup-session-async.c \ + soup-session-feature.c \ + soup-session-private.h \ + soup-session-sync.c \ + soup-socket.c \ + soup-ssl.h \ + soup-ssl.c \ + soup-status.c \ + soup-uri.c \ + soup-value-utils.c \ + soup-xmlrpc.c + +if SQLLITE_SUPPORT +libsoup_2_4_la_SOURCES += \ + soup-cookie-jar-sqlite.c +endif + +if BUILD_LIBSOUP_GNOME + +if OS_WIN32 +soup_password_manager_gnome_files = +else +soup_password_manager_gnome_files = \ + soup-password-manager-gnome.h \ + soup-password-manager-gnome.c +endif + +libsoupgnomeincludedir = $(includedir)/libsoup-gnome-2.4/libsoup + +libsoupgnomeinclude_HEADERS = \ + soup-cookie-jar-sqlite.h\ + soup-gnome.h \ + soup-gnome-features.h + +lib_LTLIBRARIES += libsoup-gnome-2.4.la + +libsoup_gnome_2_4_la_LDFLAGS = $(libsoup_2_4_la_LDFLAGS) + +libsoup_gnome_2_4_la_LIBADD = \ + libsoup-2.4.la \ + $(GLIB_LIBS) \ + $(SQLITE_LIBS) \ + $(GNOME_KEYRING_LIBS) + +libsoup_gnome_2_4_la_SOURCES = \ + soup-cookie-jar-sqlite.c \ + soup-gnome-features.c \ + soup-proxy-resolver-gnome.h \ + soup-proxy-resolver-gnome.c \ + $(soup_password_manager_gnome_files) + +endif + +# +# Introspection support +# +include $(INTROSPECTION_MAKEFILE) +INTROSPECTION_GIRS = +INTROSPECTION_SCANNER_ARGS = --add-include-path=. +INTROSPECTION_COMPILER_ARGS = --includedir=. + +if HAVE_INTROSPECTION + +# Core library +gi_soup_files = \ + $(filter-out soup.h soup-enum-types.% soup-marshal.% soup-proxy-resolver.h,\ + $(soup_headers) $(filter-out %.h, $(libsoup_2_4_la_SOURCES))) +gi_built_soup_files = soup-enum-types.h + +Soup-2.4.gir: libsoup-2.4.la +Soup_2_4_gir_INCLUDES = Gio-2.0 +Soup_2_4_gir_CFLAGS = $(INCLUDES) +Soup_2_4_gir_LIBS = libsoup-2.4.la +Soup_2_4_gir_EXPORT_PACKAGES = libsoup-2.4 +Soup_2_4_gir_SCANNERFLAGS = --c-include "libsoup/soup.h" +Soup_2_4_gir_FILES = \ + $(addprefix $(srcdir)/, $(gi_soup_files)) \ + $(foreach f,$(gi_built_soup_files), \ + $(if $(shell test -f $(addprefix $(srcdir)/,$(f)) && echo yes), \ + $(addprefix $(srcdir)/,$(f)), \ + $(f))) + +INTROSPECTION_GIRS += Soup-2.4.gir + +if BUILD_LIBSOUP_GNOME + +# GNOME extensions +gi_soup_gnome_files = $(filter-out soup-gnome.h,\ + $(libsoupgnomeinclude_HEADERS) \ + $(filter-out %.h, $(libsoup_gnome_2_4_la_SOURCES))) +SoupGNOME-2.4.gir: libsoup-gnome-2.4.la Soup-2.4.gir +SoupGNOME_2_4_gir_SCANNERFLAGS = \ + --identifier-prefix=Soup \ + --symbol-prefix=soup \ + --c-include "libsoup/soup-gnome.h" \ + --include-uninstalled=$(builddir)/Soup-2.4.gir +SoupGNOME_2_4_gir_CFLAGS = $(INCLUDES) +SoupGNOME_2_4_gir_LIBS = libsoup-gnome-2.4.la libsoup-2.4.la +SoupGNOME_2_4_gir_FILES = $(addprefix $(srcdir)/,$(gi_soup_gnome_files)) +SoupGNOME_2_4_gir_EXPORT_PACKAGES = libsoup-gnome-2.4 + +INTROSPECTION_GIRS += SoupGNOME-2.4.gir + +endif + +girdir = $(datadir)/gir-1.0 +gir_DATA = $(INTROSPECTION_GIRS) + +typelibdir = $(libdir)/girepository-1.0 +typelib_DATA = $(INTROSPECTION_GIRS:.gir=.typelib) + +CLEANFILES += $(gir_DATA) $(typelib_DATA) + +endif + +EXTRA_DIST= \ + soup-marshal.list \ + soup-enum-types.h.tmpl \ + soup-enum-types.c.tmpl diff --git a/libsoup/TIZEN.h b/libsoup/TIZEN.h new file mode 100644 index 0000000..eb85186 --- /dev/null +++ b/libsoup/TIZEN.h @@ -0,0 +1,16 @@ +/* + * TIZEN.h + * + * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. + */ + +#ifndef TIZEN_H +#define TIZEN_H + +#define ENABLE(TIZEN_FEATURE) (defined ENABLE_##TIZEN_FEATURE && ENABLE_##TIZEN_FEATURE) + +#define ENABLE_TIZEN_FIX_PACK_ENTRY 1 +#define ENABLE_TIZEN_FIX_CONTENT_SNIFFER_PATTERN 1 +#define ENABLE_TIZEN_FIX_PAUSE_MESSAGE 1 + +#endif //#ifndef TIZEN_H diff --git a/libsoup/soup-address.c b/libsoup/soup-address.c new file mode 100644 index 0000000..0e5e8cd --- /dev/null +++ b/libsoup/soup-address.c @@ -0,0 +1,1199 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-address.c: Internet address handing + * + * Copyright (C) 2010 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include + +#include "soup-address.h" +#include "soup-enum-types.h" +#include "soup-marshal.h" +#include "soup-misc.h" + +/** + * SECTION:soup-address + * @short_description: DNS support + * + * #SoupAddress represents the address of a TCP connection endpoint: + * both the IP address and the port. (It is somewhat like an + * object-oriented version of struct sockaddr.) + **/ + +enum { + PROP_0, + + PROP_NAME, + PROP_FAMILY, + PROP_PORT, + PROP_PHYSICAL, + PROP_SOCKADDR, + + LAST_PROP +}; + +typedef struct { + struct sockaddr_storage *sockaddr; + int n_addrs, offset; + + char *name, *physical; + guint port; + + GMutex *lock; + GSList *async_lookups; +} SoupAddressPrivate; +#define SOUP_ADDRESS_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_ADDRESS, SoupAddressPrivate)) + +/* sockaddr generic macros */ +#define SOUP_SIN(priv) ((struct sockaddr_in *)priv->sockaddr) +#define SOUP_SIN6(priv) ((struct sockaddr_in6 *)priv->sockaddr) + +/* sockaddr family macros */ +#define SOUP_ADDRESS_GET_FAMILY(priv) (priv->sockaddr->ss_family) +#define SOUP_ADDRESS_SET_FAMILY(priv, family) \ + (priv->sockaddr->ss_family = family) +#define SOUP_ADDRESS_FAMILY_IS_VALID(family) \ + (family == AF_INET || family == AF_INET6) +#define SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE(family) \ + (family == AF_INET ? sizeof (struct sockaddr_in) : \ + sizeof (struct sockaddr_in6)) +#define SOUP_ADDRESS_FAMILY_DATA_SIZE(family) \ + (family == AF_INET ? sizeof (struct in_addr) : \ + sizeof (struct in6_addr)) + +/* sockaddr port macros */ +#define SOUP_ADDRESS_PORT_IS_VALID(port) (port >= 0 && port <= 65535) +#define SOUP_ADDRESS_GET_PORT(priv) \ + (priv->sockaddr->ss_family == AF_INET ? \ + SOUP_SIN(priv)->sin_port : \ + SOUP_SIN6(priv)->sin6_port) +#define SOUP_ADDRESS_SET_PORT(priv, port) \ + G_STMT_START { \ + if (priv->sockaddr->ss_family == AF_INET) \ + SOUP_SIN(priv)->sin_port = port; \ + else \ + SOUP_SIN6(priv)->sin6_port = port; \ + } G_STMT_END + +/* sockaddr data macros */ +#define SOUP_ADDRESS_GET_DATA(priv) \ + (priv->sockaddr->ss_family == AF_INET ? \ + (gpointer)&SOUP_SIN(priv)->sin_addr : \ + (gpointer)&SOUP_SIN6(priv)->sin6_addr) +#define SOUP_ADDRESS_SET_DATA(priv, data, length) \ + memcpy (SOUP_ADDRESS_GET_DATA (priv), data, length) + + +static GObject *constructor (GType type, + guint n_construct_properties, + GObjectConstructParam *construct_properties); +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void soup_address_connectable_iface_init (GSocketConnectableIface *connectable_iface); +static GSocketAddressEnumerator *soup_address_connectable_enumerate (GSocketConnectable *connectable); + +G_DEFINE_TYPE_WITH_CODE (SoupAddress, soup_address, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (G_TYPE_SOCKET_CONNECTABLE, + soup_address_connectable_iface_init)) + +static void +soup_address_init (SoupAddress *addr) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + priv->lock = g_mutex_new (); +} + +static void +finalize (GObject *object) +{ + SoupAddress *addr = SOUP_ADDRESS (object); + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + if (priv->sockaddr) + g_free (priv->sockaddr); + if (priv->name) + g_free (priv->name); + if (priv->physical) + g_free (priv->physical); + + g_mutex_free (priv->lock); + + G_OBJECT_CLASS (soup_address_parent_class)->finalize (object); +} + +static void +soup_address_class_init (SoupAddressClass *address_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (address_class); + + g_type_class_add_private (address_class, sizeof (SoupAddressPrivate)); + + /* virtual method override */ + object_class->constructor = constructor; + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /* properties */ + /** + * SOUP_ADDRESS_NAME: + * + * Alias for the #SoupAddress:name property. (The hostname for + * this address.) + **/ + g_object_class_install_property ( + object_class, PROP_NAME, + g_param_spec_string (SOUP_ADDRESS_NAME, + "Name", + "Hostname for this address", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_ADDRESS_FAMILY: + * + * Alias for the #SoupAddress:family property. (The + * #SoupAddressFamily for this address.) + **/ + g_object_class_install_property ( + object_class, PROP_FAMILY, + g_param_spec_enum (SOUP_ADDRESS_FAMILY, + "Family", + "Address family for this address", + SOUP_TYPE_ADDRESS_FAMILY, + SOUP_ADDRESS_FAMILY_INVALID, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_ADDRESS_PORT: + * + * An alias for the #SoupAddress:port property. (The port for + * this address.) + **/ + g_object_class_install_property ( + object_class, PROP_PORT, + g_param_spec_int (SOUP_ADDRESS_PORT, + "Port", + "Port for this address", + -1, 65535, -1, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_ADDRESS_PHYSICAL: + * + * An alias for the #SoupAddress:physical property. (The + * stringified IP address for this address.) + **/ + g_object_class_install_property ( + object_class, PROP_PHYSICAL, + g_param_spec_string (SOUP_ADDRESS_PHYSICAL, + "Physical address", + "IP address for this address", + NULL, + G_PARAM_READABLE)); + /** + * SOUP_ADDRESS_SOCKADDR: + * + * An alias for the #SoupAddress:sockaddr property. (A pointer + * to the struct sockaddr for this address.) + **/ + g_object_class_install_property ( + object_class, PROP_SOCKADDR, + g_param_spec_pointer (SOUP_ADDRESS_SOCKADDR, + "sockaddr", + "struct sockaddr for this address", + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); +} + +static void +soup_address_connectable_iface_init (GSocketConnectableIface *connectable_iface) +{ + connectable_iface->enumerate = soup_address_connectable_enumerate; +} + +static GObject * +constructor (GType type, + guint n_construct_properties, + GObjectConstructParam *construct_properties) +{ + GObject *addr; + SoupAddressPrivate *priv; + + addr = G_OBJECT_CLASS (soup_address_parent_class)->constructor ( + type, n_construct_properties, construct_properties); + if (!addr) + return NULL; + priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + if (!priv->name && !priv->sockaddr) { + g_object_unref (addr); + return NULL; + } + + return addr; +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (object); + SoupAddressFamily family; + struct sockaddr *sa; + int len, port; + + /* This is a mess because the properties are mostly orthogonal, + * but g_object_constructor wants to set a default value for each + * of them. + */ + + switch (prop_id) { + case PROP_NAME: + priv->name = g_value_dup_string (value); + break; + + case PROP_FAMILY: + family = g_value_get_enum (value); + if (family == SOUP_ADDRESS_FAMILY_INVALID) + return; + g_return_if_fail (SOUP_ADDRESS_FAMILY_IS_VALID (family)); + g_return_if_fail (priv->sockaddr == NULL); + + priv->sockaddr = g_malloc0 (SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (family)); + SOUP_ADDRESS_SET_FAMILY (priv, family); + SOUP_ADDRESS_SET_PORT (priv, htons (priv->port)); + priv->n_addrs = 1; + break; + + case PROP_PORT: + port = g_value_get_int (value); + if (port == -1) + return; + g_return_if_fail (SOUP_ADDRESS_PORT_IS_VALID (port)); + + priv->port = port; + if (priv->sockaddr) + SOUP_ADDRESS_SET_PORT (priv, htons (port)); + break; + + case PROP_SOCKADDR: + sa = g_value_get_pointer (value); + if (!sa) + return; + g_return_if_fail (priv->sockaddr == NULL); + + len = SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (sa->sa_family); + priv->sockaddr = g_memdup (sa, len); + priv->n_addrs = 1; + priv->port = ntohs (SOUP_ADDRESS_GET_PORT (priv)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_NAME: + g_value_set_string (value, priv->name); + break; + case PROP_FAMILY: + if (priv->sockaddr) + g_value_set_enum (value, SOUP_ADDRESS_GET_FAMILY (priv)); + else + g_value_set_enum (value, 0); + break; + case PROP_PORT: + g_value_set_int (value, priv->port); + break; + case PROP_PHYSICAL: + g_value_set_string (value, soup_address_get_physical (SOUP_ADDRESS (object))); + break; + case PROP_SOCKADDR: + g_value_set_pointer (value, priv->sockaddr); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_address_new: + * @name: a hostname or physical address + * @port: a port number + * + * Creates a #SoupAddress from @name and @port. The #SoupAddress's IP + * address may not be available right away; the caller can call + * soup_address_resolve_async() or soup_address_resolve_sync() to + * force a DNS resolution. + * + * Return value: a #SoupAddress + **/ +SoupAddress * +soup_address_new (const char *name, guint port) +{ + g_return_val_if_fail (name != NULL, NULL); + g_return_val_if_fail (SOUP_ADDRESS_PORT_IS_VALID (port), NULL); + + return g_object_new (SOUP_TYPE_ADDRESS, + SOUP_ADDRESS_NAME, name, + SOUP_ADDRESS_PORT, port, + NULL); +} + +/** + * soup_address_new_from_sockaddr: + * @sa: a pointer to a sockaddr + * @len: size of @sa + * + * Returns a #SoupAddress equivalent to @sa (or %NULL if @sa's + * address family isn't supported) + * + * Return value: (allow-none): the new #SoupAddress + **/ +SoupAddress * +soup_address_new_from_sockaddr (struct sockaddr *sa, int len) +{ + g_return_val_if_fail (sa != NULL, NULL); + g_return_val_if_fail (SOUP_ADDRESS_FAMILY_IS_VALID (sa->sa_family), NULL); + g_return_val_if_fail (len == SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (sa->sa_family), NULL); + + return g_object_new (SOUP_TYPE_ADDRESS, + SOUP_ADDRESS_SOCKADDR, sa, + NULL); +} + +/** + * SoupAddressFamily: + * @SOUP_ADDRESS_FAMILY_INVALID: an invalid %SoupAddress + * @SOUP_ADDRESS_FAMILY_IPV4: an IPv4 address + * @SOUP_ADDRESS_FAMILY_IPV6: an IPv6 address + * + * The supported address families. + **/ + +/** + * SOUP_ADDRESS_ANY_PORT: + * + * This can be passed to any #SoupAddress method that expects a port, + * to indicate that you don't care what port is used. + **/ + +/** + * soup_address_new_any: + * @family: the address family + * @port: the port number (usually %SOUP_ADDRESS_ANY_PORT) + * + * Returns a #SoupAddress corresponding to the "any" address + * for @family (or %NULL if @family isn't supported), suitable for + * passing to soup_socket_server_new(). + * + * Return value: (allow-none): the new #SoupAddress + **/ +SoupAddress * +soup_address_new_any (SoupAddressFamily family, guint port) +{ + g_return_val_if_fail (SOUP_ADDRESS_FAMILY_IS_VALID (family), NULL); + g_return_val_if_fail (SOUP_ADDRESS_PORT_IS_VALID (port), NULL); + + return g_object_new (SOUP_TYPE_ADDRESS, + SOUP_ADDRESS_FAMILY, family, + SOUP_ADDRESS_PORT, port, + NULL); +} + +/** + * soup_address_get_name: + * @addr: a #SoupAddress + * + * Returns the hostname associated with @addr. + * + * This method is not thread-safe; if you call it while @addr is being + * resolved in another thread, it may return garbage. You can use + * soup_address_is_resolved() to safely test whether or not an address + * is resolved before fetching its name or address. + * + * Return value: (allow-none): the hostname, or %NULL if it is not known. + **/ +const char * +soup_address_get_name (SoupAddress *addr) +{ + g_return_val_if_fail (SOUP_IS_ADDRESS (addr), NULL); + + return SOUP_ADDRESS_GET_PRIVATE (addr)->name; +} + +/** + * soup_address_get_sockaddr: + * @addr: a #SoupAddress + * @len: return location for sockaddr length + * + * Returns the sockaddr associated with @addr, with its length in + * *@len. If the sockaddr is not yet known, returns %NULL. + * + * This method is not thread-safe; if you call it while @addr is being + * resolved in another thread, it may return garbage. You can use + * soup_address_is_resolved() to safely test whether or not an address + * is resolved before fetching its name or address. + * + * Return value: (allow-none) (transfer none): the sockaddr, or %NULL + **/ +struct sockaddr * +soup_address_get_sockaddr (SoupAddress *addr, int *len) +{ + SoupAddressPrivate *priv; + + g_return_val_if_fail (SOUP_IS_ADDRESS (addr), NULL); + priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + if (priv->sockaddr && len) + *len = SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (SOUP_ADDRESS_GET_FAMILY (priv)); + return (struct sockaddr *)priv->sockaddr; +} + +/** + * soup_address_get_gsockaddr: + * @addr: a #SoupAddress + * + * Creates a new #GSocketAddress corresponding to @addr (which is assumed + * to only have one socket address associated with it). + * + * Return value: (transfer full): a new #GSocketAddress + * + * Since: 2.32 + */ +GSocketAddress * +soup_address_get_gsockaddr (SoupAddress *addr) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + return g_socket_address_new_from_native (priv->sockaddr, + SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (SOUP_ADDRESS_GET_FAMILY (priv))); +} + +static GInetAddress * +soup_address_make_inet_address (SoupAddress *addr) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + GSocketAddress *gsa; + GInetAddress *gia; + + gsa = g_socket_address_new_from_native (priv->sockaddr, + SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (SOUP_ADDRESS_GET_FAMILY (priv))); + gia = g_inet_socket_address_get_address ((GInetSocketAddress *)gsa); + g_object_ref (gia); + g_object_unref (gsa); + return gia; +} + +/** + * soup_address_get_physical: + * @addr: a #SoupAddress + * + * Returns the physical address associated with @addr as a string. + * (Eg, "127.0.0.1"). If the address is not yet known, returns %NULL. + * + * This method is not thread-safe; if you call it while @addr is being + * resolved in another thread, it may return garbage. You can use + * soup_address_is_resolved() to safely test whether or not an address + * is resolved before fetching its name or address. + * + * Return value: (allow-none): the physical address, or %NULL + **/ +const char * +soup_address_get_physical (SoupAddress *addr) +{ + SoupAddressPrivate *priv; + + g_return_val_if_fail (SOUP_IS_ADDRESS (addr), NULL); + priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + if (!priv->sockaddr) + return NULL; + + if (!priv->physical) { + GInetAddress *gia; + + gia = soup_address_make_inet_address (addr); + priv->physical = g_inet_address_to_string (gia); + g_object_unref (gia); + } + + return priv->physical; +} + +/** + * soup_address_get_port: + * @addr: a #SoupAddress + * + * Returns the port associated with @addr. + * + * Return value: the port + **/ +guint +soup_address_get_port (SoupAddress *addr) +{ + g_return_val_if_fail (SOUP_IS_ADDRESS (addr), 0); + + return SOUP_ADDRESS_GET_PRIVATE (addr)->port; +} + + +static guint +update_addrs (SoupAddress *addr, GList *addrs, GError *error) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + GInetAddress *gia; + GSocketAddress *gsa; + int i; + + if (error) { + if (error->domain == G_IO_ERROR && + error->code == G_IO_ERROR_CANCELLED) + return SOUP_STATUS_CANCELLED; + else + return SOUP_STATUS_CANT_RESOLVE; + } else if (!addrs) + return SOUP_STATUS_CANT_RESOLVE; + else if (priv->sockaddr) + return SOUP_STATUS_OK; + + priv->n_addrs = g_list_length (addrs); + priv->sockaddr = g_new (struct sockaddr_storage, priv->n_addrs); + for (i = 0; addrs; addrs = addrs->next, i++) { + gia = addrs->data; + gsa = g_inet_socket_address_new (gia, priv->port); + + if (!g_socket_address_to_native (gsa, &priv->sockaddr[i], + sizeof (struct sockaddr_storage), + NULL)) { + /* can't happen: We know the address format is supported + * and the buffer is large enough + */ + g_warn_if_reached (); + } + g_object_unref (gsa); + } + + return SOUP_STATUS_OK; +} + +static guint +update_name (SoupAddress *addr, const char *name, GError *error) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + if (error) { + if (error->domain == G_IO_ERROR && + error->code == G_IO_ERROR_CANCELLED) + return SOUP_STATUS_CANCELLED; + else + return SOUP_STATUS_CANT_RESOLVE; + } else if (!name) + return SOUP_STATUS_CANT_RESOLVE; + else if (priv->name) + return SOUP_STATUS_OK; + + priv->name = g_strdup (name); + return SOUP_STATUS_OK; +} + +typedef struct { + SoupAddressCallback callback; + gpointer callback_data; +} SoupAddressResolveAsyncData; + +static void +complete_resolve_async (SoupAddress *addr, guint status) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + SoupAddressResolveAsyncData *res_data; + GSList *lookups, *l; + GSource *current_source; + GMainContext *current_context; + + lookups = priv->async_lookups; + priv->async_lookups = NULL; + + /* Awful hack; to make soup_socket_connect_async() with an + * non-default async_context work correctly, we need to ensure + * that the non-default context (which we're now running in) + * is the thread-default when the callbacks are run... + */ + current_source = g_main_current_source (); + if (current_source && !g_source_is_destroyed (current_source)) + current_context = g_source_get_context (current_source); + else + current_context = NULL; + g_main_context_push_thread_default (current_context); + + for (l = lookups; l; l = l->next) { + res_data = l->data; + + if (res_data->callback) { + res_data->callback (addr, status, + res_data->callback_data); + } + g_slice_free (SoupAddressResolveAsyncData, res_data); + } + g_slist_free (lookups); + + g_main_context_pop_thread_default (current_context); + + g_object_unref (addr); +} + +static void +lookup_resolved (GObject *source, GAsyncResult *result, gpointer user_data) +{ + GResolver *resolver = G_RESOLVER (source); + SoupAddress *addr = user_data; + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + GError *error = NULL; + guint status; + + if (!priv->sockaddr) { + GList *addrs; + + addrs = g_resolver_lookup_by_name_finish (resolver, result, + &error); + status = update_addrs (addr, addrs, error); + g_resolver_free_addresses (addrs); + } else if (!priv->name) { + char *name; + + name = g_resolver_lookup_by_address_finish (resolver, result, + &error); + status = update_name (addr, name, error); + g_free (name); + } else + status = SOUP_STATUS_OK; + + /* For the enumerator impl, below */ + g_object_ref (addr); + g_object_set_data (G_OBJECT (addr), "async-resolved-error", error); + + complete_resolve_async (addr, status); + + g_object_set_data (G_OBJECT (addr), "async-resolved-error", NULL); + g_object_unref (addr); + if (error) + g_error_free (error); +} + +static gboolean +idle_complete_resolve (gpointer addr) +{ + complete_resolve_async (addr, SOUP_STATUS_OK); + return FALSE; +} + +/** + * SoupAddressCallback: + * @addr: the #SoupAddress that was resolved + * @status: %SOUP_STATUS_OK, %SOUP_STATUS_CANT_RESOLVE, or + * %SOUP_STATUS_CANCELLED + * @user_data: the user data that was passed to + * soup_address_resolve_async() + * + * The callback function passed to soup_address_resolve_async(). + **/ + +/** + * soup_address_resolve_async: + * @addr: a #SoupAddress + * @async_context: (allow-none): the #GMainContext to call @callback from + * @cancellable: a #GCancellable object, or %NULL + * @callback: (scope async): callback to call with the result + * @user_data: data for @callback + * + * Asynchronously resolves the missing half of @addr (its IP address + * if it was created with soup_address_new(), or its hostname if it + * was created with soup_address_new_from_sockaddr() or + * soup_address_new_any().) + * + * If @cancellable is non-%NULL, it can be used to cancel the + * resolution. @callback will still be invoked in this case, with a + * status of %SOUP_STATUS_CANCELLED. + * + * It is safe to call this more than once on a given address, from the + * same thread, with the same @async_context (and doing so will not + * result in redundant DNS queries being made). But it is not safe to + * call from multiple threads, or with different @async_contexts, or + * mixed with calls to soup_address_resolve_sync(). + **/ +void +soup_address_resolve_async (SoupAddress *addr, GMainContext *async_context, + GCancellable *cancellable, + SoupAddressCallback callback, gpointer user_data) +{ + SoupAddressPrivate *priv; + SoupAddressResolveAsyncData *res_data; + GResolver *resolver; + gboolean already_started; + + g_return_if_fail (SOUP_IS_ADDRESS (addr)); + priv = SOUP_ADDRESS_GET_PRIVATE (addr); + g_return_if_fail (priv->name || priv->sockaddr); + + /* We don't need to do locking here because the async case is + * not intended to be thread-safe. + */ + + if (priv->name && priv->sockaddr && !callback) + return; + + res_data = g_slice_new0 (SoupAddressResolveAsyncData); + res_data->callback = callback; + res_data->callback_data = user_data; + + already_started = priv->async_lookups != NULL; + priv->async_lookups = g_slist_prepend (priv->async_lookups, res_data); + + if (already_started) + return; + + g_object_ref (addr); + + if (priv->name && priv->sockaddr) { + soup_add_completion (async_context, idle_complete_resolve, addr); + return; + } + + resolver = g_resolver_get_default (); + if (async_context) + g_main_context_push_thread_default (async_context); + + if (priv->name) { + g_resolver_lookup_by_name_async (resolver, priv->name, + cancellable, + lookup_resolved, addr); + } else { + GInetAddress *gia; + + gia = soup_address_make_inet_address (addr); + g_resolver_lookup_by_address_async (resolver, gia, + cancellable, + lookup_resolved, addr); + g_object_unref (gia); + } + + if (async_context) + g_main_context_pop_thread_default (async_context); + g_object_unref (resolver); +} + +static guint +resolve_sync_internal (SoupAddress *addr, GCancellable *cancellable, GError **error) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + GResolver *resolver; + guint status; + GError *my_err = NULL; + + resolver = g_resolver_get_default (); + + /* We could optimize this to avoid multiple lookups the same + * way _resolve_async does, but we don't currently. So, first + * lock the mutex to ensure we have a consistent view of + * priv->sockaddr and priv->name, unlock it around the + * blocking op, and then re-lock it to modify @addr. + */ + g_mutex_lock (priv->lock); + if (!priv->sockaddr) { + GList *addrs; + + g_mutex_unlock (priv->lock); + addrs = g_resolver_lookup_by_name (resolver, priv->name, + cancellable, &my_err); + g_mutex_lock (priv->lock); + + status = update_addrs (addr, addrs, my_err); + g_resolver_free_addresses (addrs); + } else if (!priv->name) { + GInetAddress *gia; + char *name; + + g_mutex_unlock (priv->lock); + gia = soup_address_make_inet_address (addr); + name = g_resolver_lookup_by_address (resolver, gia, + cancellable, &my_err); + g_object_unref (gia); + g_mutex_lock (priv->lock); + + status = update_name (addr, name, my_err); + g_free (name); + } else + status = SOUP_STATUS_OK; + g_mutex_unlock (priv->lock); + + if (my_err) + g_propagate_error (error, my_err); + g_object_unref (resolver); + + return status; +} + +/** + * soup_address_resolve_sync: + * @addr: a #SoupAddress + * @cancellable: a #GCancellable object, or %NULL + * + * Synchronously resolves the missing half of @addr, as with + * soup_address_resolve_async(). + * + * If @cancellable is non-%NULL, it can be used to cancel the + * resolution. soup_address_resolve_sync() will then return a status + * of %SOUP_STATUS_CANCELLED. + * + * It is safe to call this more than once, even from different + * threads, but it is not safe to mix calls to + * soup_address_resolve_sync() with calls to + * soup_address_resolve_async() on the same address. + * + * Return value: %SOUP_STATUS_OK, %SOUP_STATUS_CANT_RESOLVE, or + * %SOUP_STATUS_CANCELLED. + **/ +guint +soup_address_resolve_sync (SoupAddress *addr, GCancellable *cancellable) +{ + SoupAddressPrivate *priv; + + g_return_val_if_fail (SOUP_IS_ADDRESS (addr), SOUP_STATUS_MALFORMED); + priv = SOUP_ADDRESS_GET_PRIVATE (addr); + g_return_val_if_fail (priv->name || priv->sockaddr, SOUP_STATUS_MALFORMED); + + return resolve_sync_internal (addr, cancellable, NULL); +} + +/** + * soup_address_is_resolved: + * @addr: a #SoupAddress + * + * Tests if @addr has already been resolved. Unlike the other + * #SoupAddress "get" methods, this is safe to call when @addr might + * be being resolved in another thread. + * + * Return value: %TRUE if @addr has been resolved. + **/ +gboolean +soup_address_is_resolved (SoupAddress *addr) +{ + SoupAddressPrivate *priv; + gboolean resolved; + + g_return_val_if_fail (SOUP_IS_ADDRESS (addr), FALSE); + priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + g_mutex_lock (priv->lock); + resolved = priv->sockaddr && priv->name; + g_mutex_unlock (priv->lock); + + return resolved; +} + +/** + * soup_address_hash_by_name: + * @addr: (type Soup.Address): a #SoupAddress + * + * A hash function (for #GHashTable) that corresponds to + * soup_address_equal_by_name(), qv + * + * Return value: the named-based hash value for @addr. + * + * Since: 2.26 + **/ +guint +soup_address_hash_by_name (gconstpointer addr) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + + g_return_val_if_fail (priv->name != NULL, 0); + return g_str_hash (priv->name); +} + +/** + * soup_address_equal_by_name: + * @addr1: (type Soup.Address): a #SoupAddress with a resolved name + * @addr2: (type Soup.Address): another #SoupAddress with a resolved + * name + * + * Tests if @addr1 and @addr2 have the same "name". This method can be + * used with soup_address_hash_by_name() to create a #GHashTable that + * hashes on address "names". + * + * Comparing by name normally means comparing the addresses by their + * hostnames. But if the address was originally created using an IP + * address literal, then it will be compared by that instead. + * + * In particular, if "www.example.com" has the IP address 10.0.0.1, + * and @addr1 was created with the name "www.example.com" and @addr2 + * was created with the name "10.0.0.1", then they will compare as + * unequal for purposes of soup_address_equal_by_name(). + * + * This would be used to distinguish hosts in situations where + * different virtual hosts on the same IP address should be considered + * different. Eg, for purposes of HTTP authentication or cookies, two + * hosts with the same IP address but different names are considered + * to be different hosts. + * + * See also soup_address_equal_by_ip(), which compares by IP address + * rather than by name. + * + * Return value: whether or not @addr1 and @addr2 have the same name + * + * Since: 2.26 + **/ +gboolean +soup_address_equal_by_name (gconstpointer addr1, gconstpointer addr2) +{ + SoupAddressPrivate *priv1 = SOUP_ADDRESS_GET_PRIVATE (addr1); + SoupAddressPrivate *priv2 = SOUP_ADDRESS_GET_PRIVATE (addr2); + + g_return_val_if_fail (priv1->name != NULL, FALSE); + g_return_val_if_fail (priv2->name != NULL, FALSE); + return !g_ascii_strcasecmp (priv1->name, priv2->name); +} + +/** + * soup_address_hash_by_ip: + * @addr: (type Soup.Address): a #SoupAddress + * + * A hash function (for #GHashTable) that corresponds to + * soup_address_equal_by_ip(), qv + * + * Return value: the IP-based hash value for @addr. + * + * Since: 2.26 + **/ +guint +soup_address_hash_by_ip (gconstpointer addr) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr); + guint hash; + + g_return_val_if_fail (priv->sockaddr != NULL, 0); + + memcpy (&hash, SOUP_ADDRESS_GET_DATA (priv), + MIN (sizeof (hash), SOUP_ADDRESS_FAMILY_DATA_SIZE (priv->sockaddr->ss_family))); + return hash; +} + +/** + * soup_address_equal_by_ip: + * @addr1: (type Soup.Address): a #SoupAddress with a resolved IP + * address + * @addr2: (type Soup.Address): another #SoupAddress with a resolved + * IP address + * + * Tests if @addr1 and @addr2 have the same IP address. This method + * can be used with soup_address_hash_by_ip() to create a + * #GHashTable that hashes on IP address. + * + * This would be used to distinguish hosts in situations where + * different virtual hosts on the same IP address should be considered + * the same. Eg, if "www.example.com" and "www.example.net" have the + * same IP address, then a single #SoupConnection can be used to talk + * to either of them. + * + * See also soup_address_equal_by_name(), which compares by name + * rather than by IP address. + * + * Return value: whether or not @addr1 and @addr2 have the same IP + * address. + * + * Since: 2.26 + **/ +gboolean +soup_address_equal_by_ip (gconstpointer addr1, gconstpointer addr2) +{ + SoupAddressPrivate *priv1 = SOUP_ADDRESS_GET_PRIVATE (addr1); + SoupAddressPrivate *priv2 = SOUP_ADDRESS_GET_PRIVATE (addr2); + int size; + + g_return_val_if_fail (priv1->sockaddr != NULL, FALSE); + g_return_val_if_fail (priv2->sockaddr != NULL, FALSE); + + size = SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (priv1->sockaddr->ss_family); + return (priv1->sockaddr->ss_family == + priv2->sockaddr->ss_family && + !memcmp (priv1->sockaddr, priv2->sockaddr, size)); +} + + +#define SOUP_TYPE_ADDRESS_ADDRESS_ENUMERATOR (_soup_address_address_enumerator_get_type ()) +#define SOUP_ADDRESS_ADDRESS_ENUMERATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_ADDRESS_ADDRESS_ENUMERATOR, SoupAddressAddressEnumerator)) + +typedef struct { + GSocketAddressEnumerator parent_instance; + + SoupAddress *addr; + int orig_offset; + int n; +} SoupAddressAddressEnumerator; + +typedef struct { + GSocketAddressEnumeratorClass parent_class; + +} SoupAddressAddressEnumeratorClass; + +GType _soup_address_address_enumerator_get_type (void); +G_DEFINE_TYPE (SoupAddressAddressEnumerator, _soup_address_address_enumerator, G_TYPE_SOCKET_ADDRESS_ENUMERATOR) + +static void +soup_address_address_enumerator_finalize (GObject *object) +{ + SoupAddressAddressEnumerator *addr_enum = + SOUP_ADDRESS_ADDRESS_ENUMERATOR (object); + + g_object_unref (addr_enum->addr); + + G_OBJECT_CLASS (_soup_address_address_enumerator_parent_class)->finalize (object); +} + +static GSocketAddress * +next_address (SoupAddressAddressEnumerator *addr_enum) +{ + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr_enum->addr); + struct sockaddr_storage *ss; + int next_addr; + + /* If there are two addresses but the first one is unusable + * (eg, it's IPv6 and we can only do IPv4), then we don't want to + * try the bad one every time. So we use priv->offset to remember + * the offset of the first usable address (ie, the first address + * that we weren't called again after returning). + */ + next_addr = (addr_enum->orig_offset + addr_enum->n) % priv->n_addrs; + priv->offset = next_addr; + + if (addr_enum->n >= priv->n_addrs) + return NULL; + addr_enum->n++; + + ss = &priv->sockaddr[next_addr]; + return g_socket_address_new_from_native (ss, SOUP_ADDRESS_FAMILY_SOCKADDR_SIZE (ss->ss_family)); +} + +static GSocketAddress * +soup_address_address_enumerator_next (GSocketAddressEnumerator *enumerator, + GCancellable *cancellable, + GError **error) +{ + SoupAddressAddressEnumerator *addr_enum = + SOUP_ADDRESS_ADDRESS_ENUMERATOR (enumerator); + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr_enum->addr); + + if (!priv->sockaddr) { + if (resolve_sync_internal (addr_enum->addr, cancellable, error) != SOUP_STATUS_OK) + return NULL; + } + + return next_address (addr_enum); +} + +static void +got_addresses (SoupAddress *addr, guint status, gpointer user_data) +{ + GSimpleAsyncResult *simple = user_data; + GError *error; + + error = g_object_get_data (G_OBJECT (addr), "async-resolved-error"); + if (error) + g_simple_async_result_set_from_error (simple, error); + + g_simple_async_result_complete (simple); + g_object_unref (simple); +} + +static void +soup_address_address_enumerator_next_async (GSocketAddressEnumerator *enumerator, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + SoupAddressAddressEnumerator *addr_enum = + SOUP_ADDRESS_ADDRESS_ENUMERATOR (enumerator); + SoupAddressPrivate *priv = SOUP_ADDRESS_GET_PRIVATE (addr_enum->addr); + GSimpleAsyncResult *simple; + + simple = g_simple_async_result_new (G_OBJECT (enumerator), + callback, user_data, + soup_address_address_enumerator_next_async); + + if (!priv->sockaddr) { + soup_address_resolve_async (addr_enum->addr, NULL, cancellable, + got_addresses, simple); + } else { + g_simple_async_result_complete_in_idle (simple); + g_object_unref (simple); + } +} + +static GSocketAddress * +soup_address_address_enumerator_next_finish (GSocketAddressEnumerator *enumerator, + GAsyncResult *result, + GError **error) +{ + SoupAddressAddressEnumerator *addr_enum = + SOUP_ADDRESS_ADDRESS_ENUMERATOR (enumerator); + GSimpleAsyncResult *simple = G_SIMPLE_ASYNC_RESULT (result); + + if (g_simple_async_result_propagate_error (simple, error)) + return NULL; + else + return next_address (addr_enum); +} + +static void +_soup_address_address_enumerator_init (SoupAddressAddressEnumerator *enumerator) +{ +} + +static void +_soup_address_address_enumerator_class_init (SoupAddressAddressEnumeratorClass *addrenum_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (addrenum_class); + GSocketAddressEnumeratorClass *enumerator_class = + G_SOCKET_ADDRESS_ENUMERATOR_CLASS (addrenum_class); + + enumerator_class->next = soup_address_address_enumerator_next; + enumerator_class->next_async = soup_address_address_enumerator_next_async; + enumerator_class->next_finish = soup_address_address_enumerator_next_finish; + object_class->finalize = soup_address_address_enumerator_finalize; +} + +static GSocketAddressEnumerator * +soup_address_connectable_enumerate (GSocketConnectable *connectable) +{ + SoupAddressAddressEnumerator *addr_enum; + SoupAddressPrivate *priv; + + addr_enum = g_object_new (SOUP_TYPE_ADDRESS_ADDRESS_ENUMERATOR, NULL); + addr_enum->addr = g_object_ref (connectable); + + priv = SOUP_ADDRESS_GET_PRIVATE (addr_enum->addr); + addr_enum->orig_offset = priv->offset; + + return (GSocketAddressEnumerator *)addr_enum; +} diff --git a/libsoup/soup-address.h b/libsoup/soup-address.h new file mode 100644 index 0000000..3cea001 --- /dev/null +++ b/libsoup/soup-address.h @@ -0,0 +1,92 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_ADDRESS_H +#define SOUP_ADDRESS_H + +#include + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_ADDRESS (soup_address_get_type ()) +#define SOUP_ADDRESS(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_ADDRESS, SoupAddress)) +#define SOUP_ADDRESS_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_ADDRESS, SoupAddressClass)) +#define SOUP_IS_ADDRESS(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_ADDRESS)) +#define SOUP_IS_ADDRESS_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_ADDRESS)) +#define SOUP_ADDRESS_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_ADDRESS, SoupAddressClass)) + +struct _SoupAddress { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupAddressClass; + +#define SOUP_ADDRESS_NAME "name" +#define SOUP_ADDRESS_FAMILY "family" +#define SOUP_ADDRESS_PORT "port" +#define SOUP_ADDRESS_PHYSICAL "physical" +#define SOUP_ADDRESS_SOCKADDR "sockaddr" + +typedef enum { + SOUP_ADDRESS_FAMILY_INVALID = -1, + + SOUP_ADDRESS_FAMILY_IPV4 = G_SOCKET_FAMILY_IPV4, + SOUP_ADDRESS_FAMILY_IPV6 = G_SOCKET_FAMILY_IPV6 +} SoupAddressFamily; + +#define SOUP_ADDRESS_ANY_PORT 0 + +typedef void (*SoupAddressCallback) (SoupAddress *addr, + guint status, + gpointer user_data); + +GType soup_address_get_type (void); + +SoupAddress *soup_address_new (const char *name, + guint port); +SoupAddress *soup_address_new_from_sockaddr (struct sockaddr *sa, + int len); +SoupAddress *soup_address_new_any (SoupAddressFamily family, + guint port); + +void soup_address_resolve_async (SoupAddress *addr, + GMainContext *async_context, + GCancellable *cancellable, + SoupAddressCallback callback, + gpointer user_data); +guint soup_address_resolve_sync (SoupAddress *addr, + GCancellable *cancellable); + +const char *soup_address_get_name (SoupAddress *addr); +const char *soup_address_get_physical (SoupAddress *addr); +guint soup_address_get_port (SoupAddress *addr); +struct sockaddr *soup_address_get_sockaddr (SoupAddress *addr, + int *len); +GSocketAddress *soup_address_get_gsockaddr (SoupAddress *addr); +gboolean soup_address_is_resolved (SoupAddress *addr); + +guint soup_address_hash_by_name (gconstpointer addr); +gboolean soup_address_equal_by_name (gconstpointer addr1, + gconstpointer addr2); +guint soup_address_hash_by_ip (gconstpointer addr); +gboolean soup_address_equal_by_ip (gconstpointer addr1, + gconstpointer addr2); + + +G_END_DECLS + +#endif /* SOUP_ADDRESS_H */ diff --git a/libsoup/soup-auth-basic.c b/libsoup/soup-auth-basic.c new file mode 100644 index 0000000..4218f7e --- /dev/null +++ b/libsoup/soup-auth-basic.c @@ -0,0 +1,134 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-basic.c: HTTP Basic Authentication + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-auth-basic.h" +#include "soup-headers.h" +#include "soup-message.h" +#include "soup-misc.h" +#include "soup-uri.h" + +static gboolean update (SoupAuth *auth, SoupMessage *msg, GHashTable *auth_params); +static GSList *get_protection_space (SoupAuth *auth, SoupURI *source_uri); +static void authenticate (SoupAuth *auth, const char *username, const char *password); +static gboolean is_authenticated (SoupAuth *auth); +static char *get_authorization (SoupAuth *auth, SoupMessage *msg); + +typedef struct { + char *token; +} SoupAuthBasicPrivate; +#define SOUP_AUTH_BASIC_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_BASIC, SoupAuthBasicPrivate)) + +G_DEFINE_TYPE (SoupAuthBasic, soup_auth_basic, SOUP_TYPE_AUTH) + +static void +soup_auth_basic_init (SoupAuthBasic *basic) +{ +} + +static void +finalize (GObject *object) +{ + SoupAuthBasicPrivate *priv = SOUP_AUTH_BASIC_GET_PRIVATE (object); + + g_free (priv->token); + + G_OBJECT_CLASS (soup_auth_basic_parent_class)->finalize (object); +} + +static void +soup_auth_basic_class_init (SoupAuthBasicClass *auth_basic_class) +{ + SoupAuthClass *auth_class = SOUP_AUTH_CLASS (auth_basic_class); + GObjectClass *object_class = G_OBJECT_CLASS (auth_basic_class); + + g_type_class_add_private (auth_basic_class, sizeof (SoupAuthBasicPrivate)); + + auth_class->scheme_name = "Basic"; + auth_class->strength = 1; + + auth_class->update = update; + auth_class->get_protection_space = get_protection_space; + auth_class->authenticate = authenticate; + auth_class->is_authenticated = is_authenticated; + auth_class->get_authorization = get_authorization; + + object_class->finalize = finalize; +} + + +static gboolean +update (SoupAuth *auth, SoupMessage *msg, GHashTable *auth_params) +{ + SoupAuthBasicPrivate *priv = SOUP_AUTH_BASIC_GET_PRIVATE (auth); + + /* If we're updating a pre-existing auth, the + * username/password must be bad now, so forget it. + * Other than that, there's nothing to do here. + */ + if (priv->token) { + memset (priv->token, 0, strlen (priv->token)); + g_free (priv->token); + priv->token = NULL; + } + + return TRUE; +} + +static GSList * +get_protection_space (SoupAuth *auth, SoupURI *source_uri) +{ + char *space, *p; + + space = g_strdup (source_uri->path); + + /* Strip query and filename component */ + p = strrchr (space, '/'); + if (p && p != space && p[1]) + *p = '\0'; + + return g_slist_prepend (NULL, space); +} + +static void +authenticate (SoupAuth *auth, const char *username, const char *password) +{ + SoupAuthBasicPrivate *priv = SOUP_AUTH_BASIC_GET_PRIVATE (auth); + char *user_pass; + int len; + + user_pass = g_strdup_printf ("%s:%s", username, password); + len = strlen (user_pass); + + if (priv->token) { + memset (priv->token, 0, strlen (priv->token)); + g_free (priv->token); + } + priv->token = g_base64_encode ((guchar *)user_pass, len); + + memset (user_pass, 0, len); + g_free (user_pass); +} + +static gboolean +is_authenticated (SoupAuth *auth) +{ + return SOUP_AUTH_BASIC_GET_PRIVATE (auth)->token != NULL; +} + +static char * +get_authorization (SoupAuth *auth, SoupMessage *msg) +{ + SoupAuthBasicPrivate *priv = SOUP_AUTH_BASIC_GET_PRIVATE (auth); + + return g_strdup_printf ("Basic %s", priv->token); +} diff --git a/libsoup/soup-auth-basic.h b/libsoup/soup-auth-basic.h new file mode 100644 index 0000000..639bf03 --- /dev/null +++ b/libsoup/soup-auth-basic.h @@ -0,0 +1,27 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_AUTH_BASIC_H +#define SOUP_AUTH_BASIC_H 1 + +#include "soup-auth.h" + +#define SOUP_AUTH_BASIC(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_AUTH_BASIC, SoupAuthBasic)) +#define SOUP_AUTH_BASIC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_BASIC, SoupAuthBasicClass)) +#define SOUP_IS_AUTH_BASIC(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_AUTH_BASIC)) +#define SOUP_IS_AUTH_BASIC_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_AUTH_BASIC)) +#define SOUP_AUTH_BASIC_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_BASIC, SoupAuthBasicClass)) + +typedef struct { + SoupAuth parent; + +} SoupAuthBasic; + +typedef struct { + SoupAuthClass parent_class; + +} SoupAuthBasicClass; + +#endif /*SOUP_AUTH_BASIC_H*/ diff --git a/libsoup/soup-auth-digest.c b/libsoup/soup-auth-digest.c new file mode 100644 index 0000000..90718c6 --- /dev/null +++ b/libsoup/soup-auth-digest.c @@ -0,0 +1,482 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-digest.c: HTTP Digest Authentication + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include "soup-auth-digest.h" +#include "soup-headers.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-misc.h" +#include "soup-uri.h" + +static gboolean update (SoupAuth *auth, SoupMessage *msg, GHashTable *auth_params); +static GSList *get_protection_space (SoupAuth *auth, SoupURI *source_uri); +static void authenticate (SoupAuth *auth, const char *username, const char *password); +static gboolean is_authenticated (SoupAuth *auth); +static char *get_authorization (SoupAuth *auth, SoupMessage *msg); + +typedef struct { + char *user; + char hex_urp[33]; + char hex_a1[33]; + + /* These are provided by the server */ + char *nonce; + char *opaque; + SoupAuthDigestQop qop_options; + SoupAuthDigestAlgorithm algorithm; + char *domain; + + /* These are generated by the client */ + char *cnonce; + int nc; + SoupAuthDigestQop qop; +} SoupAuthDigestPrivate; +#define SOUP_AUTH_DIGEST_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_DIGEST, SoupAuthDigestPrivate)) + +static void recompute_hex_a1 (SoupAuthDigestPrivate *priv); + +G_DEFINE_TYPE (SoupAuthDigest, soup_auth_digest, SOUP_TYPE_AUTH) + +static void +soup_auth_digest_init (SoupAuthDigest *digest) +{ +} + +static void +finalize (GObject *object) +{ + SoupAuthDigestPrivate *priv = SOUP_AUTH_DIGEST_GET_PRIVATE (object); + + if (priv->user) + g_free (priv->user); + if (priv->nonce) + g_free (priv->nonce); + if (priv->domain) + g_free (priv->domain); + if (priv->cnonce) + g_free (priv->cnonce); + + memset (priv->hex_urp, 0, sizeof (priv->hex_urp)); + memset (priv->hex_a1, 0, sizeof (priv->hex_a1)); + + G_OBJECT_CLASS (soup_auth_digest_parent_class)->finalize (object); +} + +static void +soup_auth_digest_class_init (SoupAuthDigestClass *auth_digest_class) +{ + SoupAuthClass *auth_class = SOUP_AUTH_CLASS (auth_digest_class); + GObjectClass *object_class = G_OBJECT_CLASS (auth_digest_class); + + g_type_class_add_private (auth_digest_class, sizeof (SoupAuthDigestPrivate)); + + auth_class->scheme_name = "Digest"; + auth_class->strength = 5; + + auth_class->get_protection_space = get_protection_space; + auth_class->update = update; + auth_class->authenticate = authenticate; + auth_class->is_authenticated = is_authenticated; + auth_class->get_authorization = get_authorization; + + object_class->finalize = finalize; +} + +SoupAuthDigestAlgorithm +soup_auth_digest_parse_algorithm (const char *algorithm) +{ + if (!algorithm || !g_ascii_strcasecmp (algorithm, "MD5")) + return SOUP_AUTH_DIGEST_ALGORITHM_MD5; + else if (!g_ascii_strcasecmp (algorithm, "MD5-sess")) + return SOUP_AUTH_DIGEST_ALGORITHM_MD5_SESS; + else + return -1; +} + +char * +soup_auth_digest_get_algorithm (SoupAuthDigestAlgorithm algorithm) +{ + if (algorithm == SOUP_AUTH_DIGEST_ALGORITHM_MD5) + return g_strdup ("MD5"); + else if (algorithm == SOUP_AUTH_DIGEST_ALGORITHM_MD5_SESS) + return g_strdup ("MD5-sess"); + else + return NULL; +} + +SoupAuthDigestQop +soup_auth_digest_parse_qop (const char *qop) +{ + GSList *qop_values, *iter; + SoupAuthDigestQop out = 0; + + g_return_val_if_fail (qop != NULL, 0); + + qop_values = soup_header_parse_list (qop); + for (iter = qop_values; iter; iter = iter->next) { + if (!g_ascii_strcasecmp (iter->data, "auth")) + out |= SOUP_AUTH_DIGEST_QOP_AUTH; + else if (!g_ascii_strcasecmp (iter->data, "auth-int")) + out |= SOUP_AUTH_DIGEST_QOP_AUTH_INT; + } + soup_header_free_list (qop_values); + + return out; +} + +char * +soup_auth_digest_get_qop (SoupAuthDigestQop qop) +{ + GString *out; + + out = g_string_new (NULL); + if (qop & SOUP_AUTH_DIGEST_QOP_AUTH) + g_string_append (out, "auth"); + if (qop & SOUP_AUTH_DIGEST_QOP_AUTH_INT) { + if (qop & SOUP_AUTH_DIGEST_QOP_AUTH) + g_string_append (out, ","); + g_string_append (out, "auth-int"); + } + + return g_string_free (out, FALSE); +} + +static gboolean +update (SoupAuth *auth, SoupMessage *msg, GHashTable *auth_params) +{ + SoupAuthDigestPrivate *priv = SOUP_AUTH_DIGEST_GET_PRIVATE (auth); + const char *stale, *qop; + guint qop_options; + gboolean ok = TRUE; + + g_free (priv->domain); + g_free (priv->nonce); + g_free (priv->opaque); + + priv->nc = 1; + + priv->domain = g_strdup (g_hash_table_lookup (auth_params, "domain")); + priv->nonce = g_strdup (g_hash_table_lookup (auth_params, "nonce")); + priv->opaque = g_strdup (g_hash_table_lookup (auth_params, "opaque")); + + qop = g_hash_table_lookup (auth_params, "qop"); + if (qop) { + qop_options = soup_auth_digest_parse_qop (qop); + /* We only support auth */ + if (!(qop_options & SOUP_AUTH_DIGEST_QOP_AUTH)) + ok = FALSE; + priv->qop = SOUP_AUTH_DIGEST_QOP_AUTH; + } else + priv->qop = 0; + + priv->algorithm = soup_auth_digest_parse_algorithm (g_hash_table_lookup (auth_params, "algorithm")); + if (priv->algorithm == -1) + ok = FALSE; + + stale = g_hash_table_lookup (auth_params, "stale"); + if (stale && !g_ascii_strcasecmp (stale, "TRUE") && *priv->hex_urp) + recompute_hex_a1 (priv); + else { + g_free (priv->user); + priv->user = NULL; + g_free (priv->cnonce); + priv->cnonce = NULL; + memset (priv->hex_urp, 0, sizeof (priv->hex_urp)); + memset (priv->hex_a1, 0, sizeof (priv->hex_a1)); + } + + return ok; +} + +static GSList * +get_protection_space (SoupAuth *auth, SoupURI *source_uri) +{ + SoupAuthDigestPrivate *priv = SOUP_AUTH_DIGEST_GET_PRIVATE (auth); + GSList *space = NULL; + SoupURI *uri; + char **dvec, *d, *dir, *slash; + int dix; + + if (!priv->domain || !*priv->domain) { + /* If no domain directive, the protection space is the + * whole server. + */ + return g_slist_prepend (NULL, g_strdup ("")); + } + + dvec = g_strsplit (priv->domain, " ", 0); + for (dix = 0; dvec[dix] != NULL; dix++) { + d = dvec[dix]; + if (*d == '/') + dir = g_strdup (d); + else { + uri = soup_uri_new (d); + if (uri && uri->scheme == source_uri->scheme && + uri->port == source_uri->port && + !strcmp (uri->host, source_uri->host)) + dir = g_strdup (uri->path); + else + dir = NULL; + if (uri) + soup_uri_free (uri); + } + + if (dir) { + slash = strrchr (dir, '/'); + if (slash && !slash[1]) + *slash = '\0'; + + space = g_slist_prepend (space, dir); + } + } + g_strfreev (dvec); + + return space; +} + +void +soup_auth_digest_compute_hex_urp (const char *username, + const char *realm, + const char *password, + char hex_urp[33]) +{ + GChecksum *checksum; + + checksum = g_checksum_new (G_CHECKSUM_MD5); + g_checksum_update (checksum, (guchar *)username, strlen (username)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)realm, strlen (realm)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)password, strlen (password)); + strncpy (hex_urp, g_checksum_get_string (checksum), 33); + g_checksum_free (checksum); +} + +void +soup_auth_digest_compute_hex_a1 (const char *hex_urp, + SoupAuthDigestAlgorithm algorithm, + const char *nonce, + const char *cnonce, + char hex_a1[33]) +{ + if (algorithm == SOUP_AUTH_DIGEST_ALGORITHM_MD5) { + /* In MD5, A1 is just user:realm:password, so hex_A1 + * is just hex_urp. + */ + /* You'd think you could say "sizeof (hex_a1)" here, + * but you'd be wrong. + */ + memcpy (hex_a1, hex_urp, 33); + } else { + GChecksum *checksum; + + /* In MD5-sess, A1 is hex_urp:nonce:cnonce */ + + checksum = g_checksum_new (G_CHECKSUM_MD5); + g_checksum_update (checksum, (guchar *)hex_urp, strlen (hex_urp)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)nonce, strlen (nonce)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)cnonce, strlen (cnonce)); + strncpy (hex_a1, g_checksum_get_string (checksum), 33); + g_checksum_free (checksum); + } +} + +static void +recompute_hex_a1 (SoupAuthDigestPrivate *priv) +{ + soup_auth_digest_compute_hex_a1 (priv->hex_urp, + priv->algorithm, + priv->nonce, + priv->cnonce, + priv->hex_a1); +} + +static void +authenticate (SoupAuth *auth, const char *username, const char *password) +{ + SoupAuthDigestPrivate *priv = SOUP_AUTH_DIGEST_GET_PRIVATE (auth); + char *bgen; + + /* Create client nonce */ + bgen = g_strdup_printf ("%p:%lu:%lu", + auth, + (unsigned long) getpid (), + (unsigned long) time (0)); + priv->cnonce = g_base64_encode ((guchar *)bgen, strlen (bgen)); + g_free (bgen); + + priv->user = g_strdup (username); + + /* compute "URP" (user:realm:password) */ + soup_auth_digest_compute_hex_urp (username, auth->realm, + password ? password : "", + priv->hex_urp); + + /* And compute A1 from that */ + recompute_hex_a1 (priv); +} + +static gboolean +is_authenticated (SoupAuth *auth) +{ + return SOUP_AUTH_DIGEST_GET_PRIVATE (auth)->cnonce != NULL; +} + +void +soup_auth_digest_compute_response (const char *method, + const char *uri, + const char *hex_a1, + SoupAuthDigestQop qop, + const char *nonce, + const char *cnonce, + int nc, + char response[33]) +{ + char hex_a2[33]; + GChecksum *checksum; + + /* compute A2 */ + checksum = g_checksum_new (G_CHECKSUM_MD5); + g_checksum_update (checksum, (guchar *)method, strlen (method)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)uri, strlen (uri)); + strncpy (hex_a2, g_checksum_get_string (checksum), 33); + g_checksum_free (checksum); + + /* compute KD */ + checksum = g_checksum_new (G_CHECKSUM_MD5); + g_checksum_update (checksum, (guchar *)hex_a1, strlen (hex_a1)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)nonce, strlen (nonce)); + g_checksum_update (checksum, (guchar *)":", 1); + + if (qop) { + char tmp[9]; + + snprintf (tmp, 9, "%.8x", nc); + g_checksum_update (checksum, (guchar *)tmp, strlen (tmp)); + g_checksum_update (checksum, (guchar *)":", 1); + g_checksum_update (checksum, (guchar *)cnonce, strlen (cnonce)); + g_checksum_update (checksum, (guchar *)":", 1); + + if (!(qop & SOUP_AUTH_DIGEST_QOP_AUTH)) + g_warn_if_reached (); + g_checksum_update (checksum, (guchar *)"auth", strlen ("auth")); + g_checksum_update (checksum, (guchar *)":", 1); + } + + g_checksum_update (checksum, (guchar *)hex_a2, 32); + strncpy (response, g_checksum_get_string (checksum), 33); + g_checksum_free (checksum); +} + +static void +authentication_info_cb (SoupMessage *msg, gpointer data) +{ + SoupAuth *auth = data; + SoupAuthDigestPrivate *priv = SOUP_AUTH_DIGEST_GET_PRIVATE (auth); + const char *header; + GHashTable *auth_params; + char *nextnonce; + + if (auth != soup_message_get_auth (msg)) + return; + + header = soup_message_headers_get_one (msg->response_headers, + soup_auth_is_for_proxy (auth) ? + "Proxy-Authentication-Info" : + "Authentication-Info"); + g_return_if_fail (header != NULL); + + auth_params = soup_header_parse_param_list (header); + if (!auth_params) + return; + + nextnonce = g_strdup (g_hash_table_lookup (auth_params, "nextnonce")); + if (nextnonce) { + g_free (priv->nonce); + priv->nonce = nextnonce; + } + + soup_header_free_param_list (auth_params); +} + +static char * +get_authorization (SoupAuth *auth, SoupMessage *msg) +{ + SoupAuthDigestPrivate *priv = SOUP_AUTH_DIGEST_GET_PRIVATE (auth); + char response[33], *token; + char *url, *algorithm; + GString *out; + SoupURI *uri; + + uri = soup_message_get_uri (msg); + g_return_val_if_fail (uri != NULL, NULL); + url = soup_uri_to_string (uri, TRUE); + + soup_auth_digest_compute_response (msg->method, url, priv->hex_a1, + priv->qop, priv->nonce, + priv->cnonce, priv->nc, + response); + + out = g_string_new ("Digest "); + + soup_header_g_string_append_param_quoted (out, "username", priv->user); + g_string_append (out, ", "); + soup_header_g_string_append_param_quoted (out, "realm", auth->realm); + g_string_append (out, ", "); + soup_header_g_string_append_param_quoted (out, "nonce", priv->nonce); + g_string_append (out, ", "); + soup_header_g_string_append_param_quoted (out, "uri", url); + g_string_append (out, ", "); + algorithm = soup_auth_digest_get_algorithm (priv->algorithm); + g_string_append_printf (out, "algorithm=%s", algorithm); + g_free (algorithm); + g_string_append (out, ", "); + soup_header_g_string_append_param_quoted (out, "response", response); + + if (priv->opaque) { + g_string_append (out, ", "); + soup_header_g_string_append_param_quoted (out, "opaque", priv->opaque); + } + + if (priv->qop) { + char *qop = soup_auth_digest_get_qop (priv->qop); + + g_string_append (out, ", "); + soup_header_g_string_append_param_quoted (out, "cnonce", priv->cnonce); + g_string_append_printf (out, ", nc=%.8x, qop=%s", + priv->nc, qop); + g_free (qop); + } + + g_free (url); + + priv->nc++; + + token = g_string_free (out, FALSE); + + soup_message_add_header_handler (msg, + "got_headers", + soup_auth_is_for_proxy (auth) ? + "Proxy-Authentication-Info" : + "Authentication-Info", + G_CALLBACK (authentication_info_cb), + auth); + return token; +} diff --git a/libsoup/soup-auth-digest.h b/libsoup/soup-auth-digest.h new file mode 100644 index 0000000..0165f74 --- /dev/null +++ b/libsoup/soup-auth-digest.h @@ -0,0 +1,64 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_AUTH_DIGEST_H +#define SOUP_AUTH_DIGEST_H 1 + +#include "soup-auth.h" + +#define SOUP_AUTH_DIGEST(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_AUTH_DIGEST, SoupAuthDigest)) +#define SOUP_AUTH_DIGEST_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_DIGEST, SoupAuthDigestClass)) +#define SOUP_IS_AUTH_DIGEST(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_AUTH_DIGEST)) +#define SOUP_IS_AUTH_DIGEST_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_AUTH_DIGEST)) +#define SOUP_AUTH_DIGEST_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_DIGEST, SoupAuthDigestClass)) + +typedef struct { + SoupAuth parent; + +} SoupAuthDigest; + +typedef struct { + SoupAuthClass parent_class; + +} SoupAuthDigestClass; + +/* Utility routines (also used by SoupAuthDomainDigest) */ + +typedef enum { + SOUP_AUTH_DIGEST_ALGORITHM_NONE, + SOUP_AUTH_DIGEST_ALGORITHM_MD5, + SOUP_AUTH_DIGEST_ALGORITHM_MD5_SESS +} SoupAuthDigestAlgorithm; + +typedef enum { + SOUP_AUTH_DIGEST_QOP_AUTH = 1 << 0, + SOUP_AUTH_DIGEST_QOP_AUTH_INT = 1 << 1 +} SoupAuthDigestQop; + +SoupAuthDigestAlgorithm soup_auth_digest_parse_algorithm (const char *algorithm); +char *soup_auth_digest_get_algorithm (SoupAuthDigestAlgorithm algorithm); + +SoupAuthDigestQop soup_auth_digest_parse_qop (const char *qop); +char *soup_auth_digest_get_qop (SoupAuthDigestQop qop); + +void soup_auth_digest_compute_hex_urp (const char *username, + const char *realm, + const char *password, + char hex_urp[33]); +void soup_auth_digest_compute_hex_a1 (const char *hex_urp, + SoupAuthDigestAlgorithm algorithm, + const char *nonce, + const char *cnonce, + char hex_a1[33]); +void soup_auth_digest_compute_response (const char *method, + const char *uri, + const char *hex_a1, + SoupAuthDigestQop qop, + const char *nonce, + const char *cnonce, + int nc, + char response[33]); + +#endif /*SOUP_AUTH_DIGEST_H*/ diff --git a/libsoup/soup-auth-domain-basic.c b/libsoup/soup-auth-domain-basic.c new file mode 100644 index 0000000..49f8244 --- /dev/null +++ b/libsoup/soup-auth-domain-basic.c @@ -0,0 +1,352 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-domain-basic.c: HTTP Basic Authentication (server-side) + * + * Copyright (C) 2007 Novell, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-auth-domain-basic.h" +#include "soup-headers.h" +#include "soup-marshal.h" +#include "soup-message.h" + +/** + * SECTION:soup-auth-domain-basic + * @short_description: Server-side "Basic" authentication + * + * #SoupAuthDomainBasic handles the server side of HTTP "Basic" (ie, + * cleartext password) authentication. + **/ + +enum { + PROP_0, + + PROP_AUTH_CALLBACK, + PROP_AUTH_DATA, + + LAST_PROP +}; + +typedef struct { + SoupAuthDomainBasicAuthCallback auth_callback; + gpointer auth_data; + GDestroyNotify auth_dnotify; +} SoupAuthDomainBasicPrivate; + +#define SOUP_AUTH_DOMAIN_BASIC_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_DOMAIN_BASIC, SoupAuthDomainBasicPrivate)) + +G_DEFINE_TYPE (SoupAuthDomainBasic, soup_auth_domain_basic, SOUP_TYPE_AUTH_DOMAIN) + +static char *accepts (SoupAuthDomain *domain, + SoupMessage *msg, + const char *header); +static char *challenge (SoupAuthDomain *domain, + SoupMessage *msg); +static gboolean check_password (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password); + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_auth_domain_basic_init (SoupAuthDomainBasic *basic) +{ +} + +static void +finalize (GObject *object) +{ + SoupAuthDomainBasicPrivate *priv = + SOUP_AUTH_DOMAIN_BASIC_GET_PRIVATE (object); + + if (priv->auth_dnotify) + priv->auth_dnotify (priv->auth_data); + + G_OBJECT_CLASS (soup_auth_domain_basic_parent_class)->finalize (object); +} + +static void +soup_auth_domain_basic_class_init (SoupAuthDomainBasicClass *basic_class) +{ + SoupAuthDomainClass *auth_domain_class = + SOUP_AUTH_DOMAIN_CLASS (basic_class); + GObjectClass *object_class = G_OBJECT_CLASS (basic_class); + + g_type_class_add_private (basic_class, sizeof (SoupAuthDomainBasicPrivate)); + + auth_domain_class->accepts = accepts; + auth_domain_class->challenge = challenge; + auth_domain_class->check_password = check_password; + + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK: + * + * Alias for the #SoupAuthDomainBasic:auth-callback property. + * (The #SoupAuthDomainBasicAuthCallback.) + **/ + g_object_class_install_property ( + object_class, PROP_AUTH_CALLBACK, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, + "Authentication callback", + "Password-checking callback", + G_PARAM_READWRITE)); + /** + * SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA: + * + * Alias for the #SoupAuthDomainBasic:auth-data property. + * (The data to pass to the #SoupAuthDomainBasicAuthCallback.) + **/ + g_object_class_install_property ( + object_class, PROP_AUTH_DATA, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA, + "Authentication callback data", + "Data to pass to authentication callback", + G_PARAM_READWRITE)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupAuthDomainBasicPrivate *priv = + SOUP_AUTH_DOMAIN_BASIC_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_AUTH_CALLBACK: + priv->auth_callback = g_value_get_pointer (value); + break; + case PROP_AUTH_DATA: + if (priv->auth_dnotify) { + priv->auth_dnotify (priv->auth_data); + priv->auth_dnotify = NULL; + } + priv->auth_data = g_value_get_pointer (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupAuthDomainBasicPrivate *priv = + SOUP_AUTH_DOMAIN_BASIC_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_AUTH_CALLBACK: + g_value_set_pointer (value, priv->auth_callback); + break; + case PROP_AUTH_DATA: + g_value_set_pointer (value, priv->auth_data); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_auth_domain_basic_new: + * @optname1: name of first option, or %NULL + * @...: option name/value pairs + * + * Creates a #SoupAuthDomainBasic. You must set the + * %SOUP_AUTH_DOMAIN_REALM parameter, to indicate the realm name to be + * returned with the authentication challenge to the client. Other + * parameters are optional. + * + * Return value: the new #SoupAuthDomain + **/ +SoupAuthDomain * +soup_auth_domain_basic_new (const char *optname1, ...) +{ + SoupAuthDomain *domain; + va_list ap; + + va_start (ap, optname1); + domain = (SoupAuthDomain *)g_object_new_valist (SOUP_TYPE_AUTH_DOMAIN_BASIC, + optname1, ap); + va_end (ap); + + g_return_val_if_fail (soup_auth_domain_get_realm (domain) != NULL, NULL); + + return domain; +} + +/** + * SoupAuthDomainBasicAuthCallback: + * @domain: the domain + * @msg: the message being authenticated + * @username: the username provided by the client + * @password: the password provided by the client + * @user_data: the data passed to soup_auth_domain_basic_set_auth_callback() + * + * Callback used by #SoupAuthDomainBasic for authentication purposes. + * The application should verify that @username and @password and valid + * and return %TRUE or %FALSE. + * + * If you are maintaining your own password database (rather than + * using the password to authenticate against some other system like + * PAM or a remote server), you should make sure you know what you are + * doing. In particular, don't store cleartext passwords, or + * easily-computed hashes of cleartext passwords, even if you don't + * care that much about the security of your server, because users + * will frequently use the same password for multiple sites, and so + * compromising any site with a cleartext (or easily-cracked) password + * database may give attackers access to other more-interesting sites + * as well. + * + * Return value: %TRUE if @username and @password are valid + **/ + +/** + * soup_auth_domain_basic_set_auth_callback: + * @domain: the domain + * @callback: the callback + * @user_data: data to pass to @auth_callback + * @dnotify: destroy notifier to free @user_data when @domain + * is destroyed + * + * Sets the callback that @domain will use to authenticate incoming + * requests. For each request containing authorization, @domain will + * invoke the callback, and then either accept or reject the request + * based on @callback's return value. + * + * You can also set the auth callback by setting the + * %SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK and + * %SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA properties, which can also be + * used to set the callback at construct time. + **/ +void +soup_auth_domain_basic_set_auth_callback (SoupAuthDomain *domain, + SoupAuthDomainBasicAuthCallback callback, + gpointer user_data, + GDestroyNotify dnotify) +{ + SoupAuthDomainBasicPrivate *priv = + SOUP_AUTH_DOMAIN_BASIC_GET_PRIVATE (domain); + + if (priv->auth_dnotify) + priv->auth_dnotify (priv->auth_data); + + priv->auth_callback = callback; + priv->auth_data = user_data; + priv->auth_dnotify = dnotify; + + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK); + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA); +} + +static void +pw_free (char *pw) +{ + memset (pw, 0, strlen (pw)); + g_free (pw); +} + +static gboolean +parse_basic (SoupMessage *msg, const char *header, + char **username, char **password) +{ + char *decoded, *colon; + gsize len, plen; + + if (strncmp (header, "Basic ", 6) != 0) + return FALSE; + + decoded = (char *)g_base64_decode (header + 6, &len); + if (!decoded) + return FALSE; + + colon = memchr (decoded, ':', len); + if (!colon) { + pw_free (decoded); + return FALSE; + } + *colon = '\0'; + plen = len - (colon - decoded) - 1; + + *password = g_strndup (colon + 1, plen); + memset (colon + 1, 0, plen); + *username = decoded; + return TRUE; +} + +static char * +accepts (SoupAuthDomain *domain, SoupMessage *msg, const char *header) +{ + SoupAuthDomainBasicPrivate *priv = + SOUP_AUTH_DOMAIN_BASIC_GET_PRIVATE (domain); + char *username, *password; + gboolean ok = FALSE; + + if (!parse_basic (msg, header, &username, &password)) + return NULL; + + if (priv->auth_callback) { + ok = priv->auth_callback (domain, msg, username, password, + priv->auth_data); + } else { + ok = soup_auth_domain_try_generic_auth_callback ( + domain, msg, username); + } + + pw_free (password); + + if (ok) + return username; + else { + g_free (username); + return NULL; + } +} + +static char * +challenge (SoupAuthDomain *domain, SoupMessage *msg) +{ + GString *challenge; + + challenge = g_string_new ("Basic "); + soup_header_g_string_append_param (challenge, "realm", soup_auth_domain_get_realm (domain)); + return g_string_free (challenge, FALSE); +} + +static gboolean +check_password (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password) +{ + const char *header; + char *msg_username, *msg_password; + gboolean ok; + + header = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + if (!parse_basic (msg, header, &msg_username, &msg_password)) + return FALSE; + + ok = (!strcmp (username, msg_username) && + !strcmp (password, msg_password)); + g_free (msg_username); + pw_free (msg_password); + + return ok; +} diff --git a/libsoup/soup-auth-domain-basic.h b/libsoup/soup-auth-domain-basic.h new file mode 100644 index 0000000..af80d17 --- /dev/null +++ b/libsoup/soup-auth-domain-basic.h @@ -0,0 +1,56 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Novell, Inc. + */ + +#ifndef SOUP_AUTH_DOMAIN_BASIC_H +#define SOUP_AUTH_DOMAIN_BASIC_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_AUTH_DOMAIN_BASIC (soup_auth_domain_basic_get_type ()) +#define SOUP_AUTH_DOMAIN_BASIC(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_AUTH_DOMAIN_BASIC, SoupAuthDomainBasic)) +#define SOUP_AUTH_DOMAIN_BASIC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_DOMAIN_BASIC, SoupAuthDomainBasicClass)) +#define SOUP_IS_AUTH_DOMAIN_BASIC(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_AUTH_DOMAIN_BASIC)) +#define SOUP_IS_AUTH_DOMAIN_BASIC_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_AUTH_DOMAIN_BASIC)) +#define SOUP_AUTH_DOMAIN_BASIC_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_DOMAIN_BASIC, SoupAuthDomainBasicClass)) + +typedef struct { + SoupAuthDomain parent; + +} SoupAuthDomainBasic; + +typedef struct { + SoupAuthDomainClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupAuthDomainBasicClass; + +#define SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK "auth-callback" +#define SOUP_AUTH_DOMAIN_BASIC_AUTH_DATA "auth-data" + +GType soup_auth_domain_basic_get_type (void); + +SoupAuthDomain *soup_auth_domain_basic_new (const char *optname1, + ...) G_GNUC_NULL_TERMINATED; + +typedef gboolean (*SoupAuthDomainBasicAuthCallback) (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password, + gpointer user_data); + +void soup_auth_domain_basic_set_auth_callback (SoupAuthDomain *domain, + SoupAuthDomainBasicAuthCallback callback, + gpointer user_data, + GDestroyNotify dnotify); + +G_END_DECLS + +#endif /* SOUP_AUTH_DOMAIN_BASIC_H */ diff --git a/libsoup/soup-auth-domain-digest.c b/libsoup/soup-auth-domain-digest.c new file mode 100644 index 0000000..cee7745 --- /dev/null +++ b/libsoup/soup-auth-domain-digest.c @@ -0,0 +1,453 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-domain-digest.c: HTTP Digest Authentication (server-side) + * + * Copyright (C) 2007 Novell, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "soup-auth-domain-digest.h" +#include "soup-auth-digest.h" +#include "soup-headers.h" +#include "soup-marshal.h" +#include "soup-message.h" +#include "soup-uri.h" + +/** + * SECTION:soup-auth-domain-digest + * @short_description: Server-side "Digest" authentication + * + * #SoupAuthDomainBasic handles the server side of HTTP "Digest" + * authentication. + **/ + +enum { + PROP_0, + + PROP_AUTH_CALLBACK, + PROP_AUTH_DATA, + + LAST_PROP +}; + +typedef struct { + SoupAuthDomainDigestAuthCallback auth_callback; + gpointer auth_data; + GDestroyNotify auth_dnotify; + +} SoupAuthDomainDigestPrivate; + +#define SOUP_AUTH_DOMAIN_DIGEST_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_DOMAIN_DIGEST, SoupAuthDomainDigestPrivate)) + +G_DEFINE_TYPE (SoupAuthDomainDigest, soup_auth_domain_digest, SOUP_TYPE_AUTH_DOMAIN) + +static char *accepts (SoupAuthDomain *domain, + SoupMessage *msg, + const char *header); +static char *challenge (SoupAuthDomain *domain, + SoupMessage *msg); +static gboolean check_password (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password); + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_auth_domain_digest_init (SoupAuthDomainDigest *digest) +{ +} + +static void +finalize (GObject *object) +{ + SoupAuthDomainDigestPrivate *priv = + SOUP_AUTH_DOMAIN_DIGEST_GET_PRIVATE (object); + + if (priv->auth_dnotify) + priv->auth_dnotify (priv->auth_data); + + G_OBJECT_CLASS (soup_auth_domain_digest_parent_class)->finalize (object); +} + +static void +soup_auth_domain_digest_class_init (SoupAuthDomainDigestClass *digest_class) +{ + SoupAuthDomainClass *auth_domain_class = + SOUP_AUTH_DOMAIN_CLASS (digest_class); + GObjectClass *object_class = G_OBJECT_CLASS (digest_class); + + g_type_class_add_private (digest_class, sizeof (SoupAuthDomainDigestPrivate)); + + auth_domain_class->accepts = accepts; + auth_domain_class->challenge = challenge; + auth_domain_class->check_password = check_password; + + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK: + * + * Alias for the #SoupAuthDomainDigest:auth-callback property. + * (The #SoupAuthDomainDigestAuthCallback.) + **/ + g_object_class_install_property ( + object_class, PROP_AUTH_CALLBACK, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK, + "Authentication callback", + "Password-finding callback", + G_PARAM_READWRITE)); + /** + * SOUP_AUTH_DOMAIN_DIGEST_AUTH_DATA: + * + * Alias for the #SoupAuthDomainDigest:auth-callback property. + * (The #SoupAuthDomainDigestAuthCallback.) + **/ + g_object_class_install_property ( + object_class, PROP_AUTH_DATA, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_DIGEST_AUTH_DATA, + "Authentication callback data", + "Data to pass to authentication callback", + G_PARAM_READWRITE)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupAuthDomainDigestPrivate *priv = + SOUP_AUTH_DOMAIN_DIGEST_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_AUTH_CALLBACK: + priv->auth_callback = g_value_get_pointer (value); + break; + case PROP_AUTH_DATA: + if (priv->auth_dnotify) { + priv->auth_dnotify (priv->auth_data); + priv->auth_dnotify = NULL; + } + priv->auth_data = g_value_get_pointer (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupAuthDomainDigestPrivate *priv = + SOUP_AUTH_DOMAIN_DIGEST_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_AUTH_CALLBACK: + g_value_set_pointer (value, priv->auth_callback); + break; + case PROP_AUTH_DATA: + g_value_set_pointer (value, priv->auth_data); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_auth_domain_digest_new: + * @optname1: name of first option, or %NULL + * @...: option name/value pairs + * + * Creates a #SoupAuthDomainDigest. You must set the + * %SOUP_AUTH_DOMAIN_REALM parameter, to indicate the realm name to be + * returned with the authentication challenge to the client. Other + * parameters are optional. + * + * Return value: the new #SoupAuthDomain + **/ +SoupAuthDomain * +soup_auth_domain_digest_new (const char *optname1, ...) +{ + SoupAuthDomain *domain; + va_list ap; + + va_start (ap, optname1); + domain = (SoupAuthDomain *)g_object_new_valist (SOUP_TYPE_AUTH_DOMAIN_DIGEST, + optname1, ap); + va_end (ap); + + g_return_val_if_fail (soup_auth_domain_get_realm (domain) != NULL, NULL); + + return domain; +} + +/** + * SoupAuthDomainDigestAuthCallback: + * @domain: the domain + * @msg: the message being authenticated + * @username: the username provided by the client + * @user_data: the data passed to soup_auth_domain_digest_set_auth_callback() + * + * Callback used by #SoupAuthDomainDigest for authentication purposes. + * The application should look up @username in its password database, + * and return the corresponding encoded password (see + * soup_auth_domain_digest_encode_password()). + * + * Return value: the encoded password, or %NULL if @username is not a + * valid user. @domain will free the password when it is done with it. + **/ + +/** + * soup_auth_domain_digest_set_auth_callback: + * @domain: the domain + * @callback: the callback + * @user_data: data to pass to @auth_callback + * @dnotify: destroy notifier to free @user_data when @domain + * is destroyed + * + * Sets the callback that @domain will use to authenticate incoming + * requests. For each request containing authorization, @domain will + * invoke the callback, and then either accept or reject the request + * based on @callback's return value. + * + * You can also set the auth callback by setting the + * %SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK and + * %SOUP_AUTH_DOMAIN_DIGEST_AUTH_DATA properties, which can also be + * used to set the callback at construct time. + **/ +void +soup_auth_domain_digest_set_auth_callback (SoupAuthDomain *domain, + SoupAuthDomainDigestAuthCallback callback, + gpointer user_data, + GDestroyNotify dnotify) +{ + SoupAuthDomainDigestPrivate *priv = + SOUP_AUTH_DOMAIN_DIGEST_GET_PRIVATE (domain); + + if (priv->auth_dnotify) + priv->auth_dnotify (priv->auth_data); + + priv->auth_callback = callback; + priv->auth_data = user_data; + priv->auth_dnotify = dnotify; + + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK); + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_DIGEST_AUTH_DATA); +} + +static gboolean +check_hex_urp (SoupAuthDomain *domain, SoupMessage *msg, + GHashTable *params, const char *username, + const char *hex_urp) +{ + const char *uri, *qop, *realm, *msg_username; + const char *nonce, *nc, *cnonce, *response; + char hex_a1[33], computed_response[33]; + int nonce_count; + SoupURI *dig_uri, *req_uri; + + msg_username = g_hash_table_lookup (params, "username"); + if (!msg_username || strcmp (msg_username, username) != 0) + return FALSE; + + /* Check uri */ + uri = g_hash_table_lookup (params, "uri"); + if (!uri) + return FALSE; + + req_uri = soup_message_get_uri (msg); + dig_uri = soup_uri_new (uri); + if (dig_uri) { + if (!soup_uri_equal (dig_uri, req_uri)) { + soup_uri_free (dig_uri); + return FALSE; + } + soup_uri_free (dig_uri); + } else { + char *req_path; + + req_path = soup_uri_to_string (req_uri, TRUE); + if (strcmp (uri, req_path) != 0) { + g_free (req_path); + return FALSE; + } + g_free (req_path); + } + + /* Check qop; we only support "auth" for now */ + qop = g_hash_table_lookup (params, "qop"); + if (!qop || strcmp (qop, "auth") != 0) + return FALSE; + + /* Check realm */ + realm = g_hash_table_lookup (params, "realm"); + if (!realm || strcmp (realm, soup_auth_domain_get_realm (domain)) != 0) + return FALSE; + + nonce = g_hash_table_lookup (params, "nonce"); + if (!nonce) + return FALSE; + nc = g_hash_table_lookup (params, "nc"); + if (!nc) + return FALSE; + nonce_count = strtoul (nc, NULL, 16); + if (nonce_count <= 0) + return FALSE; + cnonce = g_hash_table_lookup (params, "cnonce"); + if (!cnonce) + return FALSE; + response = g_hash_table_lookup (params, "response"); + if (!response) + return FALSE; + + soup_auth_digest_compute_hex_a1 (hex_urp, + SOUP_AUTH_DIGEST_ALGORITHM_MD5, + nonce, cnonce, hex_a1); + soup_auth_digest_compute_response (msg->method, uri, + hex_a1, + SOUP_AUTH_DIGEST_QOP_AUTH, + nonce, cnonce, nonce_count, + computed_response); + return strcmp (response, computed_response) == 0; +} + +static char * +accepts (SoupAuthDomain *domain, SoupMessage *msg, const char *header) +{ + SoupAuthDomainDigestPrivate *priv = + SOUP_AUTH_DOMAIN_DIGEST_GET_PRIVATE (domain); + GHashTable *params; + const char *username; + gboolean accept = FALSE; + char *ret_user; + + if (strncmp (header, "Digest ", 7) != 0) + return NULL; + + params = soup_header_parse_param_list (header + 7); + if (!params) + return NULL; + + username = g_hash_table_lookup (params, "username"); + if (!username) { + soup_header_free_param_list (params); + return NULL; + } + + if (priv->auth_callback) { + char *hex_urp; + + hex_urp = priv->auth_callback (domain, msg, username, + priv->auth_data); + if (hex_urp) { + accept = check_hex_urp (domain, msg, params, + username, hex_urp); + g_free (hex_urp); + } else + accept = FALSE; + } else { + accept = soup_auth_domain_try_generic_auth_callback ( + domain, msg, username); + } + + ret_user = accept ? g_strdup (username) : NULL; + soup_header_free_param_list (params); + return ret_user; +} + +static char * +challenge (SoupAuthDomain *domain, SoupMessage *msg) +{ + GString *str; + + str = g_string_new ("Digest "); + soup_header_g_string_append_param_quoted (str, "realm", soup_auth_domain_get_realm (domain)); + g_string_append_printf (str, ", nonce=\"%lu%lu\"", + (unsigned long) msg, + (unsigned long) time (0)); + g_string_append_printf (str, ", qop=\"auth\""); + g_string_append_printf (str, ", algorithm=MD5"); + + return g_string_free (str, FALSE); +} + +/** + * soup_auth_domain_digest_encode_password: + * @username: a username + * @realm: an auth realm name + * @password: the password for @username in @realm + * + * Encodes the username/realm/password triplet for Digest + * authentication. (That is, it returns a stringified MD5 hash of + * @username, @realm, and @password concatenated together). This is + * the form that is needed as the return value of + * #SoupAuthDomainDigest's auth handler. + * + * For security reasons, you should store the encoded hash, rather + * than storing the cleartext password itself and calling this method + * only when you need to verify it. This way, if your server is + * compromised, the attackers will not gain access to cleartext + * passwords which might also be usable at other sites. (Note also + * that the encoded password returned by this method is identical to + * the encoded password stored in an Apache .htdigest file.) + * + * Return value: the encoded password + **/ +char * +soup_auth_domain_digest_encode_password (const char *username, + const char *realm, + const char *password) +{ + char hex_urp[33]; + + soup_auth_digest_compute_hex_urp (username, realm, password, hex_urp); + return g_strdup (hex_urp); +} + +static gboolean +check_password (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password) +{ + const char *header; + GHashTable *params; + const char *msg_username; + char hex_urp[33]; + gboolean accept; + + header = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + if (strncmp (header, "Digest ", 7) != 0) + return FALSE; + + params = soup_header_parse_param_list (header + 7); + if (!params) + return FALSE; + + msg_username = g_hash_table_lookup (params, "username"); + if (!msg_username || strcmp (msg_username, username) != 0) { + soup_header_free_param_list (params); + return FALSE; + } + + soup_auth_digest_compute_hex_urp (username, + soup_auth_domain_get_realm (domain), + password, hex_urp); + accept = check_hex_urp (domain, msg, params, username, hex_urp); + soup_header_free_param_list (params); + return accept; +} diff --git a/libsoup/soup-auth-domain-digest.h b/libsoup/soup-auth-domain-digest.h new file mode 100644 index 0000000..98e1a3a --- /dev/null +++ b/libsoup/soup-auth-domain-digest.h @@ -0,0 +1,59 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Novell, Inc. + */ + +#ifndef SOUP_AUTH_DOMAIN_DIGEST_H +#define SOUP_AUTH_DOMAIN_DIGEST_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_AUTH_DOMAIN_DIGEST (soup_auth_domain_digest_get_type ()) +#define SOUP_AUTH_DOMAIN_DIGEST(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_AUTH_DOMAIN_DIGEST, SoupAuthDomainDigest)) +#define SOUP_AUTH_DOMAIN_DIGEST_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_DOMAIN_DIGEST, SoupAuthDomainDigestClass)) +#define SOUP_IS_AUTH_DOMAIN_DIGEST(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_AUTH_DOMAIN_DIGEST)) +#define SOUP_IS_AUTH_DOMAIN_DIGEST_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_AUTH_DOMAIN_DIGEST)) +#define SOUP_AUTH_DOMAIN_DIGEST_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_DOMAIN_DIGEST, SoupAuthDomainDigestClass)) + +typedef struct { + SoupAuthDomain parent; + +} SoupAuthDomainDigest; + +typedef struct { + SoupAuthDomainClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupAuthDomainDigestClass; + +#define SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK "auth-callback" +#define SOUP_AUTH_DOMAIN_DIGEST_AUTH_DATA "auth-data" + +GType soup_auth_domain_digest_get_type (void); + +SoupAuthDomain *soup_auth_domain_digest_new (const char *optname1, + ...) G_GNUC_NULL_TERMINATED; + +typedef char * (*SoupAuthDomainDigestAuthCallback) (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + gpointer user_data); + +void soup_auth_domain_digest_set_auth_callback (SoupAuthDomain *domain, + SoupAuthDomainDigestAuthCallback callback, + gpointer user_data, + GDestroyNotify dnotify); + +char *soup_auth_domain_digest_encode_password (const char *username, + const char *realm, + const char *password); + +G_END_DECLS + +#endif /* SOUP_AUTH_DOMAIN_DIGEST_H */ diff --git a/libsoup/soup-auth-domain.c b/libsoup/soup-auth-domain.c new file mode 100644 index 0000000..c8ce96b --- /dev/null +++ b/libsoup/soup-auth-domain.c @@ -0,0 +1,627 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-domain.c: HTTP Authentication Domain (server-side) + * + * Copyright (C) 2007 Novell, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-auth-domain.h" +#include "soup-message.h" +#include "soup-path-map.h" +#include "soup-uri.h" + +/** + * SECTION:soup-auth-domain + * @short_description: Server-side authentication + * @see_also: #SoupServer + * + * A #SoupAuthDomain manages authentication for all or part of a + * #SoupServer. To make a server require authentication, first create + * an appropriate subclass of #SoupAuthDomain, and then add it to the + * server with soup_server_add_auth_domain(). + * + * In order for an auth domain to have any effect, you must add one or + * more paths to it (via soup_auth_domain_add_path() or the + * %SOUP_AUTH_DOMAIN_ADD_PATH property). To require authentication for + * all ordinary requests, add the path "/". (Note that this does not + * include the special "*" URI (eg, "OPTIONS *"), which must be added + * as a separate path if you want to cover it.) + * + * If you need greater control over which requests should and + * shouldn't be authenticated, add paths covering everything you + * might want authenticated, and then use a + * filter (soup_auth_domain_set_filter()) to bypass authentication for + * those requests that don't need it. + **/ + +enum { + PROP_0, + + PROP_REALM, + PROP_PROXY, + PROP_ADD_PATH, + PROP_REMOVE_PATH, + PROP_FILTER, + PROP_FILTER_DATA, + PROP_GENERIC_AUTH_CALLBACK, + PROP_GENERIC_AUTH_DATA, + + LAST_PROP +}; + +typedef struct { + char *realm; + gboolean proxy; + SoupPathMap *paths; + + SoupAuthDomainFilter filter; + gpointer filter_data; + GDestroyNotify filter_dnotify; + + SoupAuthDomainGenericAuthCallback auth_callback; + gpointer auth_data; + GDestroyNotify auth_dnotify; + +} SoupAuthDomainPrivate; + +#define SOUP_AUTH_DOMAIN_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_DOMAIN, SoupAuthDomainPrivate)) + +G_DEFINE_ABSTRACT_TYPE (SoupAuthDomain, soup_auth_domain, G_TYPE_OBJECT) + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_auth_domain_init (SoupAuthDomain *domain) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + priv->paths = soup_path_map_new (NULL); +} + +static void +finalize (GObject *object) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (object); + + g_free (priv->realm); + soup_path_map_free (priv->paths); + + if (priv->filter_dnotify) + priv->filter_dnotify (priv->filter_data); + if (priv->auth_dnotify) + priv->auth_dnotify (priv->auth_data); + + G_OBJECT_CLASS (soup_auth_domain_parent_class)->finalize (object); +} + +static void +soup_auth_domain_class_init (SoupAuthDomainClass *auth_domain_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (auth_domain_class); + + g_type_class_add_private (auth_domain_class, sizeof (SoupAuthDomainPrivate)); + + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SOUP_AUTH_DOMAIN_REALM: + * + * Alias for the #SoupAuthDomain:realm property. (The realm of + * this auth domain.) + **/ + g_object_class_install_property ( + object_class, PROP_REALM, + g_param_spec_string (SOUP_AUTH_DOMAIN_REALM, + "Realm", + "The realm of this auth domain", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_AUTH_DOMAIN_PROXY: + * + * Alias for the #SoupAuthDomain:proxy property. (Whether or + * not this is a proxy auth domain.) + **/ + g_object_class_install_property ( + object_class, PROP_PROXY, + g_param_spec_boolean (SOUP_AUTH_DOMAIN_PROXY, + "Proxy", + "Whether or not this is a proxy auth domain", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_AUTH_DOMAIN_ADD_PATH: + * + * Alias for the #SoupAuthDomain:add-path property. (Shortcut + * for calling soup_auth_domain_add_path().) + **/ + g_object_class_install_property ( + object_class, PROP_ADD_PATH, + g_param_spec_string (SOUP_AUTH_DOMAIN_ADD_PATH, + "Add a path", + "Add a path covered by this auth domain", + NULL, + G_PARAM_WRITABLE)); + /** + * SOUP_AUTH_DOMAIN_REMOVE_PATH: + * + * Alias for the #SoupAuthDomain:remove-path property. + * (Shortcut for calling soup_auth_domain_remove_path().) + **/ + g_object_class_install_property ( + object_class, PROP_REMOVE_PATH, + g_param_spec_string (SOUP_AUTH_DOMAIN_REMOVE_PATH, + "Remove a path", + "Remove a path covered by this auth domain", + NULL, + G_PARAM_WRITABLE)); + /** + * SOUP_AUTH_DOMAIN_FILTER: + * + * Alias for the #SoupAuthDomain:filter property. (The + * #SoupAuthDomainFilter for the domain.) + **/ + g_object_class_install_property ( + object_class, PROP_FILTER, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_FILTER, + "Filter", + "A filter for deciding whether or not to require authentication", + G_PARAM_READWRITE)); + /** + * SOUP_AUTH_DOMAIN_FILTER_DATA: + * + * Alias for the #SoupAuthDomain:filter-data property. (Data + * to pass to the #SoupAuthDomainFilter.) + **/ + g_object_class_install_property ( + object_class, PROP_FILTER_DATA, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_FILTER_DATA, + "Filter data", + "Data to pass to filter", + G_PARAM_READWRITE)); + /** + * SOUP_AUTH_DOMAIN_GENERIC_AUTH_CALLBACK: + * + * Alias for the #SoupAuthDomain:auth-callback property. + * (The #SoupAuthDomainGenericAuthCallback.) + **/ + g_object_class_install_property ( + object_class, PROP_GENERIC_AUTH_CALLBACK, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_GENERIC_AUTH_CALLBACK, + "Generic authentication callback", + "An authentication callback that can be used with any SoupAuthDomain subclass", + G_PARAM_READWRITE)); + /** + * SOUP_AUTH_DOMAIN_GENERIC_AUTH_DATA: + * + * Alias for the #SoupAuthDomain:auth-data property. + * (The data to pass to the #SoupAuthDomainGenericAuthCallback.) + **/ + g_object_class_install_property ( + object_class, PROP_GENERIC_AUTH_DATA, + g_param_spec_pointer (SOUP_AUTH_DOMAIN_GENERIC_AUTH_DATA, + "Authentication callback data", + "Data to pass to auth callback", + G_PARAM_READWRITE)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupAuthDomain *auth_domain = SOUP_AUTH_DOMAIN (object); + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_REALM: + g_free (priv->realm); + priv->realm = g_value_dup_string (value); + break; + case PROP_PROXY: + priv->proxy = g_value_get_boolean (value); + break; + case PROP_ADD_PATH: + soup_auth_domain_add_path (auth_domain, + g_value_get_string (value)); + break; + case PROP_REMOVE_PATH: + soup_auth_domain_remove_path (auth_domain, + g_value_get_string (value)); + break; + case PROP_FILTER: + priv->filter = g_value_get_pointer (value); + break; + case PROP_FILTER_DATA: + if (priv->filter_dnotify) { + priv->filter_dnotify (priv->filter_data); + priv->filter_dnotify = NULL; + } + priv->filter_data = g_value_get_pointer (value); + break; + case PROP_GENERIC_AUTH_CALLBACK: + priv->auth_callback = g_value_get_pointer (value); + break; + case PROP_GENERIC_AUTH_DATA: + if (priv->auth_dnotify) { + priv->auth_dnotify (priv->auth_data); + priv->auth_dnotify = NULL; + } + priv->auth_data = g_value_get_pointer (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_REALM: + g_value_set_string (value, priv->realm); + break; + case PROP_PROXY: + g_value_set_boolean (value, priv->proxy); + break; + case PROP_FILTER: + g_value_set_pointer (value, priv->filter); + break; + case PROP_FILTER_DATA: + g_value_set_pointer (value, priv->filter_data); + break; + case PROP_GENERIC_AUTH_CALLBACK: + g_value_set_pointer (value, priv->auth_callback); + break; + case PROP_GENERIC_AUTH_DATA: + g_value_set_pointer (value, priv->auth_data); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_auth_domain_add_path: + * @domain: a #SoupAuthDomain + * @path: the path to add to @domain + * + * Adds @path to @domain, such that requests under @path on @domain's + * server will require authentication (unless overridden by + * soup_auth_domain_remove_path() or soup_auth_domain_set_filter()). + * + * You can also add paths by setting the %SOUP_AUTH_DOMAIN_ADD_PATH + * property, which can also be used to add one or more paths at + * construct time. + **/ +void +soup_auth_domain_add_path (SoupAuthDomain *domain, const char *path) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + /* "" should not match "*" */ + if (!*path) + path = "/"; + + soup_path_map_add (priv->paths, path, GINT_TO_POINTER (TRUE)); +} + +/** + * soup_auth_domain_remove_path: + * @domain: a #SoupAuthDomain + * @path: the path to remove from @domain + * + * Removes @path from @domain, such that requests under @path on + * @domain's server will NOT require authentication. + * + * This is not simply an undo-er for soup_auth_domain_add_path(); it + * can be used to "carve out" a subtree that does not require + * authentication inside a hierarchy that does. Note also that unlike + * with soup_auth_domain_add_path(), this cannot be overridden by + * adding a filter, as filters can only bypass authentication that + * would otherwise be required, not require it where it would + * otherwise be unnecessary. + * + * You can also remove paths by setting the + * %SOUP_AUTH_DOMAIN_REMOVE_PATH property, which can also be used to + * remove one or more paths at construct time. + **/ +void +soup_auth_domain_remove_path (SoupAuthDomain *domain, const char *path) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + /* "" should not match "*" */ + if (!*path) + path = "/"; + + soup_path_map_add (priv->paths, path, GINT_TO_POINTER (FALSE)); +} + +/** + * SoupAuthDomainFilter: + * @domain: a #SoupAuthDomain + * @msg: a #SoupMessage + * @user_data: the data passed to soup_auth_domain_set_filter() + * + * The prototype for a #SoupAuthDomain filter; see + * soup_auth_domain_set_filter() for details. + * + * Return value: %TRUE if @msg requires authentication, %FALSE if not. + **/ + +/** + * soup_auth_domain_set_filter: + * @domain: a #SoupAuthDomain + * @filter: the auth filter for @domain + * @filter_data: data to pass to @filter + * @dnotify: destroy notifier to free @filter_data when @domain + * is destroyed + * + * Adds @filter as an authentication filter to @domain. The filter + * gets a chance to bypass authentication for certain requests that + * would otherwise require it. Eg, it might check the message's path + * in some way that is too complicated to do via the other methods, or + * it might check the message's method, and allow GETs but not PUTs. + * + * The filter function returns %TRUE if the request should still + * require authentication, or %FALSE if authentication is unnecessary + * for this request. + * + * To help prevent security holes, your filter should return %TRUE by + * default, and only return %FALSE under specifically-tested + * circumstances, rather than the other way around. Eg, in the example + * above, where you want to authenticate PUTs but not GETs, you should + * check if the method is GET and return %FALSE in that case, and then + * return %TRUE for all other methods (rather than returning %TRUE for + * PUT and %FALSE for all other methods). This way if it turned out + * (now or later) that some paths supported additional methods besides + * GET and PUT, those methods would default to being NOT allowed for + * unauthenticated users. + * + * You can also set the filter by setting the %SOUP_AUTH_DOMAIN_FILTER + * and %SOUP_AUTH_DOMAIN_FILTER_DATA properties, which can also be + * used to set the filter at construct time. + **/ +void +soup_auth_domain_set_filter (SoupAuthDomain *domain, + SoupAuthDomainFilter filter, + gpointer filter_data, + GDestroyNotify dnotify) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + if (priv->filter_dnotify) + priv->filter_dnotify (priv->filter_data); + + priv->filter = filter; + priv->filter_data = filter_data; + priv->filter_dnotify = dnotify; + + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_FILTER); + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_FILTER_DATA); +} + +/** + * soup_auth_domain_get_realm: + * @domain: a #SoupAuthDomain + * + * Gets the realm name associated with @domain + * + * Return value: @domain's realm + **/ +const char * +soup_auth_domain_get_realm (SoupAuthDomain *domain) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + return priv->realm; +} + +/** + * SoupAuthDomainGenericAuthCallback: + * @domain: a #SoupAuthDomain + * @msg: the #SoupMessage being authenticated + * @username: the username from @msg + * @user_data: the data passed to + * soup_auth_domain_set_generic_auth_callback() + * + * The prototype for a #SoupAuthDomain generic authentication callback. + * + * The callback should look up the user's password, call + * soup_auth_domain_check_password(), and use the return value from + * that method as its own return value. + * + * In general, for security reasons, it is preferable to use the + * auth-domain-specific auth callbacks (eg, + * #SoupAuthDomainBasicAuthCallback and + * #SoupAuthDomainDigestAuthCallback), because they don't require + * keeping a cleartext password database. Most users will use the same + * password for many different sites, meaning if any site with a + * cleartext password database is compromised, accounts on other + * servers might be compromised as well. For many of the cases where + * #SoupServer is used, this is not really relevant, but it may still + * be worth considering. + * + * Return value: %TRUE if @msg is authenticated, %FALSE if not. + **/ + +/** + * soup_auth_domain_set_generic_auth_callback: + * @domain: a #SoupAuthDomain + * @auth_callback: the auth callback + * @auth_data: data to pass to @auth_callback + * @dnotify: destroy notifier to free @auth_data when @domain + * is destroyed + * + * Sets @auth_callback as an authentication-handling callback for + * @domain. Whenever a request comes in to @domain which cannot be + * authenticated via a domain-specific auth callback (eg, + * #SoupAuthDomainDigestAuthCallback), the generic auth callback + * will be invoked. See #SoupAuthDomainGenericAuthCallback for information + * on what the callback should do. + **/ +void +soup_auth_domain_set_generic_auth_callback (SoupAuthDomain *domain, + SoupAuthDomainGenericAuthCallback auth_callback, + gpointer auth_data, + GDestroyNotify dnotify) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + if (priv->auth_dnotify) + priv->auth_dnotify (priv->auth_data); + + priv->auth_callback = auth_callback; + priv->auth_data = auth_data; + priv->auth_dnotify = dnotify; + + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_GENERIC_AUTH_CALLBACK); + g_object_notify (G_OBJECT (domain), SOUP_AUTH_DOMAIN_GENERIC_AUTH_DATA); +} + +gboolean +soup_auth_domain_try_generic_auth_callback (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + + if (priv->auth_callback) + return priv->auth_callback (domain, msg, username, priv->auth_data); + else + return FALSE; +} + +/** + * soup_auth_domain_check_password: + * @domain: a #SoupAuthDomain + * @msg: a #SoupMessage + * @username: a username + * @password: a password + * + * Checks if @msg authenticates to @domain via @username and + * @password. This would normally be called from a + * #SoupAuthDomainGenericAuthCallback. + * + * Return value: whether or not the message is authenticated + **/ +gboolean +soup_auth_domain_check_password (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password) +{ + return SOUP_AUTH_DOMAIN_GET_CLASS (domain)->check_password (domain, msg, + username, + password); +} + +/** + * soup_auth_domain_covers: + * @domain: a #SoupAuthDomain + * @msg: a #SoupMessage + * + * Checks if @domain requires @msg to be authenticated (according to + * its paths and filter function). This does not actually look at + * whether @msg is authenticated, merely whether + * or not it needs to be. + * + * This is used by #SoupServer internally and is probably of no use to + * anyone else. + * + * Return value: %TRUE if @domain requires @msg to be authenticated + **/ +gboolean +soup_auth_domain_covers (SoupAuthDomain *domain, SoupMessage *msg) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + const char *path; + + if (!priv->proxy) { + path = soup_message_get_uri (msg)->path; + if (!soup_path_map_lookup (priv->paths, path)) + return FALSE; + } + + if (priv->filter && !priv->filter (domain, msg, priv->filter_data)) + return FALSE; + else + return TRUE; +} + +/** + * soup_auth_domain_accepts: + * @domain: a #SoupAuthDomain + * @msg: a #SoupMessage + * + * Checks if @msg contains appropriate authorization for @domain to + * accept it. Mirroring soup_auth_domain_covers(), this does not check + * whether or not @domain cares if @msg is + * authorized. + * + * This is used by #SoupServer internally and is probably of no use to + * anyone else. + * + * Return value: the username that @msg has authenticated as, if in + * fact it has authenticated. %NULL otherwise. + **/ +char * +soup_auth_domain_accepts (SoupAuthDomain *domain, SoupMessage *msg) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + const char *header; + + header = soup_message_headers_get_one (msg->request_headers, + priv->proxy ? + "Proxy-Authorization" : + "Authorization"); + if (!header) + return NULL; + return SOUP_AUTH_DOMAIN_GET_CLASS (domain)->accepts (domain, msg, header); +} + +/** + * soup_auth_domain_challenge: + * @domain: a #SoupAuthDomain + * @msg: a #SoupMessage + * + * Adds a "WWW-Authenticate" or "Proxy-Authenticate" header to @msg, + * requesting that the client authenticate, and sets @msg's status + * accordingly. + * + * This is used by #SoupServer internally and is probably of no use to + * anyone else. + **/ +void +soup_auth_domain_challenge (SoupAuthDomain *domain, SoupMessage *msg) +{ + SoupAuthDomainPrivate *priv = SOUP_AUTH_DOMAIN_GET_PRIVATE (domain); + char *challenge; + + challenge = SOUP_AUTH_DOMAIN_GET_CLASS (domain)->challenge (domain, msg); + soup_message_set_status (msg, priv->proxy ? + SOUP_STATUS_PROXY_UNAUTHORIZED : + SOUP_STATUS_UNAUTHORIZED); + soup_message_headers_append (msg->response_headers, + priv->proxy ? + "Proxy-Authenticate" : + "WWW-Authenticate", + challenge); + g_free (challenge); +} diff --git a/libsoup/soup-auth-domain.h b/libsoup/soup-auth-domain.h new file mode 100644 index 0000000..e3fd3bc --- /dev/null +++ b/libsoup/soup-auth-domain.h @@ -0,0 +1,99 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Novell, Inc. + */ + +#ifndef SOUP_AUTH_DOMAIN_H +#define SOUP_AUTH_DOMAIN_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_AUTH_DOMAIN (soup_auth_domain_get_type ()) +#define SOUP_AUTH_DOMAIN(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_AUTH_DOMAIN, SoupAuthDomain)) +#define SOUP_AUTH_DOMAIN_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_DOMAIN, SoupAuthDomainClass)) +#define SOUP_IS_AUTH_DOMAIN(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_AUTH_DOMAIN)) +#define SOUP_IS_AUTH_DOMAIN_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_AUTH_DOMAIN)) +#define SOUP_AUTH_DOMAIN_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_DOMAIN, SoupAuthDomainClass)) + +struct _SoupAuthDomain { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + char * (*accepts) (SoupAuthDomain *domain, + SoupMessage *msg, + const char *header); + char * (*challenge) (SoupAuthDomain *domain, + SoupMessage *msg); + gboolean (*check_password) (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password); + + /* Padding for future expansion */ + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupAuthDomainClass; + +#define SOUP_AUTH_DOMAIN_REALM "realm" +#define SOUP_AUTH_DOMAIN_PROXY "proxy" +#define SOUP_AUTH_DOMAIN_ADD_PATH "add-path" +#define SOUP_AUTH_DOMAIN_REMOVE_PATH "remove-path" +#define SOUP_AUTH_DOMAIN_FILTER "filter" +#define SOUP_AUTH_DOMAIN_FILTER_DATA "filter-data" +#define SOUP_AUTH_DOMAIN_GENERIC_AUTH_CALLBACK "generic-auth-callback" +#define SOUP_AUTH_DOMAIN_GENERIC_AUTH_DATA "generic-auth-data" + +typedef gboolean (*SoupAuthDomainFilter) (SoupAuthDomain *domain, + SoupMessage *msg, + gpointer user_data); + +typedef gboolean (*SoupAuthDomainGenericAuthCallback) (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + gpointer user_data); + +GType soup_auth_domain_get_type (void); + +void soup_auth_domain_add_path (SoupAuthDomain *domain, + const char *path); +void soup_auth_domain_remove_path (SoupAuthDomain *domain, + const char *path); + +void soup_auth_domain_set_filter (SoupAuthDomain *domain, + SoupAuthDomainFilter filter, + gpointer filter_data, + GDestroyNotify dnotify); + +const char *soup_auth_domain_get_realm (SoupAuthDomain *domain); + +void soup_auth_domain_set_generic_auth_callback (SoupAuthDomain *domain, + SoupAuthDomainGenericAuthCallback auth_callback, + gpointer auth_data, + GDestroyNotify dnotify); +gboolean soup_auth_domain_check_password (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username, + const char *password); + +gboolean soup_auth_domain_covers (SoupAuthDomain *domain, + SoupMessage *msg); +char *soup_auth_domain_accepts (SoupAuthDomain *domain, + SoupMessage *msg); +void soup_auth_domain_challenge (SoupAuthDomain *domain, + SoupMessage *msg); + +/* protected */ +gboolean soup_auth_domain_try_generic_auth_callback (SoupAuthDomain *domain, + SoupMessage *msg, + const char *username); + +G_END_DECLS + +#endif /* SOUP_AUTH_DOMAIN_H */ diff --git a/libsoup/soup-auth-manager-ntlm.c b/libsoup/soup-auth-manager-ntlm.c new file mode 100644 index 0000000..7c3f239 --- /dev/null +++ b/libsoup/soup-auth-manager-ntlm.c @@ -0,0 +1,1349 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-manager-ntlm.c: NTLM auth manager + * + * Copyright (C) 2001-2007 Novell, Inc. + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#ifdef USE_NTLM_AUTH +#include +#include +#endif +#include "soup-auth-manager-ntlm.h" +#include "soup-auth-ntlm.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-misc.h" +#include "soup-session.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +static void soup_auth_manager_ntlm_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); +static SoupSessionFeatureInterface *soup_auth_manager_parent_feature_interface; + +static void attach (SoupSessionFeature *feature, SoupSession *session); +static void request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg); +static void request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket); +static void request_unqueued (SoupSessionFeature *feature, + SoupSession *session, SoupMessage *msg); +static gboolean add_feature (SoupSessionFeature *feature, GType type); +static gboolean remove_feature (SoupSessionFeature *feature, GType type); +static gboolean has_feature (SoupSessionFeature *feature, GType type); + +G_DEFINE_TYPE_WITH_CODE (SoupAuthManagerNTLM, soup_auth_manager_ntlm, SOUP_TYPE_AUTH_MANAGER, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_auth_manager_ntlm_session_feature_init)) + +typedef enum { + SOUP_NTLM_NEW, +#ifdef USE_NTLM_AUTH + SOUP_NTLM_SENT_SSO_REQUEST, + SOUP_NTLM_RECEIVED_SSO_CHALLENGE, + SOUP_NTLM_SENT_SSO_RESPONSE, + SOUP_NTLM_SSO_FAILED, +#endif + SOUP_NTLM_SENT_REQUEST, + SOUP_NTLM_RECEIVED_CHALLENGE, + SOUP_NTLM_SENT_RESPONSE, + SOUP_NTLM_FAILED +} SoupNTLMState; + +typedef struct { + SoupSocket *socket; + SoupNTLMState state; + char *response_header; + + char *nonce, *domain; + SoupAuth *auth; +#ifdef USE_NTLM_AUTH + char *challenge_header; + int fd_in; + int fd_out; +#endif +} SoupNTLMConnection; + +typedef struct { + gboolean use_ntlm; + + SoupSession *session; + GHashTable *connections_by_msg; + GHashTable *connections_by_id; +#ifdef USE_NTLM_AUTH + gboolean ntlm_auth_accessible; +#endif +} SoupAuthManagerNTLMPrivate; +#define SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_MANAGER_NTLM, SoupAuthManagerNTLMPrivate)) + +static char *soup_ntlm_request (void); +static gboolean soup_ntlm_parse_challenge (const char *challenge, + char **nonce, + char **default_domain); +static char *soup_ntlm_response (const char *nonce, + const char *user, + const char *password, + const char *host, + const char *domain); +#ifdef USE_NTLM_AUTH +static void sso_ntlm_close (SoupNTLMConnection *conn); +#endif + +static void +soup_auth_manager_ntlm_init (SoupAuthManagerNTLM *ntlm) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (ntlm); + + priv->connections_by_id = g_hash_table_new (NULL, NULL); + priv->connections_by_msg = g_hash_table_new (NULL, NULL); +#ifdef USE_NTLM_AUTH + priv->ntlm_auth_accessible = (access (NTLM_AUTH, X_OK) == 0); +#endif +} + +static void +free_ntlm_connection (SoupNTLMConnection *conn) +{ + g_free (conn->response_header); + g_free (conn->nonce); + g_free (conn->domain); + if (conn->auth) + g_object_unref (conn->auth); +#ifdef USE_NTLM_AUTH + g_free (conn->challenge_header); + sso_ntlm_close (conn); +#endif + g_slice_free (SoupNTLMConnection, conn); +} + +static void +free_ntlm_connection_foreach (gpointer key, gpointer value, gpointer user_data) +{ + free_ntlm_connection (value); +} + +static void +finalize (GObject *object) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (object); + + g_hash_table_foreach (priv->connections_by_id, + free_ntlm_connection_foreach, NULL); + g_hash_table_destroy (priv->connections_by_id); + g_hash_table_destroy (priv->connections_by_msg); + + G_OBJECT_CLASS (soup_auth_manager_ntlm_parent_class)->finalize (object); +} + +static void +soup_auth_manager_ntlm_class_init (SoupAuthManagerNTLMClass *auth_manager_ntlm_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (auth_manager_ntlm_class); + + g_type_class_add_private (auth_manager_ntlm_class, sizeof (SoupAuthManagerNTLMPrivate)); + + object_class->finalize = finalize; +} + +static void +soup_auth_manager_ntlm_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + soup_auth_manager_parent_feature_interface = + g_type_interface_peek_parent (feature_interface); + + feature_interface->attach = attach; + feature_interface->request_queued = request_queued; + feature_interface->request_started = request_started; + feature_interface->request_unqueued = request_unqueued; + feature_interface->add_feature = add_feature; + feature_interface->remove_feature = remove_feature; + feature_interface->has_feature = has_feature; +} + +static void +attach (SoupSessionFeature *manager, SoupSession *session) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (manager); + + /* FIXME: should support multiple sessions */ + priv->session = session; + + soup_auth_manager_parent_feature_interface->attach (manager, session); +} + +static void +delete_conn (SoupSocket *socket, gpointer user_data) +{ + SoupAuthManagerNTLMPrivate *priv = user_data; + SoupNTLMConnection *conn; + + conn = g_hash_table_lookup (priv->connections_by_id, socket); + if (conn) + free_ntlm_connection (conn); + g_hash_table_remove (priv->connections_by_id, socket); + g_signal_handlers_disconnect_by_func (socket, delete_conn, priv); +} + +static SoupNTLMConnection * +get_connection (SoupAuthManagerNTLMPrivate *priv, SoupSocket *socket) +{ + SoupNTLMConnection *conn; + + conn = g_hash_table_lookup (priv->connections_by_id, socket); + if (conn) + return conn; + + conn = g_slice_new0 (SoupNTLMConnection); + conn->socket = socket; + conn->state = SOUP_NTLM_NEW; +#ifdef USE_NTLM_AUTH + conn->fd_in = -1; + conn->fd_out = -1; +#endif + g_hash_table_insert (priv->connections_by_id, socket, conn); + + g_signal_connect (socket, "disconnected", + G_CALLBACK (delete_conn), priv); + return conn; +} + +static void +unset_conn (SoupMessage *msg, gpointer user_data) +{ + SoupAuthManagerNTLMPrivate *priv = user_data; + + g_hash_table_remove (priv->connections_by_msg, msg); + g_signal_handlers_disconnect_by_func (msg, unset_conn, priv); +} + +static SoupNTLMConnection * +set_connection_for_msg (SoupAuthManagerNTLMPrivate *priv, SoupMessage *msg, + SoupNTLMConnection *conn) +{ + if (!g_hash_table_lookup (priv->connections_by_msg, msg)) { + g_signal_connect (msg, "finished", + G_CALLBACK (unset_conn), priv); + g_signal_connect (msg, "restarted", + G_CALLBACK (unset_conn), priv); + } + g_hash_table_insert (priv->connections_by_msg, msg, conn); + + return conn; +} + +static SoupNTLMConnection * +get_connection_for_msg (SoupAuthManagerNTLMPrivate *priv, SoupMessage *msg) +{ + return g_hash_table_lookup (priv->connections_by_msg, msg); +} + +#ifdef USE_NTLM_AUTH +static void +sso_ntlm_close (SoupNTLMConnection *conn) +{ + if (conn->fd_in != -1) { + close (conn->fd_in); + conn->fd_in = -1; + } + + if (conn->fd_out != -1) { + close (conn->fd_out); + conn->fd_out = -1; + } +} + +static gboolean +sso_ntlm_initiate (SoupNTLMConnection *conn, SoupAuthManagerNTLMPrivate *priv) +{ + char *username = NULL, *slash, *domain = NULL; + char *argv[9]; + gboolean ret; + + /* Return if ntlm_auth execution process exist already */ + if (conn->fd_in != -1 && conn->fd_out != -1) + return TRUE; + else + /* Clean all sso data before re-initiate */ + sso_ntlm_close (conn); + + if (!priv->ntlm_auth_accessible) + goto done; + + username = getenv ("NTLMUSER"); + if (!username) + username = getenv ("USER"); + if (!username) + goto done; + + slash = strpbrk (username, "\\/"); + if (slash) { + domain = g_strdup (username); + slash = domain + (slash - username); + *slash = '\0'; + username = slash + 1; + } + + argv[0] = NTLM_AUTH; + argv[1] = "--helper-protocol"; + argv[2] = "ntlmssp-client-1"; + argv[3] = "--use-cached-creds"; + argv[4] = "--username"; + argv[5] = username; + argv[6] = domain ? "--domain" : NULL; + argv[7] = domain; + argv[8] = NULL; + /* Spawn child process */ + ret = g_spawn_async_with_pipes (NULL, argv, NULL, + G_SPAWN_FILE_AND_ARGV_ZERO, + NULL, NULL, + NULL, &conn->fd_in, &conn->fd_out, + NULL, NULL); + if (!ret) + goto done; + g_free (domain); + return TRUE; +done: + g_free (domain); + return FALSE; +} + +static char * +sso_ntlm_response (SoupNTLMConnection *conn, const char *input, SoupNTLMState conn_state) +{ + ssize_t size; + char buf[1024], *response = NULL; + char *tmpbuf = buf; + size_t len_in = strlen (input), len_out = sizeof (buf); + + while (len_in > 0) { + int written = write (conn->fd_in, input, len_in); + if (written == -1) { + /* Interrupted by a signal, retry it */ + if (errno == EINTR) + continue; + /* write failed if other errors happen */ + goto done; + } + input += written; + len_in -= written; + } + /* Read one line */ + while (len_out > 0) { + size = read (conn->fd_out, tmpbuf, len_out); + if (size == -1) { + if (errno == EINTR) + continue; + goto done; + } else if (size == 0) + goto done; + else if (tmpbuf[size - 1] == '\n') { + tmpbuf[size - 1] = '\0'; + goto wrfinish; + } + tmpbuf += size; + len_out -= size; + } + goto done; +wrfinish: + if (conn_state == SOUP_NTLM_NEW && + g_ascii_strcasecmp (buf, "PW") == 0) { + /* Samba/winbind installed but not configured */ + response = g_strdup ("PW"); + goto done; + } + if (conn_state == SOUP_NTLM_NEW && + g_ascii_strncasecmp (buf, "YR ", 3) != 0) + /* invalid response for type 1 message */ + goto done; + if (conn_state == SOUP_NTLM_RECEIVED_SSO_CHALLENGE && + g_ascii_strncasecmp (buf, "KK ", 3) != 0 && + g_ascii_strncasecmp (buf, "AF ", 3) != 0) + /* invalid response for type 3 message */ + goto done; + + response = g_strdup_printf ("NTLM %.*s", (int)(size - 4), buf + 3); + goto done; +done: + return response; +} +#endif /* USE_NTLM_AUTH */ + +static void +ntlm_authorize_pre (SoupMessage *msg, gpointer ntlm) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (ntlm); + SoupNTLMConnection *conn; + const char *val; + + conn = get_connection_for_msg (priv, msg); + if (!conn) + return; + + val = soup_message_headers_get_list (msg->response_headers, + "WWW-Authenticate"); + if (val) + val = strstr (val, "NTLM "); + if (!val) + return; + + if (conn->state > SOUP_NTLM_SENT_REQUEST) { + /* We already authenticated, but then got another 401. + * That means "permission denied", so don't try to + * authenticate again. + */ + conn->state = SOUP_NTLM_FAILED; + goto done; + } + + if (!soup_ntlm_parse_challenge (val, &conn->nonce, &conn->domain)) { + conn->state = SOUP_NTLM_FAILED; + goto done; + } + + conn->auth = soup_auth_ntlm_new (conn->domain, + soup_message_get_uri (msg)->host); +#ifdef USE_NTLM_AUTH + conn->challenge_header = g_strdup (val + 5); + if (conn->state == SOUP_NTLM_SENT_SSO_REQUEST) { + conn->state = SOUP_NTLM_RECEIVED_SSO_CHALLENGE; + goto done; + } +#endif + conn->state = SOUP_NTLM_RECEIVED_CHALLENGE; + soup_auth_manager_emit_authenticate (SOUP_AUTH_MANAGER (ntlm), msg, + conn->auth, FALSE); + + done: + /* Remove the WWW-Authenticate headers so the session won't try + * to do Basic auth too. + */ + soup_message_headers_remove (msg->response_headers, "WWW-Authenticate"); +} + +static void +ntlm_authorize_post (SoupMessage *msg, gpointer ntlm) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (ntlm); + SoupNTLMConnection *conn; + const char *username = NULL, *password = NULL; + char *slash, *domain = NULL; + + conn = get_connection_for_msg (priv, msg); + if (!conn || !conn->auth) + return; + +#ifdef USE_NTLM_AUTH + if (conn->state == SOUP_NTLM_RECEIVED_SSO_CHALLENGE) { + char *input; + input = g_strdup_printf ("TT %s\n", conn->challenge_header); + /* Re-Initiate ntlm_auth process in case it was closed/killed abnormally */ + if (sso_ntlm_initiate (conn, priv)) { + conn->response_header = sso_ntlm_response (conn, + input, + conn->state); + /* Close ntlm_auth as it is no longer needed for current connection */ + sso_ntlm_close (conn); + if (!conn->response_header) { + g_free (input); + goto ssofailure; + } + soup_session_requeue_message (priv->session, msg); + g_free (input); + goto done; + } +ssofailure: + conn->state = SOUP_NTLM_SSO_FAILED; + soup_session_requeue_message (priv->session, msg); + goto done; + } +#endif + username = soup_auth_ntlm_get_username (conn->auth); + password = soup_auth_ntlm_get_password (conn->auth); + if (!username || !password) + goto done; + + slash = strpbrk (username, "\\/"); + if (slash) { + domain = g_strdup (username); + slash = domain + (slash - username); + *slash = '\0'; + username = slash + 1; + } else + domain = conn->domain; + + conn->response_header = soup_ntlm_response (conn->nonce, + username, password, + NULL, domain); + soup_session_requeue_message (priv->session, msg); + +done: + if (domain != conn->domain) + g_free (domain); + g_free (conn->domain); + conn->domain = NULL; + g_free (conn->nonce); + conn->nonce = NULL; + g_object_unref (conn->auth); + conn->auth = NULL; +} + +static void +request_queued (SoupSessionFeature *ntlm, SoupSession *session, SoupMessage *msg) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (ntlm); + + if (priv->use_ntlm) { + soup_message_add_status_code_handler ( + msg, "got_headers", SOUP_STATUS_UNAUTHORIZED, + G_CALLBACK (ntlm_authorize_pre), ntlm); + soup_message_add_status_code_handler ( + msg, "got_body", SOUP_STATUS_UNAUTHORIZED, + G_CALLBACK (ntlm_authorize_post), ntlm); + } + + soup_auth_manager_parent_feature_interface->request_queued (ntlm, session, msg); +} + +static void +request_started (SoupSessionFeature *ntlm, SoupSession *session, + SoupMessage *msg, SoupSocket *socket) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (ntlm); + SoupNTLMConnection *conn; + char *header = NULL; + + if (!priv->use_ntlm) + goto super; + + conn = get_connection (priv, socket); + set_connection_for_msg (priv, msg, conn); + + switch (conn->state) { + case SOUP_NTLM_NEW: +#ifdef USE_NTLM_AUTH + /* Use Samba's 'winbind' daemon to support NTLM single-sign-on, + * by delegating the NTLM challenge/response protocal to a helper + * in ntlm_auth. + * http://devel.squid-cache.org/ntlm/squid_helper_protocol.html + * http://www.samba.org/samba/docs/man/manpages-3/winbindd.8.html + * http://www.samba.org/samba/docs/man/manpages-3/ntlm_auth.1.html + * The preprocessor variable 'USE_NTLM_AUTH' indicates whether + * this feature is enabled. Another one 'NTLM_AUTH' contains absolute + * path of it. + * If NTLM single-sign-on fails, go back to original request handling process. + */ + if (sso_ntlm_initiate (conn, priv)) { + header = sso_ntlm_response (conn, "YR\n", conn->state); + if (header) { + if (g_ascii_strcasecmp (header, "PW") != 0) { + conn->state = SOUP_NTLM_SENT_SSO_REQUEST; + break; + } else { + g_free (header); + header = NULL; + goto ssofailure; + } + } else { + g_warning ("NTLM single-sign-on by using %s failed", NTLM_AUTH); + goto ssofailure; + } + } +ssofailure: +#endif + header = soup_ntlm_request (); + conn->state = SOUP_NTLM_SENT_REQUEST; + break; +#ifdef USE_NTLM_AUTH + case SOUP_NTLM_RECEIVED_SSO_CHALLENGE: + header = conn->response_header; + conn->response_header = NULL; + conn->state = SOUP_NTLM_SENT_SSO_RESPONSE; + break; + case SOUP_NTLM_SSO_FAILED: + /* Restart request without SSO */ + g_warning ("NTLM single-sign-on by using %s failed", NTLM_AUTH); + header = soup_ntlm_request (); + conn->state = SOUP_NTLM_SENT_REQUEST; + break; +#endif + case SOUP_NTLM_RECEIVED_CHALLENGE: + header = conn->response_header; + conn->response_header = NULL; + conn->state = SOUP_NTLM_SENT_RESPONSE; + break; + default: + break; + } + + if (header && !soup_message_get_auth (msg)) { + soup_message_headers_replace (msg->request_headers, + "Authorization", header); + g_free (header); + } + +super: + soup_auth_manager_parent_feature_interface->request_started (ntlm, session, msg, socket); +} + +static void +request_unqueued (SoupSessionFeature *ntlm, SoupSession *session, + SoupMessage *msg) +{ + g_signal_handlers_disconnect_by_func (msg, ntlm_authorize_pre, ntlm); + g_signal_handlers_disconnect_by_func (msg, ntlm_authorize_post, ntlm); + + soup_auth_manager_parent_feature_interface->request_unqueued (ntlm, session, msg); +} + +static gboolean +add_feature (SoupSessionFeature *feature, GType type) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (feature); + + if (type == SOUP_TYPE_AUTH_NTLM) { + priv->use_ntlm = TRUE; + return TRUE; + } + + return soup_auth_manager_parent_feature_interface->add_feature (feature, type); +} + +static gboolean +remove_feature (SoupSessionFeature *feature, GType type) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (feature); + + if (type == SOUP_TYPE_AUTH_NTLM) { + priv->use_ntlm = FALSE; + return TRUE; + } + + return soup_auth_manager_parent_feature_interface->remove_feature (feature, type); +} + +static gboolean +has_feature (SoupSessionFeature *feature, GType type) +{ + SoupAuthManagerNTLMPrivate *priv = + SOUP_AUTH_MANAGER_NTLM_GET_PRIVATE (feature); + + if (type == SOUP_TYPE_AUTH_NTLM) + return priv->use_ntlm; + + return soup_auth_manager_parent_feature_interface->has_feature (feature, type); +} + +/* NTLM code */ + +static void md4sum (const unsigned char *in, + int nbytes, + unsigned char digest[16]); + +typedef guint32 DES_KS[16][2]; /* Single-key DES key schedule */ + +static void deskey (DES_KS, unsigned char *, int); + +static void des (DES_KS, unsigned char *); + +static void setup_schedule (const guchar *key_56, DES_KS ks); + +static void calc_response (const guchar *key, + const guchar *plaintext, + guchar *results); + +#define LM_PASSWORD_MAGIC "\x4B\x47\x53\x21\x40\x23\x24\x25" \ + "\x4B\x47\x53\x21\x40\x23\x24\x25" \ + "\x00\x00\x00\x00\x00" + +static void +lanmanager_hash (const char *password, guchar hash[21]) +{ + guchar lm_password [15]; + DES_KS ks; + int i; + + for (i = 0; i < 14 && password [i]; i++) + lm_password [i] = toupper ((unsigned char) password [i]); + + for (; i < 15; i++) + lm_password [i] = '\0'; + + memcpy (hash, LM_PASSWORD_MAGIC, 21); + + setup_schedule (lm_password, ks); + des (ks, hash); + + setup_schedule (lm_password + 7, ks); + des (ks, hash + 8); +} + +static void +nt_hash (const char *password, guchar hash[21]) +{ + unsigned char *buf, *p; + + p = buf = g_malloc (strlen (password) * 2); + + while (*password) { + *p++ = *password++; + *p++ = '\0'; + } + + md4sum (buf, p - buf, hash); + memset (hash + 16, 0, 5); + + g_free (buf); +} + +typedef struct { + guint16 length; + guint16 length2; + guint16 offset; + guchar zero_pad[2]; +} NTLMString; + +#define NTLM_CHALLENGE_NONCE_OFFSET 24 +#define NTLM_CHALLENGE_NONCE_LENGTH 8 +#define NTLM_CHALLENGE_DOMAIN_STRING_OFFSET 12 + +#define NTLM_RESPONSE_HEADER "NTLMSSP\x00\x03\x00\x00\x00" +#define NTLM_RESPONSE_FLAGS 0x8202 + +typedef struct { + guchar header[12]; + + NTLMString lm_resp; + NTLMString nt_resp; + NTLMString domain; + NTLMString user; + NTLMString host; + NTLMString session_key; + + guint32 flags; +} NTLMResponse; + +static void +ntlm_set_string (NTLMString *string, int *offset, int len) +{ + string->offset = GUINT16_TO_LE (*offset); + string->length = string->length2 = GUINT16_TO_LE (len); + *offset += len; +} + +static char * +soup_ntlm_request (void) +{ + return g_strdup ("NTLM TlRMTVNTUAABAAAABoIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMAAAAAAAAAAwAAAA"); +} + +static gboolean +soup_ntlm_parse_challenge (const char *challenge, + char **nonce, + char **default_domain) +{ + gsize clen; + NTLMString domain; + guchar *chall; + + if (strncmp (challenge, "NTLM ", 5) != 0) + return FALSE; + + chall = g_base64_decode (challenge + 5, &clen); + if (clen < NTLM_CHALLENGE_DOMAIN_STRING_OFFSET || + clen < NTLM_CHALLENGE_NONCE_OFFSET + NTLM_CHALLENGE_NONCE_LENGTH) { + g_free (chall); + return FALSE; + } + + if (default_domain) { + memcpy (&domain, chall + NTLM_CHALLENGE_DOMAIN_STRING_OFFSET, sizeof (domain)); + domain.length = GUINT16_FROM_LE (domain.length); + domain.offset = GUINT16_FROM_LE (domain.offset); + + if (clen < domain.length + domain.offset) { + g_free (chall); + return FALSE; + } + + *default_domain = g_strndup ((char *)chall + domain.offset, domain.length); + } + + if (nonce) { + *nonce = g_memdup (chall + NTLM_CHALLENGE_NONCE_OFFSET, + NTLM_CHALLENGE_NONCE_LENGTH); + } + + g_free (chall); + return TRUE; +} + +static char * +soup_ntlm_response (const char *nonce, + const char *user, + const char *password, + const char *host, + const char *domain) +{ + int hlen, dlen, ulen, offset; + guchar hash[21], lm_resp[24], nt_resp[24]; + NTLMResponse resp; + char *out, *p; + int state, save; + + nt_hash (password, hash); + calc_response (hash, (guchar *)nonce, nt_resp); + lanmanager_hash (password, hash); + calc_response (hash, (guchar *)nonce, lm_resp); + + memset (&resp, 0, sizeof (resp)); + memcpy (resp.header, NTLM_RESPONSE_HEADER, sizeof (resp.header)); + resp.flags = GUINT32_TO_LE (NTLM_RESPONSE_FLAGS); + + offset = sizeof (resp); + + dlen = strlen (domain); + ntlm_set_string (&resp.domain, &offset, dlen); + ulen = strlen (user); + ntlm_set_string (&resp.user, &offset, ulen); + if (!host) + host = "UNKNOWN"; + hlen = strlen (host); + ntlm_set_string (&resp.host, &offset, hlen); + ntlm_set_string (&resp.lm_resp, &offset, sizeof (lm_resp)); + ntlm_set_string (&resp.nt_resp, &offset, sizeof (nt_resp)); + + out = g_malloc (((offset + 3) * 4) / 3 + 6); + strncpy (out, "NTLM ", 5); + p = out + 5; + + state = save = 0; + + p += g_base64_encode_step ((const guchar *) &resp, sizeof (resp), + FALSE, p, &state, &save); + p += g_base64_encode_step ((const guchar *) domain, dlen, + FALSE, p, &state, &save); + p += g_base64_encode_step ((const guchar *) user, ulen, + FALSE, p, &state, &save); + p += g_base64_encode_step ((const guchar *) host, hlen, + FALSE, p, &state, &save); + p += g_base64_encode_step (lm_resp, sizeof (lm_resp), + FALSE, p, &state, &save); + p += g_base64_encode_step (nt_resp, sizeof (nt_resp), + FALSE, p, &state, &save); + p += g_base64_encode_close (FALSE, p, &state, &save); + *p = '\0'; + + return out; +} + +/* DES utils */ +/* Set up a key schedule based on a 56bit key */ +static void +setup_schedule (const guchar *key_56, DES_KS ks) +{ + guchar key[8]; + int i, c, bit; + + key[0] = (key_56[0]) ; + key[1] = (key_56[1] >> 1) | ((key_56[0] << 7) & 0xFF); + key[2] = (key_56[2] >> 2) | ((key_56[1] << 6) & 0xFF); + key[3] = (key_56[3] >> 3) | ((key_56[2] << 5) & 0xFF); + key[4] = (key_56[4] >> 4) | ((key_56[3] << 4) & 0xFF); + key[5] = (key_56[5] >> 5) | ((key_56[4] << 3) & 0xFF); + key[6] = (key_56[6] >> 6) | ((key_56[5] << 2) & 0xFF); + key[7] = ((key_56[6] << 1) & 0xFF); + + /* Fix parity */ + for (i = 0; i < 8; i++) { + for (c = bit = 0; bit < 8; bit++) + if (key[i] & (1 << bit)) + c++; + if (!(c & 1)) + key[i] ^= 0x01; + } + + deskey (ks, key, 0); +} + +static void +calc_response (const guchar *key, const guchar *plaintext, guchar *results) +{ + DES_KS ks; + + memcpy (results, plaintext, 8); + memcpy (results + 8, plaintext, 8); + memcpy (results + 16, plaintext, 8); + + setup_schedule (key, ks); + des (ks, results); + + setup_schedule (key + 7, ks); + des (ks, results + 8); + + setup_schedule (key + 14, ks); + des (ks, results + 16); +} + + +/* + * MD4 encoder. (The one everyone else uses is not GPL-compatible; + * this is a reimplementation from spec.) This doesn't need to be + * efficient for our purposes, although it would be nice to fix + * it to not malloc()... + */ + +#define F(X,Y,Z) ( ((X)&(Y)) | ((~(X))&(Z)) ) +#define G(X,Y,Z) ( ((X)&(Y)) | ((X)&(Z)) | ((Y)&(Z)) ) +#define H(X,Y,Z) ( (X)^(Y)^(Z) ) +#define ROT(val, n) ( ((val) << (n)) | ((val) >> (32 - (n))) ) + +static void +md4sum (const unsigned char *in, int nbytes, unsigned char digest[16]) +{ + unsigned char *M; + guint32 A, B, C, D, AA, BB, CC, DD, X[16]; + int pbytes, nbits = nbytes * 8, i, j; + + pbytes = (120 - (nbytes % 64)) % 64; + M = alloca (nbytes + pbytes + 8); + memcpy (M, in, nbytes); + memset (M + nbytes, 0, pbytes + 8); + M[nbytes] = 0x80; + M[nbytes + pbytes] = nbits & 0xFF; + M[nbytes + pbytes + 1] = (nbits >> 8) & 0xFF; + M[nbytes + pbytes + 2] = (nbits >> 16) & 0xFF; + M[nbytes + pbytes + 3] = (nbits >> 24) & 0xFF; + + A = 0x67452301; + B = 0xEFCDAB89; + C = 0x98BADCFE; + D = 0x10325476; + + for (i = 0; i < nbytes + pbytes + 8; i += 64) { + for (j = 0; j < 16; j++) { + X[j] = (M[i + j*4]) | + (M[i + j*4 + 1] << 8) | + (M[i + j*4 + 2] << 16) | + (M[i + j*4 + 3] << 24); + } + + AA = A; + BB = B; + CC = C; + DD = D; + + A = ROT (A + F(B, C, D) + X[0], 3); + D = ROT (D + F(A, B, C) + X[1], 7); + C = ROT (C + F(D, A, B) + X[2], 11); + B = ROT (B + F(C, D, A) + X[3], 19); + A = ROT (A + F(B, C, D) + X[4], 3); + D = ROT (D + F(A, B, C) + X[5], 7); + C = ROT (C + F(D, A, B) + X[6], 11); + B = ROT (B + F(C, D, A) + X[7], 19); + A = ROT (A + F(B, C, D) + X[8], 3); + D = ROT (D + F(A, B, C) + X[9], 7); + C = ROT (C + F(D, A, B) + X[10], 11); + B = ROT (B + F(C, D, A) + X[11], 19); + A = ROT (A + F(B, C, D) + X[12], 3); + D = ROT (D + F(A, B, C) + X[13], 7); + C = ROT (C + F(D, A, B) + X[14], 11); + B = ROT (B + F(C, D, A) + X[15], 19); + + A = ROT (A + G(B, C, D) + X[0] + 0x5A827999, 3); + D = ROT (D + G(A, B, C) + X[4] + 0x5A827999, 5); + C = ROT (C + G(D, A, B) + X[8] + 0x5A827999, 9); + B = ROT (B + G(C, D, A) + X[12] + 0x5A827999, 13); + A = ROT (A + G(B, C, D) + X[1] + 0x5A827999, 3); + D = ROT (D + G(A, B, C) + X[5] + 0x5A827999, 5); + C = ROT (C + G(D, A, B) + X[9] + 0x5A827999, 9); + B = ROT (B + G(C, D, A) + X[13] + 0x5A827999, 13); + A = ROT (A + G(B, C, D) + X[2] + 0x5A827999, 3); + D = ROT (D + G(A, B, C) + X[6] + 0x5A827999, 5); + C = ROT (C + G(D, A, B) + X[10] + 0x5A827999, 9); + B = ROT (B + G(C, D, A) + X[14] + 0x5A827999, 13); + A = ROT (A + G(B, C, D) + X[3] + 0x5A827999, 3); + D = ROT (D + G(A, B, C) + X[7] + 0x5A827999, 5); + C = ROT (C + G(D, A, B) + X[11] + 0x5A827999, 9); + B = ROT (B + G(C, D, A) + X[15] + 0x5A827999, 13); + + A = ROT (A + H(B, C, D) + X[0] + 0x6ED9EBA1, 3); + D = ROT (D + H(A, B, C) + X[8] + 0x6ED9EBA1, 9); + C = ROT (C + H(D, A, B) + X[4] + 0x6ED9EBA1, 11); + B = ROT (B + H(C, D, A) + X[12] + 0x6ED9EBA1, 15); + A = ROT (A + H(B, C, D) + X[2] + 0x6ED9EBA1, 3); + D = ROT (D + H(A, B, C) + X[10] + 0x6ED9EBA1, 9); + C = ROT (C + H(D, A, B) + X[6] + 0x6ED9EBA1, 11); + B = ROT (B + H(C, D, A) + X[14] + 0x6ED9EBA1, 15); + A = ROT (A + H(B, C, D) + X[1] + 0x6ED9EBA1, 3); + D = ROT (D + H(A, B, C) + X[9] + 0x6ED9EBA1, 9); + C = ROT (C + H(D, A, B) + X[5] + 0x6ED9EBA1, 11); + B = ROT (B + H(C, D, A) + X[13] + 0x6ED9EBA1, 15); + A = ROT (A + H(B, C, D) + X[3] + 0x6ED9EBA1, 3); + D = ROT (D + H(A, B, C) + X[11] + 0x6ED9EBA1, 9); + C = ROT (C + H(D, A, B) + X[7] + 0x6ED9EBA1, 11); + B = ROT (B + H(C, D, A) + X[15] + 0x6ED9EBA1, 15); + + A += AA; + B += BB; + C += CC; + D += DD; + } + + digest[0] = A & 0xFF; + digest[1] = (A >> 8) & 0xFF; + digest[2] = (A >> 16) & 0xFF; + digest[3] = (A >> 24) & 0xFF; + digest[4] = B & 0xFF; + digest[5] = (B >> 8) & 0xFF; + digest[6] = (B >> 16) & 0xFF; + digest[7] = (B >> 24) & 0xFF; + digest[8] = C & 0xFF; + digest[9] = (C >> 8) & 0xFF; + digest[10] = (C >> 16) & 0xFF; + digest[11] = (C >> 24) & 0xFF; + digest[12] = D & 0xFF; + digest[13] = (D >> 8) & 0xFF; + digest[14] = (D >> 16) & 0xFF; + digest[15] = (D >> 24) & 0xFF; +} + + +/* Public domain DES implementation from Phil Karn */ +static const guint32 Spbox[8][64] = { + { 0x01010400,0x00000000,0x00010000,0x01010404, + 0x01010004,0x00010404,0x00000004,0x00010000, + 0x00000400,0x01010400,0x01010404,0x00000400, + 0x01000404,0x01010004,0x01000000,0x00000004, + 0x00000404,0x01000400,0x01000400,0x00010400, + 0x00010400,0x01010000,0x01010000,0x01000404, + 0x00010004,0x01000004,0x01000004,0x00010004, + 0x00000000,0x00000404,0x00010404,0x01000000, + 0x00010000,0x01010404,0x00000004,0x01010000, + 0x01010400,0x01000000,0x01000000,0x00000400, + 0x01010004,0x00010000,0x00010400,0x01000004, + 0x00000400,0x00000004,0x01000404,0x00010404, + 0x01010404,0x00010004,0x01010000,0x01000404, + 0x01000004,0x00000404,0x00010404,0x01010400, + 0x00000404,0x01000400,0x01000400,0x00000000, + 0x00010004,0x00010400,0x00000000,0x01010004 }, + { 0x80108020,0x80008000,0x00008000,0x00108020, + 0x00100000,0x00000020,0x80100020,0x80008020, + 0x80000020,0x80108020,0x80108000,0x80000000, + 0x80008000,0x00100000,0x00000020,0x80100020, + 0x00108000,0x00100020,0x80008020,0x00000000, + 0x80000000,0x00008000,0x00108020,0x80100000, + 0x00100020,0x80000020,0x00000000,0x00108000, + 0x00008020,0x80108000,0x80100000,0x00008020, + 0x00000000,0x00108020,0x80100020,0x00100000, + 0x80008020,0x80100000,0x80108000,0x00008000, + 0x80100000,0x80008000,0x00000020,0x80108020, + 0x00108020,0x00000020,0x00008000,0x80000000, + 0x00008020,0x80108000,0x00100000,0x80000020, + 0x00100020,0x80008020,0x80000020,0x00100020, + 0x00108000,0x00000000,0x80008000,0x00008020, + 0x80000000,0x80100020,0x80108020,0x00108000 }, + { 0x00000208,0x08020200,0x00000000,0x08020008, + 0x08000200,0x00000000,0x00020208,0x08000200, + 0x00020008,0x08000008,0x08000008,0x00020000, + 0x08020208,0x00020008,0x08020000,0x00000208, + 0x08000000,0x00000008,0x08020200,0x00000200, + 0x00020200,0x08020000,0x08020008,0x00020208, + 0x08000208,0x00020200,0x00020000,0x08000208, + 0x00000008,0x08020208,0x00000200,0x08000000, + 0x08020200,0x08000000,0x00020008,0x00000208, + 0x00020000,0x08020200,0x08000200,0x00000000, + 0x00000200,0x00020008,0x08020208,0x08000200, + 0x08000008,0x00000200,0x00000000,0x08020008, + 0x08000208,0x00020000,0x08000000,0x08020208, + 0x00000008,0x00020208,0x00020200,0x08000008, + 0x08020000,0x08000208,0x00000208,0x08020000, + 0x00020208,0x00000008,0x08020008,0x00020200 }, + { 0x00802001,0x00002081,0x00002081,0x00000080, + 0x00802080,0x00800081,0x00800001,0x00002001, + 0x00000000,0x00802000,0x00802000,0x00802081, + 0x00000081,0x00000000,0x00800080,0x00800001, + 0x00000001,0x00002000,0x00800000,0x00802001, + 0x00000080,0x00800000,0x00002001,0x00002080, + 0x00800081,0x00000001,0x00002080,0x00800080, + 0x00002000,0x00802080,0x00802081,0x00000081, + 0x00800080,0x00800001,0x00802000,0x00802081, + 0x00000081,0x00000000,0x00000000,0x00802000, + 0x00002080,0x00800080,0x00800081,0x00000001, + 0x00802001,0x00002081,0x00002081,0x00000080, + 0x00802081,0x00000081,0x00000001,0x00002000, + 0x00800001,0x00002001,0x00802080,0x00800081, + 0x00002001,0x00002080,0x00800000,0x00802001, + 0x00000080,0x00800000,0x00002000,0x00802080 }, + { 0x00000100,0x02080100,0x02080000,0x42000100, + 0x00080000,0x00000100,0x40000000,0x02080000, + 0x40080100,0x00080000,0x02000100,0x40080100, + 0x42000100,0x42080000,0x00080100,0x40000000, + 0x02000000,0x40080000,0x40080000,0x00000000, + 0x40000100,0x42080100,0x42080100,0x02000100, + 0x42080000,0x40000100,0x00000000,0x42000000, + 0x02080100,0x02000000,0x42000000,0x00080100, + 0x00080000,0x42000100,0x00000100,0x02000000, + 0x40000000,0x02080000,0x42000100,0x40080100, + 0x02000100,0x40000000,0x42080000,0x02080100, + 0x40080100,0x00000100,0x02000000,0x42080000, + 0x42080100,0x00080100,0x42000000,0x42080100, + 0x02080000,0x00000000,0x40080000,0x42000000, + 0x00080100,0x02000100,0x40000100,0x00080000, + 0x00000000,0x40080000,0x02080100,0x40000100 }, + { 0x20000010,0x20400000,0x00004000,0x20404010, + 0x20400000,0x00000010,0x20404010,0x00400000, + 0x20004000,0x00404010,0x00400000,0x20000010, + 0x00400010,0x20004000,0x20000000,0x00004010, + 0x00000000,0x00400010,0x20004010,0x00004000, + 0x00404000,0x20004010,0x00000010,0x20400010, + 0x20400010,0x00000000,0x00404010,0x20404000, + 0x00004010,0x00404000,0x20404000,0x20000000, + 0x20004000,0x00000010,0x20400010,0x00404000, + 0x20404010,0x00400000,0x00004010,0x20000010, + 0x00400000,0x20004000,0x20000000,0x00004010, + 0x20000010,0x20404010,0x00404000,0x20400000, + 0x00404010,0x20404000,0x00000000,0x20400010, + 0x00000010,0x00004000,0x20400000,0x00404010, + 0x00004000,0x00400010,0x20004010,0x00000000, + 0x20404000,0x20000000,0x00400010,0x20004010 }, + { 0x00200000,0x04200002,0x04000802,0x00000000, + 0x00000800,0x04000802,0x00200802,0x04200800, + 0x04200802,0x00200000,0x00000000,0x04000002, + 0x00000002,0x04000000,0x04200002,0x00000802, + 0x04000800,0x00200802,0x00200002,0x04000800, + 0x04000002,0x04200000,0x04200800,0x00200002, + 0x04200000,0x00000800,0x00000802,0x04200802, + 0x00200800,0x00000002,0x04000000,0x00200800, + 0x04000000,0x00200800,0x00200000,0x04000802, + 0x04000802,0x04200002,0x04200002,0x00000002, + 0x00200002,0x04000000,0x04000800,0x00200000, + 0x04200800,0x00000802,0x00200802,0x04200800, + 0x00000802,0x04000002,0x04200802,0x04200000, + 0x00200800,0x00000000,0x00000002,0x04200802, + 0x00000000,0x00200802,0x04200000,0x00000800, + 0x04000002,0x04000800,0x00000800,0x00200002 }, + { 0x10001040,0x00001000,0x00040000,0x10041040, + 0x10000000,0x10001040,0x00000040,0x10000000, + 0x00040040,0x10040000,0x10041040,0x00041000, + 0x10041000,0x00041040,0x00001000,0x00000040, + 0x10040000,0x10000040,0x10001000,0x00001040, + 0x00041000,0x00040040,0x10040040,0x10041000, + 0x00001040,0x00000000,0x00000000,0x10040040, + 0x10000040,0x10001000,0x00041040,0x00040000, + 0x00041040,0x00040000,0x10041000,0x00001000, + 0x00000040,0x10040040,0x00001000,0x00041040, + 0x10001000,0x00000040,0x10000040,0x10040000, + 0x10040040,0x10000000,0x00040000,0x10001040, + 0x00000000,0x10041040,0x00040040,0x10000040, + 0x10040000,0x10001000,0x10001040,0x00000000, + 0x10041040,0x00041000,0x00041000,0x00001040, + 0x00001040,0x00040040,0x10000000,0x10041000 } +}; + +#undef F +#define F(l,r,key){\ + work = ((r >> 4) | (r << 28)) ^ key[0];\ + l ^= Spbox[6][work & 0x3f];\ + l ^= Spbox[4][(work >> 8) & 0x3f];\ + l ^= Spbox[2][(work >> 16) & 0x3f];\ + l ^= Spbox[0][(work >> 24) & 0x3f];\ + work = r ^ key[1];\ + l ^= Spbox[7][work & 0x3f];\ + l ^= Spbox[5][(work >> 8) & 0x3f];\ + l ^= Spbox[3][(work >> 16) & 0x3f];\ + l ^= Spbox[1][(work >> 24) & 0x3f];\ +} +/* Encrypt or decrypt a block of data in ECB mode */ +static void +des (guint32 ks[16][2], unsigned char block[8]) +{ + guint32 left,right,work; + + /* Read input block and place in left/right in big-endian order */ + left = ((guint32)block[0] << 24) + | ((guint32)block[1] << 16) + | ((guint32)block[2] << 8) + | (guint32)block[3]; + right = ((guint32)block[4] << 24) + | ((guint32)block[5] << 16) + | ((guint32)block[6] << 8) + | (guint32)block[7]; + + /* Hoey's clever initial permutation algorithm, from Outerbridge + * (see Schneier p 478) + * + * The convention here is the same as Outerbridge: rotate each + * register left by 1 bit, i.e., so that "left" contains permuted + * input bits 2, 3, 4, ... 1 and "right" contains 33, 34, 35, ... 32 + * (using origin-1 numbering as in the FIPS). This allows us to avoid + * one of the two rotates that would otherwise be required in each of + * the 16 rounds. + */ + work = ((left >> 4) ^ right) & 0x0f0f0f0f; + right ^= work; + left ^= work << 4; + work = ((left >> 16) ^ right) & 0xffff; + right ^= work; + left ^= work << 16; + work = ((right >> 2) ^ left) & 0x33333333; + left ^= work; + right ^= (work << 2); + work = ((right >> 8) ^ left) & 0xff00ff; + left ^= work; + right ^= (work << 8); + right = (right << 1) | (right >> 31); + work = (left ^ right) & 0xaaaaaaaa; + left ^= work; + right ^= work; + left = (left << 1) | (left >> 31); + + /* Now do the 16 rounds */ + F(left,right,ks[0]); + F(right,left,ks[1]); + F(left,right,ks[2]); + F(right,left,ks[3]); + F(left,right,ks[4]); + F(right,left,ks[5]); + F(left,right,ks[6]); + F(right,left,ks[7]); + F(left,right,ks[8]); + F(right,left,ks[9]); + F(left,right,ks[10]); + F(right,left,ks[11]); + F(left,right,ks[12]); + F(right,left,ks[13]); + F(left,right,ks[14]); + F(right,left,ks[15]); + + /* Inverse permutation, also from Hoey via Outerbridge and Schneier */ + right = (right << 31) | (right >> 1); + work = (left ^ right) & 0xaaaaaaaa; + left ^= work; + right ^= work; + left = (left >> 1) | (left << 31); + work = ((left >> 8) ^ right) & 0xff00ff; + right ^= work; + left ^= work << 8; + work = ((left >> 2) ^ right) & 0x33333333; + right ^= work; + left ^= work << 2; + work = ((right >> 16) ^ left) & 0xffff; + left ^= work; + right ^= work << 16; + work = ((right >> 4) ^ left) & 0x0f0f0f0f; + left ^= work; + right ^= work << 4; + + /* Put the block back into the user's buffer with final swap */ + block[0] = right >> 24; + block[1] = right >> 16; + block[2] = right >> 8; + block[3] = right; + block[4] = left >> 24; + block[5] = left >> 16; + block[6] = left >> 8; + block[7] = left; +} + +/* Key schedule-related tables from FIPS-46 */ + +/* permuted choice table (key) */ +static const unsigned char pc1[] = { + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4 +}; + +/* number left rotations of pc1 */ +static const unsigned char totrot[] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 +}; + +/* permuted choice key (table) */ +static const unsigned char pc2[] = { + 14, 17, 11, 24, 1, 5, + 3, 28, 15, 6, 21, 10, + 23, 19, 12, 4, 26, 8, + 16, 7, 27, 20, 13, 2, + 41, 52, 31, 37, 47, 55, + 30, 40, 51, 45, 33, 48, + 44, 49, 39, 56, 34, 53, + 46, 42, 50, 36, 29, 32 +}; + +/* End of DES-defined tables */ + + +/* bit 0 is left-most in byte */ +static const int bytebit[] = { + 0200,0100,040,020,010,04,02,01 +}; + + +/* Generate key schedule for encryption or decryption + * depending on the value of "decrypt" + */ +static void +deskey (DES_KS k, unsigned char *key, int decrypt) +{ + unsigned char pc1m[56]; /* place to modify pc1 into */ + unsigned char pcr[56]; /* place to rotate pc1 into */ + register int i,j,l; + int m; + unsigned char ks[8]; + + for (j=0; j<56; j++) { /* convert pc1 to bits of key */ + l=pc1[j]-1; /* integer bit location */ + m = l & 07; /* find bit */ + pc1m[j]=(key[l>>3] & /* find which key byte l is in */ + bytebit[m]) /* and which bit of that byte */ + ? 1 : 0; /* and store 1-bit result */ + } + for (i=0; i<16; i++) { /* key chunk for each iteration */ + memset(ks,0,sizeof(ks)); /* Clear key schedule */ + for (j=0; j<56; j++) /* rotate pc1 the right amount */ + pcr[j] = pc1m[(l=j+totrot[decrypt? 15-i : i])<(j<28? 28 : 56) ? l: l-28]; + /* rotate left and right halves independently */ + for (j=0; j<48; j++){ /* select bits individually */ + /* check bit that goes to ks[j] */ + if (pcr[pc2[j]-1]){ + /* mask it in if it's there */ + l= j % 6; + ks[j/6] |= bytebit[l] >> 2; + } + } + /* Now convert to packed odd/even interleaved form */ + k[i][0] = ((guint32)ks[0] << 24) + | ((guint32)ks[2] << 16) + | ((guint32)ks[4] << 8) + | ((guint32)ks[6]); + k[i][1] = ((guint32)ks[1] << 24) + | ((guint32)ks[3] << 16) + | ((guint32)ks[5] << 8) + | ((guint32)ks[7]); + } +} diff --git a/libsoup/soup-auth-manager-ntlm.h b/libsoup/soup-auth-manager-ntlm.h new file mode 100644 index 0000000..f0b4f57 --- /dev/null +++ b/libsoup/soup-auth-manager-ntlm.h @@ -0,0 +1,36 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_AUTH_MANAGER_NTLM_H +#define SOUP_AUTH_MANAGER_NTLM_H 1 + +#include "soup-auth-manager.h" + +G_BEGIN_DECLS + +#define SOUP_TYPE_AUTH_MANAGER_NTLM (soup_auth_manager_ntlm_get_type ()) +#define SOUP_AUTH_MANAGER_NTLM(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_AUTH_MANAGER_NTLM, SoupAuthManagerNTLM)) +#define SOUP_AUTH_MANAGER_NTLM_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_MANAGER_NTLM, SoupAuthManagerNTLMClass)) +#define SOUP_IS_AUTH_MANAGER_NTLM(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_AUTH_MANAGER_NTLM)) +#define SOUP_IS_AUTH_MANAGER_NTLM_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_AUTH_MANAGER_NTLM)) +#define SOUP_AUTH_MANAGER_NTLM_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_MANAGER_NTLM, SoupAuthManagerNTLMClass)) + +typedef struct { + SoupAuthManager parent; + +} SoupAuthManagerNTLM; + +typedef struct { + SoupAuthManagerClass parent_class; + +} SoupAuthManagerNTLMClass; + +#define SOUP_AUTH_MANAGER_NTLM_USE_NTLM "use-ntlm" + +GType soup_auth_manager_ntlm_get_type (void); + +G_END_DECLS + +#endif /* SOUP_AUTH_MANAGER_NTLM_NTLM_H */ diff --git a/libsoup/soup-auth-manager.c b/libsoup/soup-auth-manager.c new file mode 100644 index 0000000..cc0f3c9 --- /dev/null +++ b/libsoup/soup-auth-manager.c @@ -0,0 +1,581 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-manager.c: SoupAuth manager for SoupSession + * + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-auth-manager.h" +#include "soup-address.h" +#include "soup-headers.h" +#include "soup-marshal.h" +#include "soup-message-private.h" +#include "soup-message-queue.h" +#include "soup-path-map.h" +#include "soup-session.h" +#include "soup-session-feature.h" +#include "soup-session-private.h" +#include "soup-uri.h" + +static void soup_auth_manager_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); +static SoupSessionFeatureInterface *soup_session_feature_default_interface; + +static void attach (SoupSessionFeature *feature, SoupSession *session); +static void request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg); +static void request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket); +static void request_unqueued (SoupSessionFeature *feature, + SoupSession *session, SoupMessage *msg); +static gboolean add_feature (SoupSessionFeature *feature, GType type); +static gboolean remove_feature (SoupSessionFeature *feature, GType type); +static gboolean has_feature (SoupSessionFeature *feature, GType type); + +enum { + AUTHENTICATE, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +G_DEFINE_TYPE_WITH_CODE (SoupAuthManager, soup_auth_manager, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_auth_manager_session_feature_init)) + +typedef struct { + SoupSession *session; + GPtrArray *auth_types; + + SoupAuth *proxy_auth; + GHashTable *auth_hosts; +} SoupAuthManagerPrivate; +#define SOUP_AUTH_MANAGER_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_MANAGER, SoupAuthManagerPrivate)) + +typedef struct { + SoupURI *uri; + SoupPathMap *auth_realms; /* path -> scheme:realm */ + GHashTable *auths; /* scheme:realm -> SoupAuth */ +} SoupAuthHost; + +static void +soup_auth_manager_init (SoupAuthManager *manager) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + + priv->auth_types = g_ptr_array_new (); + priv->auth_hosts = g_hash_table_new (soup_uri_host_hash, + soup_uri_host_equal); +} + +static gboolean +foreach_free_host (gpointer key, gpointer value, gpointer data) +{ + SoupAuthHost *host = value; + + if (host->auth_realms) + soup_path_map_free (host->auth_realms); + if (host->auths) + g_hash_table_destroy (host->auths); + + soup_uri_free (host->uri); + g_slice_free (SoupAuthHost, host); + + return TRUE; +} + +static void +finalize (GObject *object) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (object); + int i; + + for (i = 0; i < priv->auth_types->len; i++) + g_type_class_unref (priv->auth_types->pdata[i]); + g_ptr_array_free (priv->auth_types, TRUE); + + g_hash_table_foreach_remove (priv->auth_hosts, foreach_free_host, NULL); + g_hash_table_destroy (priv->auth_hosts); + + if (priv->proxy_auth) + g_object_unref (priv->proxy_auth); + + G_OBJECT_CLASS (soup_auth_manager_parent_class)->finalize (object); +} + +static void +soup_auth_manager_class_init (SoupAuthManagerClass *auth_manager_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (auth_manager_class); + + g_type_class_add_private (auth_manager_class, sizeof (SoupAuthManagerPrivate)); + + object_class->finalize = finalize; + + signals[AUTHENTICATE] = + g_signal_new ("authenticate", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupAuthManagerClass, authenticate), + NULL, NULL, + soup_marshal_NONE__OBJECT_OBJECT_BOOLEAN, + G_TYPE_NONE, 3, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_AUTH, + G_TYPE_BOOLEAN); + +} + +static void +soup_auth_manager_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + soup_session_feature_default_interface = + g_type_default_interface_peek (SOUP_TYPE_SESSION_FEATURE); + + feature_interface->attach = attach; + feature_interface->request_queued = request_queued; + feature_interface->request_started = request_started; + feature_interface->request_unqueued = request_unqueued; + feature_interface->add_feature = add_feature; + feature_interface->remove_feature = remove_feature; + feature_interface->has_feature = has_feature; +} + +static int +auth_type_compare_func (gconstpointer a, gconstpointer b) +{ + SoupAuthClass **auth1 = (SoupAuthClass **)a; + SoupAuthClass **auth2 = (SoupAuthClass **)b; + + return (*auth1)->strength - (*auth2)->strength; +} + +static gboolean +add_feature (SoupSessionFeature *feature, GType type) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (feature); + SoupAuthClass *auth_class; + + if (!g_type_is_a (type, SOUP_TYPE_AUTH)) + return FALSE; + + auth_class = g_type_class_ref (type); + g_ptr_array_add (priv->auth_types, auth_class); + g_ptr_array_sort (priv->auth_types, auth_type_compare_func); + return TRUE; +} + +static gboolean +remove_feature (SoupSessionFeature *feature, GType type) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (feature); + SoupAuthClass *auth_class; + int i; + + if (!g_type_is_a (type, SOUP_TYPE_AUTH)) + return FALSE; + + auth_class = g_type_class_peek (type); + for (i = 0; i < priv->auth_types->len; i++) { + if (priv->auth_types->pdata[i] == (gpointer)auth_class) { + g_ptr_array_remove_index (priv->auth_types, i); + g_type_class_unref (auth_class); + return TRUE; + } + } + + return FALSE; +} + +static gboolean +has_feature (SoupSessionFeature *feature, GType type) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (feature); + SoupAuthClass *auth_class; + int i; + + if (!g_type_is_a (type, SOUP_TYPE_AUTH)) + return FALSE; + + auth_class = g_type_class_peek (type); + for (i = 0; i < priv->auth_types->len; i++) { + if (priv->auth_types->pdata[i] == (gpointer)auth_class) + return TRUE; + } + return FALSE; +} + +void +soup_auth_manager_emit_authenticate (SoupAuthManager *manager, SoupMessage *msg, + SoupAuth *auth, gboolean retrying) +{ + g_signal_emit (manager, signals[AUTHENTICATE], 0, msg, auth, retrying); +} + +static void +attach (SoupSessionFeature *manager, SoupSession *session) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + + /* FIXME: should support multiple sessions */ + priv->session = session; + + soup_session_feature_default_interface->attach (manager, session); +} + +static inline const char * +auth_header_for_message (SoupMessage *msg) +{ + if (msg->status_code == SOUP_STATUS_PROXY_UNAUTHORIZED) { + return soup_message_headers_get_list (msg->response_headers, + "Proxy-Authenticate"); + } else { + return soup_message_headers_get_list (msg->response_headers, + "WWW-Authenticate"); + } +} + +static char * +extract_challenge (const char *challenges, const char *scheme) +{ + GSList *items, *i; + int schemelen = strlen (scheme); + char *item, *space, *equals; + GString *challenge; + + /* The relevant grammar: + * + * WWW-Authenticate = 1#challenge + * Proxy-Authenticate = 1#challenge + * challenge = auth-scheme 1#auth-param + * auth-scheme = token + * auth-param = token "=" ( token | quoted-string ) + * + * The fact that quoted-strings can contain commas, equals + * signs, and auth scheme names makes it tricky to "cheat" on + * the parsing. We just use soup_header_parse_list(), and then + * reassemble the pieces after we find the one we want. + */ + + items = soup_header_parse_list (challenges); + + /* First item will start with the scheme name, followed by a + * space and then the first auth-param. + */ + for (i = items; i; i = i->next) { + item = i->data; + if (!g_ascii_strncasecmp (item, scheme, schemelen) && + g_ascii_isspace (item[schemelen])) + break; + } + if (!i) { + soup_header_free_list (items); + return NULL; + } + + /* The challenge extends from this item until the end, or until + * the next item that has a space before an equals sign. + */ + challenge = g_string_new (item); + for (i = i->next; i; i = i->next) { + item = i->data; + space = strpbrk (item, " \t"); + equals = strchr (item, '='); + if (!equals || (space && equals > space)) + break; + + g_string_append (challenge, ", "); + g_string_append (challenge, item); + } + + soup_header_free_list (items); + return g_string_free (challenge, FALSE); +} + +static SoupAuth * +create_auth (SoupAuthManagerPrivate *priv, SoupMessage *msg) +{ + const char *header; + SoupAuthClass *auth_class; + char *challenge = NULL; + SoupAuth *auth; + int i; + + header = auth_header_for_message (msg); + if (!header) + return NULL; + + for (i = priv->auth_types->len - 1; i >= 0; i--) { + auth_class = priv->auth_types->pdata[i]; + challenge = extract_challenge (header, auth_class->scheme_name); + if (challenge) + break; + } + if (!challenge) + return NULL; + + auth = soup_auth_new (G_TYPE_FROM_CLASS (auth_class), msg, challenge); + g_free (challenge); + return auth; +} + +static gboolean +check_auth (SoupMessage *msg, SoupAuth *auth) +{ + const char *header; + char *challenge; + gboolean ok; + + header = auth_header_for_message (msg); + if (!header) + return FALSE; + + challenge = extract_challenge (header, soup_auth_get_scheme_name (auth)); + if (!challenge) + return FALSE; + + ok = soup_auth_update (auth, msg, challenge); + g_free (challenge); + return ok; +} + +static SoupAuthHost * +get_auth_host_for_message (SoupAuthManagerPrivate *priv, SoupMessage *msg) +{ + SoupAuthHost *host; + SoupURI *uri = soup_message_get_uri (msg); + + host = g_hash_table_lookup (priv->auth_hosts, uri); + if (host) + return host; + + host = g_slice_new0 (SoupAuthHost); + host->uri = soup_uri_copy_host (uri); + g_hash_table_insert (priv->auth_hosts, host->uri, host); + + return host; +} + +static SoupAuth * +lookup_auth (SoupAuthManagerPrivate *priv, SoupMessage *msg) +{ + SoupAuthHost *host; + const char *path, *realm; + + host = get_auth_host_for_message (priv, msg); + if (!host->auth_realms) + return NULL; + + path = soup_message_get_uri (msg)->path; + if (!path) + path = "/"; + realm = soup_path_map_lookup (host->auth_realms, path); + if (realm) + return g_hash_table_lookup (host->auths, realm); + else + return NULL; +} + +static gboolean +authenticate_auth (SoupAuthManager *manager, SoupAuth *auth, + SoupMessage *msg, gboolean prior_auth_failed, + gboolean proxy, gboolean can_interact) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + SoupURI *uri; + + if (proxy) { + SoupMessageQueue *queue; + SoupMessageQueueItem *item; + + queue = soup_session_get_queue (priv->session); + item = soup_message_queue_lookup (queue, msg); + if (item) { + uri = soup_connection_get_proxy_uri (item->conn); + soup_message_queue_item_unref (item); + } else + uri = NULL; + + if (!uri) + return FALSE; + } else + uri = soup_message_get_uri (msg); + + /* If a password is specified explicitly in the URI, use it + * even if the auth had previously already been authenticated. + */ + if (uri->password) { + if (!prior_auth_failed) + soup_auth_authenticate (auth, uri->user, uri->password); + } else if (!soup_auth_is_authenticated (auth) && can_interact) { + soup_auth_manager_emit_authenticate (manager, msg, auth, + prior_auth_failed); + } + + return soup_auth_is_authenticated (auth); +} + +static void +update_auth (SoupMessage *msg, gpointer manager) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + SoupAuthHost *host; + SoupAuth *auth, *prior_auth, *old_auth; + const char *path; + char *auth_info, *old_auth_info; + GSList *pspace, *p; + gboolean prior_auth_failed = FALSE; + + host = get_auth_host_for_message (priv, msg); + + /* See if we used auth last time */ + prior_auth = soup_message_get_auth (msg); + if (prior_auth && check_auth (msg, prior_auth)) { + auth = prior_auth; + if (!soup_auth_is_authenticated (auth)) + prior_auth_failed = TRUE; + } else { + auth = create_auth (priv, msg); + if (!auth) + return; + } + auth_info = soup_auth_get_info (auth); + + if (!host->auth_realms) { + host->auth_realms = soup_path_map_new (g_free); + host->auths = g_hash_table_new_full (g_str_hash, g_str_equal, + g_free, g_object_unref); + } + + /* Record where this auth realm is used. */ + pspace = soup_auth_get_protection_space (auth, soup_message_get_uri (msg)); + for (p = pspace; p; p = p->next) { + path = p->data; + old_auth_info = soup_path_map_lookup (host->auth_realms, path); + if (old_auth_info) { + if (!strcmp (old_auth_info, auth_info)) + continue; + soup_path_map_remove (host->auth_realms, path); + } + + soup_path_map_add (host->auth_realms, path, + g_strdup (auth_info)); + } + soup_auth_free_protection_space (auth, pspace); + + /* Now, make sure the auth is recorded. (If there's a + * pre-existing auth, we keep that rather than the new one, + * since the old one might already be authenticated.) + */ + old_auth = g_hash_table_lookup (host->auths, auth_info); + if (old_auth) { + g_free (auth_info); + if (auth != old_auth && auth != prior_auth) { + g_object_unref (auth); + auth = old_auth; + } + } else { + g_hash_table_insert (host->auths, auth_info, auth); + } + + /* If we need to authenticate, try to do it. */ + authenticate_auth (manager, auth, msg, + prior_auth_failed, FALSE, TRUE); +} + +static void +requeue_if_authenticated (SoupMessage *msg, gpointer manager) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + SoupAuth *auth = lookup_auth (priv, msg); + + if (auth && soup_auth_is_authenticated (auth)) + soup_session_requeue_message (priv->session, msg); +} + +static void +update_proxy_auth (SoupMessage *msg, gpointer manager) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + SoupAuth *prior_auth; + gboolean prior_auth_failed = FALSE; + + /* See if we used auth last time */ + prior_auth = soup_message_get_proxy_auth (msg); + if (prior_auth && check_auth (msg, prior_auth)) { + if (!soup_auth_is_authenticated (prior_auth)) + prior_auth_failed = TRUE; + } + + if (!priv->proxy_auth) { + priv->proxy_auth = create_auth (priv, msg); + if (!priv->proxy_auth) + return; + } + + /* If we need to authenticate, try to do it. */ + authenticate_auth (manager, priv->proxy_auth, msg, + prior_auth_failed, TRUE, TRUE); +} + +static void +requeue_if_proxy_authenticated (SoupMessage *msg, gpointer manager) +{ + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + SoupAuth *auth = priv->proxy_auth; + + if (auth && soup_auth_is_authenticated (auth)) + soup_session_requeue_message (priv->session, msg); +} + +static void +request_queued (SoupSessionFeature *manager, SoupSession *session, + SoupMessage *msg) +{ + soup_message_add_status_code_handler ( + msg, "got_headers", SOUP_STATUS_UNAUTHORIZED, + G_CALLBACK (update_auth), manager); + soup_message_add_status_code_handler ( + msg, "got_body", SOUP_STATUS_UNAUTHORIZED, + G_CALLBACK (requeue_if_authenticated), manager); + + soup_message_add_status_code_handler ( + msg, "got_headers", SOUP_STATUS_PROXY_UNAUTHORIZED, + G_CALLBACK (update_proxy_auth), manager); + soup_message_add_status_code_handler ( + msg, "got_body", SOUP_STATUS_PROXY_UNAUTHORIZED, + G_CALLBACK (requeue_if_proxy_authenticated), manager); +} + +static void +request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket) +{ + SoupAuthManager *manager = SOUP_AUTH_MANAGER (feature); + SoupAuthManagerPrivate *priv = SOUP_AUTH_MANAGER_GET_PRIVATE (manager); + SoupAuth *auth; + + auth = lookup_auth (priv, msg); + if (!auth || !authenticate_auth (manager, auth, msg, FALSE, FALSE, FALSE)) + auth = NULL; + soup_message_set_auth (msg, auth); + + auth = priv->proxy_auth; + if (!auth || !authenticate_auth (manager, auth, msg, FALSE, TRUE, FALSE)) + auth = NULL; + soup_message_set_proxy_auth (msg, auth); +} + +static void +request_unqueued (SoupSessionFeature *manager, SoupSession *session, + SoupMessage *msg) +{ + g_signal_handlers_disconnect_matched (msg, G_SIGNAL_MATCH_DATA, + 0, 0, NULL, NULL, manager); +} diff --git a/libsoup/soup-auth-manager.h b/libsoup/soup-auth-manager.h new file mode 100644 index 0000000..493960a --- /dev/null +++ b/libsoup/soup-auth-manager.h @@ -0,0 +1,42 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifndef SOUP_AUTH_MANAGER_H +#define SOUP_AUTH_MANAGER_H 1 + +#include "soup-types.h" +#include "soup-auth.h" + +G_BEGIN_DECLS + +#define SOUP_TYPE_AUTH_MANAGER (soup_auth_manager_get_type ()) +#define SOUP_AUTH_MANAGER(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_AUTH_MANAGER, SoupAuthManager)) +#define SOUP_AUTH_MANAGER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_MANAGER, SoupAuthManagerClass)) +#define SOUP_IS_AUTH_MANAGER(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_AUTH_MANAGER)) +#define SOUP_IS_AUTH_MANAGER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_AUTH_MANAGER)) +#define SOUP_AUTH_MANAGER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_MANAGER, SoupAuthManagerClass)) + +typedef struct { + GObject parent; + +} SoupAuthManager; + +typedef struct { + GObjectClass parent_class; + + void (*authenticate) (SoupAuthManager *manager, SoupMessage *msg, + SoupAuth *auth, gboolean retrying); +} SoupAuthManagerClass; + +GType soup_auth_manager_get_type (void); + +void soup_auth_manager_emit_authenticate (SoupAuthManager *manager, + SoupMessage *msg, + SoupAuth *auth, + gboolean retrying); + +G_END_DECLS + +#endif /* SOUP_AUTH_MANAGER_H */ diff --git a/libsoup/soup-auth-ntlm.c b/libsoup/soup-auth-ntlm.c new file mode 100644 index 0000000..6383311 --- /dev/null +++ b/libsoup/soup-auth-ntlm.c @@ -0,0 +1,134 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth-ntlm.c: HTTP NTLM Authentication helper + * + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-auth-ntlm.h" +#include "soup-headers.h" +#include "soup-message.h" +#include "soup-misc.h" +#include "soup-uri.h" + +static gboolean update (SoupAuth *auth, SoupMessage *msg, GHashTable *auth_params); +static GSList *get_protection_space (SoupAuth *auth, SoupURI *source_uri); +static void authenticate (SoupAuth *auth, const char *username, const char *password); +static gboolean is_authenticated (SoupAuth *auth); +static char *get_authorization (SoupAuth *auth, SoupMessage *msg); + +typedef struct { + char *username, *password; +} SoupAuthNTLMPrivate; +#define SOUP_AUTH_NTLM_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH_NTLM, SoupAuthNTLMPrivate)) + +G_DEFINE_TYPE (SoupAuthNTLM, soup_auth_ntlm, SOUP_TYPE_AUTH) + +static void +soup_auth_ntlm_init (SoupAuthNTLM *ntlm) +{ +} + +static void +finalize (GObject *object) +{ + SoupAuthNTLMPrivate *priv = SOUP_AUTH_NTLM_GET_PRIVATE (object); + + g_free (priv->username); + if (priv->password) { + memset (priv->password, 0, strlen (priv->password)); + g_free (priv->password); + } + + G_OBJECT_CLASS (soup_auth_ntlm_parent_class)->finalize (object); +} + +static void +soup_auth_ntlm_class_init (SoupAuthNTLMClass *auth_ntlm_class) +{ + SoupAuthClass *auth_class = SOUP_AUTH_CLASS (auth_ntlm_class); + GObjectClass *object_class = G_OBJECT_CLASS (auth_ntlm_class); + + g_type_class_add_private (auth_ntlm_class, sizeof (SoupAuthNTLMPrivate)); + + auth_class->scheme_name = "NTLM"; + auth_class->strength = 3; + + auth_class->update = update; + auth_class->get_protection_space = get_protection_space; + auth_class->authenticate = authenticate; + auth_class->is_authenticated = is_authenticated; + auth_class->get_authorization = get_authorization; + + object_class->finalize = finalize; +} + +SoupAuth * +soup_auth_ntlm_new (const char *realm, const char *host) +{ + SoupAuth *auth; + + auth = g_object_new (SOUP_TYPE_AUTH_NTLM, + SOUP_AUTH_REALM, realm, + SOUP_AUTH_HOST, host, + NULL); + return auth; +} + +static gboolean +update (SoupAuth *auth, SoupMessage *msg, GHashTable *auth_params) +{ + g_return_val_if_reached (FALSE); +} + +static GSList * +get_protection_space (SoupAuth *auth, SoupURI *source_uri) +{ + g_return_val_if_reached (NULL); +} + +static void +authenticate (SoupAuth *auth, const char *username, const char *password) +{ + SoupAuthNTLMPrivate *priv = SOUP_AUTH_NTLM_GET_PRIVATE (auth); + + g_return_if_fail (username != NULL); + g_return_if_fail (password != NULL); + + priv->username = g_strdup (username); + priv->password = g_strdup (password); +} + +static gboolean +is_authenticated (SoupAuth *auth) +{ + return SOUP_AUTH_NTLM_GET_PRIVATE (auth)->password != NULL; +} + +static char * +get_authorization (SoupAuth *auth, SoupMessage *msg) +{ + g_return_val_if_reached (NULL); +} + +const char * +soup_auth_ntlm_get_username (SoupAuth *auth) +{ + SoupAuthNTLMPrivate *priv = SOUP_AUTH_NTLM_GET_PRIVATE (auth); + + return priv->username; +} + +const char * +soup_auth_ntlm_get_password (SoupAuth *auth) +{ + SoupAuthNTLMPrivate *priv = SOUP_AUTH_NTLM_GET_PRIVATE (auth); + + return priv->password; +} diff --git a/libsoup/soup-auth-ntlm.h b/libsoup/soup-auth-ntlm.h new file mode 100644 index 0000000..1f56976 --- /dev/null +++ b/libsoup/soup-auth-ntlm.h @@ -0,0 +1,32 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifndef SOUP_AUTH_NTLM_H +#define SOUP_AUTH_NTLM_H 1 + +#include "soup-auth.h" + +#define SOUP_AUTH_NTLM(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_AUTH_NTLM, SoupAuthNTLM)) +#define SOUP_AUTH_NTLM_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH_NTLM, SoupAuthNTLMClass)) +#define SOUP_IS_AUTH_NTLM(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_AUTH_NTLM)) +#define SOUP_IS_AUTH_NTLM_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_AUTH_NTLM)) +#define SOUP_AUTH_NTLM_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH_NTLM, SoupAuthNTLMClass)) + +typedef struct { + SoupAuth parent; + +} SoupAuthNTLM; + +typedef struct { + SoupAuthClass parent_class; + +} SoupAuthNTLMClass; + +SoupAuth *soup_auth_ntlm_new (const char *realm, + const char *host); +const char *soup_auth_ntlm_get_username (SoupAuth *auth); +const char *soup_auth_ntlm_get_password (SoupAuth *auth); + +#endif /* SOUP_AUTH_NTLM_H */ diff --git a/libsoup/soup-auth.c b/libsoup/soup-auth.c new file mode 100644 index 0000000..0b045a5 --- /dev/null +++ b/libsoup/soup-auth.c @@ -0,0 +1,682 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-auth.c: HTTP Authentication framework + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY + +#include + +#include "soup-auth.h" +#include "soup-auth-basic.h" +#include "soup-auth-digest.h" +#include "soup-headers.h" +#include "soup-marshal.h" +#include "soup-uri.h" + +/** + * SECTION:soup-auth + * @short_description: HTTP client-side authentication support + * @see_also: #SoupSession + * + * #SoupAuth objects store the authentication data associated with a + * given bit of web space. They are created automatically by + * #SoupSession. + **/ + +/** + * SoupAuth: + * + * The abstract base class for handling authentication. Specific HTTP + * Authentication mechanisms are implemented by its subclasses, but + * applications never need to be aware of the specific subclasses + * being used. + **/ + +typedef struct { + gboolean proxy; + char *host; + + GHashTable *saved_passwords; +} SoupAuthPrivate; +#define SOUP_AUTH_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_AUTH, SoupAuthPrivate)) + +G_DEFINE_ABSTRACT_TYPE (SoupAuth, soup_auth, G_TYPE_OBJECT) + +enum { + SAVE_PASSWORD, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +enum { + PROP_0, + + PROP_SCHEME_NAME, + PROP_REALM, + PROP_HOST, + PROP_IS_FOR_PROXY, + PROP_IS_AUTHENTICATED, + + LAST_PROP +}; + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +finalize (GObject *object) +{ + SoupAuth *auth = SOUP_AUTH (object); + SoupAuthPrivate *priv = SOUP_AUTH_GET_PRIVATE (auth); + + g_free (auth->realm); + g_free (priv->host); + if (priv->saved_passwords) + g_hash_table_destroy (priv->saved_passwords); + + G_OBJECT_CLASS (soup_auth_parent_class)->finalize (object); +} + +static void +soup_auth_class_init (SoupAuthClass *auth_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (auth_class); + + g_type_class_add_private (auth_class, sizeof (SoupAuthPrivate)); + + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SoupAuth::save-password: + * @auth: the auth + * @username: the username to save + * @password: the password to save + * + * Emitted to request that the @username/@password pair be + * saved. If the session supports password-saving, it will + * connect to this signal before emitting + * #SoupSession::authenticate, so that it record the password + * if requested by the caller. + * + * Since: 2.28 + **/ + signals[SAVE_PASSWORD] = + g_signal_new ("save-password", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, NULL, NULL, + soup_marshal_NONE__STRING_STRING, + G_TYPE_NONE, 2, + G_TYPE_STRING, + G_TYPE_STRING); + + /* properties */ + /** + * SOUP_AUTH_SCHEME_NAME: + * + * An alias for the #SoupAuth:scheme property. (The + * authentication scheme name.) + **/ + g_object_class_install_property ( + object_class, PROP_SCHEME_NAME, + g_param_spec_string (SOUP_AUTH_SCHEME_NAME, + "Scheme name", + "Authentication scheme name", + NULL, + G_PARAM_READABLE)); + /** + * SOUP_AUTH_REALM: + * + * An alias for the #SoupAuth:realm property. (The + * authentication realm.) + **/ + g_object_class_install_property ( + object_class, PROP_REALM, + g_param_spec_string (SOUP_AUTH_REALM, + "Realm", + "Authentication realm", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_AUTH_HOST: + * + * An alias for the #SoupAuth:host property. (The + * host being authenticated to.) + **/ + g_object_class_install_property ( + object_class, PROP_HOST, + g_param_spec_string (SOUP_AUTH_HOST, + "Host", + "Authentication host", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_AUTH_IS_FOR_PROXY: + * + * An alias for the #SoupAuth:is-for-proxy property. (Whether + * or not the auth is for a proxy server.) + **/ + g_object_class_install_property ( + object_class, PROP_IS_FOR_PROXY, + g_param_spec_boolean (SOUP_AUTH_IS_FOR_PROXY, + "For Proxy", + "Whether or not the auth is for a proxy server", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_AUTH_IS_AUTHENTICATED: + * + * An alias for the #SoupAuth:is-authenticated property. + * (Whether or not the auth has been authenticated.) + **/ + g_object_class_install_property ( + object_class, PROP_IS_AUTHENTICATED, + g_param_spec_boolean (SOUP_AUTH_IS_AUTHENTICATED, + "Authenticated", + "Whether or not the auth is authenticated", + FALSE, + G_PARAM_READABLE)); +} + +static void +soup_auth_init (SoupAuth *auth) +{ +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupAuth *auth = SOUP_AUTH (object); + SoupAuthPrivate *priv = SOUP_AUTH_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_REALM: + auth->realm = g_value_dup_string (value); + break; + case PROP_HOST: + priv->host = g_value_dup_string (value); + break; + case PROP_IS_FOR_PROXY: + priv->proxy = g_value_get_boolean (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupAuth *auth = SOUP_AUTH (object); + SoupAuthPrivate *priv = SOUP_AUTH_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_SCHEME_NAME: + g_value_set_string (value, soup_auth_get_scheme_name (auth)); + break; + case PROP_REALM: + g_value_set_string (value, soup_auth_get_realm (auth)); + break; + case PROP_HOST: + g_value_set_string (value, soup_auth_get_host (auth)); + break; + case PROP_IS_FOR_PROXY: + g_value_set_boolean (value, priv->proxy); + break; + case PROP_IS_AUTHENTICATED: + g_value_set_boolean (value, soup_auth_is_authenticated (auth)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_auth_new: + * @type: the type of auth to create (a subtype of #SoupAuth) + * @msg: the #SoupMessage the auth is being created for + * @auth_header: the WWW-Authenticate/Proxy-Authenticate header + * + * Creates a new #SoupAuth of type @type with the information from + * @msg and @auth_header. + * + * This is called by #SoupSession; you will normally not create auths + * yourself. + * + * Return value: the new #SoupAuth, or %NULL if it could not be + * created + **/ +SoupAuth * +soup_auth_new (GType type, SoupMessage *msg, const char *auth_header) +{ + SoupAuth *auth; + GHashTable *params; + const char *scheme, *realm; + + g_return_val_if_fail (g_type_is_a (type, SOUP_TYPE_AUTH), NULL); + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + g_return_val_if_fail (auth_header != NULL, NULL); + + auth = g_object_new (type, + SOUP_AUTH_IS_FOR_PROXY, (msg->status_code == SOUP_STATUS_PROXY_UNAUTHORIZED), + SOUP_AUTH_HOST, soup_message_get_uri (msg)->host, + NULL); + + scheme = soup_auth_get_scheme_name (auth); + if (g_ascii_strncasecmp (auth_header, scheme, strlen (scheme)) != 0) { + g_object_unref (auth); + return NULL; + } + + params = soup_header_parse_param_list (auth_header + strlen (scheme)); + if (!params) { + g_object_unref (auth); + return NULL; + } + + realm = g_hash_table_lookup (params, "realm"); + if (!realm) { + soup_header_free_param_list (params); + g_object_unref (auth); + return NULL; + } + + auth->realm = g_strdup (realm); + + if (!SOUP_AUTH_GET_CLASS (auth)->update (auth, msg, params)) { + g_object_unref (auth); + auth = NULL; + } + soup_header_free_param_list (params); + return auth; +} + +/** + * soup_auth_update: + * @auth: a #SoupAuth + * @msg: the #SoupMessage @auth is being updated for + * @auth_header: the WWW-Authenticate/Proxy-Authenticate header + * + * Updates @auth with the information from @msg and @auth_header, + * possibly un-authenticating it. As with soup_auth_new(), this is + * normally only used by #SoupSession. + * + * Return value: %TRUE if @auth is still a valid (but potentially + * unauthenticated) #SoupAuth. %FALSE if something about @auth_params + * could not be parsed or incorporated into @auth at all. + **/ +gboolean +soup_auth_update (SoupAuth *auth, SoupMessage *msg, const char *auth_header) +{ + GHashTable *params; + const char *scheme, *realm; + gboolean was_authenticated, success; + + g_return_val_if_fail (SOUP_IS_AUTH (auth), FALSE); + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), FALSE); + g_return_val_if_fail (auth_header != NULL, FALSE); + + scheme = soup_auth_get_scheme_name (auth); + if (g_ascii_strncasecmp (auth_header, scheme, strlen (scheme)) != 0) + return FALSE; + + params = soup_header_parse_param_list (auth_header + strlen (scheme)); + if (!params) + return FALSE; + + realm = g_hash_table_lookup (params, "realm"); + if (!realm || strcmp (realm, auth->realm) != 0) { + soup_header_free_param_list (params); + return FALSE; + } + + was_authenticated = soup_auth_is_authenticated (auth); + success = SOUP_AUTH_GET_CLASS (auth)->update (auth, msg, params); + if (was_authenticated != soup_auth_is_authenticated (auth)) + g_object_notify (G_OBJECT (auth), SOUP_AUTH_IS_AUTHENTICATED); + soup_header_free_param_list (params); + return success; +} + +/** + * soup_auth_authenticate: + * @auth: a #SoupAuth + * @username: the username provided by the user or client + * @password: the password provided by the user or client + * + * Call this on an auth to authenticate it; normally this will cause + * the auth's message to be requeued with the new authentication info. + * + * This does not cause the password to be saved to persistent storage; + * see soup_auth_save_password() for that. + **/ +void +soup_auth_authenticate (SoupAuth *auth, const char *username, const char *password) +{ + gboolean was_authenticated; + + g_return_if_fail (SOUP_IS_AUTH (auth)); + g_return_if_fail (username != NULL); + g_return_if_fail (password != NULL); + + was_authenticated = soup_auth_is_authenticated (auth); + SOUP_AUTH_GET_CLASS (auth)->authenticate (auth, username, password); + if (was_authenticated != soup_auth_is_authenticated (auth)) + g_object_notify (G_OBJECT (auth), SOUP_AUTH_IS_AUTHENTICATED); +} + +/** + * soup_auth_is_for_proxy: + * @auth: a #SoupAuth + * + * Tests whether or not @auth is associated with a proxy server rather + * than an "origin" server. + * + * Return value: %TRUE or %FALSE + **/ +gboolean +soup_auth_is_for_proxy (SoupAuth *auth) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), FALSE); + + return SOUP_AUTH_GET_PRIVATE (auth)->proxy; +} + +/** + * soup_auth_get_scheme_name: + * @auth: a #SoupAuth + * + * Returns @auth's scheme name. (Eg, "Basic", "Digest", or "NTLM") + * + * Return value: the scheme name + **/ +const char * +soup_auth_get_scheme_name (SoupAuth *auth) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + + return SOUP_AUTH_GET_CLASS (auth)->scheme_name; +} + +/** + * soup_auth_get_host: + * @auth: a #SoupAuth + * + * Returns the host that @auth is associated with. + * + * Return value: the hostname + **/ +const char * +soup_auth_get_host (SoupAuth *auth) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + + return SOUP_AUTH_GET_PRIVATE (auth)->host; +} + + +/** + * soup_auth_get_realm: + * @auth: a #SoupAuth + * + * Returns @auth's realm. This is an identifier that distinguishes + * separate authentication spaces on a given server, and may be some + * string that is meaningful to the user. (Although it is probably not + * localized.) + * + * Return value: the realm name + **/ +const char * +soup_auth_get_realm (SoupAuth *auth) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + + return auth->realm; +} + +/** + * soup_auth_get_info: + * @auth: a #SoupAuth + * + * Gets an opaque identifier for @auth, for use as a hash key or the + * like. #SoupAuth objects from the same server with the same + * identifier refer to the same authentication domain (eg, the URLs + * associated with them take the same usernames and passwords). + * + * Return value: the identifier + **/ +char * +soup_auth_get_info (SoupAuth *auth) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + + return g_strdup_printf ("%s:%s", + SOUP_AUTH_GET_CLASS (auth)->scheme_name, + auth->realm); +} + +/** + * soup_auth_is_authenticated: + * @auth: a #SoupAuth + * + * Tests if @auth has been given a username and password + * + * Return value: %TRUE if @auth has been given a username and password + **/ +gboolean +soup_auth_is_authenticated (SoupAuth *auth) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), TRUE); + + return SOUP_AUTH_GET_CLASS (auth)->is_authenticated (auth); +} + +/** + * soup_auth_get_authorization: + * @auth: a #SoupAuth + * @msg: the #SoupMessage to be authorized + * + * Generates an appropriate "Authorization" header for @msg. (The + * session will only call this if soup_auth_is_authenticated() + * returned %TRUE.) + * + * Return value: the "Authorization" header, which must be freed. + **/ +char * +soup_auth_get_authorization (SoupAuth *auth, SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + g_return_val_if_fail (msg != NULL, NULL); + + return SOUP_AUTH_GET_CLASS (auth)->get_authorization (auth, msg); +} + +/** + * soup_auth_get_protection_space: + * @auth: a #SoupAuth + * @source_uri: the URI of the request that @auth was generated in + * response to. + * + * Returns a list of paths on the server which @auth extends over. + * (All subdirectories of these paths are also assumed to be part + * of @auth's protection space, unless otherwise discovered not to + * be.) + * + * Return value: (element-type utf8) (transfer full): the list of + * paths, which can be freed with soup_auth_free_protection_space(). + **/ +GSList * +soup_auth_get_protection_space (SoupAuth *auth, SoupURI *source_uri) +{ + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + g_return_val_if_fail (source_uri != NULL, NULL); + + return SOUP_AUTH_GET_CLASS (auth)->get_protection_space (auth, source_uri); +} + +/** + * soup_auth_free_protection_space: (skip) + * @auth: a #SoupAuth + * @space: the return value from soup_auth_get_protection_space() + * + * Frees @space. + **/ +void +soup_auth_free_protection_space (SoupAuth *auth, GSList *space) +{ + GSList *s; + + for (s = space; s; s = s->next) + g_free (s->data); + g_slist_free (space); +} + +/** + * soup_auth_get_saved_users: + * @auth: a #SoupAuth + * + * Gets a list of usernames for which a saved password is available. + * (If the session is not configured to save passwords, this will + * always be %NULL.) + * + * Return value: (transfer container): the list of usernames. You must + * free the list with g_slist_free(), but do not free or modify the + * contents. + * + * Since: 2.28 + **/ +GSList * +soup_auth_get_saved_users (SoupAuth *auth) +{ + SoupAuthPrivate *priv; + GSList *users; + + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + + priv = SOUP_AUTH_GET_PRIVATE (auth); + users = NULL; + + if (priv->saved_passwords) { + GHashTableIter iter; + gpointer key, value; + + g_hash_table_iter_init (&iter, priv->saved_passwords); + while (g_hash_table_iter_next (&iter, &key, &value)) + users = g_slist_prepend (users, key); + } + return users; +} + +/** + * soup_auth_get_saved_password: + * @auth: a #SoupAuth + * @user: a username from the list returned from + * soup_auth_get_saved_users(). + * + * Given a username for which @auth has a saved password, this returns + * that password. If @auth doesn't have a passwords saved for @user, it + * returns %NULL. + * + * Return value: the saved password, or %NULL. + * + * Since: 2.28 + **/ +const char * +soup_auth_get_saved_password (SoupAuth *auth, const char *user) +{ + SoupAuthPrivate *priv; + + g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL); + g_return_val_if_fail (user != NULL, NULL); + + priv = SOUP_AUTH_GET_PRIVATE (auth); + if (!priv->saved_passwords) + return NULL; + return g_hash_table_lookup (priv->saved_passwords, user); +} + +static void +free_password (gpointer password) +{ + memset (password, 0, strlen (password)); + g_free (password); +} + +static inline void +init_saved_passwords (SoupAuthPrivate *priv) +{ + priv->saved_passwords = g_hash_table_new_full ( + g_str_hash, g_str_equal, g_free, free_password); +} + +/** + * soup_auth_has_saved_password: + * @auth: a #SoupAuth + * @username: a username + * @password: a password + * + * Updates @auth to be aware of an already-saved username/password + * combination. This method does not cause the + * given @username and @password to be saved; use + * soup_auth_save_password() for that. (soup_auth_has_saved_password() + * is an internal method, which is used by the code that actually + * saves and restores the passwords.) + * + * Since: 2.28 + **/ +void +soup_auth_has_saved_password (SoupAuth *auth, const char *username, + const char *password) +{ + SoupAuthPrivate *priv; + + g_return_if_fail (SOUP_IS_AUTH (auth)); + g_return_if_fail (username != NULL); + g_return_if_fail (password != NULL); + + priv = SOUP_AUTH_GET_PRIVATE (auth); + + if (!priv->saved_passwords) + init_saved_passwords (priv); + g_hash_table_insert (priv->saved_passwords, + g_strdup (username), g_strdup (password)); +} + +/** + * soup_auth_save_password: + * @auth: a #SoupAuth + * @username: the username provided by the user or client + * @password: the password provided by the user or client + * + * Requests that the username/password pair be saved to whatever form + * of persistent password storage the session supports. + * + * Since: 2.28 + **/ +void +soup_auth_save_password (SoupAuth *auth, const char *username, + const char *password) +{ + g_return_if_fail (SOUP_IS_AUTH (auth)); + g_return_if_fail (username != NULL); + g_return_if_fail (password != NULL); + + g_signal_emit (auth, signals[SAVE_PASSWORD], 0, + username, password); +} diff --git a/libsoup/soup-auth.h b/libsoup/soup-auth.h new file mode 100644 index 0000000..0a26e8c --- /dev/null +++ b/libsoup/soup-auth.h @@ -0,0 +1,114 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifndef SOUP_AUTH_H +#define SOUP_AUTH_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_AUTH (soup_auth_get_type ()) +#define SOUP_AUTH(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_AUTH, SoupAuth)) +#define SOUP_AUTH_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_AUTH, SoupAuthClass)) +#define SOUP_IS_AUTH(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_AUTH)) +#define SOUP_IS_AUTH_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_AUTH)) +#define SOUP_AUTH_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_AUTH, SoupAuthClass)) + +struct _SoupAuth { + GObject parent; + + char *realm; +}; + +typedef struct { + GObjectClass parent_class; + + const char *scheme_name; + guint strength; + + gboolean (*update) (SoupAuth *auth, + SoupMessage *msg, + GHashTable *auth_params); + + GSList * (*get_protection_space) (SoupAuth *auth, + SoupURI *source_uri); + + void (*authenticate) (SoupAuth *auth, + const char *username, + const char *password); + gboolean (*is_authenticated) (SoupAuth *auth); + + char * (*get_authorization) (SoupAuth *auth, + SoupMessage *msg); + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupAuthClass; + +#define SOUP_AUTH_SCHEME_NAME "scheme-name" +#define SOUP_AUTH_REALM "realm" +#define SOUP_AUTH_HOST "host" +#define SOUP_AUTH_IS_FOR_PROXY "is-for-proxy" +#define SOUP_AUTH_IS_AUTHENTICATED "is-authenticated" + +GType soup_auth_get_type (void); + +SoupAuth *soup_auth_new (GType type, + SoupMessage *msg, + const char *auth_header); +gboolean soup_auth_update (SoupAuth *auth, + SoupMessage *msg, + const char *auth_header); + +gboolean soup_auth_is_for_proxy (SoupAuth *auth); +const char *soup_auth_get_scheme_name (SoupAuth *auth); +const char *soup_auth_get_host (SoupAuth *auth); +const char *soup_auth_get_realm (SoupAuth *auth); +char *soup_auth_get_info (SoupAuth *auth); + +#ifdef LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY +GSList *soup_auth_get_saved_users (SoupAuth *auth); +const char *soup_auth_get_saved_password (SoupAuth *auth, + const char *user); +void soup_auth_save_password (SoupAuth *auth, + const char *username, + const char *password); +#endif + +void soup_auth_authenticate (SoupAuth *auth, + const char *username, + const char *password); +gboolean soup_auth_is_authenticated (SoupAuth *auth); + +char *soup_auth_get_authorization (SoupAuth *auth, + SoupMessage *msg); + +GSList *soup_auth_get_protection_space (SoupAuth *auth, + SoupURI *source_uri); +void soup_auth_free_protection_space (SoupAuth *auth, + GSList *space); + +#ifdef LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY +void soup_auth_has_saved_password (SoupAuth *auth, + const char *username, + const char *password); +#endif + +/* The actual auth types, which can be added/removed as features */ + +#define SOUP_TYPE_AUTH_BASIC (soup_auth_basic_get_type ()) +GType soup_auth_basic_get_type (void); +#define SOUP_TYPE_AUTH_DIGEST (soup_auth_digest_get_type ()) +GType soup_auth_digest_get_type (void); +#define SOUP_TYPE_AUTH_NTLM (soup_auth_ntlm_get_type ()) +GType soup_auth_ntlm_get_type (void); + +G_END_DECLS + +#endif /* SOUP_AUTH_H */ diff --git a/libsoup/soup-cache-private.h b/libsoup/soup-cache-private.h new file mode 100644 index 0000000..3843e8e --- /dev/null +++ b/libsoup/soup-cache-private.h @@ -0,0 +1,42 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cache-private.h: + * + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_CACHE_PRIVATE_H +#define SOUP_CACHE_PRIVATE_H 1 + +#include "soup-cache.h" +#include + +G_BEGIN_DECLS + +SoupCacheResponse soup_cache_has_response (SoupCache *cache, + SoupMessage *msg); +GInputStream *soup_cache_send_response (SoupCache *cache, + SoupMessage *msg); +SoupCacheability soup_cache_get_cacheability (SoupCache *cache, + SoupMessage *msg); +SoupMessage *soup_cache_generate_conditional_request (SoupCache *cache, + SoupMessage *original); + +G_END_DECLS + +#endif /* SOUP_CACHE_PRIVATE_H */ diff --git a/libsoup/soup-cache.c b/libsoup/soup-cache.c new file mode 100644 index 0000000..63cdf8a --- /dev/null +++ b/libsoup/soup-cache.c @@ -0,0 +1,1761 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cache.c + * + * Copyright (C) 2009, 2010 Igalia S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +/* TODO: + * - Need to hook the feature in the sync SoupSession. + * - Need more tests. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-cache.h" +#include "soup-cache-private.h" +#include "soup-date.h" +#include "soup-enum-types.h" +#include "soup-headers.h" +#include "soup-session.h" +#include "soup-session-feature.h" +#include "soup-uri.h" +/*TIZEN patch*/ +#include "TIZEN.h" + +static SoupSessionFeatureInterface *soup_cache_default_feature_interface; +static void soup_cache_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); + +#define DEFAULT_MAX_SIZE 50 * 1024 * 1024 +#define MAX_ENTRY_DATA_PERCENTAGE 10 /* Percentage of the total size + of the cache that can be + filled by a single entry */ + +/* + * Version 2: cache is now saved in soup.cache2. Added the version + * number to the beginning of the file. + * + * Version 3: added HTTP status code to the cache entries. + * + * Version 4: replaced several types. + * - freshness_lifetime,corrected_initial_age,response_time: time_t -> guint32 + * - status_code: guint -> guint16 + * - hits: guint -> guint32 + * + * Version 5: key is no longer stored on disk as it can be easily + * built from the URI. Apart from that some fields in the + * SoupCacheEntry have changed: + * - entry key is now a uint32 instead of a (char *). + * - added uri, used to check for collisions + * - removed filename, it's built from the entry key. + */ +#define SOUP_CACHE_CURRENT_VERSION 5 + +typedef struct _SoupCacheEntry { + guint32 key; + char *uri; + guint32 freshness_lifetime; + gboolean must_revalidate; + gsize length; + guint32 corrected_initial_age; + guint32 response_time; + SoupBuffer *current_writing_buffer; + gboolean dirty; + gboolean got_body; + gboolean being_validated; + SoupMessageHeaders *headers; + GOutputStream *stream; + GError *error; + guint32 hits; + GCancellable *cancellable; + guint16 status_code; +} SoupCacheEntry; + +struct _SoupCachePrivate { + char *cache_dir; + GHashTable *cache; + guint n_pending; + SoupSession *session; + SoupCacheType cache_type; + guint size; + guint max_size; + guint max_entry_data_size; /* Computed value. Here for performance reasons */ + GList *lru_start; +}; + +typedef struct { + SoupCache *cache; + SoupCacheEntry *entry; + SoupMessage *msg; + gulong got_chunk_handler; + gulong got_body_handler; + gulong restarted_handler; + GQueue *buffer_queue; +} SoupCacheWritingFixture; + +enum { + PROP_0, + PROP_CACHE_DIR, + PROP_CACHE_TYPE +}; + +#define SOUP_CACHE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_CACHE, SoupCachePrivate)) + +G_DEFINE_TYPE_WITH_CODE (SoupCache, soup_cache, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_cache_session_feature_init)) + +static gboolean soup_cache_entry_remove (SoupCache *cache, SoupCacheEntry *entry); +static void make_room_for_new_entry (SoupCache *cache, guint length_to_add); +static gboolean cache_accepts_entries_of_size (SoupCache *cache, guint length_to_add); +static gboolean write_next_buffer (SoupCacheEntry *entry, SoupCacheWritingFixture *fixture); + +static GFile * +get_file_from_entry (SoupCache *cache, SoupCacheEntry *entry) +{ + char *filename = g_strdup_printf ("%s%s%u", cache->priv->cache_dir, + G_DIR_SEPARATOR_S, (guint) entry->key); + GFile *file = g_file_new_for_path (filename); + g_free (filename); + + return file; +} + +static SoupCacheability +get_cacheability (SoupCache *cache, SoupMessage *msg) +{ + SoupCacheability cacheability; + const char *cache_control, *content_type; + + /* 1. The request method must be cacheable */ + if (msg->method == SOUP_METHOD_GET) + cacheability = SOUP_CACHE_CACHEABLE; + else if (msg->method == SOUP_METHOD_HEAD || + msg->method == SOUP_METHOD_TRACE || + msg->method == SOUP_METHOD_CONNECT) + return SOUP_CACHE_UNCACHEABLE; + else + return (SOUP_CACHE_UNCACHEABLE | SOUP_CACHE_INVALIDATES); + + content_type = soup_message_headers_get_content_type (msg->response_headers, NULL); + if (content_type && !g_ascii_strcasecmp (content_type, "multipart/x-mixed-replace")) + return SOUP_CACHE_UNCACHEABLE; + + cache_control = soup_message_headers_get (msg->response_headers, "Cache-Control"); + if (cache_control) { + GHashTable *hash; + SoupCachePrivate *priv = SOUP_CACHE_GET_PRIVATE (cache); + + hash = soup_header_parse_param_list (cache_control); + + /* Shared caches MUST NOT store private resources */ + if (priv->cache_type == SOUP_CACHE_SHARED) { + if (g_hash_table_lookup_extended (hash, "private", NULL, NULL)) { + soup_header_free_param_list (hash); + return SOUP_CACHE_UNCACHEABLE; + } + } + + /* 2. The 'no-store' cache directive does not appear in the + * headers + */ + if (g_hash_table_lookup_extended (hash, "no-store", NULL, NULL)) { + soup_header_free_param_list (hash); + return SOUP_CACHE_UNCACHEABLE; + } + + /* This does not appear in section 2.1, but I think it makes + * sense to check it too? + */ + if (g_hash_table_lookup_extended (hash, "no-cache", NULL, NULL)) { + soup_header_free_param_list (hash); + return SOUP_CACHE_UNCACHEABLE; + } + + soup_header_free_param_list (hash); + } + + switch (msg->status_code) { + case SOUP_STATUS_PARTIAL_CONTENT: + /* We don't cache partial responses, but they only + * invalidate cached full responses if the headers + * don't match. + */ + cacheability = SOUP_CACHE_UNCACHEABLE; + break; + + case SOUP_STATUS_NOT_MODIFIED: + /* A 304 response validates an existing cache entry */ + cacheability = SOUP_CACHE_VALIDATES; + break; + + case SOUP_STATUS_MULTIPLE_CHOICES: + case SOUP_STATUS_MOVED_PERMANENTLY: + case SOUP_STATUS_GONE: + /* FIXME: cacheable unless indicated otherwise */ + cacheability = SOUP_CACHE_UNCACHEABLE; + break; + + case SOUP_STATUS_FOUND: + case SOUP_STATUS_TEMPORARY_REDIRECT: + /* FIXME: cacheable if explicitly indicated */ + cacheability = SOUP_CACHE_UNCACHEABLE; + break; + + case SOUP_STATUS_SEE_OTHER: + case SOUP_STATUS_FORBIDDEN: + case SOUP_STATUS_NOT_FOUND: + case SOUP_STATUS_METHOD_NOT_ALLOWED: + return (SOUP_CACHE_UNCACHEABLE | SOUP_CACHE_INVALIDATES); + + default: + /* Any 5xx status or any 4xx status not handled above + * is uncacheable but doesn't break the cache. + */ + if ((msg->status_code >= SOUP_STATUS_BAD_REQUEST && + msg->status_code <= SOUP_STATUS_FAILED_DEPENDENCY) || + msg->status_code >= SOUP_STATUS_INTERNAL_SERVER_ERROR) + return SOUP_CACHE_UNCACHEABLE; + + /* An unrecognized 2xx, 3xx, or 4xx response breaks + * the cache. + */ + if ((msg->status_code > SOUP_STATUS_PARTIAL_CONTENT && + msg->status_code < SOUP_STATUS_MULTIPLE_CHOICES) || + (msg->status_code > SOUP_STATUS_TEMPORARY_REDIRECT && + msg->status_code < SOUP_STATUS_INTERNAL_SERVER_ERROR)) + return (SOUP_CACHE_UNCACHEABLE | SOUP_CACHE_INVALIDATES); + break; + } + + return cacheability; +} + +/* NOTE: this function deletes the file pointed by the file argument + * and also unref's the GFile object representing it. + */ +static void +soup_cache_entry_free (SoupCacheEntry *entry, GFile *file) +{ + if (file) { + g_file_delete (file, NULL, NULL); + g_object_unref (file); + } + + g_free (entry->uri); + entry->uri = NULL; + + if (entry->current_writing_buffer) { + soup_buffer_free (entry->current_writing_buffer); + entry->current_writing_buffer = NULL; + } + + if (entry->headers) { + soup_message_headers_free (entry->headers); + entry->headers = NULL; + } + if (entry->error) { + g_error_free (entry->error); + entry->error = NULL; + } + if (entry->cancellable) { + g_object_unref (entry->cancellable); + entry->cancellable = NULL; + } + + g_slice_free (SoupCacheEntry, entry); +} + +static void +copy_headers (const char *name, const char *value, SoupMessageHeaders *headers) +{ + soup_message_headers_append (headers, name, value); +} + +static char *hop_by_hop_headers[] = {"Connection", "Keep-Alive", "Proxy-Authenticate", "Proxy-Authorization", "TE", "Trailer", "Transfer-Encoding", "Upgrade"}; + +static void +copy_end_to_end_headers (SoupMessageHeaders *source, SoupMessageHeaders *destination) +{ + int i; + + soup_message_headers_foreach (source, (SoupMessageHeadersForeachFunc) copy_headers, destination); + for (i = 0; i < G_N_ELEMENTS (hop_by_hop_headers); i++) + soup_message_headers_remove (destination, hop_by_hop_headers[i]); + soup_message_headers_clean_connection_headers (destination); +} + +static guint +soup_cache_entry_get_current_age (SoupCacheEntry *entry) +{ + time_t now = time (NULL); + time_t resident_time; + + resident_time = now - entry->response_time; + return entry->corrected_initial_age + resident_time; +} + +static gboolean +soup_cache_entry_is_fresh_enough (SoupCacheEntry *entry, gint min_fresh) +{ + guint limit = (min_fresh == -1) ? soup_cache_entry_get_current_age (entry) : (guint) min_fresh; + return entry->freshness_lifetime > limit; +} + +static inline guint32 +get_cache_key_from_uri (const char *uri) +{ + return (guint32) g_str_hash (uri); +} + +static void +soup_cache_entry_set_freshness (SoupCacheEntry *entry, SoupMessage *msg, SoupCache *cache) +{ + const char *cache_control; + const char *expires, *date, *last_modified; + + cache_control = soup_message_headers_get (entry->headers, "Cache-Control"); + if (cache_control) { + const char *max_age, *s_maxage; + gint64 freshness_lifetime = 0; + GHashTable *hash; + SoupCachePrivate *priv = SOUP_CACHE_GET_PRIVATE (cache); + + hash = soup_header_parse_param_list (cache_control); + + /* Should we re-validate the entry when it goes stale */ + entry->must_revalidate = g_hash_table_lookup_extended (hash, "must-revalidate", NULL, NULL); + + /* Section 2.3.1 */ + if (priv->cache_type == SOUP_CACHE_SHARED) { + s_maxage = g_hash_table_lookup (hash, "s-maxage"); + if (s_maxage) { + freshness_lifetime = g_ascii_strtoll (s_maxage, NULL, 10); + if (freshness_lifetime) { + /* Implies proxy-revalidate. TODO: is it true? */ + entry->must_revalidate = TRUE; + soup_header_free_param_list (hash); + return; + } + } + } + + /* If 'max-age' cache directive is present, use that */ + max_age = g_hash_table_lookup (hash, "max-age"); + if (max_age) + freshness_lifetime = g_ascii_strtoll (max_age, NULL, 10); + + if (freshness_lifetime) { + entry->freshness_lifetime = (guint32) MIN (freshness_lifetime, G_MAXUINT32); + soup_header_free_param_list (hash); + return; + } + + soup_header_free_param_list (hash); + } + + /* If the 'Expires' response header is present, use its value + * minus the value of the 'Date' response header + */ + expires = soup_message_headers_get (entry->headers, "Expires"); + date = soup_message_headers_get (entry->headers, "Date"); + if (expires && date) { + SoupDate *expires_d, *date_d; + time_t expires_t, date_t; + + expires_d = soup_date_new_from_string (expires); + if (expires_d) { + date_d = soup_date_new_from_string (date); + + expires_t = soup_date_to_time_t (expires_d); + date_t = soup_date_to_time_t (date_d); + + soup_date_free (expires_d); + soup_date_free (date_d); + + if (expires_t && date_t) { + entry->freshness_lifetime = (guint32) MAX (expires_t - date_t, 0); + return; + } + } else { + /* If Expires is not a valid date we should + treat it as already expired, see section + 3.3 */ + entry->freshness_lifetime = 0; + return; + } + } + + /* Otherwise an heuristic may be used */ + + /* Heuristics MUST NOT be used with these status codes + (section 2.3.1.1) */ + if (msg->status_code != SOUP_STATUS_OK && + msg->status_code != SOUP_STATUS_NON_AUTHORITATIVE && + msg->status_code != SOUP_STATUS_PARTIAL_CONTENT && + msg->status_code != SOUP_STATUS_MULTIPLE_CHOICES && + msg->status_code != SOUP_STATUS_MOVED_PERMANENTLY && + msg->status_code != SOUP_STATUS_GONE) + goto expire; + + /* TODO: attach warning 113 if response's current_age is more + than 24h (section 2.3.1.1) when using heuristics */ + + /* Last-Modified based heuristic */ + last_modified = soup_message_headers_get (entry->headers, "Last-Modified"); + if (last_modified) { + SoupDate *soup_date; + time_t now, last_modified_t; + + soup_date = soup_date_new_from_string (last_modified); + last_modified_t = soup_date_to_time_t (soup_date); + now = time (NULL); + +#define HEURISTIC_FACTOR 0.1 /* From Section 2.3.1.1 */ + + entry->freshness_lifetime = MAX (0, (now - last_modified_t) * HEURISTIC_FACTOR); + soup_date_free (soup_date); + } + + return; + + expire: + /* If all else fails, make the entry expire immediately */ + entry->freshness_lifetime = 0; +} + +static SoupCacheEntry * +soup_cache_entry_new (SoupCache *cache, SoupMessage *msg, time_t request_time, time_t response_time) +{ + SoupCacheEntry *entry; + const char *date; + + entry = g_slice_new0 (SoupCacheEntry); + entry->dirty = FALSE; + entry->current_writing_buffer = NULL; + entry->got_body = FALSE; + entry->being_validated = FALSE; + entry->error = NULL; + entry->status_code = msg->status_code; + entry->response_time = response_time; + entry->uri = soup_uri_to_string (soup_message_get_uri (msg), FALSE); + + /* Headers */ + entry->headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE); + copy_end_to_end_headers (msg->response_headers, entry->headers); + + /* LRU list */ + entry->hits = 0; + + /* Section 2.3.1, Freshness Lifetime */ + soup_cache_entry_set_freshness (entry, msg, cache); + + /* Section 2.3.2, Calculating Age */ + date = soup_message_headers_get (entry->headers, "Date"); + + if (date) { + SoupDate *soup_date; + const char *age; + time_t date_value, apparent_age, corrected_received_age, response_delay, age_value = 0; + + soup_date = soup_date_new_from_string (date); + date_value = soup_date_to_time_t (soup_date); + soup_date_free (soup_date); + + age = soup_message_headers_get (entry->headers, "Age"); + if (age) + age_value = g_ascii_strtoll (age, NULL, 10); + + apparent_age = MAX (0, entry->response_time - date_value); + corrected_received_age = MAX (apparent_age, age_value); + response_delay = entry->response_time - request_time; + entry->corrected_initial_age = corrected_received_age + response_delay; + } else { + /* Is this correct ? */ + entry->corrected_initial_age = time (NULL); + } + + return entry; +} + +static void +soup_cache_writing_fixture_free (SoupCacheWritingFixture *fixture) +{ + /* Free fixture. And disconnect signals, we don't want to + listen to more SoupMessage events as we're finished with + this resource */ + if (g_signal_handler_is_connected (fixture->msg, fixture->got_chunk_handler)) + g_signal_handler_disconnect (fixture->msg, fixture->got_chunk_handler); + if (g_signal_handler_is_connected (fixture->msg, fixture->got_body_handler)) + g_signal_handler_disconnect (fixture->msg, fixture->got_body_handler); + if (g_signal_handler_is_connected (fixture->msg, fixture->restarted_handler)) + g_signal_handler_disconnect (fixture->msg, fixture->restarted_handler); + g_queue_foreach (fixture->buffer_queue, (GFunc) soup_buffer_free, NULL); + g_queue_free (fixture->buffer_queue); + g_object_unref (fixture->msg); + g_object_unref (fixture->cache); + g_slice_free (SoupCacheWritingFixture, fixture); +} + +static void +close_ready_cb (GObject *source, GAsyncResult *result, SoupCacheWritingFixture *fixture) +{ + SoupCacheEntry *entry = fixture->entry; + SoupCache *cache = fixture->cache; + GOutputStream *stream = G_OUTPUT_STREAM (source); + goffset content_length; + + g_warn_if_fail (entry->error == NULL); + + /* FIXME: what do we do on error ? */ + + if (stream) { + g_output_stream_close_finish (stream, result, NULL); + g_object_unref (stream); + } + entry->stream = NULL; + + content_length = soup_message_headers_get_content_length (entry->headers); + + /* If the process was cancelled, then delete the entry from + the cache. Do it also if the size of a chunked resource is + too much for the cache */ + if (g_cancellable_is_cancelled (entry->cancellable)) { + entry->dirty = FALSE; + soup_cache_entry_remove (cache, entry); + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); + entry = NULL; + } else if ((soup_message_headers_get_encoding (entry->headers) == SOUP_ENCODING_CHUNKED) || + entry->length != (gsize) content_length) { + /* Two options here: + * + * 1. "chunked" data, entry was temporarily added to + * cache (as content-length is 0) and now that we have + * the actual size we have to evaluate if we want it + * in the cache or not + * + * 2. Content-Length has a different value than actual + * length, means that the content was encoded for + * transmission (typically compressed) and thus we + * have to substract the content-length value that was + * added to the cache and add the unencoded length + */ + gint length_to_add = entry->length - content_length; + + /* Make room in cache if needed */ + if (cache_accepts_entries_of_size (cache, length_to_add)) { + make_room_for_new_entry (cache, length_to_add); + + cache->priv->size += length_to_add; + } else { + entry->dirty = FALSE; + soup_cache_entry_remove (cache, entry); + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); + entry = NULL; + } + } + + if (entry) { + entry->dirty = FALSE; + entry->got_body = FALSE; + + if (entry->current_writing_buffer) { + soup_buffer_free (entry->current_writing_buffer); + entry->current_writing_buffer = NULL; + } + + g_object_unref (entry->cancellable); + entry->cancellable = NULL; + } + + cache->priv->n_pending--; + + /* Frees */ + soup_cache_writing_fixture_free (fixture); +} + +static void +write_ready_cb (GObject *source, GAsyncResult *result, SoupCacheWritingFixture *fixture) +{ + GOutputStream *stream = G_OUTPUT_STREAM (source); + GError *error = NULL; + gssize write_size; + SoupCacheEntry *entry = fixture->entry; + + if (g_cancellable_is_cancelled (entry->cancellable)) { + g_output_stream_close_async (stream, + G_PRIORITY_LOW, + entry->cancellable, + (GAsyncReadyCallback)close_ready_cb, + fixture); + return; + } + + write_size = g_output_stream_write_finish (stream, result, &error); + if (write_size <= 0 || error) { + if (error) + entry->error = error; + g_output_stream_close_async (stream, + G_PRIORITY_LOW, + entry->cancellable, + (GAsyncReadyCallback)close_ready_cb, + fixture); + /* FIXME: We should completely stop caching the + resource at this point */ + } else { + /* Are we still writing and is there new data to write + already ? */ + if (fixture->buffer_queue->length > 0) + write_next_buffer (entry, fixture); + else { + soup_buffer_free (entry->current_writing_buffer); + entry->current_writing_buffer = NULL; + + if (entry->got_body) { + /* If we already received 'got-body' + and we have written all the data, + we can close the stream */ + g_output_stream_close_async (entry->stream, + G_PRIORITY_LOW, + entry->cancellable, + (GAsyncReadyCallback)close_ready_cb, + fixture); + } + } + } +} + +static gboolean +write_next_buffer (SoupCacheEntry *entry, SoupCacheWritingFixture *fixture) +{ + SoupBuffer *buffer = g_queue_pop_head (fixture->buffer_queue); + + if (buffer == NULL) + return FALSE; + + /* Free the old buffer */ + if (entry->current_writing_buffer) { + soup_buffer_free (entry->current_writing_buffer); + entry->current_writing_buffer = NULL; + } + entry->current_writing_buffer = buffer; + + g_output_stream_write_async (entry->stream, buffer->data, buffer->length, + G_PRIORITY_LOW, entry->cancellable, + (GAsyncReadyCallback) write_ready_cb, + fixture); + return TRUE; +} + +static void +msg_got_chunk_cb (SoupMessage *msg, SoupBuffer *chunk, SoupCacheWritingFixture *fixture) +{ + SoupCacheEntry *entry = fixture->entry; + + /* Ignore this if the writing or appending was cancelled */ + if (!g_cancellable_is_cancelled (entry->cancellable)) { + g_queue_push_tail (fixture->buffer_queue, soup_buffer_copy (chunk)); + entry->length += chunk->length; + + if (!cache_accepts_entries_of_size (fixture->cache, entry->length)) { + /* Quickly cancel the caching of the resource */ + g_cancellable_cancel (entry->cancellable); + } + } + + /* FIXME: remove the error check when we cancel the caching at + the first write error */ + /* Only write if the entry stream is ready */ + if (entry->current_writing_buffer == NULL && entry->error == NULL && entry->stream) + write_next_buffer (entry, fixture); +} + +static void +msg_got_body_cb (SoupMessage *msg, SoupCacheWritingFixture *fixture) +{ + SoupCacheEntry *entry = fixture->entry; + g_return_if_fail (entry); + + entry->got_body = TRUE; + + if (!entry->stream && fixture->buffer_queue->length > 0) + /* The stream is not ready to be written but we still + have data to write, we'll write it when the stream + is opened for writing */ + return; + + + if (fixture->buffer_queue->length > 0) { + /* If we still have data to write, write it, + write_ready_cb will close the stream */ + if (entry->current_writing_buffer == NULL && entry->error == NULL && entry->stream) + write_next_buffer (entry, fixture); + return; + } + + if (entry->stream && entry->current_writing_buffer == NULL) + g_output_stream_close_async (entry->stream, + G_PRIORITY_LOW, + entry->cancellable, + (GAsyncReadyCallback)close_ready_cb, + fixture); +} + +static gboolean +soup_cache_entry_remove (SoupCache *cache, SoupCacheEntry *entry) +{ + GList *lru_item; + + /* if (entry->dirty && !g_cancellable_is_cancelled (entry->cancellable)) { */ + if (entry->dirty) { + g_cancellable_cancel (entry->cancellable); + return FALSE; + } + + g_assert (!entry->dirty); + g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache)); + + if (!g_hash_table_remove (cache->priv->cache, GUINT_TO_POINTER (entry->key))) + return FALSE; + + /* Remove from LRU */ + lru_item = g_list_find (cache->priv->lru_start, entry); + cache->priv->lru_start = g_list_delete_link (cache->priv->lru_start, lru_item); + + /* Adjust cache size */ + cache->priv->size -= entry->length; + + g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache)); + + return TRUE; +} + +static gint +lru_compare_func (gconstpointer a, gconstpointer b) +{ + SoupCacheEntry *entry_a = (SoupCacheEntry *)a; + SoupCacheEntry *entry_b = (SoupCacheEntry *)b; + + /* The rationale of this sorting func is + * + * 1. sort by hits -> LRU algorithm, then + * + * 2. sort by freshness lifetime, we better discard first + * entries that are close to expire + * + * 3. sort by size, replace first small size resources as they + * are cheaper to download + */ + + /* Sort by hits */ + if (entry_a->hits != entry_b->hits) + return entry_a->hits - entry_b->hits; + + /* Sort by freshness_lifetime */ + if (entry_a->freshness_lifetime != entry_b->freshness_lifetime) + return entry_a->freshness_lifetime - entry_b->freshness_lifetime; + + /* Sort by size */ + return entry_a->length - entry_b->length; +} + +static gboolean +cache_accepts_entries_of_size (SoupCache *cache, guint length_to_add) +{ + /* We could add here some more heuristics. TODO: review how + this is done by other HTTP caches */ + + return length_to_add <= cache->priv->max_entry_data_size; +} + +static void +make_room_for_new_entry (SoupCache *cache, guint length_to_add) +{ + GList *lru_entry = cache->priv->lru_start; + + /* Check that there is enough room for the new entry. This is + an approximation as we're not working out the size of the + cache file or the size of the headers for performance + reasons. TODO: check if that would be really that expensive */ + + while (lru_entry && + (length_to_add + cache->priv->size > cache->priv->max_size)) { + SoupCacheEntry *old_entry = (SoupCacheEntry *)lru_entry->data; + + /* Discard entries. Once cancelled resources will be + * freed in close_ready_cb + */ + if (soup_cache_entry_remove (cache, old_entry)) { + soup_cache_entry_free (old_entry, get_file_from_entry (cache, old_entry)); + lru_entry = cache->priv->lru_start; + } else + lru_entry = g_list_next (lru_entry); + } +} + +static gboolean +soup_cache_entry_insert (SoupCache *cache, + SoupCacheEntry *entry, + gboolean sort) +{ + guint length_to_add = 0; + SoupCacheEntry *old_entry; + + /* Fill the key */ + entry->key = get_cache_key_from_uri ((const char *) entry->uri); + + if (soup_message_headers_get_encoding (entry->headers) != SOUP_ENCODING_CHUNKED) + length_to_add = soup_message_headers_get_content_length (entry->headers); + + /* Check if we are going to store the resource depending on its size */ + if (length_to_add) { + if (!cache_accepts_entries_of_size (cache, length_to_add)) + return FALSE; + + /* Make room for new entry if needed */ + make_room_for_new_entry (cache, length_to_add); + } + + /* Remove any previous entry */ + if ((old_entry = g_hash_table_lookup (cache->priv->cache, GUINT_TO_POINTER (entry->key))) != NULL) { + if (soup_cache_entry_remove (cache, old_entry)) + soup_cache_entry_free (old_entry, get_file_from_entry (cache, old_entry)); + else + return FALSE; + } + + /* Add to hash table */ + g_hash_table_insert (cache->priv->cache, GUINT_TO_POINTER (entry->key), entry); + + /* Compute new cache size */ + cache->priv->size += length_to_add; + + /* Update LRU */ + if (sort) + cache->priv->lru_start = g_list_insert_sorted (cache->priv->lru_start, entry, lru_compare_func); + else + cache->priv->lru_start = g_list_prepend (cache->priv->lru_start, entry); + + g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache)); + + return TRUE; +} + +static SoupCacheEntry* +soup_cache_entry_lookup (SoupCache *cache, + SoupMessage *msg) +{ + SoupCacheEntry *entry; + guint32 key; + char *uri = NULL; + + uri = soup_uri_to_string (soup_message_get_uri (msg), FALSE); + key = get_cache_key_from_uri ((const char *) uri); + + entry = g_hash_table_lookup (cache->priv->cache, GUINT_TO_POINTER (key)); + + if (entry != NULL && (strcmp (entry->uri, uri) != 0)) + entry = NULL; + + g_free (uri); + return entry; +} + +static void +msg_restarted_cb (SoupMessage *msg, SoupCacheEntry *entry) +{ + /* FIXME: What should we do here exactly? */ +} + +static void +replace_cb (GObject *source, GAsyncResult *result, SoupCacheWritingFixture *fixture) +{ + SoupCacheEntry *entry = fixture->entry; + GOutputStream *stream = (GOutputStream *) g_file_replace_finish (G_FILE (source), + result, &entry->error); + + if (g_cancellable_is_cancelled (entry->cancellable) || entry->error) { + if (stream) + g_object_unref (stream); + fixture->cache->priv->n_pending--; + entry->dirty = FALSE; + soup_cache_entry_remove (fixture->cache, entry); + soup_cache_entry_free (entry, get_file_from_entry (fixture->cache, entry)); + soup_cache_writing_fixture_free (fixture); + return; + } + + entry->stream = stream; + + /* If we already got all the data we have to initiate the + * writing here, since we won't get more 'got-chunk' + * signals + */ + if (!entry->got_body) + return; + + /* It could happen that reading the data from server + * was completed before this happens. In that case + * there is no data + */ + if (!write_next_buffer (entry, fixture)) + /* Could happen if the resource is empty */ + g_output_stream_close_async (stream, G_PRIORITY_LOW, entry->cancellable, + (GAsyncReadyCallback) close_ready_cb, + fixture); +} + +typedef struct { + time_t request_time; + SoupSessionFeature *feature; + gulong got_headers_handler; +} RequestHelper; + +static void +msg_got_headers_cb (SoupMessage *msg, gpointer user_data) +{ + SoupCache *cache; + SoupCacheability cacheable; + RequestHelper *helper; + time_t request_time, response_time; + SoupCacheEntry *entry; + + response_time = time (NULL); + + helper = (RequestHelper *)user_data; + cache = SOUP_CACHE (helper->feature); + request_time = helper->request_time; + g_signal_handlers_disconnect_by_func (msg, msg_got_headers_cb, user_data); + g_slice_free (RequestHelper, helper); + + cacheable = soup_cache_get_cacheability (cache, msg); + + if (cacheable & SOUP_CACHE_CACHEABLE) { + GFile *file; + SoupCacheWritingFixture *fixture; + + /* Check if we are already caching this resource */ + entry = soup_cache_entry_lookup (cache, msg); + + if (entry && entry->dirty) + return; + + /* Create a new entry, deleting any old one if present */ + if (entry) { + soup_cache_entry_remove (cache, entry); + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); + } + + entry = soup_cache_entry_new (cache, msg, request_time, response_time); + entry->hits = 1; + + /* Do not continue if it can not be stored */ + if (!soup_cache_entry_insert (cache, entry, TRUE)) { + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); + return; + } + + fixture = g_slice_new0 (SoupCacheWritingFixture); + fixture->cache = g_object_ref (cache); + fixture->entry = entry; + fixture->msg = g_object_ref (msg); + fixture->buffer_queue = g_queue_new (); + + /* We connect now to these signals and buffer the data + if it comes before the file is ready for writing */ + fixture->got_chunk_handler = + g_signal_connect (msg, "got-chunk", G_CALLBACK (msg_got_chunk_cb), fixture); + fixture->got_body_handler = + g_signal_connect (msg, "got-body", G_CALLBACK (msg_got_body_cb), fixture); + fixture->restarted_handler = + g_signal_connect (msg, "restarted", G_CALLBACK (msg_restarted_cb), entry); + + /* Prepare entry */ + cache->priv->n_pending++; + + entry->dirty = TRUE; + entry->cancellable = g_cancellable_new (); + file = get_file_from_entry (cache, entry); + g_file_replace_async (file, NULL, FALSE, + G_FILE_CREATE_PRIVATE | G_FILE_CREATE_REPLACE_DESTINATION, + G_PRIORITY_LOW, entry->cancellable, + (GAsyncReadyCallback) replace_cb, fixture); + g_object_unref (file); + } else if (cacheable & SOUP_CACHE_INVALIDATES) { + entry = soup_cache_entry_lookup (cache, msg); + + if (entry) { + if (soup_cache_entry_remove (cache, entry)) + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); + } + } else if (cacheable & SOUP_CACHE_VALIDATES) { + entry = soup_cache_entry_lookup (cache, msg); + + /* It's possible to get a CACHE_VALIDATES with no + * entry in the hash table. This could happen if for + * example the soup client is the one creating the + * conditional request. + */ + if (entry) { + entry->being_validated = FALSE; + copy_end_to_end_headers (msg->response_headers, entry->headers); + soup_cache_entry_set_freshness (entry, msg, cache); + } + } +} + +GInputStream * +soup_cache_send_response (SoupCache *cache, SoupMessage *msg) +{ + SoupCacheEntry *entry; + char *current_age; + GInputStream *stream = NULL; + GFile *file; + + g_return_val_if_fail (SOUP_IS_CACHE (cache), NULL); + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + entry = soup_cache_entry_lookup (cache, msg); + g_return_val_if_fail (entry, NULL); + + /* TODO: the original idea was to save reads, but current code + assumes that a stream is always returned. Need to reach + some agreement here. Also we have to handle the situation + were the file was no longer there (for example files + removed without notifying the cache */ + file = get_file_from_entry (cache, entry); + stream = G_INPUT_STREAM (g_file_read (file, NULL, NULL)); + g_object_unref (file); + + /* Do not change the original message if there is no resource */ + if (stream == NULL) + return stream; + + /* If we are told to send a response from cache any validation + in course is over by now */ + entry->being_validated = FALSE; + + /* Status */ + soup_message_set_status (msg, entry->status_code); + + /* Headers */ + copy_end_to_end_headers (entry->headers, msg->response_headers); + + /* Add 'Age' header with the current age */ + current_age = g_strdup_printf ("%d", soup_cache_entry_get_current_age (entry)); + soup_message_headers_replace (msg->response_headers, + "Age", + current_age); + g_free (current_age); + + return stream; +} + +static void +request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket) +{ + RequestHelper *helper = g_slice_new0 (RequestHelper); + helper->request_time = time (NULL); + helper->feature = feature; + helper->got_headers_handler = g_signal_connect (msg, "got-headers", + G_CALLBACK (msg_got_headers_cb), + helper); +} + +static void +attach (SoupSessionFeature *feature, SoupSession *session) +{ + SoupCache *cache = SOUP_CACHE (feature); + cache->priv->session = session; + + soup_cache_default_feature_interface->attach (feature, session); +} + +static void +soup_cache_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + soup_cache_default_feature_interface = + g_type_default_interface_peek (SOUP_TYPE_SESSION_FEATURE); + + feature_interface->attach = attach; + feature_interface->request_started = request_started; +} + +static void +soup_cache_init (SoupCache *cache) +{ + SoupCachePrivate *priv; + + priv = cache->priv = SOUP_CACHE_GET_PRIVATE (cache); + + priv->cache = g_hash_table_new (g_direct_hash, g_direct_equal); + /* LRU */ + priv->lru_start = NULL; + + /* */ + priv->n_pending = 0; + + /* Cache size */ + priv->max_size = DEFAULT_MAX_SIZE; + priv->max_entry_data_size = priv->max_size / MAX_ENTRY_DATA_PERCENTAGE; + priv->size = 0; +} + +static void +remove_cache_item (gpointer data, + gpointer user_data) +{ + SoupCache *cache = (SoupCache *) user_data; + SoupCacheEntry *entry = (SoupCacheEntry *) data; + + if (soup_cache_entry_remove (cache, entry)) + soup_cache_entry_free (entry, NULL); +} + +static void +soup_cache_finalize (GObject *object) +{ + SoupCachePrivate *priv; + GList *entries; + + priv = SOUP_CACHE (object)->priv; + + // Cannot use g_hash_table_foreach as callbacks must not modify the hash table + entries = g_hash_table_get_values (priv->cache); + g_list_foreach (entries, remove_cache_item, object); + g_list_free (entries); + + g_hash_table_destroy (priv->cache); + g_free (priv->cache_dir); + + g_list_free (priv->lru_start); + priv->lru_start = NULL; + + G_OBJECT_CLASS (soup_cache_parent_class)->finalize (object); +} + +static void +soup_cache_set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupCachePrivate *priv = SOUP_CACHE (object)->priv; + + switch (prop_id) { + case PROP_CACHE_DIR: + priv->cache_dir = g_value_dup_string (value); + /* Create directory if it does not exist (FIXME: should we?) */ + if (!g_file_test (priv->cache_dir, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR)) + g_mkdir_with_parents (priv->cache_dir, 0700); + break; + case PROP_CACHE_TYPE: + priv->cache_type = g_value_get_enum (value); + /* TODO: clear private entries and issue a warning if moving to shared? */ + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +soup_cache_get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupCachePrivate *priv = SOUP_CACHE (object)->priv; + + switch (prop_id) { + case PROP_CACHE_DIR: + g_value_set_string (value, priv->cache_dir); + break; + case PROP_CACHE_TYPE: + g_value_set_enum (value, priv->cache_type); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +soup_cache_constructed (GObject *object) +{ + SoupCachePrivate *priv; + + priv = SOUP_CACHE (object)->priv; + + if (!priv->cache_dir) { + /* Set a default cache dir, different for each user */ + priv->cache_dir = g_build_filename (g_get_user_cache_dir (), + "httpcache", + NULL); + if (!g_file_test (priv->cache_dir, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR)) + g_mkdir_with_parents (priv->cache_dir, 0700); + } + + if (G_OBJECT_CLASS (soup_cache_parent_class)->constructed) + G_OBJECT_CLASS (soup_cache_parent_class)->constructed (object); +} + +static void +soup_cache_class_init (SoupCacheClass *cache_class) +{ + GObjectClass *gobject_class = (GObjectClass *)cache_class; + + gobject_class->finalize = soup_cache_finalize; + gobject_class->constructed = soup_cache_constructed; + gobject_class->set_property = soup_cache_set_property; + gobject_class->get_property = soup_cache_get_property; + + cache_class->get_cacheability = get_cacheability; + + g_object_class_install_property (gobject_class, PROP_CACHE_DIR, + g_param_spec_string ("cache-dir", + "Cache directory", + "The directory to store the cache files", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + + g_object_class_install_property (gobject_class, PROP_CACHE_TYPE, + g_param_spec_enum ("cache-type", + "Cache type", + "Whether the cache is private or shared", + SOUP_TYPE_CACHE_TYPE, + SOUP_CACHE_SINGLE_USER, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + + g_type_class_add_private (cache_class, sizeof (SoupCachePrivate)); +} + +/** + * soup_cache_new: + * @cache_dir: the directory to store the cached data, or %NULL to use the default one + * @cache_type: the #SoupCacheType of the cache + * + * Creates a new #SoupCache. + * + * Returns: a new #SoupCache + * + * Since: 2.34 + */ +SoupCache * +soup_cache_new (const char *cache_dir, SoupCacheType cache_type) +{ + return g_object_new (SOUP_TYPE_CACHE, + "cache-dir", cache_dir, + "cache-type", cache_type, + NULL); +} + +/** + * soup_cache_has_response: + * @cache: a #SoupCache + * @msg: a #SoupMessage + * + * This function calculates whether the @cache object has a proper + * response for the request @msg given the flags both in the request + * and the cached reply and the time ellapsed since it was cached. + * + * Returns: whether or not the @cache has a valid response for @msg + * + * Since: 2.34 + */ +SoupCacheResponse +soup_cache_has_response (SoupCache *cache, SoupMessage *msg) +{ + SoupCacheEntry *entry; + const char *cache_control, *pragma; + gpointer value; + int max_age, max_stale, min_fresh; + GList *lru_item, *item; + + entry = soup_cache_entry_lookup (cache, msg); + + /* 1. The presented Request-URI and that of stored response + * match + */ + if (!entry) + return SOUP_CACHE_RESPONSE_STALE; + + /* Increase hit count. Take sorting into account */ + entry->hits++; + lru_item = g_list_find (cache->priv->lru_start, entry); + item = lru_item; + while (item->next && lru_compare_func (item->data, item->next->data) > 0) + item = g_list_next (item); + + if (item != lru_item) { + cache->priv->lru_start = g_list_remove_link (cache->priv->lru_start, lru_item); + item = g_list_insert_sorted (item, lru_item->data, lru_compare_func); + g_list_free (lru_item); + } + + if (entry->dirty || entry->being_validated) + return SOUP_CACHE_RESPONSE_STALE; + + /* 2. The request method associated with the stored response + * allows it to be used for the presented request + */ + + /* In practice this means we only return our resource for GET, + * cacheability for other methods is a TODO in the RFC + * (TODO: although we could return the headers for HEAD + * probably). + */ + if (msg->method != SOUP_METHOD_GET) + return SOUP_CACHE_RESPONSE_STALE; + + /* 3. Selecting request-headers nominated by the stored + * response (if any) match those presented. + */ + + /* TODO */ + + /* 4. The request is a conditional request issued by the client. + */ + if (soup_message_headers_get (msg->request_headers, "If-Modified-Since") || + soup_message_headers_get (msg->request_headers, "If-None-Match")) + return SOUP_CACHE_RESPONSE_STALE; + + /* 5. The presented request and stored response are free from + * directives that would prevent its use. + */ + + max_age = max_stale = min_fresh = -1; + + /* For HTTP 1.0 compatibility. RFC2616 section 14.9.4 + */ + pragma = soup_message_headers_get (msg->request_headers, "Pragma"); + if (pragma && soup_header_contains (pragma, "no-cache")) + return SOUP_CACHE_RESPONSE_STALE; + + cache_control = soup_message_headers_get (msg->request_headers, "Cache-Control"); + if (cache_control) { + GHashTable *hash = soup_header_parse_param_list (cache_control); + + if (g_hash_table_lookup_extended (hash, "no-store", NULL, NULL)) { + soup_header_free_param_list (hash); + return SOUP_CACHE_RESPONSE_STALE; + } + + if (g_hash_table_lookup_extended (hash, "no-cache", NULL, NULL)) { + soup_header_free_param_list (hash); + return SOUP_CACHE_RESPONSE_STALE; + } + + if (g_hash_table_lookup_extended (hash, "max-age", NULL, &value)) { + max_age = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32); + /* Forcing cache revalidaton + */ + if (!max_age) { + soup_header_free_param_list (hash); + return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION; + } + } + + /* max-stale can have no value set, we need to use _extended */ + if (g_hash_table_lookup_extended (hash, "max-stale", NULL, &value)) { + if (value) + max_stale = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32); + else + max_stale = G_MAXINT32; + } + + value = g_hash_table_lookup (hash, "min-fresh"); + if (value) + min_fresh = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32); + + soup_header_free_param_list (hash); + + if (max_age > 0) { + guint current_age = soup_cache_entry_get_current_age (entry); + + /* If we are over max-age and max-stale is not + set, do not use the value from the cache + without validation */ + if ((guint) max_age <= current_age && max_stale == -1) + return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION; + } + } + + /* 6. The stored response is either: fresh, allowed to be + * served stale or succesfully validated + */ + /* TODO consider also proxy-revalidate & s-maxage */ + if (entry->must_revalidate) + return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION; + + if (!soup_cache_entry_is_fresh_enough (entry, min_fresh)) { + /* Not fresh, can it be served stale? */ + if (max_stale != -1) { + /* G_MAXINT32 means we accept any staleness */ + if (max_stale == G_MAXINT32) + return SOUP_CACHE_RESPONSE_FRESH; + + if ((soup_cache_entry_get_current_age (entry) - entry->freshness_lifetime) <= (guint) max_stale) + return SOUP_CACHE_RESPONSE_FRESH; + } + + return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION; + } + + return SOUP_CACHE_RESPONSE_FRESH; +} + +/** + * soup_cache_get_cacheability: + * @cache: a #SoupCache + * @msg: a #SoupMessage + * + * Calculates whether the @msg can be cached or not. + * + * Returns: a #SoupCacheability value indicating whether the @msg can be cached or not. + * + * Since: 2.34 + */ +SoupCacheability +soup_cache_get_cacheability (SoupCache *cache, SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_CACHE (cache), SOUP_CACHE_UNCACHEABLE); + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), SOUP_CACHE_UNCACHEABLE); + + return SOUP_CACHE_GET_CLASS (cache)->get_cacheability (cache, msg); +} + +static gboolean +force_flush_timeout (gpointer data) +{ + gboolean *forced = (gboolean *)data; + *forced = TRUE; + + return FALSE; +} + +/** + * soup_cache_flush: + * @cache: a #SoupCache + * + * This function will force all pending writes in the @cache to be + * committed to disk. For doing so it will iterate the #GMainContext + * associated with @cache's session as long as needed. + * + * Since: 2.34 + */ +void +soup_cache_flush (SoupCache *cache) +{ + GMainContext *async_context; + SoupSession *session; + GSource *timeout; + gboolean forced = FALSE; + + g_return_if_fail (SOUP_IS_CACHE (cache)); + + session = cache->priv->session; + g_return_if_fail (SOUP_IS_SESSION (session)); + async_context = soup_session_get_async_context (session); + + /* We give cache 10 secs to finish */ + timeout = soup_add_timeout (async_context, 10000, force_flush_timeout, &forced); + + while (!forced && cache->priv->n_pending > 0) + g_main_context_iteration (async_context, FALSE); + + if (!forced) + g_source_destroy (timeout); + else + g_warning ("Cache flush finished despite %d pending requests", cache->priv->n_pending); +} + +static void +clear_cache_item (gpointer data, + gpointer user_data) +{ + SoupCache *cache = (SoupCache *) user_data; + SoupCacheEntry *entry = (SoupCacheEntry *) data; + + if (soup_cache_entry_remove (cache, entry)) + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); +} + +/** + * soup_cache_clear: + * @cache: a #SoupCache + * + * Will remove all entries in the @cache plus all the cache files + * associated with them. + * + * Since: 2.34 + */ +void +soup_cache_clear (SoupCache *cache) +{ + GList *entries; + + g_return_if_fail (SOUP_IS_CACHE (cache)); + g_return_if_fail (cache->priv->cache); + + // Cannot use g_hash_table_foreach as callbacks must not modify the hash table + entries = g_hash_table_get_values (cache->priv->cache); + g_list_foreach (entries, clear_cache_item, cache); + g_list_free (entries); +} + +SoupMessage * +soup_cache_generate_conditional_request (SoupCache *cache, SoupMessage *original) +{ + SoupMessage *msg; + SoupURI *uri; + SoupCacheEntry *entry; + const char *value; + + g_return_val_if_fail (SOUP_IS_CACHE (cache), NULL); + g_return_val_if_fail (SOUP_IS_MESSAGE (original), NULL); + + /* First copy the data we need from the original message */ + uri = soup_message_get_uri (original); + msg = soup_message_new_from_uri (original->method, uri); + + soup_message_headers_foreach (original->request_headers, + (SoupMessageHeadersForeachFunc)copy_headers, + msg->request_headers); + + /* Now add the validator entries in the header from the cached + data */ + entry = soup_cache_entry_lookup (cache, original); + g_return_val_if_fail (entry, NULL); + + entry->being_validated = TRUE; + + value = soup_message_headers_get (entry->headers, "Last-Modified"); + if (value) + soup_message_headers_append (msg->request_headers, + "If-Modified-Since", + value); + value = soup_message_headers_get (entry->headers, "ETag"); + if (value) + soup_message_headers_append (msg->request_headers, + "If-None-Match", + value); + return msg; +} + +#define OLD_SOUP_CACHE_FILE "soup.cache" +#define SOUP_CACHE_FILE "soup.cache2" + +#define SOUP_CACHE_HEADERS_FORMAT "{ss}" +#define SOUP_CACHE_PHEADERS_FORMAT "(sbuuuuuqa" SOUP_CACHE_HEADERS_FORMAT ")" +#define SOUP_CACHE_ENTRIES_FORMAT "(qa" SOUP_CACHE_PHEADERS_FORMAT ")" + +/* Basically the same format than above except that some strings are + prepended with &. This way the GVariant returns a pointer to the + data instead of duplicating the string */ +#define SOUP_CACHE_DECODE_HEADERS_FORMAT "{&s&s}" + +static void +pack_entry (gpointer data, + gpointer user_data) +{ + SoupCacheEntry *entry = (SoupCacheEntry *) data; + SoupMessageHeadersIter iter; + const char *header_key, *header_value; + GVariantBuilder *entries_builder = (GVariantBuilder *)user_data; + + /* Do not store non-consolidated entries */ + if (entry->dirty || entry->current_writing_buffer != NULL || !entry->key) + return; + + g_variant_builder_open (entries_builder, G_VARIANT_TYPE (SOUP_CACHE_PHEADERS_FORMAT)); +#if ENABLE(TIZEN_FIX_PACK_ENTRY) + if (!g_utf8_validate (entry->uri, -1, NULL)) { + g_variant_builder_close (entries_builder); + return; + } +#endif + g_variant_builder_add (entries_builder, "s", entry->uri); + g_variant_builder_add (entries_builder, "b", entry->must_revalidate); + g_variant_builder_add (entries_builder, "u", entry->freshness_lifetime); + g_variant_builder_add (entries_builder, "u", entry->corrected_initial_age); + g_variant_builder_add (entries_builder, "u", entry->response_time); + g_variant_builder_add (entries_builder, "u", entry->hits); + g_variant_builder_add (entries_builder, "u", entry->length); + g_variant_builder_add (entries_builder, "q", entry->status_code); + + /* Pack headers */ + g_variant_builder_open (entries_builder, G_VARIANT_TYPE ("a" SOUP_CACHE_HEADERS_FORMAT)); + soup_message_headers_iter_init (&iter, entry->headers); + while (soup_message_headers_iter_next (&iter, &header_key, &header_value)) { + if (g_utf8_validate (header_value, -1, NULL)) + g_variant_builder_add (entries_builder, SOUP_CACHE_HEADERS_FORMAT, + header_key, header_value); + } + g_variant_builder_close (entries_builder); /* "a" SOUP_CACHE_HEADERS_FORMAT */ + g_variant_builder_close (entries_builder); /* SOUP_CACHE_PHEADERS_FORMAT */ +} + +void +soup_cache_dump (SoupCache *cache) +{ + SoupCachePrivate *priv = SOUP_CACHE_GET_PRIVATE (cache); + char *filename; + GVariantBuilder entries_builder; + GVariant *cache_variant; + + if (!g_list_length (cache->priv->lru_start)) + return; + + /* Create the builder and iterate over all entries */ + g_variant_builder_init (&entries_builder, G_VARIANT_TYPE (SOUP_CACHE_ENTRIES_FORMAT)); + g_variant_builder_add (&entries_builder, "q", SOUP_CACHE_CURRENT_VERSION); + g_variant_builder_open (&entries_builder, G_VARIANT_TYPE ("a" SOUP_CACHE_PHEADERS_FORMAT)); + g_list_foreach (cache->priv->lru_start, pack_entry, &entries_builder); + g_variant_builder_close (&entries_builder); + + /* Serialize and dump */ + cache_variant = g_variant_builder_end (&entries_builder); + g_variant_ref_sink (cache_variant); + filename = g_build_filename (priv->cache_dir, SOUP_CACHE_FILE, NULL); + g_file_set_contents (filename, (const char *) g_variant_get_data (cache_variant), + g_variant_get_size (cache_variant), NULL); + g_free (filename); + g_variant_unref (cache_variant); +} + +static void +clear_cache_files (SoupCache *cache) +{ + GFileInfo *file_info; + GFileEnumerator *file_enumerator; + GFile *cache_dir_file = g_file_new_for_path (cache->priv->cache_dir); + + file_enumerator = g_file_enumerate_children (cache_dir_file, G_FILE_ATTRIBUTE_STANDARD_NAME, + G_FILE_QUERY_INFO_NONE, NULL, NULL); + if (file_enumerator) { + while ((file_info = g_file_enumerator_next_file (file_enumerator, NULL, NULL)) != NULL) { + const char *filename = g_file_info_get_name (file_info); + + if (strcmp (filename, SOUP_CACHE_FILE) != 0) { + GFile *cache_file = g_file_get_child (cache_dir_file, filename); + g_file_delete (cache_file, NULL, NULL); + g_object_unref (cache_file); + } + } + g_object_unref (file_enumerator); + } + g_object_unref (cache_dir_file); +} + +void +soup_cache_load (SoupCache *cache) +{ + gboolean must_revalidate; + guint32 freshness_lifetime, hits; + guint32 corrected_initial_age, response_time; + char *url, *filename = NULL, *contents = NULL; + GVariant *cache_variant; + GVariantIter *entries_iter = NULL, *headers_iter = NULL; + gsize length; + SoupCacheEntry *entry; + SoupCachePrivate *priv = cache->priv; + guint16 version, status_code; + + filename = g_build_filename (priv->cache_dir, SOUP_CACHE_FILE, NULL); + if (!g_file_get_contents (filename, &contents, &length, NULL)) { + g_free (filename); + g_free (contents); + clear_cache_files (cache); + return; + } + g_free (filename); + + cache_variant = g_variant_new_from_data (G_VARIANT_TYPE (SOUP_CACHE_ENTRIES_FORMAT), + (const char *) contents, length, FALSE, g_free, contents); + g_variant_get (cache_variant, SOUP_CACHE_ENTRIES_FORMAT, &version, &entries_iter); + if (version != SOUP_CACHE_CURRENT_VERSION) { + g_variant_iter_free (entries_iter); + g_variant_unref (cache_variant); + clear_cache_files (cache); + return; + } + + while (g_variant_iter_loop (entries_iter, SOUP_CACHE_PHEADERS_FORMAT, + &url, &must_revalidate, &freshness_lifetime, &corrected_initial_age, + &response_time, &hits, &length, &status_code, + &headers_iter)) { + const char *header_key, *header_value; + SoupMessageHeaders *headers; + SoupMessageHeadersIter soup_headers_iter; + + /* SoupMessage Headers */ + headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE); + while (g_variant_iter_loop (headers_iter, SOUP_CACHE_HEADERS_FORMAT, &header_key, &header_value)) + if (*header_key && *header_value) + soup_message_headers_append (headers, header_key, header_value); + + /* Check that we have headers */ + soup_message_headers_iter_init (&soup_headers_iter, headers); + if (!soup_message_headers_iter_next (&soup_headers_iter, &header_key, &header_value)) { + soup_message_headers_free (headers); + continue; + } + + /* Insert in cache */ + entry = g_slice_new0 (SoupCacheEntry); + entry->uri = g_strdup (url); + entry->must_revalidate = must_revalidate; + entry->freshness_lifetime = freshness_lifetime; + entry->corrected_initial_age = corrected_initial_age; + entry->response_time = response_time; + entry->hits = hits; + entry->length = length; + entry->headers = headers; + entry->status_code = status_code; + + if (!soup_cache_entry_insert (cache, entry, FALSE)) + soup_cache_entry_free (entry, get_file_from_entry (cache, entry)); + } + + cache->priv->lru_start = g_list_reverse (cache->priv->lru_start); + + /* frees */ + g_variant_iter_free (entries_iter); + g_variant_unref (cache_variant); +} + +void +soup_cache_set_max_size (SoupCache *cache, + guint max_size) +{ + cache->priv->max_size = max_size; + cache->priv->max_entry_data_size = cache->priv->max_size / MAX_ENTRY_DATA_PERCENTAGE; +} + +guint +soup_cache_get_max_size (SoupCache *cache) +{ + return cache->priv->max_size; +} diff --git a/libsoup/soup-cache.h b/libsoup/soup-cache.h new file mode 100644 index 0000000..8585d51 --- /dev/null +++ b/libsoup/soup-cache.h @@ -0,0 +1,98 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cache.h: + * + * Copyright (C) 2009, 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_CACHE_H +#define SOUP_CACHE_H 1 + +#ifdef LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_CACHE (soup_cache_get_type ()) +#define SOUP_CACHE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_CACHE, SoupCache)) +#define SOUP_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_CACHE, SoupCacheClass)) +#define SOUP_IS_CACHE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_CACHE)) +#define SOUP_IS_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_CACHE)) +#define SOUP_CACHE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_CACHE, SoupCacheClass)) + +typedef struct _SoupCache SoupCache; +typedef struct _SoupCachePrivate SoupCachePrivate; + +typedef enum { + SOUP_CACHE_CACHEABLE = (1 << 0), + SOUP_CACHE_UNCACHEABLE = (1 << 1), + SOUP_CACHE_INVALIDATES = (1 << 2), + SOUP_CACHE_VALIDATES = (1 << 3) +} SoupCacheability; + +typedef enum { + SOUP_CACHE_RESPONSE_FRESH, + SOUP_CACHE_RESPONSE_NEEDS_VALIDATION, + SOUP_CACHE_RESPONSE_STALE +} SoupCacheResponse; + +typedef enum { + SOUP_CACHE_SINGLE_USER, + SOUP_CACHE_SHARED +} SoupCacheType; + +struct _SoupCache { + GObject parent_instance; + + SoupCachePrivate *priv; +}; + +typedef struct { + GObjectClass parent_class; + + /* methods */ + SoupCacheability (*get_cacheability) (SoupCache *cache, + SoupMessage *msg); + + /* Padding for future expansion */ + void (*_libsoup_reserved1)(void); + void (*_libsoup_reserved2)(void); + void (*_libsoup_reserved3)(void); +} SoupCacheClass; + +GType soup_cache_get_type (void); +SoupCache *soup_cache_new (const char *cache_dir, + SoupCacheType cache_type); +void soup_cache_flush (SoupCache *cache); +void soup_cache_clear (SoupCache *cache); + +void soup_cache_dump (SoupCache *cache); +void soup_cache_load (SoupCache *cache); + +void soup_cache_set_max_size (SoupCache *cache, + guint max_size); +guint soup_cache_get_max_size (SoupCache *cache); + +G_END_DECLS + +#endif /* LIBSOUP_USE_UNSTABLE_REQUEST_API */ + +#endif /* SOUP_CACHE_H */ + diff --git a/libsoup/soup-connection.c b/libsoup/soup-connection.c new file mode 100644 index 0000000..514640f --- /dev/null +++ b/libsoup/soup-connection.c @@ -0,0 +1,820 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-connection.c: A single HTTP/HTTPS connection + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include +#include + +#include "soup-address.h" +#include "soup-connection.h" +#include "soup-marshal.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-message-queue.h" +#include "soup-misc.h" +#include "soup-misc-private.h" +#include "soup-socket.h" +#include "soup-ssl.h" +#include "soup-uri.h" +#include "soup-enum-types.h" + +typedef struct { + SoupSocket *socket; + + SoupAddress *remote_addr, *tunnel_addr; + SoupURI *proxy_uri; + gpointer ssl_creds; + gboolean ssl_strict; + gboolean ssl_fallback; + + GMainContext *async_context; + + SoupMessageQueueItem *cur_item; + SoupConnectionState state; + time_t unused_timeout; + guint io_timeout, idle_timeout; + GSource *idle_timeout_src; +} SoupConnectionPrivate; +#define SOUP_CONNECTION_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_CONNECTION, SoupConnectionPrivate)) + +G_DEFINE_TYPE (SoupConnection, soup_connection, G_TYPE_OBJECT) + +enum { + DISCONNECTED, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +enum { + PROP_0, + + PROP_REMOTE_ADDRESS, + PROP_TUNNEL_ADDRESS, + PROP_PROXY_URI, + PROP_SSL_CREDS, + PROP_SSL_STRICT, + PROP_SSL_FALLBACK, + PROP_ASYNC_CONTEXT, + PROP_TIMEOUT, + PROP_IDLE_TIMEOUT, + PROP_STATE, + PROP_MESSAGE, + + LAST_PROP +}; + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void stop_idle_timer (SoupConnectionPrivate *priv); +static void clear_current_item (SoupConnection *conn); + +/* Number of seconds after which we close a connection that hasn't yet + * been used. + */ +#define SOUP_CONNECTION_UNUSED_TIMEOUT 3 + +static void +soup_connection_init (SoupConnection *conn) +{ + ; +} + +static void +finalize (GObject *object) +{ + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (object); + + if (priv->remote_addr) + g_object_unref (priv->remote_addr); + if (priv->tunnel_addr) + g_object_unref (priv->tunnel_addr); + if (priv->proxy_uri) + soup_uri_free (priv->proxy_uri); + + if (priv->async_context) + g_main_context_unref (priv->async_context); + + G_OBJECT_CLASS (soup_connection_parent_class)->finalize (object); +} + +static void +dispose (GObject *object) +{ + SoupConnection *conn = SOUP_CONNECTION (object); + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + stop_idle_timer (priv); + /* Make sure clear_current_item doesn't re-establish the timeout */ + priv->idle_timeout = 0; + + if (priv->cur_item) { + g_warning ("Disposing connection with cur_item set"); + clear_current_item (conn); + } + if (priv->socket) { + g_warning ("Disposing connection while connected"); + soup_connection_disconnect (conn); + } + + G_OBJECT_CLASS (soup_connection_parent_class)->dispose (object); +} + +static void +soup_connection_class_init (SoupConnectionClass *connection_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (connection_class); + + g_type_class_add_private (connection_class, sizeof (SoupConnectionPrivate)); + + /* virtual method override */ + object_class->dispose = dispose; + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /* signals */ + signals[DISCONNECTED] = + g_signal_new ("disconnected", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupConnectionClass, disconnected), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /* properties */ + g_object_class_install_property ( + object_class, PROP_REMOTE_ADDRESS, + g_param_spec_object (SOUP_CONNECTION_REMOTE_ADDRESS, + "Remote address", + "The address of the HTTP or proxy server", + SOUP_TYPE_ADDRESS, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_TUNNEL_ADDRESS, + g_param_spec_object (SOUP_CONNECTION_TUNNEL_ADDRESS, + "Tunnel address", + "The address of the HTTPS server this tunnel connects to", + SOUP_TYPE_ADDRESS, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_PROXY_URI, + g_param_spec_boxed (SOUP_CONNECTION_PROXY_URI, + "Proxy URI", + "URI of the HTTP proxy this connection connects to", + SOUP_TYPE_URI, + G_PARAM_READWRITE)); + g_object_class_install_property ( + object_class, PROP_SSL_CREDS, + g_param_spec_pointer (SOUP_CONNECTION_SSL_CREDENTIALS, + "SSL credentials", + "Opaque SSL credentials for this connection", + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_SSL_STRICT, + g_param_spec_boolean (SOUP_CONNECTION_SSL_STRICT, + "Strictly validate SSL certificates", + "Whether certificate errors should be considered a connection error", + TRUE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_SSL_FALLBACK, + g_param_spec_boolean (SOUP_CONNECTION_SSL_FALLBACK, + "SSLv3 fallback", + "Use SSLv3 instead of TLS", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_ASYNC_CONTEXT, + g_param_spec_pointer (SOUP_CONNECTION_ASYNC_CONTEXT, + "Async GMainContext", + "GMainContext to dispatch this connection's async I/O in", + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_TIMEOUT, + g_param_spec_uint (SOUP_CONNECTION_TIMEOUT, + "Timeout value", + "Value in seconds to timeout a blocking I/O", + 0, G_MAXUINT, 0, + G_PARAM_READWRITE)); + g_object_class_install_property ( + object_class, PROP_IDLE_TIMEOUT, + g_param_spec_uint (SOUP_CONNECTION_IDLE_TIMEOUT, + "Idle Timeout", + "Connection lifetime when idle", + 0, G_MAXUINT, 0, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_STATE, + g_param_spec_enum (SOUP_CONNECTION_STATE, + "Connection state", + "Current state of connection", + SOUP_TYPE_CONNECTION_STATE, SOUP_CONNECTION_NEW, + G_PARAM_READWRITE)); + g_object_class_install_property ( + object_class, PROP_MESSAGE, + g_param_spec_object (SOUP_CONNECTION_MESSAGE, + "Message", + "Message being processed", + SOUP_TYPE_MESSAGE, + G_PARAM_READABLE)); +} + + +SoupConnection * +soup_connection_new (const char *propname1, ...) +{ + SoupConnection *conn; + va_list ap; + + va_start (ap, propname1); + conn = (SoupConnection *)g_object_new_valist (SOUP_TYPE_CONNECTION, + propname1, ap); + va_end (ap); + + return conn; +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_REMOTE_ADDRESS: + priv->remote_addr = g_value_dup_object (value); + break; + case PROP_TUNNEL_ADDRESS: + priv->tunnel_addr = g_value_dup_object (value); + break; + case PROP_PROXY_URI: + if (priv->proxy_uri) + soup_uri_free (priv->proxy_uri); + priv->proxy_uri = g_value_dup_boxed (value); + break; + case PROP_SSL_CREDS: + priv->ssl_creds = g_value_get_pointer (value); + break; + case PROP_SSL_STRICT: + priv->ssl_strict = g_value_get_boolean (value); + break; + case PROP_SSL_FALLBACK: + priv->ssl_fallback = g_value_get_boolean (value); + break; + case PROP_ASYNC_CONTEXT: + priv->async_context = g_value_get_pointer (value); + if (priv->async_context) + g_main_context_ref (priv->async_context); + break; + case PROP_TIMEOUT: + priv->io_timeout = g_value_get_uint (value); + break; + case PROP_IDLE_TIMEOUT: + priv->idle_timeout = g_value_get_uint (value); + break; + case PROP_STATE: + soup_connection_set_state (SOUP_CONNECTION (object), g_value_get_uint (value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_REMOTE_ADDRESS: + g_value_set_object (value, priv->remote_addr); + break; + case PROP_TUNNEL_ADDRESS: + g_value_set_object (value, priv->tunnel_addr); + break; + case PROP_PROXY_URI: + g_value_set_boxed (value, priv->proxy_uri); + break; + case PROP_SSL_CREDS: + g_value_set_pointer (value, priv->ssl_creds); + break; + case PROP_SSL_STRICT: + g_value_set_boolean (value, priv->ssl_strict); + break; + case PROP_SSL_FALLBACK: + g_value_set_boolean (value, priv->ssl_fallback); + break; + case PROP_ASYNC_CONTEXT: + g_value_set_pointer (value, priv->async_context ? g_main_context_ref (priv->async_context) : NULL); + break; + case PROP_TIMEOUT: + g_value_set_uint (value, priv->io_timeout); + break; + case PROP_IDLE_TIMEOUT: + g_value_set_uint (value, priv->idle_timeout); + break; + case PROP_STATE: + g_value_set_enum (value, priv->state); + break; + case PROP_MESSAGE: + if (priv->cur_item) + g_value_set_object (value, priv->cur_item->msg); + else + g_value_set_object (value, NULL); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static gboolean +idle_timeout (gpointer conn) +{ + soup_connection_disconnect (conn); + return FALSE; +} + +static void +start_idle_timer (SoupConnection *conn) +{ + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + if (priv->idle_timeout > 0 && !priv->idle_timeout_src) { + priv->idle_timeout_src = + soup_add_timeout (priv->async_context, + priv->idle_timeout * 1000, + idle_timeout, conn); + } +} + +static void +stop_idle_timer (SoupConnectionPrivate *priv) +{ + if (priv->idle_timeout_src) { + g_source_destroy (priv->idle_timeout_src); + priv->idle_timeout_src = NULL; + } +} + +static void +set_current_item (SoupConnection *conn, SoupMessageQueueItem *item) +{ + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + g_return_if_fail (priv->cur_item == NULL); + + g_object_freeze_notify (G_OBJECT (conn)); + + stop_idle_timer (priv); + + item->state = SOUP_MESSAGE_RUNNING; + priv->cur_item = item; + g_object_notify (G_OBJECT (conn), "message"); + + if (priv->state == SOUP_CONNECTION_IDLE || + item->msg->method != SOUP_METHOD_CONNECT) + soup_connection_set_state (conn, SOUP_CONNECTION_IN_USE); + + g_object_thaw_notify (G_OBJECT (conn)); +} + +static void +clear_current_item (SoupConnection *conn) +{ + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + g_object_freeze_notify (G_OBJECT (conn)); + + priv->unused_timeout = 0; + start_idle_timer (conn); + + if (priv->cur_item) { + SoupMessageQueueItem *item; + + item = priv->cur_item; + priv->cur_item = NULL; + g_object_notify (G_OBJECT (conn), "message"); + + if (item->msg->method == SOUP_METHOD_CONNECT && + SOUP_STATUS_IS_SUCCESSFUL (item->msg->status_code)) { + /* We're now effectively no longer proxying */ + soup_uri_free (priv->proxy_uri); + priv->proxy_uri = NULL; + } + + if (!soup_message_is_keepalive (item->msg)) + soup_connection_disconnect (conn); + } + + g_object_thaw_notify (G_OBJECT (conn)); +} + +static void +socket_disconnected (SoupSocket *sock, gpointer conn) +{ + soup_connection_disconnect (conn); +} + +typedef struct { + SoupConnection *conn; + SoupConnectionCallback callback; + gpointer callback_data; + GCancellable *cancellable; +} SoupConnectionAsyncConnectData; + +static void +socket_connect_finished (SoupSocket *socket, guint status, gpointer user_data) +{ + SoupConnectionAsyncConnectData *data = user_data; + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (data->conn); + + if (SOUP_STATUS_IS_SUCCESSFUL (status)) { + g_signal_connect (priv->socket, "disconnected", + G_CALLBACK (socket_disconnected), data->conn); + + soup_connection_set_state (data->conn, SOUP_CONNECTION_IN_USE); + priv->unused_timeout = time (NULL) + SOUP_CONNECTION_UNUSED_TIMEOUT; + start_idle_timer (data->conn); + } else if (status == SOUP_STATUS_TLS_FAILED) { + priv->ssl_fallback = TRUE; + status = SOUP_STATUS_TRY_AGAIN; + } + + if (data->callback) { + if (priv->proxy_uri != NULL) + status = soup_status_proxify (status); + data->callback (data->conn, status, data->callback_data); + } + g_object_unref (data->conn); + if (data->cancellable) + g_object_unref (data->cancellable); + g_slice_free (SoupConnectionAsyncConnectData, data); +} + +static void +socket_connect_result (SoupSocket *sock, guint status, gpointer user_data) +{ + SoupConnectionAsyncConnectData *data = user_data; + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (data->conn); + + if (SOUP_STATUS_IS_SUCCESSFUL (status) && + priv->ssl_creds && !priv->tunnel_addr) { + if (soup_socket_start_ssl (sock, data->cancellable)) { + soup_socket_handshake_async (sock, data->cancellable, + socket_connect_finished, data); + return; + } + + status = SOUP_STATUS_SSL_FAILED; + } + + socket_connect_finished (sock, status, data); +} + +void +soup_connection_connect_async (SoupConnection *conn, + GCancellable *cancellable, + SoupConnectionCallback callback, + gpointer user_data) +{ + SoupConnectionAsyncConnectData *data; + SoupConnectionPrivate *priv; + + g_return_if_fail (SOUP_IS_CONNECTION (conn)); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + g_return_if_fail (priv->socket == NULL); + + soup_connection_set_state (conn, SOUP_CONNECTION_CONNECTING); + + data = g_slice_new (SoupConnectionAsyncConnectData); + data->conn = g_object_ref (conn); + data->callback = callback; + data->callback_data = user_data; + data->cancellable = cancellable ? g_object_ref (cancellable) : NULL; + + priv->socket = + soup_socket_new (SOUP_SOCKET_REMOTE_ADDRESS, priv->remote_addr, + SOUP_SOCKET_SSL_CREDENTIALS, priv->ssl_creds, + SOUP_SOCKET_SSL_STRICT, priv->ssl_strict, + SOUP_SOCKET_SSL_FALLBACK, priv->ssl_fallback, + SOUP_SOCKET_ASYNC_CONTEXT, priv->async_context, + SOUP_SOCKET_TIMEOUT, priv->io_timeout, + "clean-dispose", TRUE, + NULL); + soup_socket_connect_async (priv->socket, cancellable, + socket_connect_result, data); +} + +guint +soup_connection_connect_sync (SoupConnection *conn, GCancellable *cancellable) +{ + SoupConnectionPrivate *priv; + guint status; + + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), SOUP_STATUS_MALFORMED); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + g_return_val_if_fail (priv->socket == NULL, SOUP_STATUS_MALFORMED); + + soup_connection_set_state (conn, SOUP_CONNECTION_CONNECTING); + + priv->socket = + soup_socket_new (SOUP_SOCKET_REMOTE_ADDRESS, priv->remote_addr, + SOUP_SOCKET_SSL_CREDENTIALS, priv->ssl_creds, + SOUP_SOCKET_SSL_STRICT, priv->ssl_strict, + SOUP_SOCKET_SSL_FALLBACK, priv->ssl_fallback, + SOUP_SOCKET_FLAG_NONBLOCKING, FALSE, + SOUP_SOCKET_TIMEOUT, priv->io_timeout, + "clean-dispose", TRUE, + NULL); + + status = soup_socket_connect_sync (priv->socket, cancellable); + + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) + goto fail; + + if (priv->ssl_creds && !priv->tunnel_addr) { + if (!soup_socket_start_ssl (priv->socket, cancellable)) + status = SOUP_STATUS_SSL_FAILED; + else { + status = soup_socket_handshake_sync (priv->socket, cancellable); + if (status == SOUP_STATUS_TLS_FAILED) { + priv->ssl_fallback = TRUE; + status = SOUP_STATUS_TRY_AGAIN; + } + } + } + + if (SOUP_STATUS_IS_SUCCESSFUL (status)) { + g_signal_connect (priv->socket, "disconnected", + G_CALLBACK (socket_disconnected), conn); + + soup_connection_set_state (conn, SOUP_CONNECTION_IN_USE); + priv->unused_timeout = time (NULL) + SOUP_CONNECTION_UNUSED_TIMEOUT; + start_idle_timer (conn); + } else { + fail: + if (priv->socket) { + soup_socket_disconnect (priv->socket); + g_object_unref (priv->socket); + priv->socket = NULL; + } + } + + if (priv->proxy_uri != NULL) + status = soup_status_proxify (status); + return status; +} + +SoupAddress * +soup_connection_get_tunnel_addr (SoupConnection *conn) +{ + SoupConnectionPrivate *priv; + + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), NULL); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + return priv->tunnel_addr; +} + +guint +soup_connection_start_ssl_sync (SoupConnection *conn, + GCancellable *cancellable) +{ + SoupConnectionPrivate *priv; + const char *server_name; + guint status; + + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), FALSE); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + server_name = soup_address_get_name (priv->tunnel_addr ? + priv->tunnel_addr : + priv->remote_addr); + if (!soup_socket_start_proxy_ssl (priv->socket, server_name, + cancellable)) + return SOUP_STATUS_SSL_FAILED; + + status = soup_socket_handshake_sync (priv->socket, cancellable); + if (status == SOUP_STATUS_TLS_FAILED) { + priv->ssl_fallback = TRUE; + status = SOUP_STATUS_TRY_AGAIN; + } + + return status; +} + +static void +start_ssl_completed (SoupSocket *socket, guint status, gpointer user_data) +{ + SoupConnectionAsyncConnectData *data = user_data; + SoupConnectionPrivate *priv = SOUP_CONNECTION_GET_PRIVATE (data->conn); + + if (status == SOUP_STATUS_TLS_FAILED) { + priv->ssl_fallback = TRUE; + status = SOUP_STATUS_TRY_AGAIN; + } + + data->callback (data->conn, status, data->callback_data); + g_object_unref (data->conn); + g_slice_free (SoupConnectionAsyncConnectData, data); +} + +static gboolean +idle_start_ssl_completed (gpointer user_data) +{ + SoupConnectionAsyncConnectData *data = user_data; + + start_ssl_completed (NULL, SOUP_STATUS_SSL_FAILED, data); + return FALSE; +} + +void +soup_connection_start_ssl_async (SoupConnection *conn, + GCancellable *cancellable, + SoupConnectionCallback callback, + gpointer user_data) +{ + SoupConnectionPrivate *priv; + const char *server_name; + SoupConnectionAsyncConnectData *data; + + g_return_if_fail (SOUP_IS_CONNECTION (conn)); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + data = g_slice_new (SoupConnectionAsyncConnectData); + data->conn = g_object_ref (conn); + data->callback = callback; + data->callback_data = user_data; + + server_name = soup_address_get_name (priv->tunnel_addr ? + priv->tunnel_addr : + priv->remote_addr); + if (!soup_socket_start_proxy_ssl (priv->socket, server_name, + cancellable)) { + soup_add_completion (priv->async_context, + idle_start_ssl_completed, data); + return; + } + + soup_socket_handshake_async (priv->socket, cancellable, + start_ssl_completed, data); +} + +/** + * soup_connection_disconnect: + * @conn: a connection + * + * Disconnects @conn's socket and emits a %disconnected signal. + * After calling this, @conn will be essentially useless. + **/ +void +soup_connection_disconnect (SoupConnection *conn) +{ + SoupConnectionPrivate *priv; + SoupConnectionState old_state; + + g_return_if_fail (SOUP_IS_CONNECTION (conn)); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + + old_state = priv->state; + if (old_state != SOUP_CONNECTION_DISCONNECTED) + soup_connection_set_state (conn, SOUP_CONNECTION_DISCONNECTED); + + if (priv->socket) { + g_signal_handlers_disconnect_by_func (priv->socket, + socket_disconnected, conn); + soup_socket_disconnect (priv->socket); + g_object_unref (priv->socket); + priv->socket = NULL; + } + + if (old_state != SOUP_CONNECTION_DISCONNECTED) + g_signal_emit (conn, signals[DISCONNECTED], 0); +} + +SoupSocket * +soup_connection_get_socket (SoupConnection *conn) +{ + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), NULL); + + return SOUP_CONNECTION_GET_PRIVATE (conn)->socket; +} + +SoupURI * +soup_connection_get_proxy_uri (SoupConnection *conn) +{ + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), NULL); + + return SOUP_CONNECTION_GET_PRIVATE (conn)->proxy_uri; +} + +gboolean +soup_connection_is_via_proxy (SoupConnection *conn) +{ + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), FALSE); + + return SOUP_CONNECTION_GET_PRIVATE (conn)->proxy_uri != NULL; +} + +SoupConnectionState +soup_connection_get_state (SoupConnection *conn) +{ + SoupConnectionPrivate *priv; + + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), + SOUP_CONNECTION_DISCONNECTED); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + +#ifdef G_OS_UNIX + if (priv->state == SOUP_CONNECTION_IDLE) { + GPollFD pfd; + + pfd.fd = soup_socket_get_fd (priv->socket); + pfd.events = G_IO_IN; + pfd.revents = 0; + if (g_poll (&pfd, 1, 0) == 1) + soup_connection_set_state (conn, SOUP_CONNECTION_REMOTE_DISCONNECTED); + } +#endif + if (priv->state == SOUP_CONNECTION_IDLE && + priv->unused_timeout && priv->unused_timeout < time (NULL)) + soup_connection_set_state (conn, SOUP_CONNECTION_REMOTE_DISCONNECTED); + + return priv->state; +} + +void +soup_connection_set_state (SoupConnection *conn, SoupConnectionState state) +{ + SoupConnectionPrivate *priv; + SoupConnectionState old_state; + + g_return_if_fail (SOUP_IS_CONNECTION (conn)); + g_return_if_fail (state >= SOUP_CONNECTION_NEW && + state <= SOUP_CONNECTION_DISCONNECTED); + + g_object_freeze_notify (G_OBJECT (conn)); + + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + old_state = priv->state; + priv->state = state; + if ((state == SOUP_CONNECTION_IDLE || + state == SOUP_CONNECTION_DISCONNECTED) && + old_state == SOUP_CONNECTION_IN_USE) + clear_current_item (conn); + + g_object_notify (G_OBJECT (conn), "state"); + g_object_thaw_notify (G_OBJECT (conn)); +} + +gboolean +soup_connection_get_ever_used (SoupConnection *conn) +{ + g_return_val_if_fail (SOUP_IS_CONNECTION (conn), FALSE); + + return SOUP_CONNECTION_GET_PRIVATE (conn)->unused_timeout == 0; +} + +gboolean +soup_connection_get_ssl_fallback (SoupConnection *conn) +{ + return SOUP_CONNECTION_GET_PRIVATE (conn)->ssl_fallback; +} + +void +soup_connection_send_request (SoupConnection *conn, + SoupMessageQueueItem *item, + SoupMessageCompletionFn completion_cb, + gpointer user_data) +{ + SoupConnectionPrivate *priv; + + g_return_if_fail (SOUP_IS_CONNECTION (conn)); + g_return_if_fail (item != NULL); + priv = SOUP_CONNECTION_GET_PRIVATE (conn); + g_return_if_fail (priv->state != SOUP_CONNECTION_NEW && priv->state != SOUP_CONNECTION_DISCONNECTED); + + if (item != priv->cur_item) + set_current_item (conn, item); + soup_message_send_request (item, completion_cb, user_data); +} diff --git a/libsoup/soup-connection.h b/libsoup/soup-connection.h new file mode 100644 index 0000000..ef304e7 --- /dev/null +++ b/libsoup/soup-connection.h @@ -0,0 +1,92 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_CONNECTION_H +#define SOUP_CONNECTION_H 1 + +#include "soup-types.h" +#include "soup-message-private.h" +#include "soup-misc.h" + +G_BEGIN_DECLS + +#define SOUP_TYPE_CONNECTION (soup_connection_get_type ()) +#define SOUP_CONNECTION(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_CONNECTION, SoupConnection)) +#define SOUP_CONNECTION_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_CONNECTION, SoupConnectionClass)) +#define SOUP_IS_CONNECTION(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_CONNECTION)) +#define SOUP_IS_CONNECTION_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_CONNECTION)) +#define SOUP_CONNECTION_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_CONNECTION, SoupConnectionClass)) + +struct _SoupConnection { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + /* signals */ + void (*disconnected) (SoupConnection *); + +} SoupConnectionClass; + +GType soup_connection_get_type (void); + + +typedef void (*SoupConnectionCallback) (SoupConnection *conn, + guint status, + gpointer data); + +#define SOUP_CONNECTION_REMOTE_ADDRESS "remote-address" +#define SOUP_CONNECTION_TUNNEL_ADDRESS "tunnel-address" +#define SOUP_CONNECTION_PROXY_URI "proxy-uri" +#define SOUP_CONNECTION_SSL_CREDENTIALS "ssl-creds" +#define SOUP_CONNECTION_SSL_STRICT "ssl-strict" +#define SOUP_CONNECTION_SSL_FALLBACK "ssl-fallback" +#define SOUP_CONNECTION_ASYNC_CONTEXT "async-context" +#define SOUP_CONNECTION_TIMEOUT "timeout" +#define SOUP_CONNECTION_IDLE_TIMEOUT "idle-timeout" +#define SOUP_CONNECTION_STATE "state" +#define SOUP_CONNECTION_MESSAGE "message" + +SoupConnection *soup_connection_new (const char *propname1, + ...) G_GNUC_NULL_TERMINATED; + +void soup_connection_connect_async (SoupConnection *conn, + GCancellable *cancellable, + SoupConnectionCallback callback, + gpointer user_data); +guint soup_connection_connect_sync (SoupConnection *conn, + GCancellable *cancellable); +SoupAddress *soup_connection_get_tunnel_addr(SoupConnection *conn); +guint soup_connection_start_ssl_sync (SoupConnection *conn, + GCancellable *cancellable); +void soup_connection_start_ssl_async (SoupConnection *conn, + GCancellable *cancellable, + SoupConnectionCallback callback, + gpointer user_data); + +void soup_connection_disconnect (SoupConnection *conn); + +SoupSocket *soup_connection_get_socket (SoupConnection *conn); +SoupURI *soup_connection_get_proxy_uri (SoupConnection *conn); +gboolean soup_connection_is_via_proxy (SoupConnection *conn); + +SoupConnectionState soup_connection_get_state (SoupConnection *conn); +void soup_connection_set_state (SoupConnection *conn, + SoupConnectionState state); + +gboolean soup_connection_get_ever_used (SoupConnection *conn); + +void soup_connection_send_request (SoupConnection *conn, + SoupMessageQueueItem *item, + SoupMessageCompletionFn completion_cb, + gpointer user_data); + +gboolean soup_connection_get_ssl_fallback (SoupConnection *conn); + +G_END_DECLS + +#endif /* SOUP_CONNECTION_H */ diff --git a/libsoup/soup-content-decoder.c b/libsoup/soup-content-decoder.c new file mode 100644 index 0000000..3ab240c --- /dev/null +++ b/libsoup/soup-content-decoder.c @@ -0,0 +1,205 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-content-decoder.c + * + * Copyright (C) 2009 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "soup-content-decoder.h" +#include "soup-enum-types.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +/** + * SECTION:soup-content-decoder + * @short_description: Content-Encoding handler + * + * #SoupContentDecoder handles the "Accept-Encoding" header on + * outgoing messages, and the "Content-Encoding" header on incoming + * ones. If you add it to a session with soup_session_add_feature() or + * soup_session_add_feature_by_type(), the session will automatically + * use Content-Encoding as appropriate. + * + * (Note that currently there is no way to (automatically) use + * Content-Encoding when sending a request body, or to pick specific + * encoding types to support.) + * + * If #SoupContentDecoder successfully decodes the Content-Encoding, + * it will set the %SOUP_MESSAGE_CONTENT_DECODED flag on the message, + * and the message body and the chunks in the #SoupMessage::got_chunk + * signals will contain the decoded data; however, the message headers + * will be unchanged (and so "Content-Encoding" will still be present, + * "Content-Length" will describe the original encoded length, etc). + * + * If "Content-Encoding" contains any encoding types that + * #SoupContentDecoder doesn't recognize, then none of the encodings + * will be decoded (and the %SOUP_MESSAGE_CONTENT_DECODED flag will + * not be set). + * + * Since: 2.28.2 + **/ + +struct _SoupContentDecoderPrivate { + GHashTable *decoders; +}; + +typedef GConverter * (*SoupContentDecoderCreator) (void); + +static void soup_content_decoder_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); + +static void request_queued (SoupSessionFeature *feature, SoupSession *session, SoupMessage *msg); +static void request_unqueued (SoupSessionFeature *feature, SoupSession *session, SoupMessage *msg); + +static void finalize (GObject *object); + +G_DEFINE_TYPE_WITH_CODE (SoupContentDecoder, soup_content_decoder, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_content_decoder_session_feature_init)) + +/* This is constant for now */ +#define ACCEPT_ENCODING_HEADER "gzip" + +static GConverter * +gzip_decoder_creator (void) +{ + return (GConverter *)g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); +} + +static void +soup_content_decoder_init (SoupContentDecoder *decoder) +{ + decoder->priv = G_TYPE_INSTANCE_GET_PRIVATE (decoder, + SOUP_TYPE_CONTENT_DECODER, + SoupContentDecoderPrivate); + + decoder->priv->decoders = g_hash_table_new (g_str_hash, g_str_equal); + /* Hardcoded for now */ + g_hash_table_insert (decoder->priv->decoders, "gzip", + gzip_decoder_creator); + g_hash_table_insert (decoder->priv->decoders, "x-gzip", + gzip_decoder_creator); +} + +static void +soup_content_decoder_class_init (SoupContentDecoderClass *decoder_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (decoder_class); + + g_type_class_add_private (decoder_class, sizeof (SoupContentDecoderPrivate)); + + object_class->finalize = finalize; +} + +static void +soup_content_decoder_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + feature_interface->request_queued = request_queued; + feature_interface->request_unqueued = request_unqueued; +} + +static void +finalize (GObject *object) +{ + SoupContentDecoder *decoder = SOUP_CONTENT_DECODER (object); + + g_hash_table_destroy (decoder->priv->decoders); + + G_OBJECT_CLASS (soup_content_decoder_parent_class)->finalize (object); +} + +static void +soup_content_decoder_got_headers_cb (SoupMessage *msg, SoupContentDecoder *decoder) +{ + SoupMessagePrivate *msgpriv = SOUP_MESSAGE_GET_PRIVATE (msg); + const char *header; + GSList *encodings, *e; + SoupContentDecoderCreator converter_creator; + GConverter *converter; + + header = soup_message_headers_get_list (msg->response_headers, + "Content-Encoding"); + if (!header) + return; + + /* Workaround for an apache bug (bgo 613361) */ + if (!g_ascii_strcasecmp (header, "gzip")) { + const char *content_type = soup_message_headers_get_content_type (msg->response_headers, NULL); + + if (content_type && + (!g_ascii_strcasecmp (content_type, "application/gzip") || + !g_ascii_strcasecmp (content_type, "application/x-gzip"))) + return; + } + + /* OK, really, no one is ever going to use more than one + * encoding, but we'll be robust. + */ + encodings = soup_header_parse_list (header); + if (!encodings) + return; + + for (e = encodings; e; e = e->next) { + if (!g_hash_table_lookup (decoder->priv->decoders, e->data)) { + soup_header_free_list (encodings); + return; + } + } + + /* msgpriv->decoders should be empty at this point anyway, but + * clean it up if it's not. + */ + while (msgpriv->decoders) { + g_object_unref (msgpriv->decoders->data); + msgpriv->decoders = g_slist_delete_link (msgpriv->decoders, msgpriv->decoders); + } + + for (e = encodings; e; e = e->next) { + converter_creator = g_hash_table_lookup (decoder->priv->decoders, e->data); + converter = converter_creator (); + + /* Content-Encoding lists the codings in the order + * they were applied in, so we put decoders in reverse + * order so the last-applied will be the first + * decoded. + */ + msgpriv->decoders = g_slist_prepend (msgpriv->decoders, converter); + } + soup_header_free_list (encodings); + + soup_message_set_flags (msg, msgpriv->msg_flags | SOUP_MESSAGE_CONTENT_DECODED); +} + +static void +request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg) +{ + SoupContentDecoder *decoder = SOUP_CONTENT_DECODER (feature); + + if (!soup_message_headers_get_one (msg->request_headers, + "Accept-Encoding")) { + soup_message_headers_append (msg->request_headers, + "Accept-Encoding", + ACCEPT_ENCODING_HEADER); + } + + g_signal_connect (msg, "got-headers", + G_CALLBACK (soup_content_decoder_got_headers_cb), + decoder); +} + +static void +request_unqueued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg) +{ + g_signal_handlers_disconnect_by_func (msg, soup_content_decoder_got_headers_cb, feature); +} diff --git a/libsoup/soup-content-decoder.h b/libsoup/soup-content-decoder.h new file mode 100644 index 0000000..e0b2238 --- /dev/null +++ b/libsoup/soup-content-decoder.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009 Red Hat, Inc. + */ + +#ifndef SOUP_CONTENT_DECODER_H +#define SOUP_CONTENT_DECODER_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_CONTENT_DECODER (soup_content_decoder_get_type ()) +#define SOUP_CONTENT_DECODER(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_CONTENT_DECODER, SoupContentDecoder)) +#define SOUP_CONTENT_DECODER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_CONTENT_DECODER, SoupContentDecoderClass)) +#define SOUP_IS_CONTENT_DECODER(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_CONTENT_DECODER)) +#define SOUP_IS_CONTENT_DECODER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_CONTENT_DECODER)) +#define SOUP_CONTENT_DECODER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_CONTENT_DECODER, SoupContentDecoderClass)) + +typedef struct _SoupContentDecoderPrivate SoupContentDecoderPrivate; + +typedef struct { + GObject parent; + + SoupContentDecoderPrivate *priv; +} SoupContentDecoder; + +typedef struct { + GObjectClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); + void (*_libsoup_reserved5) (void); +} SoupContentDecoderClass; + +GType soup_content_decoder_get_type (void); + +G_END_DECLS + +#endif /* SOUP_CONTENT_DECODER_H */ diff --git a/libsoup/soup-content-sniffer.c b/libsoup/soup-content-sniffer.c new file mode 100644 index 0000000..f551e94 --- /dev/null +++ b/libsoup/soup-content-sniffer.c @@ -0,0 +1,841 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-content-sniffer.c + * + * Copyright (C) 2009 Gustavo Noronha Silva. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-content-sniffer.h" +#include "soup-enum-types.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-session-feature.h" +#include "soup-uri.h" +/*TIZEN patch*/ +#include "TIZEN.h" + +/** + * SECTION:soup-content-sniffer + * @short_description: Content sniffing for #SoupSession + * + * A #SoupContentSniffer tries to detect the actual content type of + * the files that are being downloaded by looking at some of the data + * before the #SoupMessage emits its #SoupMessage::got-headers signal. + * #SoupContentSniffer implements #SoupSessionFeature, so you can add + * content sniffing to a session with soup_session_add_feature() or + * soup_session_add_feature_by_type(). + * + * Since: 2.27.3 + **/ + +static char *sniff (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer, GHashTable **params); +static gsize get_buffer_size (SoupContentSniffer *sniffer); + +static void soup_content_sniffer_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); + +static void request_queued (SoupSessionFeature *feature, SoupSession *session, SoupMessage *msg); +static void request_unqueued (SoupSessionFeature *feature, SoupSession *session, SoupMessage *msg); + +G_DEFINE_TYPE_WITH_CODE (SoupContentSniffer, soup_content_sniffer, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_content_sniffer_session_feature_init)) + +static void +soup_content_sniffer_init (SoupContentSniffer *content_sniffer) +{ +} + +static void +soup_content_sniffer_class_init (SoupContentSnifferClass *content_sniffer_class) +{ + content_sniffer_class->sniff = sniff; + content_sniffer_class->get_buffer_size = get_buffer_size; +} + +static void +soup_content_sniffer_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + feature_interface->request_queued = request_queued; + feature_interface->request_unqueued = request_unqueued; +} + +/** + * soup_content_sniffer_new: + * + * Creates a new #SoupContentSniffer. + * + * Returns: a new #SoupContentSniffer + * + * Since: 2.27.3 + **/ +SoupContentSniffer * +soup_content_sniffer_new () +{ + return g_object_new (SOUP_TYPE_CONTENT_SNIFFER, NULL); +} + +/** + * soup_content_sniffer_sniff: + * @sniffer: a #SoupContentSniffer + * @msg: the message to sniff + * @buffer: a buffer containing the start of @msg's response body + * @params: (element-type utf8 utf8) (out) (transfer full) (allow-none): return + * location for Content-Type parameters (eg, "charset"), or %NULL + * + * Sniffs @buffer to determine its Content-Type. The result may also + * be influenced by the Content-Type declared in @msg's response + * headers. + * + * Return value: the sniffed Content-Type of @buffer; this will never be %NULL, + * but may be "application/octet-stream". + */ +char * +soup_content_sniffer_sniff (SoupContentSniffer *sniffer, + SoupMessage *msg, SoupBuffer *buffer, + GHashTable **params) +{ + g_return_val_if_fail (SOUP_IS_CONTENT_SNIFFER (sniffer), NULL); + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + g_return_val_if_fail (buffer != NULL, NULL); + + return SOUP_CONTENT_SNIFFER_GET_CLASS (sniffer)->sniff (sniffer, msg, buffer, params); +} + +/* This table is based on the HTML5 spec; + * See 2.7.4 Content-Type sniffing: unknown type + */ +typedef struct { + /* @has_ws is TRUE if @pattern contains "generic" whitespace */ + gboolean has_ws; + const guchar *mask; + const guchar *pattern; + guint pattern_length; + const char *sniffed_type; + gboolean scriptable; +} SoupContentSnifferPattern; + +#if ENABLE(TIZEN_FIX_CONTENT_SNIFFER_PATTERN) +/* This table is updated by TIZEN team(steve.jun@samsung.com), based on draft-abarth-mime-sniff-06 + * (http://tools.ietf.org/html/draft-abarth-mime-sniff-06); + * See 5. Unknown Type + */ +static SoupContentSnifferPattern types_table[] = { + // ) + { TRUE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)" \x3C\x3f\x78\x6d\x6c", + 5, + "text/xml", + TRUE }, + // The string "%PDF-", the PDF signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x25\x50\x44\x46\x2D", + 5, + "application/pdf", + TRUE }, + // The string "%!PS-Adobe-", the PostScript signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x25\x21\x50\x53\x2D\x41\x64\x6F\x62\x65\x2D", + 11, + "application/postscript", + FALSE }, + // UTF-16BE BOM + { FALSE, + (const guchar *)"\xFF\xFF\x00\x00", + (const guchar *)"\xFE\xFF\x00\x00", + 4, + "text/plain", + FALSE }, + // UTF-16LE BOM + { FALSE, + (const guchar *)"\xFF\xFF\x00\x00", + (const guchar *)"\xFF\xFE\x00\x00", + 4, + "text/plain", + FALSE }, + // UTF-8 BOM + { FALSE, + (const guchar *)"\xFF\xFF\xFF\x00", + (const guchar *)"\xEF\xBB\xBF\x00", + 4, + "text/plain", + FALSE }, + // The string "GIF87a", a GIF signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x47\x49\x46\x38\x37\x61", + 6, + "image/gif", + FALSE }, + // The string "GIF89a", a GIF signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x47\x49\x46\x38\x39\x61", + 6, + "image/gif", + FALSE }, + // The PNG signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", + 8, + "image/png", + FALSE }, + // A JPEG SOI marker followed by a octet of another marker. + { FALSE, + (const guchar *)"\xFF\xFF\xFF", + (const guchar *)"\xFF\xD8\xFF", + 3, + "image/jpeg", + FALSE }, + // The string "BM", a BMP signature. + { FALSE, + (const guchar *)"\xFF\xFF", + (const guchar *)"\x42\x4D", + 2, + "image/bmp", + FALSE }, + // "RIFF" followed by four bytes, followed by "WEBPVP". + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x52\x49\x46\x46\x00\x00\x00\x00\x57\x45\x42\x50\x56\x50", + 14, + "image/webp", + FALSE }, + // A Windows Icon signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x00\x00\x01\x00", + 4, + "image/vnd.microsoft.icon", + FALSE }, + // An Ogg Vorbis audio or video signature. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x4F\x67\x67\x53\x00", + 5, + "application/ogg", + FALSE }, + // "RIFF" followed by four bytes, followed by "WAVE". + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", + (const guchar *)"\x52\x49\x46\x46\x00\x00\x00\x00\x57\x41\x56\x45", + 12, + "audio/x-wave", + FALSE }, + // The WebM signature [TODO: Use more octets?] vidow: typo(!) + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x1A\x45\xDF\xA3", + 4, + "vidow/webm", + FALSE }, + // A RAR archive. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x52\x61\x72\x20\x1A\x07\x00", + 7, + "application/x-rar-compressed", + FALSE }, + // A ZIP archive. + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x50\x4B\x03\x04", + 4, + "application/zip", + FALSE }, + // A GZIP archive. + { FALSE, + (const guchar *)"\xFF\xFF\xFF", + (const guchar *)"\x1F\x8B\x08", + 3, + "application/x-gzip", + FALSE } +}; +#else +static SoupContentSnifferPattern types_table[] = { + { FALSE, + (const guchar *)"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF\xDF\xFF\xDF\xDF\xDF\xDF", + (const guchar *)"\x3C\x21\x44\x4F\x43\x54\x59\x50\x45\x20\x48\x54\x4D\x4C", + 14, + "text/html", + TRUE }, + + { TRUE, + (const guchar *)"\xFF\xFF\xDF\xDF\xDF\xDF", + (const guchar *)" \x3C\x48\x54\x4D\x4C", + 5, + "text/html", + TRUE }, + + { TRUE, + (const guchar *)"\xFF\xFF\xDF\xDF\xDF\xDF", + (const guchar *)" \x3C\x48\x45\x41\x44", + 5, + "text/html", + TRUE }, + + { TRUE, + (const guchar *)"\xFF\xFF\xDF\xDF\xDF\xDF\xDF\xDF", + (const guchar *)" \x3C\x53\x43\x52\x49\x50\x54", + 7, + "text/html", + TRUE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x25\x50\x44\x46\x2D", + 5, + "application/pdf", + TRUE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x25\x21\x50\x53\x2D\x41\x64\x6F\x62\x65\x2D", + 11, + "application/postscript", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\x00\x00", + (const guchar *)"\xFE\xFF\x00\x00", + 4, + "text/plain", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\x00\x00", + (const guchar *)"\xFF\xFF\x00\x00", + 4, + "text/plain", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\x00", + (const guchar *)"\xEF\xBB\xBF\x00", + 4, + "text/plain", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x47\x49\x46\x38\x37\x61", + 6, + "image/gif", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x47\x49\x46\x38\x39\x61", + 6, + "image/gif", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + (const guchar *)"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", + 8, + "image/png", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF", + (const guchar *)"\xFF\xD8\xFF", + 3, + "image/jpeg", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF", + (const guchar *)"\x42\x4D", + 2, + "image/bmp", + FALSE }, + + { FALSE, + (const guchar *)"\xFF\xFF\xFF\xFF", + (const guchar *)"\x00\x00\x01\x00", + 4, + "image/vnd.microsoft.icon", + FALSE } +}; +#endif + +/* Whether a given byte looks like it might be part of binary content. + * Source: HTML5 spec; borrowed from the Chromium mime sniffer code, + * which is BSD-licensed + */ +static char byte_looks_binary[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, /* 0x00 - 0x0F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, /* 0x10 - 0x1F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x2F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x4F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50 - 0x5F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x6F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70 - 0x7F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xAF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xB0 - 0xBF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xCF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xDF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xEF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xFF */ +}; + +/* HTML5: 2.7.4 Content-Type sniffing: unknown type */ +static char* +sniff_unknown (SoupContentSniffer *sniffer, SoupMessage *msg, + SoupBuffer *buffer, gboolean for_text_or_binary) +{ + const guchar *resource = (const guchar *)buffer->data; + int resource_length = MIN (512, buffer->length); + int i; + + for (i = 0; i < G_N_ELEMENTS (types_table); i++) { + SoupContentSnifferPattern *type_row = &(types_table[i]); + + /* The scriptable types should be skiped for the text + * or binary path, but considered for other paths */ + if (for_text_or_binary && type_row->scriptable) + continue; + + if (type_row->has_ws) { + int index_stream = 0; + int index_pattern = 0; + gboolean skip_row = FALSE; + + while ((index_stream < resource_length) && + (index_pattern <= type_row->pattern_length)) { + /* Skip insignificant white space ("WS" in the spec) */ + if (type_row->pattern[index_pattern] == ' ') { + if (resource[index_stream] == '\x09' || + resource[index_stream] == '\x0a' || + resource[index_stream] == '\x0c' || + resource[index_stream] == '\x0d' || + resource[index_stream] == '\x20') + index_stream++; + else + index_pattern++; + } else { + if ((type_row->mask[index_pattern] & resource[index_stream]) != type_row->pattern[index_pattern]) { + skip_row = TRUE; + break; + } + index_pattern++; + index_stream++; + } + } + + if (skip_row) + continue; + + if (index_pattern > type_row->pattern_length) + return g_strdup (type_row->sniffed_type); + } else { + int j; + + if (resource_length < type_row->pattern_length) + continue; + + for (j = 0; j < type_row->pattern_length; j++) { + if ((type_row->mask[j] & resource[j]) != type_row->pattern[j]) + break; + } + + /* This means our comparison above matched completely */ + if (j == type_row->pattern_length) + return g_strdup (type_row->sniffed_type); + } + } + + if (for_text_or_binary) + return g_strdup ("application/octet-stream"); + + for (i = 0; i < resource_length; i++) { + if (byte_looks_binary[resource[i]]) + return g_strdup ("application/octet-stream"); + } + + return g_strdup ("text/plain"); +} + +/* HTML5: 2.7.3 Content-Type sniffing: text or binary */ +static char* +sniff_text_or_binary (SoupContentSniffer *sniffer, SoupMessage *msg, + SoupBuffer *buffer) +{ + const guchar *resource = (const guchar *)buffer->data; + int resource_length = MIN (512, buffer->length); + gboolean looks_binary = FALSE; + int i; + + /* Detecting UTF-16BE, UTF-16LE, or UTF-8 BOMs means it's text/plain */ + if (resource_length >= 4) { + if ((resource[0] == 0xFE && resource[1] == 0xFF) || + (resource[0] == 0xFF && resource[1] == 0xFE) || + (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF)) + return g_strdup ("text/plain"); + } + + /* Look to see if any of the first n bytes looks binary */ + for (i = 0; i < resource_length; i++) { + if (byte_looks_binary[resource[i]]) { + looks_binary = TRUE; + break; + } + } + + if (!looks_binary) + return g_strdup ("text/plain"); + + return sniff_unknown (sniffer, msg, buffer, TRUE); +} + +static char* +sniff_images (SoupContentSniffer *sniffer, SoupMessage *msg, + SoupBuffer *buffer, const char *content_type) +{ + const guchar *resource = (const guchar *)buffer->data; + int resource_length = MIN (512, buffer->length); + int i; + + for (i = 0; i < G_N_ELEMENTS (types_table); i++) { + SoupContentSnifferPattern *type_row = &(types_table[i]); + + if (resource_length < type_row->pattern_length) + continue; + + if (!g_str_has_prefix (type_row->sniffed_type, "image/")) + continue; + + /* All of the image types use all-\xFF for the mask, + * so we can just memcmp. + */ + if (memcmp (type_row->pattern, resource, type_row->pattern_length) == 0) + return g_strdup (type_row->sniffed_type); + } + + return g_strdup (content_type); +} + +static char* +sniff_feed_or_html (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer) +{ + const guchar *resource = (const guchar *)buffer->data; + int resource_length = MIN (512, buffer->length); + int pos = 0; + + if (resource_length < 3) + goto text_html; + + /* Skip a leading UTF-8 BOM */ + if (resource[0] == 0xEF && resource[1] == 0xBB && resource[2] == 0xBF) + pos = 3; + + look_for_tag: + if (pos > resource_length) + goto text_html; + + /* Skip insignificant white space */ + while ((resource[pos] == '\x09') || + (resource[pos] == '\x20') || + (resource[pos] == '\x0A') || + (resource[pos] == '\x0D')) { + pos++; + + if (pos > resource_length) + goto text_html; + } + + /* != < */ + if (resource[pos] != '\x3C') + return g_strdup ("text/html"); + + pos++; + + if ((pos + 2) > resource_length) + goto text_html; + + /* Skipping comments */ + if ((resource[pos] == '\x2D') || + (resource[pos+1] == '\x2D') || + (resource[pos+2] == '\x3E')) { + pos = pos + 3; + + if ((pos + 2) > resource_length) + goto text_html; + + while ((resource[pos] != '\x2D') && + (resource[pos+1] != '\x2D') && + (resource[pos+2] != '\x3E')) { + pos++; + + if ((pos + 2) > resource_length) + goto text_html; + } + + goto look_for_tag; + } + + if (pos > resource_length) + goto text_html; + + /* == ! */ + if (resource[pos] == '\x21') { + do { + pos++; + + if (pos > resource_length) + goto text_html; + } while (resource[pos] != '\x3E'); + + pos++; + + goto look_for_tag; + } else if (resource[pos] == '\x3F') { /* ? */ + do { + pos++; + + if ((pos + 1) > resource_length) + goto text_html; + } while ((resource[pos] != '\x3F') && + (resource[pos+1] != '\x3E')); + + pos = pos + 2; + + goto look_for_tag; + } + + if ((pos + 2) > resource_length) + goto text_html; + + if ((resource[pos] == '\x72') && + (resource[pos+1] == '\x73') && + (resource[pos+2] == '\x73')) + return g_strdup ("application/rss+xml"); + + if ((pos + 3) > resource_length) + goto text_html; + + if ((resource[pos] == '\x66') && + (resource[pos+1] == '\x65') && + (resource[pos+2] == '\x65') && + (resource[pos+3] == '\x64')) + return g_strdup ("application/atom+xml"); + + text_html: + return g_strdup ("text/html"); +} + +static char* +sniff (SoupContentSniffer *sniffer, SoupMessage *msg, SoupBuffer *buffer, GHashTable **params) +{ + const char *content_type; + + content_type = soup_message_headers_get_content_type (msg->response_headers, params); + + /* These comparisons are done in an ASCII-case-insensitive + * manner because the spec requires it */ + if ((content_type == NULL) || + !g_ascii_strcasecmp (content_type, "unknown/unknown") || + !g_ascii_strcasecmp (content_type, "application/unknown") || + !g_ascii_strcasecmp (content_type, "*/*")) + return sniff_unknown (sniffer, msg, buffer, FALSE); + + if (g_str_has_suffix (content_type, "+xml") || + !g_ascii_strcasecmp (content_type, "text/xml") || + !g_ascii_strcasecmp (content_type, "application/xml")) + return g_strdup (content_type); + + /* 2.7.5 Content-Type sniffing: image + * The spec says: + * + * If the resource's official type is "image/svg+xml", then + * the sniffed type of the resource is its official type (an + * XML type) + * + * The XML case is handled by the if above; if you refactor + * this code, keep this in mind. + */ + if (!g_ascii_strncasecmp (content_type, "image/", 6)) + return sniff_images (sniffer, msg, buffer, content_type); + + /* If we got text/plain, use text_or_binary */ + if (g_str_equal (content_type, "text/plain")) { + return sniff_text_or_binary (sniffer, msg, buffer); + } + + if (!g_ascii_strcasecmp (content_type, "text/html")) + return sniff_feed_or_html (sniffer, msg, buffer); + + return g_strdup (content_type); +} + +static gsize +get_buffer_size (SoupContentSniffer *sniffer) +{ + return 512; +} + +static void +soup_content_sniffer_got_headers_cb (SoupMessage *msg, SoupContentSniffer *sniffer) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupContentSnifferClass *content_sniffer_class = SOUP_CONTENT_SNIFFER_GET_CLASS (sniffer); + + priv->bytes_for_sniffing = content_sniffer_class->get_buffer_size (sniffer); +} + +static void +request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + priv->sniffer = g_object_ref (feature); + g_signal_connect (msg, "got-headers", + G_CALLBACK (soup_content_sniffer_got_headers_cb), + feature); +} + +static void +request_unqueued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + g_object_unref (priv->sniffer); + priv->sniffer = NULL; + + g_signal_handlers_disconnect_by_func (msg, soup_content_sniffer_got_headers_cb, feature); +} diff --git a/libsoup/soup-content-sniffer.h b/libsoup/soup-content-sniffer.h new file mode 100644 index 0000000..a8aa915 --- /dev/null +++ b/libsoup/soup-content-sniffer.h @@ -0,0 +1,57 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009 Gustavo Noronha Silva. + */ + +#ifndef SOUP_CONTENT_SNIFFER_H +#define SOUP_CONTENT_SNIFFER_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_CONTENT_SNIFFER (soup_content_sniffer_get_type ()) +#define SOUP_CONTENT_SNIFFER(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_CONTENT_SNIFFER, SoupContentSniffer)) +#define SOUP_CONTENT_SNIFFER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_CONTENT_SNIFFER, SoupContentSnifferClass)) +#define SOUP_IS_CONTENT_SNIFFER(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_CONTENT_SNIFFER)) +#define SOUP_IS_CONTENT_SNIFFER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_CONTENT_SNIFFER)) +#define SOUP_CONTENT_SNIFFER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_CONTENT_SNIFFER, SoupContentSnifferClass)) + +typedef struct _SoupContentSnifferPrivate SoupContentSnifferPrivate; + +typedef struct { + GObject parent; + + SoupContentSnifferPrivate *priv; +} SoupContentSniffer; + +typedef struct { + GObjectClass parent_class; + + char* (*sniff) (SoupContentSniffer *sniffer, + SoupMessage *msg, + SoupBuffer *buffer, + GHashTable **params); + gsize (*get_buffer_size) (SoupContentSniffer *sniffer); + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); + void (*_libsoup_reserved5) (void); +} SoupContentSnifferClass; + +GType soup_content_sniffer_get_type (void); + +SoupContentSniffer *soup_content_sniffer_new (void); + +char *soup_content_sniffer_sniff (SoupContentSniffer *sniffer, + SoupMessage *msg, + SoupBuffer *buffer, + GHashTable **params); + +G_END_DECLS + +#endif /* SOUP_CONTENT_SNIFFER_H */ diff --git a/libsoup/soup-cookie-jar-sqlite.c b/libsoup/soup-cookie-jar-sqlite.c new file mode 100644 index 0000000..250abb8 --- /dev/null +++ b/libsoup/soup-cookie-jar-sqlite.c @@ -0,0 +1,338 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cookie-jar-sqlite.c: ff sqlite-based cookie storage + * + * Using danw's soup-cookie-jar-text as template + * Copyright (C) 2008 Diego Escalante Urrelo + * Copyright (C) 2009 Collabora Ltd. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include "soup-cookie-jar-sqlite.h" +#include "soup-cookie.h" +#include "soup-date.h" + +/** + * SECTION:soup-cookie-jar-sqlite + * @short_description: SQLite-based Cookie Jar + * + * #SoupCookieJarSqlite is a #SoupCookieJar that reads cookies from and + * writes them to an SQLite file in the new Mozilla format. + **/ + +enum { + PROP_0, + + PROP_FILENAME, + + LAST_PROP +}; + +typedef struct { + char *filename; + sqlite3 *db; +} SoupCookieJarSqlitePrivate; + +#define SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_COOKIE_JAR_SQLITE, SoupCookieJarSqlitePrivate)) + +G_DEFINE_TYPE (SoupCookieJarSqlite, soup_cookie_jar_sqlite, SOUP_TYPE_COOKIE_JAR) + +static void load (SoupCookieJar *jar); +static void changed (SoupCookieJar *jar, + SoupCookie *old_cookie, + SoupCookie *new_cookie); + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_cookie_jar_sqlite_init (SoupCookieJarSqlite *sqlite) +{ +} + +static void +finalize (GObject *object) +{ + SoupCookieJarSqlitePrivate *priv = + SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE (object); + + g_free (priv->filename); + + if (priv->db) + sqlite3_close (priv->db); + + G_OBJECT_CLASS (soup_cookie_jar_sqlite_parent_class)->finalize (object); +} + +static void +soup_cookie_jar_sqlite_class_init (SoupCookieJarSqliteClass *sqlite_class) +{ + SoupCookieJarClass *cookie_jar_class = + SOUP_COOKIE_JAR_CLASS (sqlite_class); + GObjectClass *object_class = G_OBJECT_CLASS (sqlite_class); + + g_type_class_add_private (sqlite_class, sizeof (SoupCookieJarSqlitePrivate)); + + cookie_jar_class->changed = changed; + + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SOUP_COOKIE_JAR_SQLITE_FILENAME: + * + * Alias for the #SoupCookieJarSqlite:filename property. (The + * cookie-storage filename.) + **/ + g_object_class_install_property ( + object_class, PROP_FILENAME, + g_param_spec_string (SOUP_COOKIE_JAR_SQLITE_FILENAME, + "Filename", + "Cookie-storage filename", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupCookieJarSqlitePrivate *priv = + SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_FILENAME: + priv->filename = g_value_dup_string (value); + load (SOUP_COOKIE_JAR (object)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupCookieJarSqlitePrivate *priv = + SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_FILENAME: + g_value_set_string (value, priv->filename); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_cookie_jar_sqlite_new: + * @filename: the filename to read to/write from, or %NULL + * @read_only: %TRUE if @filename is read-only + * + * Creates a #SoupCookieJarSqlite. + * + * @filename will be read in at startup to create an initial set of + * cookies. If @read_only is %FALSE, then the non-session cookies will + * be written to @filename when the 'changed' signal is emitted from + * the jar. (If @read_only is %TRUE, then the cookie jar will only be + * used for this session, and changes made to it will be lost when the + * jar is destroyed.) + * + * Return value: the new #SoupCookieJar + * + * Since: 2.26 + **/ +SoupCookieJar * +soup_cookie_jar_sqlite_new (const char *filename, gboolean read_only) +{ + g_return_val_if_fail (filename != NULL, NULL); + + return g_object_new (SOUP_TYPE_COOKIE_JAR_SQLITE, + SOUP_COOKIE_JAR_SQLITE_FILENAME, filename, + SOUP_COOKIE_JAR_READ_ONLY, read_only, + NULL); +} + +#define QUERY_ALL "SELECT id, name, value, host, path, expiry, lastAccessed, isSecure, isHttpOnly FROM moz_cookies;" +#define CREATE_TABLE "CREATE TABLE moz_cookies (id INTEGER PRIMARY KEY, name TEXT, value TEXT, host TEXT, path TEXT,expiry INTEGER, lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)" +#define QUERY_INSERT "INSERT INTO moz_cookies VALUES(NULL, %Q, %Q, %Q, %Q, %d, NULL, %d, %d);" +#define QUERY_DELETE "DELETE FROM moz_cookies WHERE name=%Q AND host=%Q;" + +enum { + COL_ID, + COL_NAME, + COL_VALUE, + COL_HOST, + COL_PATH, + COL_EXPIRY, + COL_LAST_ACCESS, + COL_SECURE, + COL_HTTP_ONLY, + N_COL, +}; + +static int +callback (void *data, int argc, char **argv, char **colname) +{ + SoupCookie *cookie = NULL; + SoupCookieJar *jar = SOUP_COOKIE_JAR (data); + + char *name, *value, *host, *path; + gulong expire_time; + time_t now; + int max_age; + gboolean http_only = FALSE, secure = FALSE; + + now = time (NULL); + + name = argv[COL_NAME]; + value = argv[COL_VALUE]; + host = argv[COL_HOST]; + path = argv[COL_PATH]; + expire_time = strtoul (argv[COL_EXPIRY], NULL, 10); + + if (now >= expire_time) + return 0; + max_age = (expire_time - now <= G_MAXINT ? expire_time - now : G_MAXINT); + + http_only = (g_strcmp0 (argv[COL_HTTP_ONLY], "1") == 0); + secure = (g_strcmp0 (argv[COL_SECURE], "1") == 0); + + cookie = soup_cookie_new (name, value, host, path, max_age); + + if (secure) + soup_cookie_set_secure (cookie, TRUE); + if (http_only) + soup_cookie_set_http_only (cookie, TRUE); + + soup_cookie_jar_add_cookie (jar, cookie); + + return 0; +} + +static void +try_create_table (sqlite3 *db) +{ + char *error = NULL; + + if (sqlite3_exec (db, CREATE_TABLE, NULL, NULL, &error)) { + g_warning ("Failed to execute query: %s", error); + sqlite3_free (error); + } +} + +static void +exec_query_with_try_create_table (sqlite3 *db, + const char *sql, + int (*callback)(void*,int,char**,char**), + void *argument) +{ + char *error = NULL; + gboolean try_create = TRUE; + +try_exec: + if (sqlite3_exec (db, sql, callback, argument, &error)) { + if (try_create) { + try_create = FALSE; + try_create_table (db); + sqlite3_free (error); + error = NULL; + goto try_exec; + } else { + g_warning ("Failed to execute query: %s", error); + sqlite3_free (error); + } + } +} + +/* Follows sqlite3 convention; returns TRUE on error */ +static gboolean +open_db (SoupCookieJar *jar) +{ + SoupCookieJarSqlitePrivate *priv = + SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE (jar); + + char *error = NULL; + + if (sqlite3_open (priv->filename, &priv->db)) { + sqlite3_close (priv->db); + priv->db = NULL; + g_warning ("Can't open %s", priv->filename); + return TRUE; + } + + if (sqlite3_exec (priv->db, "PRAGMA synchronous = OFF; PRAGMA secure_delete = 1;", NULL, NULL, &error)) { + g_warning ("Failed to execute query: %s", error); + sqlite3_free (error); + } + + return FALSE; +} + +static void +load (SoupCookieJar *jar) +{ + SoupCookieJarSqlitePrivate *priv = + SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE (jar); + + if (priv->db == NULL) { + if (open_db (jar)) + return; + } + + exec_query_with_try_create_table (priv->db, QUERY_ALL, callback, jar); +} + +static void +changed (SoupCookieJar *jar, + SoupCookie *old_cookie, + SoupCookie *new_cookie) +{ + SoupCookieJarSqlitePrivate *priv = + SOUP_COOKIE_JAR_SQLITE_GET_PRIVATE (jar); + char *query; + + if (priv->db == NULL) { + if (open_db (jar)) + return; + } + + if (old_cookie) { + query = sqlite3_mprintf (QUERY_DELETE, + old_cookie->name, + old_cookie->domain); + exec_query_with_try_create_table (priv->db, query, NULL, NULL); + sqlite3_free (query); + } + + if (new_cookie && new_cookie->expires) { + gulong expires; + + expires = (gulong)soup_date_to_time_t (new_cookie->expires); + query = sqlite3_mprintf (QUERY_INSERT, + new_cookie->name, + new_cookie->value, + new_cookie->domain, + new_cookie->path, + expires, + new_cookie->secure, + new_cookie->http_only); + exec_query_with_try_create_table (priv->db, query, NULL, NULL); + sqlite3_free (query); + } +} diff --git a/libsoup/soup-cookie-jar-sqlite.h b/libsoup/soup-cookie-jar-sqlite.h new file mode 100644 index 0000000..19dfbfa --- /dev/null +++ b/libsoup/soup-cookie-jar-sqlite.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Diego Escalante Urrelo + */ + +#ifndef SOUP_COOKIE_JAR_SQLITE_H +#define SOUP_COOKIE_JAR_SQLITE_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_COOKIE_JAR_SQLITE (soup_cookie_jar_sqlite_get_type ()) +#define SOUP_COOKIE_JAR_SQLITE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_COOKIE_JAR_SQLITE, SoupCookieJarSqlite)) +#define SOUP_COOKIE_JAR_SQLITE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_COOKIE_JAR_SQLITE, SoupCookieJarSqliteClass)) +#define SOUP_IS_COOKIE_JAR_SQLITE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_COOKIE_JAR_SQLITE)) +#define SOUP_IS_COOKIE_JAR_SQLITE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_COOKIE_JAR_SQLITE)) +#define SOUP_COOKIE_JAR_SQLITE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_COOKIE_JAR_SQLITE, SoupCookieJarSqliteClass)) + +typedef struct { + SoupCookieJar parent; + +} SoupCookieJarSqlite; + +typedef struct { + SoupCookieJarClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupCookieJarSqliteClass; + +#define SOUP_COOKIE_JAR_SQLITE_FILENAME "filename" + +GType soup_cookie_jar_sqlite_get_type (void); + +SoupCookieJar *soup_cookie_jar_sqlite_new (const char *filename, + gboolean read_only); + +G_END_DECLS + +#endif /* SOUP_COOKIE_JAR_SQLITE_H */ diff --git a/libsoup/soup-cookie-jar-text.c b/libsoup/soup-cookie-jar-text.c new file mode 100644 index 0000000..527d442 --- /dev/null +++ b/libsoup/soup-cookie-jar-text.c @@ -0,0 +1,365 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cookie-jar-text.c: cookies.txt-based cookie storage + * + * Copyright (C) 2007, 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include "soup-cookie-jar-text.h" +#include "soup-cookie.h" +#include "soup-date.h" + +/** + * SECTION:soup-cookie-jar-text + * @short_description: Text-file-based ("cookies.txt") Cookie Jar + * + * #SoupCookieJarText is a #SoupCookieJar that reads cookies from and + * writes them to a text file in the Mozilla "cookies.txt" format. + **/ + +enum { + PROP_0, + + PROP_FILENAME, + + LAST_PROP +}; + +typedef struct { + char *filename; + +} SoupCookieJarTextPrivate; +#define SOUP_COOKIE_JAR_TEXT_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_COOKIE_JAR_TEXT, SoupCookieJarTextPrivate)) + +G_DEFINE_TYPE (SoupCookieJarText, soup_cookie_jar_text, SOUP_TYPE_COOKIE_JAR) + +static void load (SoupCookieJar *jar); +static void changed (SoupCookieJar *jar, + SoupCookie *old_cookie, + SoupCookie *new_cookie); + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_cookie_jar_text_init (SoupCookieJarText *text) +{ +} + +static void +finalize (GObject *object) +{ + SoupCookieJarTextPrivate *priv = + SOUP_COOKIE_JAR_TEXT_GET_PRIVATE (object); + + g_free (priv->filename); + + G_OBJECT_CLASS (soup_cookie_jar_text_parent_class)->finalize (object); +} + +static void +soup_cookie_jar_text_class_init (SoupCookieJarTextClass *text_class) +{ + SoupCookieJarClass *cookie_jar_class = + SOUP_COOKIE_JAR_CLASS (text_class); + GObjectClass *object_class = G_OBJECT_CLASS (text_class); + + g_type_class_add_private (text_class, sizeof (SoupCookieJarTextPrivate)); + + cookie_jar_class->changed = changed; + + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SOUP_COOKIE_JAR_TEXT_FILENAME: + * + * Alias for the #SoupCookieJarText:filename property. (The + * cookie-storage filename.) + **/ + g_object_class_install_property ( + object_class, PROP_FILENAME, + g_param_spec_string (SOUP_COOKIE_JAR_TEXT_FILENAME, + "Filename", + "Cookie-storage filename", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupCookieJarTextPrivate *priv = + SOUP_COOKIE_JAR_TEXT_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_FILENAME: + priv->filename = g_value_dup_string (value); + load (SOUP_COOKIE_JAR (object)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupCookieJarTextPrivate *priv = + SOUP_COOKIE_JAR_TEXT_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_FILENAME: + g_value_set_string (value, priv->filename); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_cookie_jar_text_new: + * @filename: the filename to read to/write from + * @read_only: %TRUE if @filename is read-only + * + * Creates a #SoupCookieJarText. + * + * @filename will be read in at startup to create an initial set of + * cookies. If @read_only is %FALSE, then the non-session cookies will + * be written to @filename when the 'changed' signal is emitted from + * the jar. (If @read_only is %TRUE, then the cookie jar will only be + * used for this session, and changes made to it will be lost when the + * jar is destroyed.) + * + * Return value: the new #SoupCookieJar + * + * Since: 2.26 + **/ +SoupCookieJar * +soup_cookie_jar_text_new (const char *filename, gboolean read_only) +{ + g_return_val_if_fail (filename != NULL, NULL); + + return g_object_new (SOUP_TYPE_COOKIE_JAR_TEXT, + SOUP_COOKIE_JAR_TEXT_FILENAME, filename, + SOUP_COOKIE_JAR_READ_ONLY, read_only, + NULL); +} + +static SoupCookie* +parse_cookie (char *line, time_t now) +{ + char **result; + SoupCookie *cookie = NULL; + gboolean http_only; + gulong expire_time; + int max_age; + char *host, *path, *secure, *expires, *name, *value; + + if (g_str_has_prefix (line, "#HttpOnly_")) { + http_only = TRUE; + line += strlen ("#HttpOnly_"); + } else if (*line == '#' || g_ascii_isspace (*line)) + return cookie; + else + http_only = FALSE; + + result = g_strsplit (line, "\t", -1); + if (g_strv_length (result) != 7) + goto out; + + /* Check this first */ + expires = result[4]; + expire_time = strtoul (expires, NULL, 10); + if (now >= expire_time) + goto out; + max_age = (expire_time - now <= G_MAXINT ? expire_time - now : G_MAXINT); + + host = result[0]; + + /* result[1] is not used because it's redundat; it's a boolean + * value regarding whether the cookie should be used for + * sub-domains of the domain that is set for the cookie. It is + * TRUE if host starts with '.', and FALSE otherwise. + */ + + path = result[2]; + secure = result[3]; + + name = result[5]; + value = result[6]; + + cookie = soup_cookie_new (name, value, host, path, max_age); + + if (strcmp (secure, "FALSE") != 0) + soup_cookie_set_secure (cookie, TRUE); + if (http_only) + soup_cookie_set_http_only (cookie, TRUE); + + out: + g_strfreev (result); + + return cookie; +} + +static void +parse_line (SoupCookieJar *jar, char *line, time_t now) +{ + SoupCookie *cookie; + + cookie = parse_cookie (line, now); + if (cookie) + soup_cookie_jar_add_cookie (jar, cookie); +} + +static void +load (SoupCookieJar *jar) +{ + SoupCookieJarTextPrivate *priv = + SOUP_COOKIE_JAR_TEXT_GET_PRIVATE (jar); + char *contents = NULL, *line, *p; + gsize length = 0; + time_t now = time (NULL); + + /* FIXME: error? */ + if (!g_file_get_contents (priv->filename, &contents, &length, NULL)) + return; + + line = contents; + for (p = contents; *p; p++) { + /* \r\n comes out as an extra empty line and gets ignored */ + if (*p == '\r' || *p == '\n') { + *p = '\0'; + parse_line (jar, line, now); + line = p + 1; + } + } + parse_line (jar, line, now); + + g_free (contents); +} + +static void +write_cookie (FILE *out, SoupCookie *cookie) +{ + fseek (out, 0, SEEK_END); + + fprintf (out, "%s%s\t%s\t%s\t%s\t%lu\t%s\t%s\n", + cookie->http_only ? "#HttpOnly_" : "", + cookie->domain, + *cookie->domain == '.' ? "TRUE" : "FALSE", + cookie->path, + cookie->secure ? "TRUE" : "FALSE", + (gulong)soup_date_to_time_t (cookie->expires), + cookie->name, + cookie->value); +} + +static void +delete_cookie (const char *filename, SoupCookie *cookie) +{ + char *contents = NULL, *line, *p; + gsize length = 0; + FILE *f; + SoupCookie *c; + time_t now = time (NULL); + + if (!g_file_get_contents (filename, &contents, &length, NULL)) + return; + + f = fopen (filename, "w"); + if (!f) { + g_free (contents); + return; + } + + line = contents; + for (p = contents; *p; p++) { + /* \r\n comes out as an extra empty line and gets ignored */ + if (*p == '\r' || *p == '\n') { + *p = '\0'; + c = parse_cookie (line, now); + line = p + 1; + if (!c) + continue; + if (!soup_cookie_equal (cookie, c)) + write_cookie (f, c); + soup_cookie_free (c); + } + } + c = parse_cookie (line, now); + if (c) { + if (!soup_cookie_equal (cookie, c)) + write_cookie (f, c); + soup_cookie_free (c); + } + + g_free (contents); + fclose (f); +} + +static void +changed (SoupCookieJar *jar, + SoupCookie *old_cookie, + SoupCookie *new_cookie) +{ + FILE *out; + SoupCookieJarTextPrivate *priv = + SOUP_COOKIE_JAR_TEXT_GET_PRIVATE (jar); + + /* We can sort of ignore the semantics of the 'changed' + * signal here and simply delete the old cookie if present + * and write the new cookie if present. That will do the + * right thing for all 'added', 'deleted' and 'modified' + * meanings. + */ + /* Also, delete_cookie takes the filename and write_cookie + * a FILE pointer. Seems more convenient that way considering + * the implementations of the functions + */ + if (old_cookie) + delete_cookie (priv->filename, old_cookie); + + if (new_cookie) { + gboolean write_header = FALSE; + + if (!g_file_test (priv->filename, G_FILE_TEST_EXISTS)) + write_header = TRUE; + + out = fopen (priv->filename, "a"); + if (!out) { + /* FIXME: error? */ + return; + } + + if (write_header) { + fprintf (out, "# HTTP Cookie File\n"); + fprintf (out, "# http://www.netscape.com/newsref/std/cookie_spec.html\n"); + fprintf (out, "# This is a generated file! Do not edit.\n"); + fprintf (out, "# To delete cookies, use the Cookie Manager.\n\n"); + } + + if (new_cookie->expires) + write_cookie (out, new_cookie); + + if (fclose (out) != 0) { + /* FIXME: error? */ + return; + } + } +} diff --git a/libsoup/soup-cookie-jar-text.h b/libsoup/soup-cookie-jar-text.h new file mode 100644 index 0000000..cc186dd --- /dev/null +++ b/libsoup/soup-cookie-jar-text.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_COOKIE_JAR_TEXT_H +#define SOUP_COOKIE_JAR_TEXT_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_COOKIE_JAR_TEXT (soup_cookie_jar_text_get_type ()) +#define SOUP_COOKIE_JAR_TEXT(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_COOKIE_JAR_TEXT, SoupCookieJarText)) +#define SOUP_COOKIE_JAR_TEXT_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_COOKIE_JAR_TEXT, SoupCookieJarTextClass)) +#define SOUP_IS_COOKIE_JAR_TEXT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_COOKIE_JAR_TEXT)) +#define SOUP_IS_COOKIE_JAR_TEXT_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_COOKIE_JAR_TEXT)) +#define SOUP_COOKIE_JAR_TEXT_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_COOKIE_JAR_TEXT, SoupCookieJarTextClass)) + +typedef struct { + SoupCookieJar parent; + +} SoupCookieJarText; + +typedef struct { + SoupCookieJarClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupCookieJarTextClass; + +#define SOUP_COOKIE_JAR_TEXT_FILENAME "filename" + +GType soup_cookie_jar_text_get_type (void); + +SoupCookieJar *soup_cookie_jar_text_new (const char *filename, + gboolean read_only); + +G_END_DECLS + +#endif /* SOUP_COOKIE_JAR_TEXT_H */ diff --git a/libsoup/soup-cookie-jar.c b/libsoup/soup-cookie-jar.c new file mode 100644 index 0000000..13bf238 --- /dev/null +++ b/libsoup/soup-cookie-jar.c @@ -0,0 +1,758 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cookie-jar.c + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "soup-cookie.h" +#include "soup-cookie-jar.h" +#include "soup-date.h" +#include "soup-enum-types.h" +#include "soup-marshal.h" +#include "soup-message.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +/** + * SECTION:soup-cookie-jar + * @short_description: Automatic cookie handling for #SoupSession + * + * A #SoupCookieJar stores #SoupCookies and arrange for them + * to be sent with the appropriate #SoupMessages. + * #SoupCookieJar implements #SoupSessionFeature, so you can add a + * cookie jar to a session with soup_session_add_feature() or + * soup_session_add_feature_by_type(). + * + * Note that the base #SoupCookieJar class does not support any form + * of long-term cookie persistence. + **/ + +static void soup_cookie_jar_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); +static void request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg); +static void request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket); +static void request_unqueued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg); + +G_DEFINE_TYPE_WITH_CODE (SoupCookieJar, soup_cookie_jar, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_cookie_jar_session_feature_init)) + +enum { + CHANGED, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +enum { + PROP_0, + + PROP_READ_ONLY, + PROP_ACCEPT_POLICY, + + LAST_PROP +}; + +typedef struct { + gboolean constructed, read_only; + GHashTable *domains, *serials; + guint serial; + SoupCookieJarAcceptPolicy accept_policy; +} SoupCookieJarPrivate; +#define SOUP_COOKIE_JAR_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_COOKIE_JAR, SoupCookieJarPrivate)) + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_cookie_jar_init (SoupCookieJar *jar) +{ + SoupCookieJarPrivate *priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + + priv->domains = g_hash_table_new_full (soup_str_case_hash, + soup_str_case_equal, + g_free, NULL); + priv->serials = g_hash_table_new (NULL, NULL); + priv->accept_policy = SOUP_COOKIE_JAR_ACCEPT_ALWAYS; +} + +static void +constructed (GObject *object) +{ + SoupCookieJarPrivate *priv = SOUP_COOKIE_JAR_GET_PRIVATE (object); + + priv->constructed = TRUE; +} + +static void +finalize (GObject *object) +{ + SoupCookieJarPrivate *priv = SOUP_COOKIE_JAR_GET_PRIVATE (object); + GHashTableIter iter; + gpointer key, value; + + g_hash_table_iter_init (&iter, priv->domains); + while (g_hash_table_iter_next (&iter, &key, &value)) + soup_cookies_free (value); + g_hash_table_destroy (priv->domains); + g_hash_table_destroy (priv->serials); + + G_OBJECT_CLASS (soup_cookie_jar_parent_class)->finalize (object); +} + +static void +soup_cookie_jar_class_init (SoupCookieJarClass *jar_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (jar_class); + + g_type_class_add_private (jar_class, sizeof (SoupCookieJarPrivate)); + + object_class->constructed = constructed; + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /** + * SoupCookieJar::changed + * @jar: the #SoupCookieJar + * @old_cookie: the old #SoupCookie value + * @new_cookie: the new #SoupCookie value + * + * Emitted when @jar changes. If a cookie has been added, + * @new_cookie will contain the newly-added cookie and + * @old_cookie will be %NULL. If a cookie has been deleted, + * @old_cookie will contain the to-be-deleted cookie and + * @new_cookie will be %NULL. If a cookie has been changed, + * @old_cookie will contain its old value, and @new_cookie its + * new value. + **/ + signals[CHANGED] = + g_signal_new ("changed", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupCookieJarClass, changed), + NULL, NULL, + soup_marshal_NONE__BOXED_BOXED, + G_TYPE_NONE, 2, + SOUP_TYPE_COOKIE | G_SIGNAL_TYPE_STATIC_SCOPE, + SOUP_TYPE_COOKIE | G_SIGNAL_TYPE_STATIC_SCOPE); + + /** + * SOUP_COOKIE_JAR_READ_ONLY: + * + * Alias for the #SoupCookieJar:read-only property. (Whether + * or not the cookie jar is read-only.) + **/ + g_object_class_install_property ( + object_class, PROP_READ_ONLY, + g_param_spec_boolean (SOUP_COOKIE_JAR_READ_ONLY, + "Read-only", + "Whether or not the cookie jar is read-only", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + + /** + * SOUP_COOKIE_JAR_ACCEPT_POLICY: + * + * Alias for the #SoupCookieJar:accept-policy property. + * + * Since: 2.30 + */ + /** + * SoupCookieJar:accept-policy: + * + * The policy the jar should follow to accept or reject cookies + * + * Since: 2.30 + */ + g_object_class_install_property ( + object_class, PROP_ACCEPT_POLICY, + g_param_spec_enum (SOUP_COOKIE_JAR_ACCEPT_POLICY, + "Accept-policy", + "The policy the jar should follow to accept or reject cookies", + SOUP_TYPE_COOKIE_JAR_ACCEPT_POLICY, + SOUP_COOKIE_JAR_ACCEPT_ALWAYS, + G_PARAM_READWRITE)); +} + +static void +soup_cookie_jar_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + feature_interface->request_queued = request_queued; + feature_interface->request_started = request_started; + feature_interface->request_unqueued = request_unqueued; +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupCookieJarPrivate *priv = + SOUP_COOKIE_JAR_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_READ_ONLY: + priv->read_only = g_value_get_boolean (value); + break; + case PROP_ACCEPT_POLICY: + priv->accept_policy = g_value_get_enum (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupCookieJarPrivate *priv = + SOUP_COOKIE_JAR_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_READ_ONLY: + g_value_set_boolean (value, priv->read_only); + break; + case PROP_ACCEPT_POLICY: + g_value_set_enum (value, priv->accept_policy); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_cookie_jar_new: + * + * Creates a new #SoupCookieJar. The base #SoupCookieJar class does + * not support persistent storage of cookies; use a subclass for that. + * + * Returns: a new #SoupCookieJar + * + * Since: 2.24 + **/ +SoupCookieJar * +soup_cookie_jar_new (void) +{ + return g_object_new (SOUP_TYPE_COOKIE_JAR, NULL); +} + +void +soup_cookie_jar_save (SoupCookieJar *jar) +{ + /* Does nothing, obsolete */ +} + +static void +soup_cookie_jar_changed (SoupCookieJar *jar, + SoupCookie *old, SoupCookie *new) +{ + SoupCookieJarPrivate *priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + + if (old && old != new) + g_hash_table_remove (priv->serials, old); + if (new) { + priv->serial++; + g_hash_table_insert (priv->serials, new, GUINT_TO_POINTER (priv->serial)); + } + + if (priv->read_only || !priv->constructed) + return; + + g_signal_emit (jar, signals[CHANGED], 0, old, new); +} + +static int +compare_cookies (gconstpointer a, gconstpointer b, gpointer jar) +{ + SoupCookie *ca = (SoupCookie *)a; + SoupCookie *cb = (SoupCookie *)b; + SoupCookieJarPrivate *priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + int alen, blen; + guint aserial, bserial; + + /* "Cookies with longer path fields are listed before cookies + * with shorter path field." + */ + alen = ca->path ? strlen (ca->path) : 0; + blen = cb->path ? strlen (cb->path) : 0; + if (alen != blen) + return blen - alen; + + /* "Among cookies that have equal length path fields, cookies + * with earlier creation dates are listed before cookies with + * later creation dates." + */ + aserial = GPOINTER_TO_UINT (g_hash_table_lookup (priv->serials, ca)); + bserial = GPOINTER_TO_UINT (g_hash_table_lookup (priv->serials, cb)); + return aserial - bserial; +} + +/** + * soup_cookie_jar_get_cookies: + * @jar: a #SoupCookieJar + * @uri: a #SoupURI + * @for_http: whether or not the return value is being passed directly + * to an HTTP operation + * + * Retrieves (in Cookie-header form) the list of cookies that would + * be sent with a request to @uri. + * + * If @for_http is %TRUE, the return value will include cookies marked + * "HttpOnly" (that is, cookies that the server wishes to keep hidden + * from client-side scripting operations such as the JavaScript + * document.cookies property). Since #SoupCookieJar sets the Cookie + * header itself when making the actual HTTP request, you should + * almost certainly be setting @for_http to %FALSE if you are calling + * this. + * + * Return value: the cookies, in string form, or %NULL if there are no + * cookies for @uri. + * + * Since: 2.24 + **/ +char * +soup_cookie_jar_get_cookies (SoupCookieJar *jar, SoupURI *uri, + gboolean for_http) +{ + SoupCookieJarPrivate *priv; + GSList *cookies, *domain_cookies; + char *domain, *cur, *next_domain, *result; + GSList *new_head, *cookies_to_remove = NULL, *p; + + g_return_val_if_fail (SOUP_IS_COOKIE_JAR (jar), NULL); + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + g_return_val_if_fail (uri != NULL, NULL); + + if (!uri->host) + return NULL; + + /* The logic here is a little weird, but the plan is that if + * uri->host is "www.foo.com", we will end up looking up + * cookies for ".www.foo.com", "www.foo.com", ".foo.com", and + * ".com", in that order. (Logic stolen from Mozilla.) + */ + cookies = NULL; + domain = cur = g_strdup_printf (".%s", uri->host); + next_domain = domain + 1; + do { + new_head = domain_cookies = g_hash_table_lookup (priv->domains, cur); + while (domain_cookies) { + GSList *next = domain_cookies->next; + SoupCookie *cookie = domain_cookies->data; + + if (cookie->expires && soup_date_is_past (cookie->expires)) { + cookies_to_remove = g_slist_append (cookies_to_remove, + cookie); + new_head = g_slist_delete_link (new_head, domain_cookies); + g_hash_table_insert (priv->domains, + g_strdup (cur), + new_head); + } else if (soup_cookie_applies_to_uri (cookie, uri) && + (for_http || !cookie->http_only)) + cookies = g_slist_append (cookies, cookie); + + domain_cookies = next; + } + cur = next_domain; + if (cur) + next_domain = strchr (cur + 1, '.'); + } while (cur); + g_free (domain); + + for (p = cookies_to_remove; p; p = p->next) { + SoupCookie *cookie = p->data; + + soup_cookie_jar_changed (jar, cookie, NULL); + soup_cookie_free (cookie); + } + g_slist_free (cookies_to_remove); + + if (cookies) { + cookies = g_slist_sort_with_data (cookies, compare_cookies, jar); + result = soup_cookies_to_cookie_header (cookies); + g_slist_free (cookies); + + if (!*result) { + g_free (result); + result = NULL; + } + return result; + } else + return NULL; +} + +/** + * soup_cookie_jar_add_cookie: + * @jar: a #SoupCookieJar + * @cookie: a #SoupCookie + * + * Adds @cookie to @jar, emitting the 'changed' signal if we are modifying + * an existing cookie or adding a valid new cookie ('valid' means + * that the cookie's expire date is not in the past). + * + * @cookie will be 'stolen' by the jar, so don't free it afterwards. + * + * Since: 2.26 + **/ +void +soup_cookie_jar_add_cookie (SoupCookieJar *jar, SoupCookie *cookie) +{ + SoupCookieJarPrivate *priv; + GSList *old_cookies, *oc, *last = NULL; + SoupCookie *old_cookie; + + g_return_if_fail (SOUP_IS_COOKIE_JAR (jar)); + g_return_if_fail (cookie != NULL); + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + old_cookies = g_hash_table_lookup (priv->domains, cookie->domain); + for (oc = old_cookies; oc; oc = oc->next) { + old_cookie = oc->data; + if (!strcmp (cookie->name, old_cookie->name) && + !g_strcmp0 (cookie->path, old_cookie->path)) { + if (cookie->expires && soup_date_is_past (cookie->expires)) { + /* The new cookie has an expired date, + * this is the way the the server has + * of telling us that we have to + * remove the cookie. + */ + old_cookies = g_slist_delete_link (old_cookies, oc); + g_hash_table_insert (priv->domains, + g_strdup (cookie->domain), + old_cookies); + soup_cookie_jar_changed (jar, old_cookie, NULL); + soup_cookie_free (old_cookie); + soup_cookie_free (cookie); + } else { + oc->data = cookie; + soup_cookie_jar_changed (jar, old_cookie, cookie); + soup_cookie_free (old_cookie); + } + + return; + } + last = oc; + } + + /* The new cookie is... a new cookie */ + if (cookie->expires && soup_date_is_past (cookie->expires)) { + soup_cookie_free (cookie); + return; + } + + if (last) + last->next = g_slist_append (NULL, cookie); + else { + old_cookies = g_slist_append (NULL, cookie); + g_hash_table_insert (priv->domains, g_strdup (cookie->domain), + old_cookies); + } + + soup_cookie_jar_changed (jar, NULL, cookie); +} + +/** + * soup_cookie_jar_set_cookie: + * @jar: a #SoupCookieJar + * @uri: the URI setting the cookie + * @cookie: the stringified cookie to set + * + * Adds @cookie to @jar, exactly as though it had appeared in a + * Set-Cookie header returned from a request to @uri. + * + * Keep in mind that if the #SoupCookieJarAcceptPolicy + * %SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY is set you'll need to use + * soup_cookie_jar_set_cookie_with_first_party(), otherwise the jar + * will have no way of knowing if the cookie is being set by a third + * party or not. + * + * Since: 2.24 + **/ +void +soup_cookie_jar_set_cookie (SoupCookieJar *jar, SoupURI *uri, + const char *cookie) +{ + SoupCookie *soup_cookie; + SoupCookieJarPrivate *priv; + + g_return_if_fail (SOUP_IS_COOKIE_JAR (jar)); + g_return_if_fail (uri != NULL); + g_return_if_fail (cookie != NULL); + + if (!uri->host) + return; + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + if (priv->accept_policy == SOUP_COOKIE_JAR_ACCEPT_NEVER) + return; + + g_return_if_fail (priv->accept_policy != SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY); + + soup_cookie = soup_cookie_parse (cookie, uri); + if (soup_cookie) { + /* will steal or free soup_cookie */ + soup_cookie_jar_add_cookie (jar, soup_cookie); + } +} + +/** + * soup_cookie_jar_set_cookie_with_first_party: + * @jar: a #SoupCookieJar + * @uri: the URI setting the cookie + * @first_party: the URI for the main document + * @cookie: the stringified cookie to set + * + * Adds @cookie to @jar, exactly as though it had appeared in a + * Set-Cookie header returned from a request to @uri. @first_party + * will be used to reject cookies coming from third party resources in + * case such a security policy is set in the @jar. + * + * Since: 2.30 + **/ +void +soup_cookie_jar_set_cookie_with_first_party (SoupCookieJar *jar, + SoupURI *uri, + SoupURI *first_party, + const char *cookie) +{ + SoupCookie *soup_cookie; + SoupCookieJarPrivate *priv; + + g_return_if_fail (SOUP_IS_COOKIE_JAR (jar)); + g_return_if_fail (uri != NULL); + g_return_if_fail (first_party != NULL); + g_return_if_fail (cookie != NULL); + + if (!uri->host) + return; + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + if (priv->accept_policy == SOUP_COOKIE_JAR_ACCEPT_NEVER) + return; + + soup_cookie = soup_cookie_parse (cookie, uri); + if (soup_cookie) { + if (priv->accept_policy == SOUP_COOKIE_JAR_ACCEPT_ALWAYS || + soup_cookie_domain_matches (soup_cookie, first_party->host)) { + /* will steal or free soup_cookie */ + soup_cookie_jar_add_cookie (jar, soup_cookie); + } else { + soup_cookie_free (soup_cookie); + } + } +} + +static void +process_set_cookie_header (SoupMessage *msg, gpointer user_data) +{ + SoupCookieJar *jar = user_data; + SoupCookieJarPrivate *priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + GSList *new_cookies, *nc; + + if (priv->accept_policy == SOUP_COOKIE_JAR_ACCEPT_NEVER) + return; + + new_cookies = soup_cookies_from_response (msg); + for (nc = new_cookies; nc; nc = nc->next) { + SoupURI *first_party = soup_message_get_first_party (msg); + + if ((priv->accept_policy == SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY && + first_party != NULL && first_party->host && + soup_cookie_domain_matches (nc->data, first_party->host)) || + priv->accept_policy == SOUP_COOKIE_JAR_ACCEPT_ALWAYS) + soup_cookie_jar_add_cookie (jar, nc->data); + else + soup_cookie_free (nc->data); + } + g_slist_free (new_cookies); +} + +static void +request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg) +{ + soup_message_add_header_handler (msg, "got-headers", + "Set-Cookie", + G_CALLBACK (process_set_cookie_header), + feature); +} + +static void +request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket) +{ + SoupCookieJar *jar = SOUP_COOKIE_JAR (feature); + char *cookies; + + cookies = soup_cookie_jar_get_cookies (jar, soup_message_get_uri (msg), TRUE); + if (cookies) { + soup_message_headers_replace (msg->request_headers, + "Cookie", cookies); + g_free (cookies); + } else + soup_message_headers_remove (msg->request_headers, "Cookie"); +} + +static void +request_unqueued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg) +{ + g_signal_handlers_disconnect_by_func (msg, process_set_cookie_header, feature); +} + +/** + * soup_cookie_jar_all_cookies: + * @jar: a #SoupCookieJar + * + * Constructs a #GSList with every cookie inside the @jar. + * The cookies in the list are a copy of the original, so + * you have to free them when you are done with them. + * + * Return value: (transfer full) (element-type Soup.Cookie): a #GSList + * with all the cookies in the @jar. + * + * Since: 2.26 + **/ +GSList * +soup_cookie_jar_all_cookies (SoupCookieJar *jar) +{ + SoupCookieJarPrivate *priv; + GHashTableIter iter; + GSList *l = NULL; + gpointer key, value; + + g_return_val_if_fail (SOUP_IS_COOKIE_JAR (jar), NULL); + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + + g_hash_table_iter_init (&iter, priv->domains); + + while (g_hash_table_iter_next (&iter, &key, &value)) { + GSList *p, *cookies = value; + for (p = cookies; p; p = p->next) + l = g_slist_prepend (l, soup_cookie_copy (p->data)); + } + + return l; +} + +/** + * soup_cookie_jar_delete_cookie: + * @jar: a #SoupCookieJar + * @cookie: a #SoupCookie + * + * Deletes @cookie from @jar, emitting the 'changed' signal. + * + * Since: 2.26 + **/ +void +soup_cookie_jar_delete_cookie (SoupCookieJar *jar, + SoupCookie *cookie) +{ + SoupCookieJarPrivate *priv; + GSList *cookies, *p; + char *domain; + + g_return_if_fail (SOUP_IS_COOKIE_JAR (jar)); + g_return_if_fail (cookie != NULL); + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + + domain = g_strdup (cookie->domain); + + cookies = g_hash_table_lookup (priv->domains, domain); + if (cookies == NULL) + return; + + for (p = cookies; p; p = p->next ) { + SoupCookie *c = (SoupCookie*)p->data; + if (soup_cookie_equal (cookie, c)) { + cookies = g_slist_delete_link (cookies, p); + g_hash_table_insert (priv->domains, + domain, + cookies); + soup_cookie_jar_changed (jar, c, NULL); + soup_cookie_free (c); + return; + } + } +} + +/** + * SoupCookieJarAcceptPolicy: + * @SOUP_COOKIE_JAR_ACCEPT_ALWAYS: accept all cookies unconditionally. + * @SOUP_COOKIE_JAR_ACCEPT_NEVER: reject all cookies unconditionally. + * @SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY: accept all cookies set by + * the main document loaded in the application using libsoup. An + * example of the most common case, web browsers, would be: If + * http://www.example.com is the page loaded, accept all cookies set + * by example.com, but if a resource from http://www.third-party.com + * is loaded from that page reject any cookie that it could try to + * set. For libsoup to be able to tell apart first party cookies from + * the rest, the application must call soup_message_set_first_party() + * on each outgoing #SoupMessage, setting the #SoupURI of the main + * document. If no first party is set in a message when this policy is + * in effect, cookies will be assumed to be third party by default. + * + * Since: 2.30 + */ + +/** + * soup_cookie_jar_get_accept_policy: + * @jar: a #SoupCookieJar + * + * Returns: the #SoupCookieJarAcceptPolicy set in the @jar + * + * Since: 2.30 + **/ +SoupCookieJarAcceptPolicy +soup_cookie_jar_get_accept_policy (SoupCookieJar *jar) +{ + SoupCookieJarPrivate *priv; + + g_return_val_if_fail (SOUP_IS_COOKIE_JAR (jar), SOUP_COOKIE_JAR_ACCEPT_ALWAYS); + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + return priv->accept_policy; +} + +/** + * soup_cookie_jar_set_accept_policy: + * @jar: a #SoupCookieJar + * @policy: a #SoupCookieJarAcceptPolicy + * + * Sets @policy as the cookie acceptance policy for @jar. + * + * Since: 2.30 + **/ +void +soup_cookie_jar_set_accept_policy (SoupCookieJar *jar, + SoupCookieJarAcceptPolicy policy) +{ + SoupCookieJarPrivate *priv; + + g_return_if_fail (SOUP_IS_COOKIE_JAR (jar)); + + priv = SOUP_COOKIE_JAR_GET_PRIVATE (jar); + + if (priv->accept_policy != policy) { + priv->accept_policy = policy; + g_object_notify (G_OBJECT (jar), SOUP_COOKIE_JAR_ACCEPT_POLICY); + } +} diff --git a/libsoup/soup-cookie-jar.h b/libsoup/soup-cookie-jar.h new file mode 100644 index 0000000..eab64bf --- /dev/null +++ b/libsoup/soup-cookie-jar.h @@ -0,0 +1,76 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_COOKIE_JAR_H +#define SOUP_COOKIE_JAR_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_COOKIE_JAR (soup_cookie_jar_get_type ()) +#define SOUP_COOKIE_JAR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_COOKIE_JAR, SoupCookieJar)) +#define SOUP_COOKIE_JAR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_COOKIE_JAR, SoupCookieJarClass)) +#define SOUP_IS_COOKIE_JAR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_COOKIE_JAR)) +#define SOUP_IS_COOKIE_JAR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_COOKIE_JAR)) +#define SOUP_COOKIE_JAR_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_COOKIE_JAR, SoupCookieJarClass)) + +struct _SoupCookieJar { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + void (*save) (SoupCookieJar *jar); + + /* signals */ + void (*changed) (SoupCookieJar *jar, + SoupCookie *old_cookie, + SoupCookie *new_cookie); + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); +} SoupCookieJarClass; + +#define SOUP_COOKIE_JAR_READ_ONLY "read-only" +#define SOUP_COOKIE_JAR_ACCEPT_POLICY "accept-policy" + +typedef enum { + SOUP_COOKIE_JAR_ACCEPT_ALWAYS, + SOUP_COOKIE_JAR_ACCEPT_NEVER, + SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY +} SoupCookieJarAcceptPolicy; + +GType soup_cookie_jar_get_type (void); +SoupCookieJar * soup_cookie_jar_new (void); +#ifndef LIBSOUP_DISABLE_DEPRECATED +void soup_cookie_jar_save (SoupCookieJar *jar); +#endif +char * soup_cookie_jar_get_cookies (SoupCookieJar *jar, + SoupURI *uri, + gboolean for_http); +void soup_cookie_jar_set_cookie (SoupCookieJar *jar, + SoupURI *uri, + const char *cookie); +void soup_cookie_jar_set_cookie_with_first_party (SoupCookieJar *jar, + SoupURI *uri, + SoupURI *first_party, + const char *cookie); +void soup_cookie_jar_add_cookie (SoupCookieJar *jar, + SoupCookie *cookie); +void soup_cookie_jar_delete_cookie (SoupCookieJar *jar, + SoupCookie *cookie); +GSList * soup_cookie_jar_all_cookies (SoupCookieJar *jar); +void soup_cookie_jar_set_accept_policy (SoupCookieJar *jar, + SoupCookieJarAcceptPolicy policy); +SoupCookieJarAcceptPolicy soup_cookie_jar_get_accept_policy (SoupCookieJar *jar); + +G_END_DECLS + +#endif /* SOUP_COOKIE_JAR_H */ diff --git a/libsoup/soup-cookie.c b/libsoup/soup-cookie.c new file mode 100644 index 0000000..5e50043 --- /dev/null +++ b/libsoup/soup-cookie.c @@ -0,0 +1,1081 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-cookie.c + * + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "soup-cookie.h" +#include "soup-date.h" +#include "soup-headers.h" +#include "soup-message.h" +#include "soup-message-headers.h" +#include "soup-uri.h" + +/** + * SECTION:soup-cookie + * @short_description: HTTP Cookies + * @see_also: #SoupMessage + * + * #SoupCookie implements HTTP cookies, primarily as described by + * the + * original Netscape cookie specification, but with slight + * modifications based on RFC 2109, Microsoft's + * HttpOnly extension attribute, and observed real-world usage + * (and, in particular, based on what Firefox does). + * + * To have a #SoupSession handle cookies for your appliction + * automatically, use a #SoupCookieJar. + **/ + +/** + * SoupCookie: + * @name: the cookie name + * @value: the cookie value + * @domain: the "domain" attribute, or else the hostname that the + * cookie came from. + * @path: the "path" attribute, or %NULL + * @expires: the cookie expiration time, or %NULL for a session cookie + * @secure: %TRUE if the cookie should only be tranferred over SSL + * @http_only: %TRUE if the cookie should not be exposed to scripts + * + * An HTTP cookie. + * + * @name and @value will be set for all cookies. If the cookie is + * generated from a string that appears to have no name, then @name + * will be the empty string. + * + * @domain and @path give the host or domain, and path within that + * host/domain, to restrict this cookie to. If @domain starts with + * ".", that indicates a domain (which matches the string after the + * ".", or any hostname that has @domain as a suffix). Otherwise, it + * is a hostname and must match exactly. + * + * @expires will be non-%NULL if the cookie uses either the original + * "expires" attribute, or the "max-age" attribute specified in RFC + * 2109. If @expires is %NULL, it indicates that neither "expires" nor + * "max-age" was specified, and the cookie expires at the end of the + * session. + * + * If @http_only is set, the cookie should not be exposed to untrusted + * code (eg, javascript), so as to minimize the danger posed by + * cross-site scripting attacks. + * + * Since: 2.24 + **/ + +GType +soup_cookie_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupCookie"), + (GBoxedCopyFunc) soup_cookie_copy, + (GBoxedFreeFunc) soup_cookie_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} + +/** + * soup_cookie_copy: + * @cookie: a #SoupCookie + * + * Copies @cookie. + * + * Return value: a copy of @cookie + * + * Since: 2.24 + **/ +SoupCookie * +soup_cookie_copy (SoupCookie *cookie) +{ + SoupCookie *copy = g_slice_new0 (SoupCookie); + + copy->name = g_strdup (cookie->name); + copy->value = g_strdup (cookie->value); + copy->domain = g_strdup (cookie->domain); + copy->path = g_strdup (cookie->path); + if (cookie->expires) + copy->expires = soup_date_copy(cookie->expires); + copy->secure = cookie->secure; + copy->http_only = cookie->http_only; + + return copy; +} + +/** + * soup_cookie_domain_matches: + * @cookie: a #SoupCookie + * @host: a URI + * + * Checks if the @cookie's domain and @host match in the sense that + * @cookie should be sent when making a request to @host, or that + * @cookie should be accepted when receiving a response from @host. + * + * Return value: %TRUE if the domains match, %FALSE otherwise + * + * Since: 2.30 + **/ +gboolean +soup_cookie_domain_matches (SoupCookie *cookie, const char *host) +{ + char *match; + int dlen; + const char *domain; + + g_return_val_if_fail (cookie != NULL, FALSE); + g_return_val_if_fail (host != NULL, FALSE); + + domain = cookie->domain; + + if (!g_ascii_strcasecmp (domain, host)) + return TRUE; + if (*domain != '.') + return FALSE; + if (!g_ascii_strcasecmp (domain + 1, host)) + return TRUE; + dlen = strlen (domain); + while ((match = strstr (host, domain))) { + if (!match[dlen]) + return TRUE; + host = match + 1; + } + return FALSE; +} + +static inline const char * +skip_lws (const char *s) +{ + while (g_ascii_isspace (*s)) + s++; + return s; +} + +static inline const char * +unskip_lws (const char *s, const char *start) +{ + while (s > start && g_ascii_isspace (*(s - 1))) + s--; + return s; +} + +#define is_attr_ender(ch) ((ch) < ' ' || (ch) == ';' || (ch) == ',' || (ch) == '=') +#define is_value_ender(ch) ((ch) < ' ' || (ch) == ';') + +static char * +parse_value (const char **val_p) +{ + const char *start, *end, *p; + char *value; + + p = *val_p; + if (*p == '=') + p++; + start = skip_lws (p); + for (p = start; !is_value_ender (*p); p++) + ; + end = unskip_lws (p, start); + value = g_strndup (start, end - start); + + *val_p = p; + return value; +} + +static SoupDate * +parse_date (const char **val_p) +{ + char *value; + SoupDate *date; + + value = parse_value (val_p); + date = soup_date_new_from_string (value); + g_free (value); + return date; +} + +static SoupCookie * +parse_one_cookie (const char *header, SoupURI *origin) +{ + const char *start, *end, *p; + gboolean has_value; + SoupCookie *cookie; + + g_return_val_if_fail (origin == NULL || origin->host, NULL); + + cookie = g_slice_new0 (SoupCookie); + + /* Parse the NAME */ + start = skip_lws (header); + for (p = start; !is_attr_ender (*p); p++) + ; + if (*p == '=') { + end = unskip_lws (p, start); + cookie->name = g_strndup (start, end - start); + } else { + /* No NAME; Set cookie->name to "" and then rewind to + * re-parse the string as a VALUE. + */ + cookie->name = g_strdup (""); + p = start; + } + + /* Parse the VALUE */ + cookie->value = parse_value (&p); + + /* Parse attributes */ + while (*p == ';') { + start = skip_lws (p + 1); + for (p = start; !is_attr_ender (*p); p++) + ; + end = unskip_lws (p, start); + + has_value = (*p == '='); +#define MATCH_NAME(name) ((end - start == strlen (name)) && !g_ascii_strncasecmp (start, name, end - start)) + + if (MATCH_NAME ("domain") && has_value) { + cookie->domain = parse_value (&p); + if (!*cookie->domain) { + g_free (cookie->domain); + cookie->domain = NULL; + } + } else if (MATCH_NAME ("expires") && has_value) { + cookie->expires = parse_date (&p); + } else if (MATCH_NAME ("httponly")) { + cookie->http_only = TRUE; + } else if (MATCH_NAME ("max-age") && has_value) { + char *max_age_str = parse_value (&p), *mae; + long max_age = strtol (max_age_str, &mae, 10); + if (!*mae) { + if (max_age < 0) + max_age = 0; + soup_cookie_set_max_age (cookie, max_age); + } + g_free (max_age_str); + } else if (MATCH_NAME ("path") && has_value) { + cookie->path = parse_value (&p); + if (*cookie->path != '/') { + g_free (cookie->path); + cookie->path = NULL; + } + } else if (MATCH_NAME ("secure")) { + cookie->secure = TRUE; + } else { + /* Ignore unknown attributes, but we still have + * to skip over the value. + */ + if (has_value) + g_free (parse_value (&p)); + } + } + + if (cookie->domain) { + /* Domain must have at least one '.' (not counting an + * initial one. (We check this now, rather than + * bailing out sooner, because we don't want to force + * any cookies after this one in the Set-Cookie header + * to be discarded.) + */ + if (!strchr (cookie->domain + 1, '.')) { + soup_cookie_free (cookie); + return NULL; + } + + /* If the domain string isn't an IP addr, and doesn't + * start with a '.', prepend one. + */ + if (!g_hostname_is_ip_address (cookie->domain) && + cookie->domain[0] != '.') { + char *tmp = g_strdup_printf (".%s", cookie->domain); + g_free (cookie->domain); + cookie->domain = tmp; + } + } + + if (origin) { + /* Sanity-check domain */ + if (cookie->domain) { + if (!soup_cookie_domain_matches (cookie, origin->host)) { + soup_cookie_free (cookie); + return NULL; + } + } else + cookie->domain = g_strdup (origin->host); + + /* The original cookie spec didn't say that pages + * could only set cookies for paths they were under. + * RFC 2109 adds that requirement, but some sites + * depend on the old behavior + * (https://bugzilla.mozilla.org/show_bug.cgi?id=156725#c20). + * So we don't check the path. + */ + + if (!cookie->path) { + char *slash; + + slash = strrchr (origin->path, '/'); + if (!slash || slash == origin->path) + cookie->path = g_strdup ("/"); + else { + cookie->path = g_strndup (origin->path, + slash - origin->path); + } + } + } + + return cookie; +} + +static SoupCookie * +cookie_new_internal (const char *name, const char *value, + const char *domain, const char *path, + int max_age) +{ + SoupCookie *cookie; + + cookie = g_slice_new0 (SoupCookie); + cookie->name = g_strdup (name); + cookie->value = g_strdup (value); + cookie->domain = g_strdup (domain); + cookie->path = g_strdup (path); + soup_cookie_set_max_age (cookie, max_age); + + return cookie; +} + +/** + * soup_cookie_new: + * @name: cookie name + * @value: cookie value + * @domain: cookie domain or hostname + * @path: cookie path, or %NULL + * @max_age: max age of the cookie, or -1 for a session cookie + * + * Creates a new #SoupCookie with the given attributes. (Use + * soup_cookie_set_secure() and soup_cookie_set_http_only() if you + * need to set those attributes on the returned cookie.) + * + * @max_age is used to set the "expires" attribute on the cookie; pass + * -1 to not include the attribute (indicating that the cookie expires + * with the current session), 0 for an already-expired cookie, or a + * lifetime in seconds. You can use the constants + * %SOUP_COOKIE_MAX_AGE_ONE_HOUR, %SOUP_COOKIE_MAX_AGE_ONE_DAY, + * %SOUP_COOKIE_MAX_AGE_ONE_WEEK and %SOUP_COOKIE_MAX_AGE_ONE_YEAR (or + * multiples thereof) to calculate this value. (If you really care + * about setting the exact time that the cookie will expire, use + * soup_cookie_set_expires().) + * + * Return value: a new #SoupCookie. + * + * Since: 2.24 + **/ +SoupCookie * +soup_cookie_new (const char *name, const char *value, + const char *domain, const char *path, + int max_age) +{ + g_return_val_if_fail (name != NULL, NULL); + g_return_val_if_fail (value != NULL, NULL); + + /* We ought to return if domain is NULL too, but this used to + * do be incorrectly documented as legal, and it wouldn't + * break anything as long as you called + * soup_cookie_set_domain() immediately after. So we warn but + * don't return, to discourage that behavior but not actually + * break anyone doing it. + */ + g_warn_if_fail (domain != NULL); + + return cookie_new_internal (name, value, domain, path, max_age); +} + +/** + * soup_cookie_parse: + * @header: a cookie string (eg, the value of a Set-Cookie header) + * @origin: origin of the cookie, or %NULL + * + * Parses @header and returns a #SoupCookie. (If @header contains + * multiple cookies, only the first one will be parsed.) + * + * If @header does not have "path" or "domain" attributes, they will + * be defaulted from @origin. If @origin is %NULL, path will default + * to "/", but domain will be left as %NULL. Note that this is not a + * valid state for a #SoupCookie, and you will need to fill in some + * appropriate string for the domain if you want to actually make use + * of the cookie. + * + * Return value: a new #SoupCookie, or %NULL if it could not be + * parsed, or contained an illegal "domain" attribute for a cookie + * originating from @origin. + * + * Since: 2.24 + **/ +SoupCookie * +soup_cookie_parse (const char *cookie, SoupURI *origin) +{ + return parse_one_cookie (cookie, origin); +} + +/** + * soup_cookie_get_name: + * @cookie: a #SoupCookie + * + * Gets @cookie's name + * + * Return value: @cookie's name + * + * Since: 2.32 + **/ +const char * +soup_cookie_get_name (SoupCookie *cookie) +{ + return cookie->name; +} + +/** + * soup_cookie_set_name: + * @cookie: a #SoupCookie + * @name: the new name + * + * Sets @cookie's name to @name + * + * Since: 2.24 + **/ +void +soup_cookie_set_name (SoupCookie *cookie, const char *name) +{ + g_free (cookie->name); + cookie->name = g_strdup (name); +} + +/** + * soup_cookie_get_value: + * @cookie: a #SoupCookie + * + * Gets @cookie's value + * + * Return value: @cookie's value + * + * Since: 2.32 + **/ +const char * +soup_cookie_get_value (SoupCookie *cookie) +{ + return cookie->value; +} + +/** + * soup_cookie_set_value: + * @cookie: a #SoupCookie + * @value: the new value + * + * Sets @cookie's value to @value + * + * Since: 2.24 + **/ +void +soup_cookie_set_value (SoupCookie *cookie, const char *value) +{ + g_free (cookie->value); + cookie->value = g_strdup (value); +} + +/** + * soup_cookie_get_domain: + * @cookie: a #SoupCookie + * + * Gets @cookie's domain + * + * Return value: @cookie's domain + * + * Since: 2.32 + **/ +const char * +soup_cookie_get_domain (SoupCookie *cookie) +{ + return cookie->domain; +} + +/** + * soup_cookie_set_domain: + * @cookie: a #SoupCookie + * @domain: the new domain + * + * Sets @cookie's domain to @domain + * + * Since: 2.24 + **/ +void +soup_cookie_set_domain (SoupCookie *cookie, const char *domain) +{ + g_free (cookie->domain); + cookie->domain = g_strdup (domain); +} + +/** + * soup_cookie_get_path: + * @cookie: a #SoupCookie + * + * Gets @cookie's path + * + * Return value: @cookie's path + * + * Since: 2.32 + **/ +const char * +soup_cookie_get_path (SoupCookie *cookie) +{ + return cookie->path; +} + +/** + * soup_cookie_set_path: + * @cookie: a #SoupCookie + * @path: the new path + * + * Sets @cookie's path to @path + * + * Since: 2.24 + **/ +void +soup_cookie_set_path (SoupCookie *cookie, const char *path) +{ + g_free (cookie->path); + cookie->path = g_strdup (path); +} + +/** + * soup_cookie_set_max_age: + * @cookie: a #SoupCookie + * @max_age: the new max age + * + * Sets @cookie's max age to @max_age. If @max_age is -1, the cookie + * is a session cookie, and will expire at the end of the client's + * session. Otherwise, it is the number of seconds until the cookie + * expires. You can use the constants %SOUP_COOKIE_MAX_AGE_ONE_HOUR, + * %SOUP_COOKIE_MAX_AGE_ONE_DAY, %SOUP_COOKIE_MAX_AGE_ONE_WEEK and + * %SOUP_COOKIE_MAX_AGE_ONE_YEAR (or multiples thereof) to calculate + * this value. (A value of 0 indicates that the cookie should be + * considered already-expired.) + * + * (This sets the same property as soup_cookie_set_expires().) + * + * Since: 2.24 + **/ +void +soup_cookie_set_max_age (SoupCookie *cookie, int max_age) +{ + if (cookie->expires) + soup_date_free (cookie->expires); + + if (max_age == -1) + cookie->expires = NULL; + else if (max_age == 0) { + /* Use a date way in the past, to protect against + * clock skew. + */ + cookie->expires = soup_date_new (1970, 1, 1, 0, 0, 0); + } else + cookie->expires = soup_date_new_from_now (max_age); +} + +/** + * SOUP_COOKIE_MAX_AGE_ONE_HOUR: + * + * A constant corresponding to 1 hour, for use with soup_cookie_new() + * and soup_cookie_set_max_age(). + * + * Since: 2.24 + **/ +/** + * SOUP_COOKIE_MAX_AGE_ONE_DAY: + * + * A constant corresponding to 1 day, for use with soup_cookie_new() + * and soup_cookie_set_max_age(). + * + * Since: 2.24 + **/ +/** + * SOUP_COOKIE_MAX_AGE_ONE_WEEK: + * + * A constant corresponding to 1 week, for use with soup_cookie_new() + * and soup_cookie_set_max_age(). + * + * Since: 2.24 + **/ +/** + * SOUP_COOKIE_MAX_AGE_ONE_YEAR: + * + * A constant corresponding to 1 year, for use with soup_cookie_new() + * and soup_cookie_set_max_age(). + * + * Since: 2.24 + **/ + +/** + * soup_cookie_get_expires: + * @cookie: a #SoupCookie + * + * Gets @cookie's expiration time + * + * Return value: (transfer none): @cookie's expiration time, which is + * owned by @cookie and should not be modified or freed. + * + * Since: 2.32 + **/ +SoupDate * +soup_cookie_get_expires (SoupCookie *cookie) +{ + return cookie->expires; +} + +/** + * soup_cookie_set_expires: + * @cookie: a #SoupCookie + * @expires: the new expiration time, or %NULL + * + * Sets @cookie's expiration time to @expires. If @expires is %NULL, + * @cookie will be a session cookie and will expire at the end of the + * client's session. + * + * (This sets the same property as soup_cookie_set_max_age().) + * + * Since: 2.24 + **/ +void +soup_cookie_set_expires (SoupCookie *cookie, SoupDate *expires) +{ + if (cookie->expires) + soup_date_free (cookie->expires); + + if (expires) + cookie->expires = soup_date_copy (expires); + else + cookie->expires = NULL; +} + +/** + * soup_cookie_get_secure: + * @cookie: a #SoupCookie + * + * Gets @cookie's secure attribute + * + * Return value: @cookie's secure attribute + * + * Since: 2.32 + **/ +gboolean +soup_cookie_get_secure (SoupCookie *cookie) +{ + return cookie->secure; +} + +/** + * soup_cookie_set_secure: + * @cookie: a #SoupCookie + * @secure: the new value for the secure attribute + * + * Sets @cookie's secure attribute to @secure. If %TRUE, @cookie will + * only be transmitted from the client to the server over secure + * (https) connections. + * + * Since: 2.24 + **/ +void +soup_cookie_set_secure (SoupCookie *cookie, gboolean secure) +{ + cookie->secure = secure; +} + +/** + * soup_cookie_get_http_only: + * @cookie: a #SoupCookie + * + * Gets @cookie's HttpOnly attribute + * + * Return value: @cookie's HttpOnly attribute + * + * Since: 2.32 + **/ +gboolean +soup_cookie_get_http_only (SoupCookie *cookie) +{ + return cookie->http_only; +} + +/** + * soup_cookie_set_http_only: + * @cookie: a #SoupCookie + * @http_only: the new value for the HttpOnly attribute + * + * Sets @cookie's HttpOnly attribute to @http_only. If %TRUE, @cookie + * will be marked as "http only", meaning it should not be exposed to + * web page scripts or other untrusted code. + * + * Since: 2.24 + **/ +void +soup_cookie_set_http_only (SoupCookie *cookie, gboolean http_only) +{ + cookie->http_only = http_only; +} + +static void +serialize_cookie (SoupCookie *cookie, GString *header, gboolean set_cookie) +{ + if (!*cookie->name && !*cookie->value) + return; + + if (header->len) { + if (set_cookie) + g_string_append (header, ", "); + else + g_string_append (header, "; "); + } + + if (set_cookie || *cookie->name) { + g_string_append (header, cookie->name); + g_string_append (header, "="); + } + g_string_append (header, cookie->value); + if (!set_cookie) + return; + + if (cookie->expires) { + char *timestamp; + + g_string_append (header, "; expires="); + timestamp = soup_date_to_string (cookie->expires, + SOUP_DATE_COOKIE); + g_string_append (header, timestamp); + g_free (timestamp); + } + if (cookie->path) { + g_string_append (header, "; path="); + g_string_append (header, cookie->path); + } + if (cookie->domain) { + g_string_append (header, "; domain="); + g_string_append (header, cookie->domain); + } + if (cookie->secure) + g_string_append (header, "; secure"); + if (cookie->secure) + g_string_append (header, "; HttpOnly"); +} + +/** + * soup_cookie_to_set_cookie_header: + * @cookie: a #SoupCookie + * + * Serializes @cookie in the format used by the Set-Cookie header + * (ie, for sending a cookie from a #SoupServer to a client). + * + * Return value: the header + * + * Since: 2.24 + **/ +char * +soup_cookie_to_set_cookie_header (SoupCookie *cookie) +{ + GString *header = g_string_new (NULL); + + serialize_cookie (cookie, header, TRUE); + return g_string_free (header, FALSE); +} + +/** + * soup_cookie_to_cookie_header: + * @cookie: a #SoupCookie + * + * Serializes @cookie in the format used by the Cookie header (ie, for + * returning a cookie from a #SoupSession to a server). + * + * Return value: the header + * + * Since: 2.24 + **/ +char * +soup_cookie_to_cookie_header (SoupCookie *cookie) +{ + GString *header = g_string_new (NULL); + + serialize_cookie (cookie, header, FALSE); + return g_string_free (header, FALSE); +} + +/** + * soup_cookie_free: + * @cookie: a #SoupCookie + * + * Frees @cookie + * + * Since: 2.24 + **/ +void +soup_cookie_free (SoupCookie *cookie) +{ + g_return_if_fail (cookie != NULL); + + g_free (cookie->name); + g_free (cookie->value); + g_free (cookie->domain); + g_free (cookie->path); + + if (cookie->expires) + soup_date_free (cookie->expires); + + g_slice_free (SoupCookie, cookie); +} + +/** + * soup_cookies_from_response: + * @msg: a #SoupMessage containing a "Set-Cookie" response header + * + * Parses @msg's Set-Cookie response headers and returns a #GSList of + * #SoupCookies. Cookies that do not specify "path" or + * "domain" attributes will have their values defaulted from @msg. + * + * Return value: (element-type SoupCookie) (transfer full): a #GSList + * of #SoupCookies, which can be freed with + * soup_cookies_free(). + * + * Since: 2.24 + **/ +GSList * +soup_cookies_from_response (SoupMessage *msg) +{ + SoupURI *origin; + const char *name, *value; + SoupCookie *cookie; + GSList *cookies = NULL; + SoupMessageHeadersIter iter; + + origin = soup_message_get_uri (msg); + + /* We have to use soup_message_headers_iter rather than + * soup_message_headers_get_list() since Set-Cookie isn't + * properly mergeable/unmergeable. + */ + soup_message_headers_iter_init (&iter, msg->response_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) { + if (g_ascii_strcasecmp (name, "Set-Cookie") != 0) + continue; + + cookie = parse_one_cookie (value, origin); + if (cookie) + cookies = g_slist_prepend (cookies, cookie); + } + return g_slist_reverse (cookies); +} + +/** + * soup_cookies_from_request: + * @msg: a #SoupMessage containing a "Cookie" request header + * + * Parses @msg's Cookie request header and returns a #GSList of + * #SoupCookies. As the "Cookie" header, unlike "Set-Cookie", + * only contains cookie names and values, none of the other + * #SoupCookie fields will be filled in. (Thus, you can't generally + * pass a cookie returned from this method directly to + * soup_cookies_to_response().) + * + * Return value: (element-type SoupCookie) (transfer full): a #GSList + * of #SoupCookies, which can be freed with + * soup_cookies_free(). + * + * Since: 2.24 + **/ +GSList * +soup_cookies_from_request (SoupMessage *msg) +{ + SoupCookie *cookie; + GSList *cookies = NULL; + GHashTable *params; + GHashTableIter iter; + gpointer name, value; + const char *header; + + header = soup_message_headers_get_one (msg->request_headers, "Cookie"); + if (!header) + return NULL; + + params = soup_header_parse_semi_param_list (header); + g_hash_table_iter_init (&iter, params); + while (g_hash_table_iter_next (&iter, &name, &value)) { + if (name && value) { + cookie = cookie_new_internal (name, value, + NULL, NULL, 0); + cookies = g_slist_prepend (cookies, cookie); + } + } + soup_header_free_param_list (params); + + return g_slist_reverse (cookies); +} + +/** + * soup_cookies_to_response: + * @cookies: (element-type SoupCookie): a #GSList of #SoupCookie + * @msg: a #SoupMessage + * + * Appends a "Set-Cookie" response header to @msg for each cookie in + * @cookies. (This is in addition to any other "Set-Cookie" headers + * @msg may already have.) + * + * Since: 2.24 + **/ +void +soup_cookies_to_response (GSList *cookies, SoupMessage *msg) +{ + GString *header; + + header = g_string_new (NULL); + while (cookies) { + serialize_cookie (cookies->data, header, TRUE); + soup_message_headers_append (msg->response_headers, + "Set-Cookie", header->str); + g_string_truncate (header, 0); + cookies = cookies->next; + } + g_string_free (header, TRUE); +} + +/** + * soup_cookies_to_request: + * @cookies: (element-type SoupCookie): a #GSList of #SoupCookie + * @msg: a #SoupMessage + * + * Adds the name and value of each cookie in @cookies to @msg's + * "Cookie" request. (If @msg already has a "Cookie" request header, + * these cookies will be appended to the cookies already present. Be + * careful that you do not append the same cookies twice, eg, when + * requeuing a message.) + * + * Since: 2.24 + **/ +void +soup_cookies_to_request (GSList *cookies, SoupMessage *msg) +{ + GString *header; + + header = g_string_new (soup_message_headers_get_one (msg->request_headers, + "Cookie")); + while (cookies) { + serialize_cookie (cookies->data, header, FALSE); + cookies = cookies->next; + } + soup_message_headers_replace (msg->request_headers, + "Cookie", header->str); + g_string_free (header, TRUE); +} + +/** + * soup_cookies_free: (skip) + * @cookies: (element-type SoupCookie): a #GSList of #SoupCookie + * + * Frees @cookies. + * + * Since: 2.24 + **/ +void +soup_cookies_free (GSList *cookies) +{ + GSList *c; + + for (c = cookies; c; c = c->next) + soup_cookie_free (c->data); + g_slist_free (cookies); +} + +/** + * soup_cookies_to_cookie_header: + * @cookies: (element-type SoupCookie): a #GSList of #SoupCookie + * + * Serializes a #GSList of #SoupCookie into a string suitable for + * setting as the value of the "Cookie" header. + * + * Return value: the serialization of @cookies + * + * Since: 2.24 + **/ +char * +soup_cookies_to_cookie_header (GSList *cookies) +{ + GString *str; + + g_return_val_if_fail (cookies != NULL, NULL); + + str = g_string_new (NULL); + while (cookies) { + serialize_cookie (cookies->data, str, FALSE); + cookies = cookies->next; + } + + return g_string_free (str, FALSE); +} + +/** + * soup_cookie_applies_to_uri: + * @cookie: a #SoupCookie + * @uri: a #SoupURI + * + * Tests if @cookie should be sent to @uri. + * + * (At the moment, this does not check that @cookie's domain matches + * @uri, because it assumes that the caller has already done that. + * But don't rely on that; it may change in the future.) + * + * Return value: %TRUE if @cookie should be sent to @uri, %FALSE if + * not + * + * Since: 2.24 + **/ +gboolean +soup_cookie_applies_to_uri (SoupCookie *cookie, SoupURI *uri) +{ + int plen; + + if (cookie->secure && uri->scheme != SOUP_URI_SCHEME_HTTPS) + return FALSE; + + if (cookie->expires && soup_date_is_past (cookie->expires)) + return FALSE; + + /* uri->path is required to be non-NULL */ + g_return_val_if_fail (uri->path != NULL, FALSE); + + plen = strlen (cookie->path); + if (plen == 0) + return TRUE; + if (strncmp (cookie->path, uri->path, plen) != 0) + return FALSE; + if (cookie->path[plen - 1] != '/' && + uri->path[plen] && uri->path[plen] != '/') + return FALSE; + + return TRUE; +} + +gboolean +soup_cookie_equal (SoupCookie *cookie1, SoupCookie *cookie2) +{ + g_return_val_if_fail (cookie1, FALSE); + g_return_val_if_fail (cookie2, FALSE); + + return (!strcmp (cookie1->name, cookie2->name) && + !strcmp (cookie1->value, cookie2->value) && + !strcmp (cookie1->path, cookie2->path)); +} diff --git a/libsoup/soup-cookie.h b/libsoup/soup-cookie.h new file mode 100644 index 0000000..3a3c388 --- /dev/null +++ b/libsoup/soup-cookie.h @@ -0,0 +1,91 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright 2007, 2008 Red Hat, Inc. + */ + +#ifndef SOUP_COOKIE_H +#define SOUP_COOKIE_H 1 + +#include + +G_BEGIN_DECLS + +struct _SoupCookie { + char *name; + char *value; + char *domain; + char *path; + SoupDate *expires; + gboolean secure; + gboolean http_only; +}; + +GType soup_cookie_get_type (void); +#define SOUP_TYPE_COOKIE (soup_cookie_get_type()) + +#define SOUP_COOKIE_MAX_AGE_ONE_HOUR (60 * 60) +#define SOUP_COOKIE_MAX_AGE_ONE_DAY (SOUP_COOKIE_MAX_AGE_ONE_HOUR * 24) +#define SOUP_COOKIE_MAX_AGE_ONE_WEEK (SOUP_COOKIE_MAX_AGE_ONE_DAY * 7) +#define SOUP_COOKIE_MAX_AGE_ONE_YEAR (SOUP_COOKIE_MAX_AGE_ONE_DAY * 365.2422) + +SoupCookie *soup_cookie_new (const char *name, + const char *value, + const char *domain, + const char *path, + int max_age); +SoupCookie *soup_cookie_parse (const char *header, + SoupURI *origin); +SoupCookie *soup_cookie_copy (SoupCookie *cookie); + +const char *soup_cookie_get_name (SoupCookie *cookie); +void soup_cookie_set_name (SoupCookie *cookie, + const char *name); +const char *soup_cookie_get_value (SoupCookie *cookie); +void soup_cookie_set_value (SoupCookie *cookie, + const char *value); +const char *soup_cookie_get_domain (SoupCookie *cookie); +void soup_cookie_set_domain (SoupCookie *cookie, + const char *domain); +const char *soup_cookie_get_path (SoupCookie *cookie); +void soup_cookie_set_path (SoupCookie *cookie, + const char *path); +void soup_cookie_set_max_age (SoupCookie *cookie, + int max_age); +SoupDate *soup_cookie_get_expires (SoupCookie *cookie); +void soup_cookie_set_expires (SoupCookie *cookie, + SoupDate *expires); +gboolean soup_cookie_get_secure (SoupCookie *cookie); +void soup_cookie_set_secure (SoupCookie *cookie, + gboolean secure); +gboolean soup_cookie_get_http_only (SoupCookie *cookie); +void soup_cookie_set_http_only (SoupCookie *cookie, + gboolean http_only); + +char *soup_cookie_to_set_cookie_header (SoupCookie *cookie); +char *soup_cookie_to_cookie_header (SoupCookie *cookie); + +gboolean soup_cookie_applies_to_uri (SoupCookie *cookie, + SoupURI *uri); +gboolean soup_cookie_equal (SoupCookie *cookie1, + SoupCookie *cookie2); + +void soup_cookie_free (SoupCookie *cookie); + +GSList *soup_cookies_from_response (SoupMessage *msg); +GSList *soup_cookies_from_request (SoupMessage *msg); + +void soup_cookies_to_response (GSList *cookies, + SoupMessage *msg); +void soup_cookies_to_request (GSList *cookies, + SoupMessage *msg); + +void soup_cookies_free (GSList *cookies); + +char *soup_cookies_to_cookie_header (GSList *cookies); + +gboolean soup_cookie_domain_matches (SoupCookie *cookie, + const char *host); + +G_END_DECLS + +#endif /* SOUP_COOKIE_H */ diff --git a/libsoup/soup-date.c b/libsoup/soup-date.c new file mode 100644 index 0000000..a8a32e9 --- /dev/null +++ b/libsoup/soup-date.c @@ -0,0 +1,938 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-date.c: Date/time handling + * + * Copyright (C) 2005, Novell, Inc. + * Copyright (C) 2007, Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include "soup-date.h" + +/** + * SoupDate: + * @year: the year, 1 to 9999 + * @month: the month, 1 to 12 + * @day: day of the month, 1 to 31 + * @hour: hour of the day, 0 to 23 + * @minute: minute, 0 to 59 + * @second: second, 0 to 59 (or up to 61 in the case of leap seconds) + * @utc: %TRUE if the date is in UTC + * @offset: offset from UTC + + * A date and time. The date is assumed to be in the (proleptic) + * Gregorian calendar. The time is in UTC if @utc is %TRUE. Otherwise, + * the time is a local time, and @offset gives the offset from UTC in + * minutes (such that adding @offset to the time would give the + * correct UTC time). If @utc is %FALSE and @offset is 0, then the + * %SoupDate represents a "floating" time with no associated timezone + * information. + **/ + +/* Do not internationalize */ +static const char *const months[] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +/* Do not internationalize */ +static const char *const days[] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; + +static const int nonleap_days_in_month[] = { + 0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +static const int nonleap_days_before[] = { + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 +}; + +static inline gboolean +is_leap_year (int year) +{ + return (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)); +} + +/* Computes the number of days since proleptic Gregorian 0000-12-31. + * (That is, 0001-01-01 is "1", and 1970-01-01 is 719163. + */ +static int +rata_die_day (SoupDate *date) +{ + int day; + + day = (date->year - 1) * 365 + ((date->year - 1) / 4) - + ((date->year - 1) / 100) + ((date->year - 1) / 400); + day += nonleap_days_before[date->month] + date->day; + if (is_leap_year (date->year) && date->month > 2) + day++; + return day; +} + +#define TIME_T_EPOCH_RATA_DIE_DAY 719163 + +static inline int +days_in_month (int month, int year) +{ + if (month == 2 && is_leap_year (year)) + return 29; + else + return nonleap_days_in_month[month]; +} + +GType +soup_date_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupDate"), + (GBoxedCopyFunc) soup_date_copy, + (GBoxedFreeFunc) soup_date_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} + +static void +soup_date_fixup (SoupDate *date) +{ + /* We only correct date->second if it's negative or too high + * to be a leap second. + */ + if (date->second < 0 || date->second > 61) { + date->minute += date->second / 60; + date->second %= 60; + if (date->second < 0) + date->second += 60; + } + + if (date->minute < 0 || date->minute > 59) { + date->hour += date->minute / 60; + date->minute %= 60; + if (date->minute < 0) + date->minute += 60; + } + + if (date->hour < 0 || date->hour > 23) { + date->day += date->hour / 24; + date->hour %= 24; + if (date->hour < 0) + date->hour += 24; + } + + /* Have to make sure month is valid before we can look at the + * day. + */ + if (date->month < 1 || date->month > 12) { + date->year += ((date->month - 1) / 12) + 1; + date->month = ((date->month - 1) % 12) + 1; + if (date->month < 1) + date->month += 12; + } + + if (date->day < 0) { + while (date->day < 0) { + if (date->month == 1) { + date->month = 12; + date->year--; + } else + date->month--; + date->day += days_in_month (date->month, date->year); + } + } else { + while (date->day > days_in_month (date->month, date->year)) { + date->day -= days_in_month (date->month, date->year); + if (date->month == 12) { + date->month = 1; + date->year++; + } else + date->month++; + } + } +} + +/** + * soup_date_new: + * @year: the year (1-9999) + * @month: the month (1-12) + * @day: the day of the month (1-31, as appropriate for @month) + * @hour: the hour (0-23) + * @minute: the minute (0-59) + * @second: the second (0-59, or up to 61 for leap seconds) + * + * Creates a #SoupDate representing the indicated time, UTC. + * + * Return value: a new #SoupDate + **/ +SoupDate * +soup_date_new (int year, int month, int day, + int hour, int minute, int second) +{ + SoupDate *date = g_slice_new (SoupDate); + + date->year = year; + date->month = month; + date->day = day; + date->hour = hour; + date->minute = minute; + date->second = second; + date->utc = TRUE; + date->offset = 0; + + return date; +} + +/** + * soup_date_new_from_now: + * @offset_seconds: offset from current time + * + * Creates a #SoupDate representing a time @offset_seconds after the + * current time (or before it, if @offset_seconds is negative). If + * offset_seconds is 0, returns the current time. + * + * If @offset_seconds would indicate a time not expressible as a + * #time_t, the return value will be clamped into range. + * + * Return value: a new #SoupDate + **/ +SoupDate * +soup_date_new_from_now (int offset_seconds) +{ + time_t now = time (NULL); + time_t then = now + offset_seconds; + + if (sizeof (time_t) == 4) { + if (offset_seconds < 0 && then > now) + return soup_date_new_from_time_t (-G_MAXINT); + else if (offset_seconds > 0 && then < now) + return soup_date_new_from_time_t (G_MAXINT); + } + return soup_date_new_from_time_t (then); +} + +static gboolean +parse_iso8601_date (SoupDate *date, const char *date_string) +{ + gulong val; + + if (strlen (date_string) < 15) + return FALSE; + if (date_string[4] == '-' && + date_string[7] == '-' && + date_string[10] == 'T') { + /* YYYY-MM-DD */ + date->year = atoi (date_string); + date->month = atoi (date_string + 5); + date->day = atoi (date_string + 8); + date_string += 11; + } else if (date_string[8] == 'T') { + /* YYYYMMDD */ + val = atoi (date_string); + date->year = val / 10000; + date->month = (val % 10000) / 100; + date->day = val % 100; + date_string += 9; + } else + return FALSE; + + if (strlen (date_string) >= 8 && + date_string[2] == ':' && date_string[5] == ':') { + /* HH:MM:SS */ + date->hour = atoi (date_string); + date->minute = atoi (date_string + 3); + date->second = atoi (date_string + 6); + date_string += 8; + } else if (strlen (date_string) >= 6) { + /* HHMMSS */ + val = strtoul (date_string, (char **)&date_string, 10); + date->hour = val / 10000; + date->minute = (val % 10000) / 100; + date->second = val % 100; + } else + return FALSE; + + if (*date_string == '.' || *date_string == ',') + (void) strtoul (date_string + 1, (char **)&date_string, 10); + + if (*date_string == 'Z') { + date_string++; + date->utc = TRUE; + date->offset = 0; + } else if (*date_string == '+' || *date_string == '-') { + int sign = (*date_string == '+') ? -1 : 1; + val = strtoul (date_string + 1, (char **)&date_string, 10); + if (*date_string == ':') + val = 60 * val + strtoul (date_string + 1, (char **)&date_string, 10); + else + val = 60 * (val / 100) + (val % 100); + date->offset = sign * val; + date->utc = !val; + } else { + date->offset = 0; + date->utc = FALSE; + } + + return !*date_string; +} + +static inline gboolean +parse_day (SoupDate *date, const char **date_string) +{ + char *end; + + date->day = strtoul (*date_string, &end, 10); + if (end == (char *)*date_string) + return FALSE; + + while (*end == ' ' || *end == '-') + end++; + *date_string = end; + return TRUE; +} + +static inline gboolean +parse_month (SoupDate *date, const char **date_string) +{ + int i; + + for (i = 0; i < G_N_ELEMENTS (months); i++) { + if (!g_ascii_strncasecmp (*date_string, months[i], 3)) { + date->month = i + 1; + *date_string += 3; + while (**date_string == ' ' || **date_string == '-') + (*date_string)++; + return TRUE; + } + } + return FALSE; +} + +static inline gboolean +parse_year (SoupDate *date, const char **date_string) +{ + char *end; + + date->year = strtoul (*date_string, &end, 10); + if (end == (char *)*date_string) + return FALSE; + + if (end == (char *)*date_string + 2) { + if (date->year < 70) + date->year += 2000; + else + date->year += 1900; + } else if (end == (char *)*date_string + 3) + date->year += 1900; + + while (*end == ' ' || *end == '-') + end++; + *date_string = end; + return TRUE; +} + +static inline gboolean +parse_time (SoupDate *date, const char **date_string) +{ + char *p, *end; + + date->hour = strtoul (*date_string, &end, 10); + if (end == (char *)*date_string || *end++ != ':') + return FALSE; + p = end; + date->minute = strtoul (p, &end, 10); + if (end == p || *end++ != ':') + return FALSE; + p = end; + date->second = strtoul (p, &end, 10); + if (end == p) + return FALSE; + p = end; + + while (*p == ' ') + p++; + *date_string = p; + return TRUE; +} + +static inline gboolean +parse_timezone (SoupDate *date, const char **date_string) +{ + if (!**date_string) { + date->utc = FALSE; + date->offset = 0; + } else if (**date_string == '+' || **date_string == '-') { + gulong val; + int sign = (**date_string == '+') ? -1 : 1; + val = strtoul (*date_string + 1, (char **)date_string, 10); + if (**date_string == ':') + val = 60 * val + strtoul (*date_string + 1, (char **)date_string, 10); + else + val = 60 * (val / 100) + (val % 100); + date->offset = sign * val; + date->utc = (sign == -1) && !val; + } else if (**date_string == 'Z') { + date->offset = 0; + date->utc = TRUE; + (*date_string)++; + } else if (!strcmp (*date_string, "GMT") || + !strcmp (*date_string, "UTC")) { + date->offset = 0; + date->utc = TRUE; + (*date_string) += 3; + } else if (strchr ("ECMP", **date_string) && + ((*date_string)[1] == 'D' || (*date_string)[1] == 'S') && + (*date_string)[2] == 'T') { + date->offset = -60 * (5 * strcspn ("ECMP", *date_string)); + if ((*date_string)[1] == 'D') + date->offset += 60; + date->utc = FALSE; + } else + return FALSE; + return TRUE; +} + +static gboolean +parse_textual_date (SoupDate *date, const char *date_string) +{ + /* If it starts with a word, it must be a weekday, which we skip */ + if (g_ascii_isalpha (*date_string)) { + while (g_ascii_isalpha (*date_string)) + date_string++; + if (*date_string == ',') + date_string++; + while (g_ascii_isspace (*date_string)) + date_string++; + } + + /* If there's now another word, this must be an asctime-date */ + if (g_ascii_isalpha (*date_string)) { + /* (Sun) Nov 6 08:49:37 1994 */ + if (!parse_month (date, &date_string) || + !parse_day (date, &date_string) || + !parse_time (date, &date_string) || + !parse_year (date, &date_string)) + return FALSE; + + /* There shouldn't be a timezone, but check anyway */ + parse_timezone (date, &date_string); + } else { + /* Non-asctime date, so some variation of + * (Sun,) 06 Nov 1994 08:49:37 GMT + */ + if (!parse_day (date, &date_string) || + !parse_month (date, &date_string) || + !parse_year (date, &date_string) || + !parse_time (date, &date_string)) + return FALSE; + + /* This time there *should* be a timezone, but we + * survive if there isn't. + */ + parse_timezone (date, &date_string); + } + return TRUE; +} + +/** + * SoupDateFormat: + * @SOUP_DATE_HTTP: RFC 1123 format, used by the HTTP "Date" header. Eg + * "Sun, 06 Nov 1994 08:49:37 GMT" + * @SOUP_DATE_COOKIE: The format for the "Expires" timestamp in the + * Netscape cookie specification. Eg, "Sun, 06-Nov-1994 08:49:37 GMT". + * @SOUP_DATE_RFC2822: RFC 2822 format, eg "Sun, 6 Nov 1994 09:49:37 -0100" + * @SOUP_DATE_ISO8601_COMPACT: ISO 8601 date/time with no optional + * punctuation. Eg, "19941106T094937-0100". + * @SOUP_DATE_ISO8601_FULL: ISO 8601 date/time with all optional + * punctuation. Eg, "1994-11-06T09:49:37-01:00". + * @SOUP_DATE_ISO8601_XMLRPC: ISO 8601 date/time as used by XML-RPC. + * Eg, "19941106T09:49:37". + * @SOUP_DATE_ISO8601: An alias for @SOUP_DATE_ISO8601_FULL. + * + * Date formats that soup_date_to_string() can use. + * + * @SOUP_DATE_HTTP and @SOUP_DATE_COOKIE always coerce the time to + * UTC. @SOUP_DATE_ISO8601_XMLRPC uses the time as given, ignoring the + * offset completely. @SOUP_DATE_RFC2822 and the other ISO 8601 + * variants use the local time, appending the offset information if + * available. + * + * This enum may be extended with more values in future releases. + **/ + +/** + * soup_date_new_from_string: + * @date_string: the date in some plausible format + * + * Parses @date_string and tries to extract a date from it. This + * recognizes all of the "HTTP-date" formats from RFC 2616, all ISO + * 8601 formats containing both a time and a date, RFC 2822 dates, + * and reasonable approximations thereof. (Eg, it is lenient about + * whitespace, leading "0"s, etc.) + * + * Return value: a new #SoupDate, or %NULL if @date_string could not + * be parsed. + **/ +SoupDate * +soup_date_new_from_string (const char *date_string) +{ + SoupDate *date; + gboolean success; + + g_return_val_if_fail (date_string != NULL, NULL); + + date = g_slice_new (SoupDate); + + while (g_ascii_isspace (*date_string)) + date_string++; + + /* If it starts with a digit, it's either an ISO 8601 date, or + * an RFC2822 date without the optional weekday; in the later + * case, there will be a month name later on, so look for one + * of the month-start letters. + */ + if (g_ascii_isdigit (*date_string) && + !strpbrk (date_string, "JFMASOND")) + success = parse_iso8601_date (date, date_string); + else + success = parse_textual_date (date, date_string); + + if (!success) { + g_slice_free (SoupDate, date); + return NULL; + } + + if (date->year < 1 || date->year > 9999 || + date->month < 1 || date->month > 12 || + date->day < 1 || + date->day > days_in_month (date->month, date->year) || + date->hour < 0 || date->hour > 24 || + date->minute < 0 || date->minute > 59 || + date->second < 0 || date->second > 61) { + soup_date_free (date); + return NULL; + } + if (date->hour == 24) { + /* ISO8601 allows this explicitly. We allow it for + * other types as well just for simplicity. + */ + if (date->minute == 0 && date->second == 0) + soup_date_fixup (date); + else { + soup_date_free (date); + return NULL; + } + } + + return date; +} + +/** + * soup_date_new_from_time_t: + * @when: a #time_t + * + * Creates a #SoupDate corresponding to @when + * + * Return value: a new #SoupDate + **/ +SoupDate * +soup_date_new_from_time_t (time_t when) +{ + struct tm tm; + +#ifdef HAVE_GMTIME_R + gmtime_r (&when, &tm); +#else + tm = *gmtime (&when); +#endif + + return soup_date_new (tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); +} + +static const char * +soup_date_weekday (SoupDate *date) +{ + /* Proleptic Gregorian 0001-01-01 was a Monday, which + * corresponds to 1 in the days[] array. + */ + return days[rata_die_day (date) % 7]; +} + +/** + * soup_date_to_string: + * @date: a #SoupDate + * @format: the format to generate the date in + * + * Converts @date to a string in the format described by @format. + * + * Return value: @date as a string + **/ +char * +soup_date_to_string (SoupDate *date, SoupDateFormat format) +{ + g_return_val_if_fail (date != NULL, NULL); + + if (format == SOUP_DATE_HTTP || format == SOUP_DATE_COOKIE) { + /* HTTP and COOKIE formats require UTC timestamp, so coerce + * @date if it's non-UTC. + */ + SoupDate utcdate; + + if (date->offset != 0) { + memcpy (&utcdate, date, sizeof (SoupDate)); + utcdate.minute += utcdate.offset; + utcdate.offset = 0; + utcdate.utc = TRUE; + soup_date_fixup (&utcdate); + date = &utcdate; + } + + switch (format) { + case SOUP_DATE_HTTP: + /* "Sun, 06 Nov 1994 08:49:37 GMT" */ + return g_strdup_printf ( + "%s, %02d %s %04d %02d:%02d:%02d GMT", + soup_date_weekday (date), date->day, + months[date->month - 1], date->year, + date->hour, date->minute, date->second); + + case SOUP_DATE_COOKIE: + /* "Sun, 06-Nov-1994 08:49:37 GMT" */ + return g_strdup_printf ( + "%s, %02d-%s-%04d %02d:%02d:%02d GMT", + soup_date_weekday (date), date->day, + months[date->month - 1], date->year, + date->hour, date->minute, date->second); + + default: + g_return_val_if_reached (NULL); + } + } else if (format == SOUP_DATE_ISO8601_XMLRPC) { + /* Always "floating", ignore offset */ + return g_strdup_printf ("%04d%02d%02dT%02d:%02d:%02d", + date->year, date->month, date->day, + date->hour, date->minute, date->second); + } else { + int hour_offset, minute_offset; + char zone[8], sign; + + /* For other ISO8601 formats or RFC2822, use the + * offset given in @date. For ISO8601 formats, use "Z" + * for UTC, +-offset for non-UTC, and nothing for + * floating. For RFC2822, use +-offset for UTC or + * non-UTC, and -0000 for floating. + */ + hour_offset = abs (date->offset) / 60; + minute_offset = abs (date->offset) - hour_offset * 60; + + switch (format) { + case SOUP_DATE_ISO8601_COMPACT: + /* "19941106T084937[zone]" */ + if (date->utc) + strcpy (zone, "Z"); + else if (date->offset) { + g_snprintf (zone, sizeof (zone), "%c%02d%02d", + date->offset > 0 ? '-' : '+', + hour_offset, minute_offset); + } else + *zone = '\0'; + + return g_strdup_printf ( + "%04d%02d%02dT%02d%02d%02d%s", + date->year, date->month, date->day, + date->hour, date->minute, date->second, + zone); + + case SOUP_DATE_ISO8601_FULL: + /* "1994-11-06T08:49:37[zone]" */ + if (date->utc) + strcpy (zone, "Z"); + else if (date->offset) { + g_snprintf (zone, sizeof (zone), "%c%02d:%02d", + date->offset > 0 ? '-' : '+', + hour_offset, minute_offset); + } else + *zone = '\0'; + + return g_strdup_printf ( + "%04d-%02d-%02dT%02d:%02d:%02d%s", + date->year, date->month, date->day, + date->hour, date->minute, date->second, + zone); + + case SOUP_DATE_RFC2822: + /* "Sun, 6 Nov 1994 09:49:37 -0100" */ + if (date->offset) + sign = (date->offset > 0) ? '-' : '+'; + else + sign = date->utc ? '+' : '-'; + return g_strdup_printf ( + "%s, %d %s %04d %02d:%02d:%02d %c%02d%02d", + soup_date_weekday (date), date->day, + months[date->month - 1], date->year, + date->hour, date->minute, date->second, + sign, hour_offset, minute_offset); + + default: + return NULL; + } + } +} + +/** + * soup_date_to_time_t: + * @date: a #SoupDate + * + * Converts @date to a %time_t. + * + * If @date is not representable as a %time_t, it will be clamped into + * range. (In particular, some HTTP cookies have expiration dates + * after "Y2.038k" (2038-01-19T03:14:07Z).) + * + * Return value: @date as a %time_t + **/ +time_t +soup_date_to_time_t (SoupDate *date) +{ + time_t tt; + GTimeVal val; + + g_return_val_if_fail (date != NULL, 0); + + /* FIXME: offset, etc */ + + if (date->year < 1970) + return 0; + + /* If the year is later than 2038, we're guaranteed to + * overflow a 32-bit time_t. (If it's exactly 2038, we'll + * *probably* overflow, but only by a little, and it's easiest + * to test that at the end by seeing if the result has turned + * negative.) + */ + if (sizeof (time_t) == 4 && date->year > 2038) + return (time_t)0x7fffffff; + + soup_date_to_timeval (date, &val); + tt = val.tv_sec; + + if (sizeof (time_t) == 4 && tt < 0) + return (time_t)0x7fffffff; + return tt; +} + +/** + * soup_date_to_timeval: + * @date: a #SoupDate + * @time: (out): a #GTimeVal structure in which to store the converted time. + * + * Converts @date to a #GTimeVal. + * + * Since: 2.24 + */ +void +soup_date_to_timeval (SoupDate *date, GTimeVal *time) +{ + g_return_if_fail (date != NULL); + g_return_if_fail (time != NULL); + + /* FIXME: offset, etc */ + + time->tv_sec = rata_die_day (date) - TIME_T_EPOCH_RATA_DIE_DAY; + time->tv_sec = ((((time->tv_sec * 24) + date->hour) * 60) + date->minute) * 60 + date->second; + time->tv_usec = 0; +} + +/** + * soup_date_is_past: + * @date: a #SoupDate + * + * Determines if @date is in the past. + * + * Return value: %TRUE if @date is in the past + * + * Since: 2.24 + **/ +gboolean +soup_date_is_past (SoupDate *date) +{ + g_return_val_if_fail (date != NULL, TRUE); + + /* optimization */ + if (date->year < 2010) + return TRUE; + + return soup_date_to_time_t (date) < time (NULL); +} + +/** + * soup_date_get_year: + * @date: a #SoupDate + * + * Gets @date's year. + * + * Return value: @date's year + * + * Since: 2.32 + **/ +int +soup_date_get_year (SoupDate *date) +{ + return date->year; +} + +/** + * soup_date_get_month: + * @date: a #SoupDate + * + * Gets @date's month. + * + * Return value: @date's month + * + * Since: 2.32 + **/ +int +soup_date_get_month (SoupDate *date) +{ + return date->month; +} + +/** + * soup_date_get_day: + * @date: a #SoupDate + * + * Gets @date's day. + * + * Return value: @date's day + * + * Since: 2.32 + **/ +int +soup_date_get_day (SoupDate *date) +{ + return date->day; +} + +/** + * soup_date_get_hour: + * @date: a #SoupDate + * + * Gets @date's hour. + * + * Return value: @date's hour + * + * Since: 2.32 + **/ +int +soup_date_get_hour (SoupDate *date) +{ + return date->hour; +} + +/** + * soup_date_get_minute: + * @date: a #SoupDate + * + * Gets @date's minute. + * + * Return value: @date's minute + * + * Since: 2.32 + **/ +int +soup_date_get_minute (SoupDate *date) +{ + return date->minute; +} + +/** + * soup_date_get_second: + * @date: a #SoupDate + * + * Gets @date's second. + * + * Return value: @date's second + * + * Since: 2.32 + **/ +int +soup_date_get_second (SoupDate *date) +{ + return date->second; +} + +/** + * soup_date_get_utc: + * @date: a #SoupDate + * + * Gets @date's UTC flag + * + * Return value: %TRUE if @date is UTC. + * + * Since: 2.32 + **/ +gboolean +soup_date_get_utc (SoupDate *date) +{ + return date->utc; +} + +/** + * soup_date_get_offset: + * @date: a #SoupDate + * + * Gets @date's offset from UTC. + * + * Return value: @date's offset from UTC. If soup_date_get_utc() + * returns %FALSE but soup_date_get_offset() returns 0, that means the + * date is a "floating" time with no associated offset information. + * + * Since: 2.32 + **/ +int +soup_date_get_offset (SoupDate *date) +{ + return date->offset; +} + +/** + * soup_date_copy: + * @date: a #SoupDate + * + * Copies @date. + **/ +SoupDate * +soup_date_copy (SoupDate *date) +{ + SoupDate *copy; + + g_return_val_if_fail (date != NULL, NULL); + + copy = g_slice_new (SoupDate); + memcpy (copy, date, sizeof (SoupDate)); + return copy; +} + +/** + * soup_date_free: + * @date: a #SoupDate + * + * Frees @date. + **/ +void +soup_date_free (SoupDate *date) +{ + g_return_if_fail (date != NULL); + + g_slice_free (SoupDate, date); +} diff --git a/libsoup/soup-date.h b/libsoup/soup-date.h new file mode 100644 index 0000000..0924607 --- /dev/null +++ b/libsoup/soup-date.h @@ -0,0 +1,73 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2005 Novell, Inc. + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifndef SOUP_DATE_H +#define SOUP_DATE_H 1 + +#include +#include + +G_BEGIN_DECLS + +struct _SoupDate { + int year; + int month; + int day; + + int hour; + int minute; + int second; + + gboolean utc; + int offset; +}; + +typedef enum { + SOUP_DATE_HTTP = 1, + SOUP_DATE_COOKIE, + SOUP_DATE_RFC2822, + SOUP_DATE_ISO8601_COMPACT, + SOUP_DATE_ISO8601_FULL, + SOUP_DATE_ISO8601 = SOUP_DATE_ISO8601_FULL, + SOUP_DATE_ISO8601_XMLRPC +} SoupDateFormat; + +GType soup_date_get_type (void); +#define SOUP_TYPE_DATE (soup_date_get_type ()) + +SoupDate *soup_date_new (int year, + int month, + int day, + int hour, + int minute, + int second); +SoupDate *soup_date_new_from_string (const char *date_string); +SoupDate *soup_date_new_from_time_t (time_t when); +SoupDate *soup_date_new_from_now (int offset_seconds); + +char *soup_date_to_string (SoupDate *date, + SoupDateFormat format); +time_t soup_date_to_time_t (SoupDate *date); +void soup_date_to_timeval (SoupDate *date, + GTimeVal *time); + +gboolean soup_date_is_past (SoupDate *date); + +int soup_date_get_year (SoupDate *date); +int soup_date_get_month (SoupDate *date); +int soup_date_get_day (SoupDate *date); +int soup_date_get_hour (SoupDate *date); +int soup_date_get_minute (SoupDate *date); +int soup_date_get_second (SoupDate *date); +int soup_date_get_utc (SoupDate *date); +int soup_date_get_offset (SoupDate *date); + +SoupDate *soup_date_copy (SoupDate *date); +void soup_date_free (SoupDate *date); + +G_END_DECLS + +#endif /* SOUP_DATE_H */ diff --git a/libsoup/soup-directory-input-stream.c b/libsoup/soup-directory-input-stream.c new file mode 100644 index 0000000..3ba74b7 --- /dev/null +++ b/libsoup/soup-directory-input-stream.c @@ -0,0 +1,201 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-directory-input-stream.h" + +#include +#include +#include + +#define INIT_STRING "OMG!" +#define EXIT_STRING "
" + +G_DEFINE_TYPE (SoupDirectoryInputStream, soup_directory_input_stream, G_TYPE_INPUT_STREAM) + +static SoupBuffer * +soup_directory_input_stream_parse_info (SoupDirectoryInputStream *stream, + GFileInfo *info) +{ + SoupBuffer *buffer; + GString *string; + const char *s; + char *escaped, *path, *xml_string; + + if (!g_file_info_get_name (info)) + return NULL; + + s = g_file_info_get_display_name (info); + if (!s) { + s = g_file_info_get_name (info); + /* FIXME: convert somehow? */ + if (!g_utf8_validate (s, -1, NULL)) + return NULL; + } + string = g_string_new (""); + + xml_string = g_markup_escape_text (s, -1); + escaped = g_uri_escape_string (g_file_info_get_name (info), NULL, FALSE); + path = g_strconcat (stream->uri, "/", escaped, NULL); + g_free (escaped); + g_string_append_printf (string, "
%s", path, xml_string); + g_free (path); + g_free (xml_string); + g_string_append (string, ""); + + buffer = soup_buffer_new (SOUP_MEMORY_TAKE, string->str, string->len); + g_string_free (string, FALSE); + + return buffer; +} + +static SoupBuffer * +soup_directory_input_stream_read_next_file (SoupDirectoryInputStream *stream, + GCancellable *cancellable, + GError **error) +{ + GFileInfo *info; + SoupBuffer *buffer; + GError *err = NULL; + + do { + info = g_file_enumerator_next_file (stream->enumerator, cancellable, &err); + if (info == NULL) { + if (err) { + g_propagate_error (error, err); + return NULL; + } else if (!stream->done) { + stream->done = TRUE; + return soup_buffer_new (SOUP_MEMORY_STATIC, + EXIT_STRING, + sizeof (EXIT_STRING)); + } else { + return NULL; + } + } + + buffer = soup_directory_input_stream_parse_info (stream, info); + g_object_unref (info); + } while (buffer == NULL); + + return buffer; +} + +static gssize +soup_directory_input_stream_read (GInputStream *input, + void *buffer, + gsize count, + GCancellable *cancellable, + GError **error) +{ + SoupDirectoryInputStream *stream = SOUP_DIRECTORY_INPUT_STREAM (input); + gsize total, size; + + for (total = 0; total < count; total += size) { + if (stream->buffer == NULL) { + stream->buffer = soup_directory_input_stream_read_next_file (stream, cancellable, error); + if (stream->buffer == NULL) { + /* FIXME: Is this correct or should we forward the error? */ + if (total) + g_clear_error (error); + return total; + } + } + + size = MIN (stream->buffer->length, count - total); + memcpy ((char *)buffer + total, stream->buffer->data, size); + if (size == stream->buffer->length) { + soup_buffer_free (stream->buffer); + stream->buffer = NULL; + } else { + SoupBuffer *sub = soup_buffer_new_subbuffer (stream->buffer, + size, + stream->buffer->length - size); + soup_buffer_free (stream->buffer); + stream->buffer = sub; + } + } + + return total; +} + +static gboolean +soup_directory_input_stream_close (GInputStream *input, + GCancellable *cancellable, + GError **error) +{ + SoupDirectoryInputStream *stream = SOUP_DIRECTORY_INPUT_STREAM (input); + gboolean result; + + if (stream->buffer) { + soup_buffer_free (stream->buffer); + stream->buffer = NULL; + } + + result = g_file_enumerator_close (stream->enumerator, + cancellable, + error); + g_object_unref (stream->enumerator); + stream->enumerator = NULL; + + g_free (stream->uri); + stream->uri = NULL; + + return result; +} + +static void +soup_directory_input_stream_class_init (SoupDirectoryInputStreamClass *stream_class) +{ + GInputStreamClass *inputstream_class = G_INPUT_STREAM_CLASS (stream_class); + + inputstream_class->read_fn = soup_directory_input_stream_read; + inputstream_class->close_fn = soup_directory_input_stream_close; +} + +static void +soup_directory_input_stream_init (SoupDirectoryInputStream *stream) +{ + stream->buffer = soup_buffer_new (SOUP_MEMORY_STATIC, + INIT_STRING, + sizeof (INIT_STRING)); +} + +GInputStream * +soup_directory_input_stream_new (GFileEnumerator *enumerator, + SoupURI *uri) +{ + GInputStream *stream; + + g_return_val_if_fail (G_IS_FILE_ENUMERATOR (enumerator), NULL); + g_return_val_if_fail (uri != NULL, NULL); + + stream = g_object_new (SOUP_TYPE_DIRECTORY_INPUT_STREAM, NULL); + + SOUP_DIRECTORY_INPUT_STREAM (stream)->enumerator = g_object_ref (enumerator); + SOUP_DIRECTORY_INPUT_STREAM (stream)->uri = soup_uri_to_string (uri, FALSE); + + return stream; +} + diff --git a/libsoup/soup-directory-input-stream.h b/libsoup/soup-directory-input-stream.h new file mode 100644 index 0000000..ae2b4ba --- /dev/null +++ b/libsoup/soup-directory-input-stream.h @@ -0,0 +1,62 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_DIRECTORY_INPUT_STREAM_H +#define SOUP_DIRECTORY_INPUT_STREAM_H 1 + +#include +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_DIRECTORY_INPUT_STREAM (soup_directory_input_stream_get_type ()) +#define SOUP_DIRECTORY_INPUT_STREAM(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_DIRECTORY_INPUT_STREAM, SoupDirectoryInputStream)) +#define SOUP_DIRECTORY_INPUT_STREAM_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_DIRECTORY_INPUT_STREAM, SoupDirectoryInputStreamClass)) +#define SOUP_IS_DIRECTORY_INPUT_STREAM(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_DIRECTORY_INPUT_STREAM)) +#define SOUP_IS_DIRECTORY_INPUT_STREAM_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_DIRECTORY_INPUT_STREAM)) +#define SOUP_DIRECTORY_INPUT_STREAM_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_DIRECTORY_INPUT_STREAM, SoupDirectoryInputStreamClass)) + +typedef struct _SoupDirectoryInputStream SoupDirectoryInputStream; +typedef struct _SoupDirectoryInputStreamClass SoupDirectoryInputStreamClass; + +struct _SoupDirectoryInputStream { + GInputStream parent; + + GFileEnumerator *enumerator; + char *uri; + SoupBuffer *buffer; + gboolean done; +}; + +struct _SoupDirectoryInputStreamClass { + GInputStreamClass parent_class; +}; + +GType soup_directory_input_stream_get_type (void); + +GInputStream *soup_directory_input_stream_new (GFileEnumerator *enumerator, + SoupURI *uri); + + +G_END_DECLS + +#endif /* SOUP_DIRECTORY_INPUT_STREAM_H */ diff --git a/libsoup/soup-enum-types.c.tmpl b/libsoup/soup-enum-types.c.tmpl new file mode 100644 index 0000000..289cef0 --- /dev/null +++ b/libsoup/soup-enum-types.c.tmpl @@ -0,0 +1,36 @@ +/*** BEGIN file-header ***/ +#include "soup.h" +#define LIBSOUP_USE_UNSTABLE_REQUEST_API +#include "soup-cache.h" +#include "soup-requester.h" +/*** END file-header ***/ + +/*** BEGIN file-production ***/ +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType +@enum_name@_get_type (void) +{ + static GType etype = 0; + if (G_UNLIKELY (etype == 0)) { + static const G@Type@Value values[] = { +/*** END value-header ***/ + +/*** BEGIN value-production ***/ + { @VALUENAME@, "@VALUENAME@", "@valuenick@" }, +/*** END value-production ***/ + +/*** BEGIN value-tail ***/ + { 0, NULL, NULL } + }; + etype = g_@type@_register_static (g_intern_static_string ("@EnumName@"), values); + } + return etype; +} + +/*** END value-tail ***/ + +/*** BEGIN file-tail ***/ +/*** END file-tail ***/ diff --git a/libsoup/soup-enum-types.h.tmpl b/libsoup/soup-enum-types.h.tmpl new file mode 100644 index 0000000..e18d7e0 --- /dev/null +++ b/libsoup/soup-enum-types.h.tmpl @@ -0,0 +1,24 @@ +/*** BEGIN file-header ***/ +#ifndef __SOUP_ENUM_TYPES_H__ +#define __SOUP_ENUM_TYPES_H__ + +#include + +G_BEGIN_DECLS +/*** END file-header ***/ + +/*** BEGIN file-production ***/ + +/* enumerations from "@filename@" */ +/*** END file-production ***/ + +/*** BEGIN value-header ***/ +GType @enum_name@_get_type (void) G_GNUC_CONST; +#define SOUP_TYPE_@ENUMSHORT@ (@enum_name@_get_type ()) +/*** END value-header ***/ + +/*** BEGIN file-tail ***/ +G_END_DECLS + +#endif /* __SOUP_ENUM_TYPES_H__ */ +/*** END file-tail ***/ diff --git a/libsoup/soup-form.c b/libsoup/soup-form.c new file mode 100644 index 0000000..9e52793 --- /dev/null +++ b/libsoup/soup-form.c @@ -0,0 +1,490 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* soup-form.c : utility functions for HTML forms */ + +/* + * Copyright 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-form.h" +#include "soup-message.h" +#include "soup-uri.h" + +/** + * SECTION:soup-form + * @short_description: HTML form handling + * @see_also: #SoupMultipart + * + * libsoup contains several help methods for processing HTML forms as + * defined by the + * HTML 4.01 specification. + **/ + +/** + * SOUP_FORM_MIME_TYPE_URLENCODED: + * + * A macro containing the value + * "application/x-www-form-urlencoded"; the default + * MIME type for POSTing HTML form data. + * + * Since: 2.26 + **/ + +/** + * SOUP_FORM_MIME_TYPE_MULTIPART: + * + * A macro containing the value + * "multipart/form-data"; the MIME type used for + * posting form data that contains files to be uploaded. + * + * Since: 2.26 + **/ + +#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10) +#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2])) + +static gboolean +form_decode (char *part) +{ + unsigned char *s, *d; + + s = d = (unsigned char *)part; + do { + if (*s == '%') { + if (!g_ascii_isxdigit (s[1]) || + !g_ascii_isxdigit (s[2])) + return FALSE; + *d++ = HEXCHAR (s); + s += 2; + } else if (*s == '+') + *d++ = ' '; + else + *d++ = *s; + } while (*s++); + + return TRUE; +} + +/** + * soup_form_decode: + * @encoded_form: data of type "application/x-www-form-urlencoded" + * + * Decodes @form, which is an urlencoded dataset as defined in the + * HTML 4.01 spec. + * + * Return value: (element-type utf8 utf8) (transfer full): a hash + * table containing the name/value pairs from @encoded_form, which you + * can free with g_hash_table_destroy(). + **/ +GHashTable * +soup_form_decode (const char *encoded_form) +{ + GHashTable *form_data_set; + char **pairs, *eq, *name, *value; + int i; + + form_data_set = g_hash_table_new_full (g_str_hash, g_str_equal, + g_free, NULL); + pairs = g_strsplit (encoded_form, "&", -1); + for (i = 0; pairs[i]; i++) { + name = pairs[i]; + eq = strchr (name, '='); + if (eq) { + *eq = '\0'; + value = eq + 1; + } else + value = NULL; + if (!value || !form_decode (name) || !form_decode (value)) { + g_free (name); + continue; + } + + g_hash_table_replace (form_data_set, name, value); + } + g_free (pairs); + + return form_data_set; +} + +/** + * soup_form_decode_multipart: + * @msg: a #SoupMessage containing a "multipart/form-data" request body + * @file_control_name: the name of the HTML file upload control, or %NULL + * @filename: (out): return location for the name of the uploaded file + * @content_type: (out): return location for the MIME type of the uploaded file + * @file: (out): return location for the uploaded file data + * + * Decodes the "multipart/form-data" request in @msg; this is a + * convenience method for the case when you have a single file upload + * control in a form. (Or when you don't have any file upload + * controls, but are still using "multipart/form-data" anyway.) Pass + * the name of the file upload control in @file_control_name, and + * soup_form_decode_multipart() will extract the uploaded file data + * into @filename, @content_type, and @file. All of the other form + * control data will be returned (as strings, as with + * soup_form_decode()) in the returned #GHashTable. + * + * You may pass %NULL for @filename and/or @content_type if you do not + * care about those fields. soup_form_decode_multipart() may also + * return %NULL in those fields if the client did not provide that + * information. You must free the returned filename and content-type + * with g_free(), and the returned file data with soup_buffer_free(). + * + * If you have a form with more than one file upload control, you will + * need to decode it manually, using soup_multipart_new_from_message() + * and soup_multipart_get_part(). + * + * Return value: (element-type utf8 utf8) (transfer full): a hash + * table containing the name/value pairs (other than + * @file_control_name) from @msg, which you can free with + * g_hash_table_destroy(). On error, it will return %NULL. + * + * Since: 2.26 + **/ +GHashTable * +soup_form_decode_multipart (SoupMessage *msg, const char *file_control_name, + char **filename, char **content_type, + SoupBuffer **file) +{ + SoupMultipart *multipart; + GHashTable *form_data_set, *params; + SoupMessageHeaders *part_headers; + SoupBuffer *part_body; + char *disposition, *name; + int i; + + multipart = soup_multipart_new_from_message (msg->request_headers, + msg->request_body); + if (!multipart) + return NULL; + + if (filename) + *filename = NULL; + if (content_type) + *content_type = NULL; + *file = NULL; + + form_data_set = g_hash_table_new_full (g_str_hash, g_str_equal, + g_free, g_free); + for (i = 0; i < soup_multipart_get_length (multipart); i++) { + soup_multipart_get_part (multipart, i, &part_headers, &part_body); + if (!soup_message_headers_get_content_disposition ( + part_headers, &disposition, ¶ms)) + continue; + name = g_hash_table_lookup (params, "name"); + if (g_ascii_strcasecmp (disposition, "form-data") != 0 || + !name) { + g_free (disposition); + g_hash_table_destroy (params); + continue; + } + + if (!strcmp (name, file_control_name)) { + if (filename) + *filename = g_strdup (g_hash_table_lookup (params, "filename")); + if (content_type) + *content_type = g_strdup (soup_message_headers_get_content_type (part_headers, NULL)); + if (file) + *file = soup_buffer_copy (part_body); + } else { + g_hash_table_insert (form_data_set, + g_strdup (name), + g_strndup (part_body->data, + part_body->length)); + } + + g_free (disposition); + g_hash_table_destroy (params); + } + + soup_multipart_free (multipart); + return form_data_set; +} + +static void +append_form_encoded (GString *str, const char *in) +{ + const unsigned char *s = (const unsigned char *)in; + + while (*s) { + if (*s == ' ') { + g_string_append_c (str, '+'); + s++; + } else if (!g_ascii_isalnum (*s)) + g_string_append_printf (str, "%%%02X", (int)*s++); + else + g_string_append_c (str, *s++); + } +} + +static void +encode_pair (GString *str, const char *name, const char *value) +{ + g_return_if_fail (name != NULL); + g_return_if_fail (value != NULL); + + if (str->len) + g_string_append_c (str, '&'); + append_form_encoded (str, name); + g_string_append_c (str, '='); + append_form_encoded (str, value); +} + +static void +hash_encode_foreach (gpointer name, gpointer value, gpointer str) +{ + encode_pair (str, name, value); +} + +/** + * soup_form_encode: + * @first_field: name of the first form field + * @...: value of @first_field, followed by additional field names + * and values, terminated by %NULL. + * + * Encodes the given field names and values into a value of type + * "application/x-www-form-urlencoded", as defined in the HTML 4.01 + * spec. + * + * This method requires you to know the names of the form fields (or + * at the very least, the total number of fields) at compile time; for + * working with dynamic forms, use soup_form_encode_hash() or + * soup_form_encode_datalist(). + * + * Return value: the encoded form + **/ +char * +soup_form_encode (const char *first_field, ...) +{ + va_list args; + char *encoded; + + va_start (args, first_field); + encoded = soup_form_encode_valist (first_field, args); + va_end (args); + + return encoded; +} + +/** + * soup_form_encode_hash: + * @form_data_set: (element-type utf8 utf8): a hash table containing + * name/value pairs (as strings) + * + * Encodes @form_data_set into a value of type + * "application/x-www-form-urlencoded", as defined in the HTML 4.01 + * spec. + * + * Note that the HTML spec states that "The control names/values are + * listed in the order they appear in the document." Since this method + * takes a hash table, it cannot enforce that; if you care about the + * ordering of the form fields, use soup_form_encode_datalist(). + * + * Return value: the encoded form + **/ +char * +soup_form_encode_hash (GHashTable *form_data_set) +{ + GString *str = g_string_new (NULL); + + g_hash_table_foreach (form_data_set, hash_encode_foreach, str); + return g_string_free (str, FALSE); +} + +static void +datalist_encode_foreach (GQuark key_id, gpointer value, gpointer str) +{ + encode_pair (str, g_quark_to_string (key_id), value); +} + +/** + * soup_form_encode_datalist: + * @form_data_set: a datalist containing name/value pairs + * + * Encodes @form_data_set into a value of type + * "application/x-www-form-urlencoded", as defined in the HTML 4.01 + * spec. Unlike soup_form_encode_hash(), this preserves the ordering + * of the form elements, which may be required in some situations. + * + * Return value: the encoded form + **/ +char * +soup_form_encode_datalist (GData **form_data_set) +{ + GString *str = g_string_new (NULL); + + g_datalist_foreach (form_data_set, datalist_encode_foreach, str); + return g_string_free (str, FALSE); +} + +/** + * soup_form_encode_valist: + * @first_field: name of the first form field + * @args: pointer to additional values, as in soup_form_encode() + * + * See soup_form_encode(). This is mostly an internal method, used by + * various other methods such as soup_uri_set_query_from_fields() and + * soup_form_request_new(). + * + * Return value: the encoded form + **/ +char * +soup_form_encode_valist (const char *first_field, va_list args) +{ + GString *str = g_string_new (NULL); + const char *name, *value; + + name = first_field; + value = va_arg (args, const char *); + while (name && value) { + encode_pair (str, name, value); + + name = va_arg (args, const char *); + if (name) + value = va_arg (args, const char *); + } + + return g_string_free (str, FALSE); +} + +static SoupMessage * +soup_form_request_for_data (const char *method, const char *uri_string, + char *form_data) +{ + SoupMessage *msg; + SoupURI *uri; + + uri = soup_uri_new (uri_string); + if (!uri) + return NULL; + + if (!strcmp (method, "GET")) { + g_free (uri->query); + uri->query = form_data; + + msg = soup_message_new_from_uri (method, uri); + } else if (!strcmp (method, "POST") || !strcmp (method, "PUT")) { + msg = soup_message_new_from_uri (method, uri); + + soup_message_set_request ( + msg, SOUP_FORM_MIME_TYPE_URLENCODED, + SOUP_MEMORY_TAKE, + form_data, strlen (form_data)); + } else { + g_warning ("invalid method passed to soup_form_request_new"); + g_free (form_data); + + /* Don't crash */ + msg = soup_message_new_from_uri (method, uri); + } + soup_uri_free (uri); + + return msg; +} + +/** + * soup_form_request_new: + * @method: the HTTP method, either "GET" or "POST" + * @uri: the URI to send the form data to + * @first_field: name of the first form field + * @...: value of @first_field, followed by additional field names + * and values, terminated by %NULL. + * + * Creates a new %SoupMessage and sets it up to send the given data + * to @uri via @method. (That is, if @method is "GET", it will encode + * the form data into @uri's query field, and if @method is "POST", it + * will encode it into the %SoupMessage's request_body.) + * + * Return value: (transfer full): the new %SoupMessage + **/ +SoupMessage * +soup_form_request_new (const char *method, const char *uri, + const char *first_field, ...) +{ + va_list args; + char *form_data; + + va_start (args, first_field); + form_data = soup_form_encode_valist (first_field, args); + va_end (args); + + return soup_form_request_for_data (method, uri, form_data); +} + +/** + * soup_form_request_new_from_hash: + * @method: the HTTP method, either "GET" or "POST" + * @uri: the URI to send the form data to + * @form_data_set: (element-type utf8 utf8): the data to send to @uri + * + * Creates a new %SoupMessage and sets it up to send @form_data_set to + * @uri via @method, as with soup_form_request_new(). + * + * Return value: (transfer full): the new %SoupMessage + **/ +SoupMessage * +soup_form_request_new_from_hash (const char *method, const char *uri, + GHashTable *form_data_set) +{ + return soup_form_request_for_data ( + method, uri, soup_form_encode_hash (form_data_set)); +} + +/** + * soup_form_request_new_from_datalist: + * @method: the HTTP method, either "GET" or "POST" + * @uri: the URI to send the form data to + * @form_data_set: the data to send to @uri + * + * Creates a new %SoupMessage and sets it up to send @form_data_set to + * @uri via @method, as with soup_form_request_new(). + * + * Return value: (transfer full): the new %SoupMessage + **/ +SoupMessage * +soup_form_request_new_from_datalist (const char *method, const char *uri, + GData **form_data_set) +{ + return soup_form_request_for_data ( + method, uri, soup_form_encode_datalist (form_data_set)); +} + +/** + * soup_form_request_new_from_multipart: + * @uri: the URI to send the form data to + * @multipart: a "multipart/form-data" #SoupMultipart + * + * Creates a new %SoupMessage and sets it up to send @multipart to + * @uri via POST. + * + * To send a "multipart/form-data" POST, first + * create a #SoupMultipart, using %SOUP_FORM_MIME_TYPE_MULTIPART as + * the MIME type. Then use soup_multipart_append_form_string() and + * soup_multipart_append_form_file() to add the value of each form + * control to the multipart. (These are just convenience methods, and + * you can use soup_multipart_append_part() if you need greater + * control over the part headers.) Finally, call + * soup_form_request_new_from_multipart() to serialize the multipart + * structure and create a #SoupMessage. + * + * Return value: (transfer full): the new %SoupMessage + * + * Since: 2.26 + **/ +SoupMessage * +soup_form_request_new_from_multipart (const char *uri, + SoupMultipart *multipart) +{ + SoupMessage *msg; + + msg = soup_message_new ("POST", uri); + soup_multipart_to_message (multipart, msg->request_headers, + msg->request_body); + return msg; +} diff --git a/libsoup/soup-form.h b/libsoup/soup-form.h new file mode 100644 index 0000000..f2be1bc --- /dev/null +++ b/libsoup/soup-form.h @@ -0,0 +1,53 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright 2008 Red Hat, Inc. + */ + +#ifndef SOUP_FORM_H +#define SOUP_FORM_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_FORM_MIME_TYPE_URLENCODED "application/x-www-form-urlencoded" +#define SOUP_FORM_MIME_TYPE_MULTIPART "multipart/form-data" + +GHashTable *soup_form_decode (const char *encoded_form); +GHashTable *soup_form_decode_multipart (SoupMessage *msg, + const char *file_control_name, + char **filename, + char **content_type, + SoupBuffer **file); + +char *soup_form_encode (const char *first_field, + ...) G_GNUC_NULL_TERMINATED; +char *soup_form_encode_hash (GHashTable *form_data_set); +char *soup_form_encode_datalist (GData **form_data_set); +char *soup_form_encode_valist (const char *first_field, + va_list args); + +#ifndef LIBSOUP_DISABLE_DEPRECATED +/* Compatibility with libsoup 2.3.0 */ +#define soup_form_decode_urlencoded soup_form_decode +#define soup_form_encode_urlencoded soup_form_encode_hash +#define soup_form_encode_urlencoded_list soup_form_encode_datalist +#endif + +SoupMessage *soup_form_request_new (const char *method, + const char *uri, + const char *first_field, + ...) G_GNUC_NULL_TERMINATED; +SoupMessage *soup_form_request_new_from_hash (const char *method, + const char *uri, + GHashTable *form_data_set); +SoupMessage *soup_form_request_new_from_datalist (const char *method, + const char *uri, + GData **form_data_set); +SoupMessage *soup_form_request_new_from_multipart (const char *uri, + SoupMultipart *multipart); + +G_END_DECLS + +#endif /* SOUP_FORM_H */ diff --git a/libsoup/soup-gnome-features.c b/libsoup/soup-gnome-features.c new file mode 100644 index 0000000..22a426b --- /dev/null +++ b/libsoup/soup-gnome-features.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-gnome-features.c: GNOME-specific features + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-gnome-features.h" + +/** + * SOUP_TYPE_PROXY_RESOLVER_GNOME: + * + * This returns the #GType of a #SoupProxyResolver that can be used to + * resolve HTTP proxies for GNOME applications. You can add this to + * a session using soup_session_add_feature_by_type() or by using the + * %SOUP_SESSION_ADD_FEATURE_BY_TYPE construct-time property. + * + * This feature is included in %SOUP_TYPE_GNOME_FEATURES_2_26, so if + * you are using that feature, you do not need to include this feature + * separately. + * + * Since: 2.26 + **/ +/* This is actually declared in soup-proxy-resolver-gnome now */ + +/** + * SOUP_TYPE_GNOME_FEATURES_2_26: + * + * This returns the #GType of a #SoupSessionFeature that automatically + * adds all of the GNOME features defined for libsoup 2.26 (which is + * just %SOUP_TYPE_PROXY_RESOLVER_GNOME). + * + * You can add this to a session using + * soup_session_add_feature_by_type() or by using the + * %SOUP_SESSION_ADD_FEATURE_BY_TYPE construct-time property. + * + * Since: 2.26 + **/ +GType +soup_gnome_features_2_26_get_type (void) +{ + /* Eventually this needs to be a special SoupSessionFeature + * class that registers other features. But for now we can + * just do this: + */ + return SOUP_TYPE_PROXY_RESOLVER_GNOME; +} + diff --git a/libsoup/soup-gnome-features.h b/libsoup/soup-gnome-features.h new file mode 100644 index 0000000..84d6459 --- /dev/null +++ b/libsoup/soup-gnome-features.h @@ -0,0 +1,28 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_GNOME_FEATURES_H +#define SOUP_GNOME_FEATURES_H 1 + +#include + +G_BEGIN_DECLS + +GType soup_proxy_resolver_gnome_get_type (void); +#define SOUP_TYPE_PROXY_RESOLVER_GNOME (soup_proxy_resolver_gnome_get_type ()) + +GType soup_gnome_features_2_26_get_type (void); +#define SOUP_TYPE_GNOME_FEATURES_2_26 (soup_gnome_features_2_26_get_type ()) + +#ifndef G_OS_WIN32 +#ifdef LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY +GType soup_password_manager_gnome_get_type (void); +#define SOUP_TYPE_PASSWORD_MANAGER_GNOME (soup_password_manager_gnome_get_type ()) +#endif /* LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY */ +#endif + +G_END_DECLS + +#endif /* SOUP_GNOME_FEATURES_H */ diff --git a/libsoup/soup-gnome.h b/libsoup/soup-gnome.h new file mode 100644 index 0000000..a2134a4 --- /dev/null +++ b/libsoup/soup-gnome.h @@ -0,0 +1,14 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_GNOME_H +#define SOUP_GNOME_H 1 + +#include + +#include +#include + +#endif /* SOUP_GNOME_H */ diff --git a/libsoup/soup-headers.c b/libsoup/soup-headers.c new file mode 100644 index 0000000..6baf70b --- /dev/null +++ b/libsoup/soup-headers.c @@ -0,0 +1,945 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-headers.c: HTTP message header parsing + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include +#include +#include +#include + +#include "soup-headers.h" +#include "soup-misc.h" +#include "soup-uri.h" + +/** + * soup_headers_parse: + * @str: the header string (including the Request-Line or Status-Line, + * and the trailing blank line) + * @len: length of @str up to (but not including) the terminating blank line. + * @dest: #SoupMessageHeaders to store the header values in + * + * Parses the headers of an HTTP request or response in @str and + * stores the results in @dest. Beware that @dest may be modified even + * on failure. + * + * This is a low-level method; normally you would use + * soup_headers_parse_request() or soup_headers_parse_response(). + * + * Return value: success or failure + * + * Since: 2.26 + **/ +gboolean +soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest) +{ + const char *headers_start; + char *headers_copy, *name, *name_end, *value, *value_end; + char *eol, *sol; + gboolean success = FALSE; + + g_return_val_if_fail (str != NULL, FALSE); + g_return_val_if_fail (dest != NULL, FALSE); + + /* Technically, the grammar does allow NUL bytes in the + * headers, but this is probably a bug, and if it's not, we + * can't deal with them anyway. + */ + if (memchr (str, '\0', len)) + return FALSE; + + /* As per RFC 2616 section 19.3, we treat '\n' as the + * line terminator, and '\r', if it appears, merely as + * ignorable trailing whitespace. + */ + + /* Skip over the Request-Line / Status-Line */ + headers_start = memchr (str, '\n', len); + if (!headers_start) + return FALSE; + + /* We work on a copy of the headers, which we can write '\0's + * into, so that we don't have to individually g_strndup and + * then g_free each header name and value. + */ + headers_copy = g_strndup (headers_start, len - (headers_start - str)); + value_end = headers_copy; + + while (*(value_end + 1)) { + name = value_end + 1; + name_end = strchr (name, ':'); + if (!name_end || name + strcspn (name, " \t\r\n") < name_end) { + /* Bad header; just ignore this line. Note + * that if it has continuation lines, we'll + * end up ignoring them too since they'll + * start with spaces. + */ + value_end = strchr (name, '\n'); + if (!value_end) + goto done; + continue; + } + + /* Find the end of the value; ie, an end-of-line that + * isn't followed by a continuation line. + */ + value = name_end + 1; + value_end = strchr (name, '\n'); + if (!value_end) + goto done; + while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') { + value_end = strchr (value_end + 1, '\n'); + if (!value_end) + goto done; + } + + *name_end = '\0'; + *value_end = '\0'; + + /* Skip leading whitespace */ + while (value < value_end && + (*value == ' ' || *value == '\t' || + *value == '\r' || *value == '\n')) + value++; + + /* Collapse continuation lines */ + while ((eol = strchr (value, '\n'))) { + /* find start of next line */ + sol = eol + 1; + while (*sol == ' ' || *sol == '\t') + sol++; + + /* back up over trailing whitespace on current line */ + while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r') + eol--; + + /* Delete all but one SP */ + *eol = ' '; + g_memmove (eol + 1, sol, strlen (sol) + 1); + } + + /* clip trailing whitespace */ + eol = strchr (value, '\0'); + while (eol > value && + (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')) + eol--; + *eol = '\0'; + + soup_message_headers_append (dest, name, value); + } + success = TRUE; + +done: + g_free (headers_copy); + return success; +} + +/** + * soup_headers_parse_request: + * @str: the header string (including the trailing blank line) + * @len: length of @str up to (but not including) the terminating blank line. + * @req_headers: #SoupMessageHeaders to store the header values in + * @req_method: (out) (allow-none): if non-%NULL, will be filled in with the + * request method + * @req_path: (out) (allow-none): if non-%NULL, will be filled in with the + * request path + * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP + * version + * + * Parses the headers of an HTTP request in @str and stores the + * results in @req_method, @req_path, @ver, and @req_headers. + * + * Beware that @req_headers may be modified even on failure. + * + * Return value: %SOUP_STATUS_OK if the headers could be parsed, or an + * HTTP error to be returned to the client if they could not be. + **/ +guint +soup_headers_parse_request (const char *str, + int len, + SoupMessageHeaders *req_headers, + char **req_method, + char **req_path, + SoupHTTPVersion *ver) +{ + const char *method, *method_end, *path, *path_end; + const char *version, *version_end, *headers; + unsigned long major_version, minor_version; + char *p; + + g_return_val_if_fail (str && *str, SOUP_STATUS_MALFORMED); + + /* RFC 2616 4.1 "servers SHOULD ignore any empty line(s) + * received where a Request-Line is expected." + */ + while ((*str == '\r' || *str == '\n') && len > 0) { + str++; + len--; + } + if (!len) + return SOUP_STATUS_BAD_REQUEST; + + /* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or + * HT characters between [Request-Line] fields" + */ + + method = method_end = str; + while (method_end < str + len && *method_end != ' ' && *method_end != '\t') + method_end++; + if (method_end >= str + len) + return SOUP_STATUS_BAD_REQUEST; + + path = method_end; + while (path < str + len && (*path == ' ' || *path == '\t')) + path++; + if (path >= str + len) + return SOUP_STATUS_BAD_REQUEST; + + path_end = path; + while (path_end < str + len && *path_end != ' ' && *path_end != '\t') + path_end++; + if (path_end >= str + len) + return SOUP_STATUS_BAD_REQUEST; + + version = path_end; + while (version < str + len && (*version == ' ' || *version == '\t')) + version++; + if (version + 8 >= str + len) + return SOUP_STATUS_BAD_REQUEST; + + if (strncmp (version, "HTTP/", 5) != 0 || + !g_ascii_isdigit (version[5])) + return SOUP_STATUS_BAD_REQUEST; + major_version = strtoul (version + 5, &p, 10); + if (*p != '.' || !g_ascii_isdigit (p[1])) + return SOUP_STATUS_BAD_REQUEST; + minor_version = strtoul (p + 1, &p, 10); + version_end = p; + if (major_version != 1) + return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED; + if (minor_version > 1) + return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED; + + headers = version_end; + while (headers < str + len && (*headers == '\r' || *headers == ' ')) + headers++; + if (headers >= str + len || *headers != '\n') + return SOUP_STATUS_BAD_REQUEST; + + if (!soup_headers_parse (str, len, req_headers)) + return SOUP_STATUS_BAD_REQUEST; + + if (soup_message_headers_get_expectations (req_headers) & + SOUP_EXPECTATION_UNRECOGNIZED) + return SOUP_STATUS_EXPECTATION_FAILED; + /* RFC 2616 14.10 */ + if (minor_version == 0) + soup_message_headers_clean_connection_headers (req_headers); + + if (req_method) + *req_method = g_strndup (method, method_end - method); + if (req_path) + *req_path = g_strndup (path, path_end - path); + if (ver) + *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1; + + return SOUP_STATUS_OK; +} + +/** + * soup_headers_parse_status_line: + * @status_line: an HTTP Status-Line + * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP + * version + * @status_code: (out) (allow-none): if non-%NULL, will be filled in with + * the status code + * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with + * the reason phrase + * + * Parses the HTTP Status-Line string in @status_line into @ver, + * @status_code, and @reason_phrase. @status_line must be terminated by + * either "\0" or "\r\n". + * + * Return value: %TRUE if @status_line was parsed successfully. + **/ +gboolean +soup_headers_parse_status_line (const char *status_line, + SoupHTTPVersion *ver, + guint *status_code, + char **reason_phrase) +{ + unsigned long major_version, minor_version, code; + const char *code_start, *code_end, *phrase_start, *phrase_end; + char *p; + + g_return_val_if_fail (status_line != NULL, FALSE); + + if (strncmp (status_line, "HTTP/", 5) == 0 && + g_ascii_isdigit (status_line[5])) { + major_version = strtoul (status_line + 5, &p, 10); + if (*p != '.' || !g_ascii_isdigit (p[1])) + return FALSE; + minor_version = strtoul (p + 1, &p, 10); + if (major_version != 1) + return FALSE; + if (minor_version > 1) + return FALSE; + if (ver) + *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1; + } else if (!strncmp (status_line, "ICY", 3)) { + /* Shoutcast not-quite-HTTP format */ + if (ver) + *ver = SOUP_HTTP_1_0; + p = (char *)status_line + 3; + } else + return FALSE; + + code_start = p; + while (*code_start == ' ' || *code_start == '\t') + code_start++; + code_end = code_start; + while (*code_end >= '0' && *code_end <= '9') + code_end++; + if (code_end != code_start + 3) + return FALSE; + code = atoi (code_start); + if (code < 100 || code > 599) + return FALSE; + if (status_code) + *status_code = code; + + phrase_start = code_end; + while (*phrase_start == ' ' || *phrase_start == '\t') + phrase_start++; + phrase_end = phrase_start + strcspn (phrase_start, "\n"); + while (phrase_end > phrase_start && + (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t')) + phrase_end--; + if (reason_phrase) + *reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start); + + return TRUE; +} + +/** + * soup_headers_parse_response: + * @str: the header string (including the trailing blank line) + * @len: length of @str up to (but not including) the terminating blank line. + * @headers: #SoupMessageheaders to store the header values in + * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP + * version + * @status_code: (out) (allow-none): if non-%NULL, will be filled in with + * the status code + * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with + * the reason phrase + * + * Parses the headers of an HTTP response in @str and stores the + * results in @ver, @status_code, @reason_phrase, and @headers. + * + * Beware that @headers may be modified even on failure. + * + * Return value: success or failure. + **/ +gboolean +soup_headers_parse_response (const char *str, + int len, + SoupMessageHeaders *headers, + SoupHTTPVersion *ver, + guint *status_code, + char **reason_phrase) +{ + SoupHTTPVersion version; + + g_return_val_if_fail (str && *str, FALSE); + + /* Workaround for broken servers that send extra line breaks + * after a response, which we then see prepended to the next + * response on that connection. + */ + while ((*str == '\r' || *str == '\n') && len > 0) { + str++; + len--; + } + if (!len) + return FALSE; + + if (!soup_headers_parse (str, len, headers)) + return FALSE; + + if (!soup_headers_parse_status_line (str, + &version, + status_code, + reason_phrase)) + return FALSE; + if (ver) + *ver = version; + + /* RFC 2616 14.10 */ + if (version == SOUP_HTTP_1_0) + soup_message_headers_clean_connection_headers (headers); + + return TRUE; +} + + +/* + * Parsing of specific HTTP header types + */ + +static const char * +skip_lws (const char *s) +{ + while (g_ascii_isspace (*s)) + s++; + return s; +} + +static const char * +unskip_lws (const char *s, const char *start) +{ + while (s > start && g_ascii_isspace (*(s - 1))) + s--; + return s; +} + +static const char * +skip_delims (const char *s, char delim) +{ + /* The grammar allows for multiple delimiters */ + while (g_ascii_isspace (*s) || *s == delim) + s++; + return s; +} + +static const char * +skip_item (const char *s, char delim) +{ + gboolean quoted = FALSE; + const char *start = s; + + /* A list item ends at the last non-whitespace character + * before a delimiter which is not inside a quoted-string. Or + * at the end of the string. + */ + + while (*s) { + if (*s == '"') + quoted = !quoted; + else if (quoted) { + if (*s == '\\' && *(s + 1)) + s++; + } else { + if (*s == delim) + break; + } + s++; + } + + return unskip_lws (s, start); +} + +static GSList * +parse_list (const char *header, char delim) +{ + GSList *list = NULL; + const char *end; + + header = skip_delims (header, delim); + while (*header) { + end = skip_item (header, delim); + list = g_slist_prepend (list, g_strndup (header, end - header)); + header = skip_delims (end, delim); + } + + return g_slist_reverse (list); +} + +/** + * soup_header_parse_list: + * @header: a header value + * + * Parses a header whose content is described by RFC2616 as + * "#something", where "something" does not itself contain commas, + * except as part of quoted-strings. + * + * Return value: (transfer full) (element-type utf8): a #GSList of + * list elements, as allocated strings + **/ +GSList * +soup_header_parse_list (const char *header) +{ + g_return_val_if_fail (header != NULL, NULL); + + return parse_list (header, ','); +} + +typedef struct { + char *item; + double qval; +} QualityItem; + +static int +sort_by_qval (const void *a, const void *b) +{ + QualityItem *qia = (QualityItem *)a; + QualityItem *qib = (QualityItem *)b; + + if (qia->qval == qib->qval) + return 0; + else if (qia->qval < qib->qval) + return 1; + else + return -1; +} + +/** + * soup_header_parse_quality_list: + * @header: a header value + * @unacceptable: (out) (allow-none) (transfer full) (element-type utf8): on + * return, will contain a list of unacceptable values + * + * Parses a header whose content is a list of items with optional + * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding, + * Accept-Language, TE). + * + * If @unacceptable is not %NULL, then on return, it will contain the + * items with qvalue 0. Either way, those items will be removed from + * the main list. + * + * Return value: (transfer full) (element-type utf8): a #GSList of + * acceptable values (as allocated strings), highest-qvalue first. + **/ +GSList * +soup_header_parse_quality_list (const char *header, GSList **unacceptable) +{ + GSList *unsorted; + QualityItem *array; + GSList *sorted, *iter; + char *item, *semi; + const char *param, *equal, *value; + double qval; + int n; + + g_return_val_if_fail (header != NULL, NULL); + + if (unacceptable) + *unacceptable = NULL; + + unsorted = soup_header_parse_list (header); + array = g_new0 (QualityItem, g_slist_length (unsorted)); + for (iter = unsorted, n = 0; iter; iter = iter->next) { + item = iter->data; + qval = 1.0; + for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) { + param = skip_lws (semi + 1); + if (*param != 'q') + continue; + equal = skip_lws (param + 1); + if (!equal || *equal != '=') + continue; + value = skip_lws (equal + 1); + if (!value) + continue; + + if (value[0] != '0' && value[0] != '1') + continue; + qval = (double)(value[0] - '0'); + if (value[0] == '0' && value[1] == '.') { + if (g_ascii_isdigit (value[2])) { + qval += (double)(value[2] - '0') / 10; + if (g_ascii_isdigit (value[3])) { + qval += (double)(value[3] - '0') / 100; + if (g_ascii_isdigit (value[4])) + qval += (double)(value[4] - '0') / 1000; + } + } + } + + *semi = '\0'; + break; + } + + if (qval == 0.0) { + if (unacceptable) { + *unacceptable = g_slist_prepend (*unacceptable, + item); + } + } else { + array[n].item = item; + array[n].qval = qval; + n++; + } + } + g_slist_free (unsorted); + + qsort (array, n, sizeof (QualityItem), sort_by_qval); + sorted = NULL; + while (n--) + sorted = g_slist_prepend (sorted, array[n].item); + g_free (array); + + return sorted; +} + +/** + * soup_header_free_list: (skip) + * @list: a #GSList returned from soup_header_parse_list() or + * soup_header_parse_quality_list() + * + * Frees @list. + **/ +void +soup_header_free_list (GSList *list) +{ + GSList *l; + + for (l = list; l; l = l->next) + g_free (l->data); + g_slist_free (list); +} + +/** + * soup_header_contains: + * @header: An HTTP header suitable for parsing with + * soup_header_parse_list() + * @token: a token + * + * Parses @header to see if it contains the token @token (matched + * case-insensitively). Note that this can't be used with lists + * that have qvalues. + * + * Return value: whether or not @header contains @token + **/ +gboolean +soup_header_contains (const char *header, const char *token) +{ + const char *end; + guint len = strlen (token); + + g_return_val_if_fail (header != NULL, FALSE); + g_return_val_if_fail (token != NULL, FALSE); + + header = skip_delims (header, ','); + while (*header) { + end = skip_item (header, ','); + if (end - header == len && + !g_ascii_strncasecmp (header, token, len)) + return TRUE; + header = skip_delims (end, ','); + } + + return FALSE; +} + +static void +decode_quoted_string (char *quoted_string) +{ + char *src, *dst; + + src = quoted_string + 1; + dst = quoted_string; + while (*src && *src != '"') { + if (*src == '\\' && *(src + 1)) + src++; + *dst++ = *src++; + } + *dst = '\0'; +} + +static gboolean +decode_rfc5987 (char *encoded_string) +{ + char *q, *decoded; + gboolean iso_8859_1 = FALSE; + + q = strchr (encoded_string, '\''); + if (!q) + return FALSE; + if (g_ascii_strncasecmp (encoded_string, "UTF-8", + q - encoded_string) == 0) + ; + else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1", + q - encoded_string) == 0) + iso_8859_1 = TRUE; + else + return FALSE; + + q = strchr (q + 1, '\''); + if (!q) + return FALSE; + + decoded = soup_uri_decode (q + 1); + if (iso_8859_1) { + char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8", + "iso-8859-1", "_", + NULL, NULL, NULL); + g_free (decoded); + if (!utf8) + return FALSE; + decoded = utf8; + } + + /* If encoded_string was UTF-8, then each 3-character %-escape + * will be converted to a single byte, and so decoded is + * shorter than encoded_string. If encoded_string was + * iso-8859-1, then each 3-character %-escape will be + * converted into at most 2 bytes in UTF-8, and so it's still + * shorter. + */ + strcpy (encoded_string, decoded); + g_free (decoded); + return TRUE; +} + +static GHashTable * +parse_param_list (const char *header, char delim) +{ + GHashTable *params; + GSList *list, *iter; + char *item, *eq, *name_end, *value; + gboolean override; + + list = parse_list (header, delim); + if (!list) + return NULL; + + params = g_hash_table_new_full (soup_str_case_hash, + soup_str_case_equal, + g_free, NULL); + + for (iter = list; iter; iter = iter->next) { + item = iter->data; + override = FALSE; + + eq = strchr (item, '='); + if (eq) { + name_end = (char *)unskip_lws (eq, item); + if (name_end == item) { + /* That's no good... */ + g_free (item); + continue; + } + + *name_end = '\0'; + + value = (char *)skip_lws (eq + 1); + + if (name_end[-1] == '*' && name_end > item + 1) { + name_end[-1] = '\0'; + if (!decode_rfc5987 (value)) { + g_free (item); + continue; + } + override = TRUE; + } else if (*value == '"') + decode_quoted_string (value); + } else + value = NULL; + + if (override || !g_hash_table_lookup (params, item)) + g_hash_table_replace (params, item, value); + else + g_free (item); + } + + g_slist_free (list); + return params; +} + +/** + * soup_header_parse_param_list: + * @header: a header value + * + * Parses a header which is a comma-delimited list of something like: + * token [ "=" ( token | quoted-string ) ]. + * + * Tokens that don't have an associated value will still be added to + * the resulting hash table, but with a %NULL value. + * + * This also handles RFC5987 encoding (which in HTTP is mostly used + * for giving UTF8-encoded filenames in the Content-Disposition + * header). + * + * Return value: (element-type utf8 utf8) (transfer full): a + * #GHashTable of list elements, which can be freed with + * soup_header_free_param_list(). + **/ +GHashTable * +soup_header_parse_param_list (const char *header) +{ + g_return_val_if_fail (header != NULL, NULL); + + return parse_param_list (header, ','); +} + +/** + * soup_header_parse_semi_param_list: + * @header: a header value + * + * Parses a header which is a semicolon-delimited list of something + * like: token [ "=" ( token | quoted-string ) ]. + * + * Tokens that don't have an associated value will still be added to + * the resulting hash table, but with a %NULL value. + * + * This also handles RFC5987 encoding (which in HTTP is mostly used + * for giving UTF8-encoded filenames in the Content-Disposition + * header). + * + * Return value: (element-type utf8 utf8) (transfer full): a + * #GHashTable of list elements, which can be freed with + * soup_header_free_param_list(). + * + * Since: 2.24 + **/ +GHashTable * +soup_header_parse_semi_param_list (const char *header) +{ + g_return_val_if_fail (header != NULL, NULL); + + return parse_param_list (header, ';'); +} + +/** + * soup_header_free_param_list: + * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list() + * or soup_header_parse_semi_param_list() + * + * Frees @param_list. + **/ +void +soup_header_free_param_list (GHashTable *param_list) +{ + g_return_if_fail (param_list != NULL); + + g_hash_table_destroy (param_list); +} + +static void +append_param_rfc5987 (GString *string, + const char *name, + const char *value) +{ + char *encoded; + + g_string_append (string, name); + g_string_append (string, "*=UTF-8''"); + encoded = soup_uri_encode (value, " *'%()<>@,;:\\\"/[]?="); + g_string_append (string, encoded); + g_free (encoded); +} + +static void +append_param_quoted (GString *string, + const char *name, + const char *value) +{ + int len; + + g_string_append (string, name); + g_string_append (string, "=\""); + while (*value) { + while (*value == '\\' || *value == '"') { + g_string_append_c (string, '\\'); + g_string_append_c (string, *value++); + } + len = strcspn (value, "\\\""); + g_string_append_len (string, value, len); + value += len; + } + g_string_append_c (string, '"'); +} + +static void +append_param_internal (GString *string, + const char *name, + const char *value, + gboolean allow_token) +{ + const char *v; + gboolean use_token = allow_token; + + for (v = value; *v; v++) { + if (*v & 0x80) { + if (g_utf8_validate (value, -1, NULL)) { + append_param_rfc5987 (string, name, value); + return; + } else { + use_token = FALSE; + break; + } + } else if (!soup_char_is_token (*v)) + use_token = FALSE; + } + + if (use_token) { + g_string_append (string, name); + g_string_append_c (string, '='); + g_string_append (string, value); + } else + append_param_quoted (string, name, value); +} + +/** + * soup_header_g_string_append_param_quoted: + * @string: a #GString being used to construct an HTTP header value + * @name: a parameter name + * @value: a parameter value + * + * Appends something like @name="@value" to + * @string, taking care to escape any quotes or backslashes in @value. + * + * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987 + * encoding, just like soup_header_g_string_append_param(). + * + * Since: 2.30 + **/ +void +soup_header_g_string_append_param_quoted (GString *string, + const char *name, + const char *value) +{ + g_return_if_fail (string != NULL); + g_return_if_fail (name != NULL); + g_return_if_fail (value != NULL); + + append_param_internal (string, name, value, FALSE); +} + +/** + * soup_header_g_string_append_param: + * @string: a #GString being used to construct an HTTP header value + * @name: a parameter name + * @value: a parameter value, or %NULL + * + * Appends something like @name=@value to @string, + * taking care to quote @value if needed, and if so, to escape any + * quotes or backslashes in @value. + * + * Alternatively, if @value is a non-ASCII UTF-8 string, it will be + * appended using RFC5987 syntax. Although in theory this is supposed + * to work anywhere in HTTP that uses this style of parameter, in + * reality, it can only be used portably with the Content-Disposition + * "filename" parameter. + * + * If @value is %NULL, this will just append @name to @string. + * + * Since: 2.26 + **/ +void +soup_header_g_string_append_param (GString *string, + const char *name, + const char *value) +{ + g_return_if_fail (string != NULL); + g_return_if_fail (name != NULL); + + if (!value) { + g_string_append (string, name); + return; + } + + append_param_internal (string, name, value, TRUE); +} diff --git a/libsoup/soup-headers.h b/libsoup/soup-headers.h new file mode 100644 index 0000000..cc542c3 --- /dev/null +++ b/libsoup/soup-headers.h @@ -0,0 +1,62 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifndef SOUP_HEADERS_H +#define SOUP_HEADERS_H 1 + +#include +#include + +G_BEGIN_DECLS + +/* HTTP Header Parsing */ + +gboolean soup_headers_parse (const char *str, + int len, + SoupMessageHeaders *dest); + +guint soup_headers_parse_request (const char *str, + int len, + SoupMessageHeaders *req_headers, + char **req_method, + char **req_path, + SoupHTTPVersion *ver); + +gboolean soup_headers_parse_status_line (const char *status_line, + SoupHTTPVersion *ver, + guint *status_code, + char **reason_phrase); + +gboolean soup_headers_parse_response (const char *str, + int len, + SoupMessageHeaders *headers, + SoupHTTPVersion *ver, + guint *status_code, + char **reason_phrase); + +/* Individual header parsing */ + +GSList *soup_header_parse_list (const char *header); +GSList *soup_header_parse_quality_list (const char *header, + GSList **unacceptable); +void soup_header_free_list (GSList *list); + +gboolean soup_header_contains (const char *header, + const char *token); + +GHashTable *soup_header_parse_param_list (const char *header); +GHashTable *soup_header_parse_semi_param_list (const char *header); +void soup_header_free_param_list (GHashTable *param_list); + +void soup_header_g_string_append_param (GString *string, + const char *name, + const char *value); +void soup_header_g_string_append_param_quoted (GString *string, + const char *name, + const char *value); + +G_END_DECLS + +#endif /*SOUP_HEADERS_H*/ diff --git a/libsoup/soup-http-input-stream.c b/libsoup/soup-http-input-stream.c new file mode 100644 index 0000000..6aa153d --- /dev/null +++ b/libsoup/soup-http-input-stream.c @@ -0,0 +1,871 @@ +/* soup-input-stream.c, based on gsocketinputstream.c + * + * Copyright (C) 2006-2007, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place, Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include + +#include + +#include +#include + +#include "soup-http-input-stream.h" +#include "soup-session.h" + +static void soup_http_input_stream_seekable_iface_init (GSeekableIface *seekable_iface); + +G_DEFINE_TYPE_WITH_CODE (SoupHTTPInputStream, soup_http_input_stream, G_TYPE_INPUT_STREAM, + G_IMPLEMENT_INTERFACE (G_TYPE_SEEKABLE, + soup_http_input_stream_seekable_iface_init)) + +typedef void (*SoupHTTPInputStreamCallback)(GInputStream *); + +typedef struct { + SoupSession *session; + GMainContext *async_context; + SoupMessage *msg; + gboolean got_headers, finished; + goffset offset; + + GCancellable *cancellable; + GSource *cancel_watch; + SoupHTTPInputStreamCallback got_headers_cb; + SoupHTTPInputStreamCallback got_chunk_cb; + SoupHTTPInputStreamCallback finished_cb; + SoupHTTPInputStreamCallback cancelled_cb; + + guchar *leftover_buffer; + gsize leftover_bufsize, leftover_offset; + + guchar *caller_buffer; + gsize caller_bufsize, caller_nread; + GAsyncReadyCallback outstanding_callback; + GSimpleAsyncResult *result; +} SoupHTTPInputStreamPrivate; +#define SOUP_HTTP_INPUT_STREAM_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_HTTP_INPUT_STREAM, SoupHTTPInputStreamPrivate)) + + +static gssize soup_http_input_stream_read (GInputStream *stream, + void *buffer, + gsize count, + GCancellable *cancellable, + GError **error); +static gboolean soup_http_input_stream_close (GInputStream *stream, + GCancellable *cancellable, + GError **error); +static void soup_http_input_stream_read_async (GInputStream *stream, + void *buffer, + gsize count, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer data); +static gssize soup_http_input_stream_read_finish (GInputStream *stream, + GAsyncResult *result, + GError **error); +static void soup_http_input_stream_close_async (GInputStream *stream, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer data); +static gboolean soup_http_input_stream_close_finish (GInputStream *stream, + GAsyncResult *result, + GError **error); + +static goffset soup_http_input_stream_tell (GSeekable *seekable); + +static gboolean soup_http_input_stream_can_seek (GSeekable *seekable); +static gboolean soup_http_input_stream_seek (GSeekable *seekable, + goffset offset, + GSeekType type, + GCancellable *cancellable, + GError **error); + +static gboolean soup_http_input_stream_can_truncate (GSeekable *seekable); +static gboolean soup_http_input_stream_truncate (GSeekable *seekable, + goffset offset, + GCancellable *cancellable, + GError **error); + +static void soup_http_input_stream_got_headers (SoupMessage *msg, gpointer stream); +static void soup_http_input_stream_got_chunk (SoupMessage *msg, SoupBuffer *chunk, gpointer stream); +static void soup_http_input_stream_finished (SoupMessage *msg, gpointer stream); + +static void +soup_http_input_stream_finalize (GObject *object) +{ + SoupHTTPInputStream *stream = SOUP_HTTP_INPUT_STREAM (object); + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + g_object_unref (priv->session); + + g_signal_handlers_disconnect_by_func (priv->msg, G_CALLBACK (soup_http_input_stream_got_headers), stream); + g_signal_handlers_disconnect_by_func (priv->msg, G_CALLBACK (soup_http_input_stream_got_chunk), stream); + g_signal_handlers_disconnect_by_func (priv->msg, G_CALLBACK (soup_http_input_stream_finished), stream); + g_object_unref (priv->msg); + g_free (priv->leftover_buffer); + + if (G_OBJECT_CLASS (soup_http_input_stream_parent_class)->finalize) + (*G_OBJECT_CLASS (soup_http_input_stream_parent_class)->finalize)(object); +} + +static void +soup_http_input_stream_class_init (SoupHTTPInputStreamClass *klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + GInputStreamClass *stream_class = G_INPUT_STREAM_CLASS (klass); + + g_type_class_add_private (klass, sizeof (SoupHTTPInputStreamPrivate)); + + gobject_class->finalize = soup_http_input_stream_finalize; + + stream_class->read_fn = soup_http_input_stream_read; + stream_class->close_fn = soup_http_input_stream_close; + stream_class->read_async = soup_http_input_stream_read_async; + stream_class->read_finish = soup_http_input_stream_read_finish; + stream_class->close_async = soup_http_input_stream_close_async; + stream_class->close_finish = soup_http_input_stream_close_finish; +} + +static void +soup_http_input_stream_seekable_iface_init (GSeekableIface *seekable_iface) +{ + seekable_iface->tell = soup_http_input_stream_tell; + seekable_iface->can_seek = soup_http_input_stream_can_seek; + seekable_iface->seek = soup_http_input_stream_seek; + seekable_iface->can_truncate = soup_http_input_stream_can_truncate; + seekable_iface->truncate_fn = soup_http_input_stream_truncate; +} + +static void +soup_http_input_stream_init (SoupHTTPInputStream *stream) +{ + ; +} + +static void +soup_http_input_stream_queue_message (SoupHTTPInputStream *stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + priv->got_headers = priv->finished = FALSE; + + /* Add an extra ref since soup_session_queue_message steals one */ + g_object_ref (priv->msg); + soup_session_queue_message (priv->session, priv->msg, NULL, NULL); +} + +/** + * soup_http_input_stream_new: + * @session: the #SoupSession to use + * @msg: the #SoupMessage whose response will be streamed + * + * Prepares to send @msg over @session, and returns a #GInputStream + * that can be used to read the response. + * + * @msg may not be sent until the first read call; if you need to look + * at the status code or response headers before reading the body, you + * can use soup_http_input_stream_send() or soup_http_input_stream_send_async() + * to force the message to be sent and the response headers read. + * + * If @msg gets a non-2xx result, the first read (or send) will return + * an error with type %SOUP_HTTP_INPUT_STREAM_HTTP_ERROR. + * + * Internally, #SoupHTTPInputStream is implemented using asynchronous I/O, + * so if you are using the synchronous API (eg, + * g_input_stream_read()), you should create a new #GMainContext and + * set it as the %SOUP_SESSION_ASYNC_CONTEXT property on @session. (If + * you don't, then synchronous #GInputStream calls will cause the main + * loop to be run recursively.) The async #GInputStream API works fine + * with %SOUP_SESSION_ASYNC_CONTEXT either set or unset. + * + * Returns: a new #GInputStream. + **/ +SoupHTTPInputStream * +soup_http_input_stream_new (SoupSession *session, SoupMessage *msg) +{ + SoupHTTPInputStream *stream; + SoupHTTPInputStreamPrivate *priv; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + stream = g_object_new (SOUP_TYPE_HTTP_INPUT_STREAM, NULL); + priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + priv->session = g_object_ref (session); + priv->async_context = soup_session_get_async_context (session); + priv->msg = g_object_ref (msg); + + g_signal_connect (msg, "got_headers", + G_CALLBACK (soup_http_input_stream_got_headers), stream); + g_signal_connect (msg, "got_chunk", + G_CALLBACK (soup_http_input_stream_got_chunk), stream); + g_signal_connect (msg, "finished", + G_CALLBACK (soup_http_input_stream_finished), stream); + + soup_http_input_stream_queue_message (stream); + return stream; +} + +static void +soup_http_input_stream_got_headers (SoupMessage *msg, gpointer stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + /* If the status is unsuccessful, we just ignore the signal and let + * libsoup keep going (eventually either it will requeue the request + * (after handling authentication/redirection), or else the + * "finished" handler will run). + */ + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) + return; + + priv->got_headers = TRUE; + if (!priv->caller_buffer) { + /* Not ready to read the body yet */ + soup_session_pause_message (priv->session, msg); + } + + if (priv->got_headers_cb) + priv->got_headers_cb (stream); +} + +static void +soup_http_input_stream_got_chunk (SoupMessage *msg, SoupBuffer *chunk_buffer, + gpointer stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + const gchar *chunk = chunk_buffer->data; + gsize chunk_size = chunk_buffer->length; + + /* We only pay attention to the chunk if it's part of a successful + * response. + */ + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) + return; + + /* Sanity check */ + if (priv->caller_bufsize == 0 || priv->leftover_bufsize != 0) + g_warning ("soup_http_input_stream_got_chunk called again before previous chunk was processed"); + + /* Copy what we can into priv->caller_buffer */ + if (priv->caller_bufsize > priv->caller_nread) { + gsize nread = MIN (chunk_size, priv->caller_bufsize - priv->caller_nread); + + memcpy (priv->caller_buffer + priv->caller_nread, chunk, nread); + priv->caller_nread += nread; + priv->offset += nread; + chunk += nread; + chunk_size -= nread; + } + + if (chunk_size > 0) { + /* Copy the rest into priv->leftover_buffer. If + * there's already some data there, realloc and + * append. Otherwise just copy. + */ + if (priv->leftover_bufsize) { + priv->leftover_buffer = g_realloc (priv->leftover_buffer, + priv->leftover_bufsize + chunk_size); + memcpy (priv->leftover_buffer + priv->leftover_bufsize, + chunk, chunk_size); + priv->leftover_bufsize += chunk_size; + } else { + priv->leftover_bufsize = chunk_size; + priv->leftover_buffer = g_memdup (chunk, chunk_size); + priv->leftover_offset = 0; + } + } + + soup_session_pause_message (priv->session, msg); + if (priv->got_chunk_cb) + priv->got_chunk_cb (stream); +} + +static void +soup_http_input_stream_finished (SoupMessage *msg, gpointer stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + priv->finished = TRUE; + + if (priv->finished_cb) + priv->finished_cb (stream); +} + +static gboolean +soup_http_input_stream_cancelled (GIOChannel *chan, GIOCondition condition, + gpointer stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + priv->cancel_watch = NULL; + + soup_session_pause_message (priv->session, priv->msg); + if (priv->cancelled_cb) + priv->cancelled_cb (stream); + + return FALSE; +} + +static void +soup_http_input_stream_prepare_for_io (GInputStream *stream, + GCancellable *cancellable, + guchar *buffer, + gsize count) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + int cancel_fd; + + priv->cancellable = cancellable; + cancel_fd = g_cancellable_get_fd (cancellable); + if (cancel_fd != -1) { + GIOChannel *chan = g_io_channel_unix_new (cancel_fd); + priv->cancel_watch = soup_add_io_watch (priv->async_context, chan, + G_IO_IN | G_IO_ERR | G_IO_HUP, + soup_http_input_stream_cancelled, + stream); + g_io_channel_unref (chan); + } + + priv->caller_buffer = buffer; + priv->caller_bufsize = count; + priv->caller_nread = 0; + + if (priv->got_headers) + soup_session_unpause_message (priv->session, priv->msg); +} + +static void +soup_http_input_stream_done_io (GInputStream *stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + if (priv->cancel_watch) { + g_source_destroy (priv->cancel_watch); + priv->cancel_watch = NULL; + g_cancellable_release_fd (priv->cancellable); + } + priv->cancellable = NULL; + + priv->caller_buffer = NULL; + priv->caller_bufsize = 0; +} + +static gboolean +set_error_if_http_failed (SoupMessage *msg, GError **error) +{ + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + g_set_error_literal (error, SOUP_HTTP_ERROR, + msg->status_code, msg->reason_phrase); + return TRUE; + } + return FALSE; +} + +static gsize +read_from_leftover (SoupHTTPInputStreamPrivate *priv, + gpointer buffer, gsize bufsize) +{ + gsize nread; + + if (priv->leftover_bufsize - priv->leftover_offset <= bufsize) { + nread = priv->leftover_bufsize - priv->leftover_offset; + memcpy (buffer, priv->leftover_buffer + priv->leftover_offset, nread); + + g_free (priv->leftover_buffer); + priv->leftover_buffer = NULL; + priv->leftover_bufsize = priv->leftover_offset = 0; + } else { + nread = bufsize; + memcpy (buffer, priv->leftover_buffer + priv->leftover_offset, nread); + priv->leftover_offset += nread; + } + + priv->offset += nread; + return nread; +} + +/* This does the work of soup_http_input_stream_send(), assuming that the + * GInputStream pending flag has already been set. It is also used by + * soup_http_input_stream_send_async() in some circumstances. + */ +static gboolean +soup_http_input_stream_send_internal (GInputStream *stream, + GCancellable *cancellable, + GError **error) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + soup_http_input_stream_prepare_for_io (stream, cancellable, NULL, 0); + while (!priv->finished && !priv->got_headers && + !g_cancellable_is_cancelled (cancellable)) + g_main_context_iteration (priv->async_context, TRUE); + soup_http_input_stream_done_io (stream); + + if (g_cancellable_set_error_if_cancelled (cancellable, error)) + return FALSE; + else if (set_error_if_http_failed (priv->msg, error)) + return FALSE; + return TRUE; +} + +static void +send_sync_finished (GInputStream *stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + GError *error = NULL; + + if (!g_cancellable_set_error_if_cancelled (priv->cancellable, &error)) + set_error_if_http_failed (priv->msg, &error); + + priv->got_headers_cb = NULL; + priv->finished_cb = NULL; + + /* Wake up the main context iteration */ + soup_add_completion (priv->async_context, NULL, NULL); +} + +/** + * soup_http_input_stream_send: + * @httpstream: a #SoupHTTPInputStream + * @cancellable: optional #GCancellable object, %NULL to ignore. + * @error: location to store the error occuring, or %NULL to ignore + * + * Synchronously sends the HTTP request associated with @stream, and + * reads the response headers. Call this after soup_http_input_stream_new() + * and before the first g_input_stream_read() if you want to check the + * HTTP status code before you start reading. + * + * Return value: %TRUE if msg has a successful (2xx) status, %FALSE if + * not. + **/ +gboolean +soup_http_input_stream_send (SoupHTTPInputStream *httpstream, + GCancellable *cancellable, + GError **error) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (httpstream); + GInputStream *istream = (GInputStream *)httpstream; + gboolean result; + + g_return_val_if_fail (SOUP_IS_HTTP_INPUT_STREAM (httpstream), FALSE); + + if (!g_input_stream_set_pending (istream, error)) + return FALSE; + + priv->got_headers_cb = send_sync_finished; + priv->finished_cb = send_sync_finished; + + result = soup_http_input_stream_send_internal (istream, cancellable, error); + g_input_stream_clear_pending (istream); + + return result; +} + +static gssize +soup_http_input_stream_read (GInputStream *stream, + void *buffer, + gsize count, + GCancellable *cancellable, + GError **error) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + /* If there is data leftover from a previous read, return it. */ + if (priv->leftover_bufsize) + return read_from_leftover (priv, buffer, count); + + if (priv->finished) + return 0; + + /* No leftover data, accept one chunk from the network */ + soup_http_input_stream_prepare_for_io (stream, cancellable, buffer, count); + while (!priv->finished && priv->caller_nread == 0 && + !g_cancellable_is_cancelled (cancellable)) + g_main_context_iteration (priv->async_context, TRUE); + soup_http_input_stream_done_io (stream); + + if (priv->caller_nread > 0) + return priv->caller_nread; + + if (g_cancellable_set_error_if_cancelled (cancellable, error)) + return -1; + else if (set_error_if_http_failed (priv->msg, error)) + return -1; + else + return 0; +} + +static gboolean +soup_http_input_stream_close (GInputStream *stream, + GCancellable *cancellable, + GError **error) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + if (!priv->finished) + soup_session_cancel_message (priv->session, priv->msg, SOUP_STATUS_CANCELLED); + + return TRUE; +} + +static void +wrapper_callback (GObject *source_object, GAsyncResult *res, + gpointer user_data) +{ + GInputStream *stream = G_INPUT_STREAM (source_object); + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + g_input_stream_clear_pending (stream); + if (priv->outstanding_callback) + (*priv->outstanding_callback)(source_object, res, user_data); + priv->outstanding_callback = NULL; + g_object_unref (stream); +} + +static void +send_async_finished (GInputStream *stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + GSimpleAsyncResult *result; + GError *error = NULL; + + if (!g_cancellable_set_error_if_cancelled (priv->cancellable, &error)) + set_error_if_http_failed (priv->msg, &error); + + priv->got_headers_cb = NULL; + priv->finished_cb = NULL; + soup_http_input_stream_done_io (stream); + + result = priv->result; + priv->result = NULL; + + g_simple_async_result_set_op_res_gboolean (result, error == NULL); + if (error) { + g_simple_async_result_set_from_error (result, error); + g_error_free (error); + } + g_simple_async_result_complete (result); + g_object_unref (result); +} + +static void +soup_http_input_stream_send_async_internal (GInputStream *stream, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + + g_return_if_fail (priv->async_context == g_main_context_get_thread_default ()); + + g_object_ref (stream); + priv->outstanding_callback = callback; + + priv->got_headers_cb = send_async_finished; + priv->finished_cb = send_async_finished; + + soup_http_input_stream_prepare_for_io (stream, cancellable, NULL, 0); + priv->result = g_simple_async_result_new (G_OBJECT (stream), + wrapper_callback, user_data, + soup_http_input_stream_send_async); +} + +/** + * soup_http_input_stream_send_async: + * @httpstream: a #SoupHTTPInputStream + * @io_priority: the io priority of the request. + * @cancellable: optional #GCancellable object, %NULL to ignore. + * @callback: callback to call when the request is satisfied + * @user_data: the data to pass to callback function + * + * Asynchronously sends the HTTP request associated with @stream, and + * reads the response headers. Call this after soup_http_input_stream_new() + * and before the first g_input_stream_read_async() if you want to + * check the HTTP status code before you start reading. + **/ +void +soup_http_input_stream_send_async (SoupHTTPInputStream *httpstream, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + GInputStream *istream = (GInputStream *)httpstream; + GError *error = NULL; + + g_return_if_fail (SOUP_IS_HTTP_INPUT_STREAM (httpstream)); + + if (!g_input_stream_set_pending (istream, &error)) { + g_simple_async_report_gerror_in_idle (G_OBJECT (httpstream), + callback, + user_data, + error); + g_error_free (error); + return; + } + soup_http_input_stream_send_async_internal (istream, io_priority, cancellable, + callback, user_data); +} + +/** + * soup_http_input_stream_send_finish: + * @httpstream: a #SoupHTTPInputStream + * @result: a #GAsyncResult. + * @error: a #GError location to store the error occuring, or %NULL to + * ignore. + * + * Finishes a soup_http_input_stream_send_async() operation. + * + * Return value: %TRUE if the message was sent successfully and + * received a successful status code, %FALSE if not. + **/ +gboolean +soup_http_input_stream_send_finish (SoupHTTPInputStream *httpstream, + GAsyncResult *result, + GError **error) +{ + GSimpleAsyncResult *simple; + + g_return_val_if_fail (G_IS_SIMPLE_ASYNC_RESULT (result), FALSE); + simple = G_SIMPLE_ASYNC_RESULT (result); + + g_return_val_if_fail (g_simple_async_result_get_source_tag (simple) == soup_http_input_stream_send_async, FALSE); + + if (g_simple_async_result_propagate_error (simple, error)) + return FALSE; + + return g_simple_async_result_get_op_res_gboolean (simple); +} + +static void +read_async_done (GInputStream *stream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + GSimpleAsyncResult *result; + GError *error = NULL; + + result = priv->result; + priv->result = NULL; + + if (g_cancellable_set_error_if_cancelled (priv->cancellable, &error) || + set_error_if_http_failed (priv->msg, &error)) { + g_simple_async_result_set_from_error (result, error); + g_error_free (error); + } else + g_simple_async_result_set_op_res_gssize (result, priv->caller_nread); + + priv->got_chunk_cb = NULL; + priv->finished_cb = NULL; + priv->cancelled_cb = NULL; + soup_http_input_stream_done_io (stream); + + g_simple_async_result_complete (result); + g_object_unref (result); +} + +static void +soup_http_input_stream_read_async (GInputStream *stream, + void *buffer, + gsize count, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (stream); + GSimpleAsyncResult *result; + + g_return_if_fail (priv->async_context == g_main_context_get_thread_default ()); + + result = g_simple_async_result_new (G_OBJECT (stream), + callback, user_data, + soup_http_input_stream_read_async); + + if (priv->leftover_bufsize) { + gsize nread = read_from_leftover (priv, buffer, count); + g_simple_async_result_set_op_res_gssize (result, nread); + g_simple_async_result_complete_in_idle (result); + g_object_unref (result); + return; + } + + if (priv->finished) { + g_simple_async_result_set_op_res_gssize (result, 0); + g_simple_async_result_complete_in_idle (result); + g_object_unref (result); + return; + } + + priv->result = result; + + priv->got_chunk_cb = read_async_done; + priv->finished_cb = read_async_done; + priv->cancelled_cb = read_async_done; + soup_http_input_stream_prepare_for_io (stream, cancellable, buffer, count); +} + +static gssize +soup_http_input_stream_read_finish (GInputStream *stream, + GAsyncResult *result, + GError **error) +{ + GSimpleAsyncResult *simple; + + g_return_val_if_fail (G_IS_SIMPLE_ASYNC_RESULT (result), -1); + simple = G_SIMPLE_ASYNC_RESULT (result); + g_return_val_if_fail (g_simple_async_result_get_source_tag (simple) == soup_http_input_stream_read_async, -1); + + return g_simple_async_result_get_op_res_gssize (simple); +} + +static void +soup_http_input_stream_close_async (GInputStream *stream, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + GSimpleAsyncResult *result; + gboolean success; + GError *error = NULL; + + result = g_simple_async_result_new (G_OBJECT (stream), + callback, user_data, + soup_http_input_stream_close_async); + success = soup_http_input_stream_close (stream, cancellable, &error); + g_simple_async_result_set_op_res_gboolean (result, success); + if (error) { + g_simple_async_result_set_from_error (result, error); + g_error_free (error); + } + + g_simple_async_result_complete_in_idle (result); + g_object_unref (result); +} + +static gboolean +soup_http_input_stream_close_finish (GInputStream *stream, + GAsyncResult *result, + GError **error) +{ + /* Failures handled in generic close_finish code */ + return TRUE; +} + +static goffset +soup_http_input_stream_tell (GSeekable *seekable) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (seekable); + + return priv->offset; +} + +static gboolean +soup_http_input_stream_can_seek (GSeekable *seekable) +{ + return TRUE; +} + +extern void soup_message_io_cleanup (SoupMessage *msg); + +static gboolean +soup_http_input_stream_seek (GSeekable *seekable, + goffset offset, + GSeekType type, + GCancellable *cancellable, + GError **error) +{ + GInputStream *stream = G_INPUT_STREAM (seekable); + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (seekable); + char *range; + + if (type == G_SEEK_END) { + /* FIXME: we could send "bytes=-offset", but unless we + * know the Content-Length, we wouldn't be able to + * answer a tell() properly. We could find the + * Content-Length by doing a HEAD... + */ + + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, + "G_SEEK_END not currently supported"); + return FALSE; + } + + if (!g_input_stream_set_pending (stream, error)) + return FALSE; + + soup_session_cancel_message (priv->session, priv->msg, SOUP_STATUS_CANCELLED); + soup_message_io_cleanup (priv->msg); + + switch (type) { + case G_SEEK_CUR: + offset += priv->offset; + /* fall through */ + + case G_SEEK_SET: + range = g_strdup_printf ("bytes=%" G_GUINT64_FORMAT "-", (guint64)offset); + priv->offset = offset; + break; + + case G_SEEK_END: + range = NULL; /* keep compilers happy */ + g_return_val_if_reached (FALSE); + break; + + default: + g_return_val_if_reached (FALSE); + } + + soup_message_headers_remove (priv->msg->request_headers, "Range"); + soup_message_headers_append (priv->msg->request_headers, "Range", range); + g_free (range); + + soup_http_input_stream_queue_message (SOUP_HTTP_INPUT_STREAM (stream)); + + g_input_stream_clear_pending (stream); + return TRUE; +} + +static gboolean +soup_http_input_stream_can_truncate (GSeekable *seekable) +{ + return FALSE; +} + +static gboolean +soup_http_input_stream_truncate (GSeekable *seekable, + goffset offset, + GCancellable *cancellable, + GError **error) +{ + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, + "Truncate not allowed on input stream"); + return FALSE; +} + +SoupMessage * +soup_http_input_stream_get_message (SoupHTTPInputStream *httpstream) +{ + SoupHTTPInputStreamPrivate *priv = SOUP_HTTP_INPUT_STREAM_GET_PRIVATE (httpstream); + return priv->msg ? g_object_ref (priv->msg) : NULL; +} diff --git a/libsoup/soup-http-input-stream.h b/libsoup/soup-http-input-stream.h new file mode 100644 index 0000000..4f23e93 --- /dev/null +++ b/libsoup/soup-http-input-stream.h @@ -0,0 +1,77 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2006, 2007, 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place, Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef __SOUP_HTTP_INPUT_STREAM_H__ +#define __SOUP_HTTP_INPUT_STREAM_H__ + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_HTTP_INPUT_STREAM (soup_http_input_stream_get_type ()) +#define SOUP_HTTP_INPUT_STREAM(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), SOUP_TYPE_HTTP_INPUT_STREAM, SoupHTTPInputStream)) +#define SOUP_HTTP_INPUT_STREAM_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), SOUP_TYPE_HTTP_INPUT_STREAM, SoupHTTPInputStreamClass)) +#define SOUP_IS_HTTP_INPUT_STREAM(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), SOUP_TYPE_HTTP_INPUT_STREAM)) +#define SOUP_IS_HTTP_INPUT_STREAM_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), SOUP_TYPE_HTTP_INPUT_STREAM)) +#define SOUP_HTTP_INPUT_STREAM_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), SOUP_TYPE_HTTP_INPUT_STREAM, SoupHTTPInputStreamClass)) + +typedef struct SoupHTTPInputStream SoupHTTPInputStream; +typedef struct SoupHTTPInputStreamClass SoupHTTPInputStreamClass; + +struct SoupHTTPInputStream { + GInputStream parent; +}; + +struct SoupHTTPInputStreamClass { + GInputStreamClass parent_class; + + /* Padding for future expansion */ + void (*_g_reserved1)(void); + void (*_g_reserved2)(void); + void (*_g_reserved3)(void); + void (*_g_reserved4)(void); + void (*_g_reserved5)(void); +}; + +GType soup_http_input_stream_get_type (void) G_GNUC_CONST; + +SoupHTTPInputStream *soup_http_input_stream_new (SoupSession *session, + SoupMessage *msg); + +gboolean soup_http_input_stream_send (SoupHTTPInputStream *httpstream, + GCancellable *cancellable, + GError **error); + +void soup_http_input_stream_send_async (SoupHTTPInputStream *httpstream, + int io_priority, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); +gboolean soup_http_input_stream_send_finish (SoupHTTPInputStream *httpstream, + GAsyncResult *result, + GError **error); + +SoupMessage *soup_http_input_stream_get_message (SoupHTTPInputStream *httpstream); + +G_END_DECLS + +#endif /* __SOUP_HTTP_INPUT_STREAM_H__ */ diff --git a/libsoup/soup-logger.c b/libsoup/soup-logger.c new file mode 100644 index 0000000..7cdf2ed --- /dev/null +++ b/libsoup/soup-logger.c @@ -0,0 +1,673 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-logger.c + * + * Copyright (C) 2001-2004 Novell, Inc. + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "soup-logger.h" +#include "soup-message.h" +#include "soup-session.h" +#include "soup-session-feature.h" +#include "soup-socket.h" +#include "soup-uri.h" + +/** + * SECTION:soup-logger + * @short_description: Debug logging support + * + * #SoupLogger watches a #SoupSession and logs the HTTP traffic that + * it generates, for debugging purposes. Many applications use an + * environment variable to determine whether or not to use + * #SoupLogger, and to determine the amount of debugging output. + * + * To use #SoupLogger, first create a logger with soup_logger_new(), + * optionally configure it with soup_logger_set_request_filter(), + * soup_logger_set_response_filter(), and soup_logger_set_printer(), + * and then attach it to a session (or multiple sessions) with + * soup_session_add_feature(). + * + * By default, the debugging output is sent to %stdout, and looks + * something like: + * + * + * > POST /unauth HTTP/1.1 + * > Soup-Debug-Timestamp: 1200171744 + * > Soup-Debug: SoupSessionAsync 1 (0x612190), SoupMessage 1 (0x617000), SoupSocket 1 (0x612220) + * > Host: localhost + * > Content-Type: text/plain + * > Connection: close + * > + * > This is a test. + * + * < HTTP/1.1 201 Created + * < Soup-Debug-Timestamp: 1200171744 + * < Soup-Debug: SoupMessage 1 (0x617000) + * < Date: Sun, 12 Jan 2008 21:02:24 GMT + * < Content-Length: 0 + * + * + * The Soup-Debug-Timestamp line gives the time (as + * a #time_t) when the request was sent, or the response fully + * received. + * + * The Soup-Debug line gives further debugging + * information about the #SoupSession, #SoupMessage, and #SoupSocket + * involved; the hex numbers are the addresses of the objects in + * question (which may be useful if you are running in a debugger). + * The decimal IDs are simply counters that uniquely identify objects + * across the lifetime of the #SoupLogger. In particular, this can be + * used to identify when multiple messages are sent across the same + * connection. + * + * Currently, the request half of the message is logged just before + * the first byte of the request gets written to the network (from the + * #SoupSession::request_started signal), which means that if you have + * not made the complete request body available at that point, it will + * not be logged. The response is logged just after the last byte of + * the response body is read from the network (from the + * #SoupMessage::got_body or #SoupMessage::got_informational signal), + * which means that the #SoupMessage::got_headers signal, and anything + * triggered off it (such as #SoupSession::authenticate) will be + * emitted before the response headers are + * actually logged. + **/ + +static void soup_logger_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); + +static void request_queued (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg); +static void request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket); +static void request_unqueued (SoupSessionFeature *feature, + SoupSession *session, SoupMessage *msg); + +G_DEFINE_TYPE_WITH_CODE (SoupLogger, soup_logger, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_logger_session_feature_init)) + +typedef struct { + /* We use a mutex so that if requests are being run in + * multiple threads, we don't mix up the output. + */ + GMutex *lock; + + GQuark tag; + GHashTable *ids; + + SoupLoggerLogLevel level; + int max_body_size; + + SoupLoggerFilter request_filter; + gpointer request_filter_data; + GDestroyNotify request_filter_dnotify; + + SoupLoggerFilter response_filter; + gpointer response_filter_data; + GDestroyNotify response_filter_dnotify; + + SoupLoggerPrinter printer; + gpointer printer_data; + GDestroyNotify printer_dnotify; +} SoupLoggerPrivate; +#define SOUP_LOGGER_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_LOGGER, SoupLoggerPrivate)) + +static void +soup_logger_init (SoupLogger *logger) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + priv->lock = g_mutex_new (); + priv->tag = g_quark_from_static_string (g_strdup_printf ("SoupLogger-%p", logger)); + priv->ids = g_hash_table_new (NULL, NULL); +} + +static void +finalize (GObject *object) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (object); + + g_hash_table_destroy (priv->ids); + + if (priv->request_filter_dnotify) + priv->request_filter_dnotify (priv->request_filter_data); + if (priv->response_filter_dnotify) + priv->response_filter_dnotify (priv->response_filter_data); + if (priv->printer_dnotify) + priv->printer_dnotify (priv->printer_data); + + g_mutex_free (priv->lock); + + G_OBJECT_CLASS (soup_logger_parent_class)->finalize (object); +} + +static void +soup_logger_class_init (SoupLoggerClass *logger_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (logger_class); + + g_type_class_add_private (logger_class, sizeof (SoupLoggerPrivate)); + + object_class->finalize = finalize; +} + +static void +soup_logger_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + feature_interface->request_queued = request_queued; + feature_interface->request_started = request_started; + feature_interface->request_unqueued = request_unqueued; +} + +/** + * SoupLoggerLogLevel: + * @SOUP_LOGGER_LOG_NONE: No logging + * @SOUP_LOGGER_LOG_MINIMAL: Log the Request-Line or Status-Line and + * the Soup-Debug pseudo-headers + * @SOUP_LOGGER_LOG_HEADERS: Log the full request/response headers + * @SOUP_LOGGER_LOG_BODY: Log the full headers and request/response + * bodies. + * + * Describes the level of logging output to provide. + **/ + +/** + * soup_logger_new: + * @level: the debug level + * @max_body_size: the maximum body size to output, or -1 + * + * Creates a new #SoupLogger with the given debug level. If @level is + * %SOUP_LOGGER_LOG_BODY, @max_body_size gives the maximum number of + * bytes of the body that will be logged. (-1 means "no limit".) + * + * If you need finer control over what message parts are and aren't + * logged, use soup_logger_set_request_filter() and + * soup_logger_set_response_filter(). + * + * Returns: a new #SoupLogger + **/ +SoupLogger * +soup_logger_new (SoupLoggerLogLevel level, int max_body_size) +{ + SoupLogger *logger; + SoupLoggerPrivate *priv; + + logger = g_object_new (SOUP_TYPE_LOGGER, NULL); + + priv = SOUP_LOGGER_GET_PRIVATE (logger); + priv->level = level; + priv->max_body_size = max_body_size; + + return logger; +} + +/** + * SoupLoggerFilter: + * @logger: the #SoupLogger + * @msg: the message being logged + * @user_data: the data passed to soup_logger_set_request_filter() + * or soup_logger_set_response_filter() + * + * The prototype for a logging filter. The filter callback will be + * invoked for each request or response, and should analyze it and + * return a #SoupLoggerLogLevel value indicating how much of the + * message to log. Eg, it might choose between %SOUP_LOGGER_LOG_BODY + * and %SOUP_LOGGER_LOG_HEADERS depending on the Content-Type. + * + * Return value: a #SoupLoggerLogLevel value indicating how much of + * the message to log + **/ + +/** + * soup_logger_set_request_filter: + * @logger: a #SoupLogger + * @request_filter: the callback for request debugging + * @filter_data: data to pass to the callback + * @destroy: a #GDestroyNotify to free @filter_data + * + * Sets up a filter to determine the log level for a given request. + * For each HTTP request @logger will invoke @request_filter to + * determine how much (if any) of that request to log. (If you do not + * set a request filter, @logger will just always log requests at the + * level passed to soup_logger_new().) + **/ +void +soup_logger_set_request_filter (SoupLogger *logger, + SoupLoggerFilter request_filter, + gpointer filter_data, + GDestroyNotify destroy) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + priv->request_filter = request_filter; + priv->request_filter_data = filter_data; + priv->request_filter_dnotify = destroy; +} + +/** + * soup_logger_set_response_filter: + * @logger: a #SoupLogger + * @response_filter: the callback for response debugging + * @filter_data: data to pass to the callback + * @destroy: a #GDestroyNotify to free @filter_data + * + * Sets up a filter to determine the log level for a given response. + * For each HTTP response @logger will invoke @response_filter to + * determine how much (if any) of that response to log. (If you do not + * set a response filter, @logger will just always log responses at + * the level passed to soup_logger_new().) + **/ +void +soup_logger_set_response_filter (SoupLogger *logger, + SoupLoggerFilter response_filter, + gpointer filter_data, + GDestroyNotify destroy) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + priv->response_filter = response_filter; + priv->response_filter_data = filter_data; + priv->response_filter_dnotify = destroy; +} + +/** + * SoupLoggerPrinter: + * @logger: the #SoupLogger + * @level: the level of the information being printed. + * @direction: a single-character prefix to @data + * @data: data to print + * @user_data: the data passed to soup_logger_set_printer() + * + * The prototype for a custom printing callback. + * + * @level indicates what kind of information is being printed. Eg, it + * will be %SOUP_LOGGER_LOG_HEADERS if @data is header data. + * + * @direction is either '<', '>', or ' ', and @data is the single line + * to print; the printer is expected to add a terminating newline. + * + * To get the effect of the default printer, you would do: + * + * + * printf ("%c %s\n", direction, data); + * + **/ + +/** + * soup_logger_set_printer: + * @logger: a #SoupLogger + * @printer: the callback for printing logging output + * @printer_data: data to pass to the callback + * @destroy: a #GDestroyNotify to free @printer_data + * + * Sets up an alternate log printing routine, if you don't want + * the log to go to %stdout. + **/ +void +soup_logger_set_printer (SoupLogger *logger, + SoupLoggerPrinter printer, + gpointer printer_data, + GDestroyNotify destroy) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + priv->printer = printer; + priv->printer_data = printer_data; + priv->printer_dnotify = destroy; +} + +static guint +soup_logger_get_id (SoupLogger *logger, gpointer object) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + return GPOINTER_TO_UINT (g_object_get_qdata (object, priv->tag)); +} + +static guint +soup_logger_set_id (SoupLogger *logger, gpointer object) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + gpointer klass = G_OBJECT_GET_CLASS (object); + gpointer id; + + id = g_hash_table_lookup (priv->ids, klass); + id = (char *)id + 1; + g_hash_table_insert (priv->ids, klass, id); + + g_object_set_qdata (object, priv->tag, id); + return GPOINTER_TO_UINT (id); +} + +/** + * soup_logger_attach: + * @logger: a #SoupLogger + * @session: a #SoupSession + * + * Sets @logger to watch @session and print debug information for + * its messages. + * + * (The session will take a reference on @logger, which will be + * removed when you call soup_logger_detach(), or when the session is + * destroyed.) + * + * Deprecated: Use soup_session_add_feature() instead. + **/ +void +soup_logger_attach (SoupLogger *logger, + SoupSession *session) +{ + soup_session_add_feature (session, SOUP_SESSION_FEATURE (logger)); +} + +/** + * soup_logger_detach: + * @logger: a #SoupLogger + * @session: a #SoupSession + * + * Stops @logger from watching @session. + * + * Deprecated: Use soup_session_remove_feature() instead. + **/ +void +soup_logger_detach (SoupLogger *logger, + SoupSession *session) +{ + soup_session_remove_feature (session, SOUP_SESSION_FEATURE (logger)); +} + +static void +soup_logger_print (SoupLogger *logger, SoupLoggerLogLevel level, + char direction, const char *format, ...) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + va_list args; + char *data, *line, *end; + + va_start (args, format); + data = g_strdup_vprintf (format, args); + va_end (args); + + if (level == SOUP_LOGGER_LOG_BODY && priv->max_body_size > 0) { + if (strlen (data) > priv->max_body_size + 6) + strcpy (data + priv->max_body_size, "\n[...]"); + } + + line = data; + do { + end = strchr (line, '\n'); + if (end) + *end = '\0'; + if (priv->printer) { + priv->printer (logger, level, direction, + line, priv->printer_data); + } else + printf ("%c %s\n", direction, line); + + line = end + 1; + } while (end && *line); + + g_free (data); +} + +static void +soup_logger_print_basic_auth (SoupLogger *logger, const char *value) +{ + char *decoded, *p; + gsize len; + + decoded = (char *)g_base64_decode (value + 6, &len); + if (!decoded) + decoded = g_strdup (value); + p = strchr (decoded, ':'); + if (p) { + while (++p < decoded + len) + *p = '*'; + } + soup_logger_print (logger, SOUP_LOGGER_LOG_HEADERS, '>', + "Authorization: Basic [%.*s]", len, decoded); + g_free (decoded); +} + +static void +print_request (SoupLogger *logger, SoupMessage *msg, + SoupSession *session, SoupSocket *socket, + gboolean restarted) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + SoupLoggerLogLevel log_level; + SoupMessageHeadersIter iter; + const char *name, *value; + SoupURI *uri; + + if (priv->request_filter) { + log_level = priv->request_filter (logger, msg, + priv->request_filter_data); + } else + log_level = priv->level; + + if (log_level == SOUP_LOGGER_LOG_NONE) + return; + + uri = soup_message_get_uri (msg); + if (msg->method == SOUP_METHOD_CONNECT) { + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '>', + "CONNECT %s:%u HTTP/1.%d", + uri->host, uri->port, + soup_message_get_http_version (msg)); + } else { + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '>', + "%s %s%s%s HTTP/1.%d", + msg->method, uri->path, + uri->query ? "?" : "", + uri->query ? uri->query : "", + soup_message_get_http_version (msg)); + } + + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '>', + "Soup-Debug-Timestamp: %lu", + (unsigned long)time (0)); + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '>', + "Soup-Debug: %s %u (%p), %s %u (%p), %s %u (%p)%s", + g_type_name_from_instance ((GTypeInstance *)session), + soup_logger_get_id (logger, session), session, + g_type_name_from_instance ((GTypeInstance *)msg), + soup_logger_get_id (logger, msg), msg, + g_type_name_from_instance ((GTypeInstance *)socket), + soup_logger_get_id (logger, socket), socket, + restarted ? ", restarted" : ""); + + if (log_level == SOUP_LOGGER_LOG_MINIMAL) + return; + + if (!soup_message_headers_get_one (msg->request_headers, "Host")) { + soup_logger_print (logger, SOUP_LOGGER_LOG_HEADERS, '>', + "Host: %s%c%u", uri->host, + soup_uri_uses_default_port (uri) ? '\0' : ':', + uri->port); + } + soup_message_headers_iter_init (&iter, msg->request_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) { + if (!g_ascii_strcasecmp (name, "Authorization") && + !g_ascii_strncasecmp (value, "Basic ", 6)) + soup_logger_print_basic_auth (logger, value); + else { + soup_logger_print (logger, SOUP_LOGGER_LOG_HEADERS, '>', + "%s: %s", name, value); + } + } + if (log_level == SOUP_LOGGER_LOG_HEADERS) + return; + + if (msg->request_body->length && + soup_message_body_get_accumulate (msg->request_body)) { + SoupBuffer *request; + + request = soup_message_body_flatten (msg->request_body); + g_return_if_fail (request != NULL); + soup_buffer_free (request); + + if (soup_message_headers_get_expectations (msg->request_headers) != SOUP_EXPECTATION_CONTINUE) { + soup_logger_print (logger, SOUP_LOGGER_LOG_BODY, '>', + "\n%s", msg->request_body->data); + } + } +} + +static void +print_response (SoupLogger *logger, SoupMessage *msg) +{ + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + SoupLoggerLogLevel log_level; + SoupMessageHeadersIter iter; + const char *name, *value; + + if (priv->response_filter) { + log_level = priv->response_filter (logger, msg, + priv->response_filter_data); + } else + log_level = priv->level; + + if (log_level == SOUP_LOGGER_LOG_NONE) + return; + + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '<', + "HTTP/1.%d %u %s\n", + soup_message_get_http_version (msg), + msg->status_code, msg->reason_phrase); + + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '<', + "Soup-Debug-Timestamp: %lu", + (unsigned long)time (0)); + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '<', + "Soup-Debug: %s %u (%p)", + g_type_name_from_instance ((GTypeInstance *)msg), + soup_logger_get_id (logger, msg), msg); + + if (log_level == SOUP_LOGGER_LOG_MINIMAL) + return; + + soup_message_headers_iter_init (&iter, msg->response_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) { + soup_logger_print (logger, SOUP_LOGGER_LOG_HEADERS, '<', + "%s: %s", name, value); + } + if (log_level == SOUP_LOGGER_LOG_HEADERS) + return; + + if (msg->response_body->data) { + soup_logger_print (logger, SOUP_LOGGER_LOG_BODY, '<', + "\n%s", msg->response_body->data); + } +} + +static void +got_informational (SoupMessage *msg, gpointer user_data) +{ + SoupLogger *logger = user_data; + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + g_mutex_lock (priv->lock); + + print_response (logger, msg); + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, ' ', ""); + + if (msg->status_code == SOUP_STATUS_CONTINUE && msg->request_body->data) { + SoupLoggerLogLevel log_level; + + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, '>', + "[Now sending request body...]"); + + if (priv->request_filter) { + log_level = priv->request_filter (logger, msg, + priv->request_filter_data); + } else + log_level = priv->level; + + if (log_level == SOUP_LOGGER_LOG_BODY) { + soup_logger_print (logger, SOUP_LOGGER_LOG_BODY, '>', + "%s", msg->request_body->data); + } + + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, ' ', ""); + } + + g_mutex_unlock (priv->lock); +} + +static void +got_body (SoupMessage *msg, gpointer user_data) +{ + SoupLogger *logger = user_data; + SoupLoggerPrivate *priv = SOUP_LOGGER_GET_PRIVATE (logger); + + g_mutex_lock (priv->lock); + + print_response (logger, msg); + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, ' ', ""); + + g_mutex_unlock (priv->lock); +} + +static void +request_queued (SoupSessionFeature *logger, SoupSession *session, + SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + g_signal_connect (msg, "got-informational", + G_CALLBACK (got_informational), + logger); + g_signal_connect (msg, "got-body", + G_CALLBACK (got_body), + logger); +} + +static void +request_started (SoupSessionFeature *feature, SoupSession *session, + SoupMessage *msg, SoupSocket *socket) +{ + SoupLogger *logger = SOUP_LOGGER (feature); + gboolean restarted; + guint msg_id; + + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (SOUP_IS_SOCKET (socket)); + + msg_id = soup_logger_get_id (logger, msg); + if (msg_id) + restarted = TRUE; + else { + soup_logger_set_id (logger, msg); + restarted = FALSE; + } + + if (!soup_logger_get_id (logger, session)) + soup_logger_set_id (logger, session); + + if (!soup_logger_get_id (logger, socket)) + soup_logger_set_id (logger, socket); + + print_request (logger, msg, session, socket, restarted); + soup_logger_print (logger, SOUP_LOGGER_LOG_MINIMAL, ' ', ""); +} + +static void +request_unqueued (SoupSessionFeature *logger, SoupSession *session, + SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + g_signal_handlers_disconnect_by_func (msg, got_informational, logger); + g_signal_handlers_disconnect_by_func (msg, got_body, logger); +} diff --git a/libsoup/soup-logger.h b/libsoup/soup-logger.h new file mode 100644 index 0000000..c6d3a39 --- /dev/null +++ b/libsoup/soup-logger.h @@ -0,0 +1,80 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_LOGGER_H +#define SOUP_LOGGER_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_LOGGER (soup_logger_get_type ()) +#define SOUP_LOGGER(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_LOGGER, SoupLogger)) +#define SOUP_LOGGER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_LOGGER, SoupLoggerClass)) +#define SOUP_IS_LOGGER(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_LOGGER)) +#define SOUP_IS_LOGGER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_LOGGER)) +#define SOUP_LOGGER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_LOGGER, SoupLoggerClass)) + +typedef enum { + SOUP_LOGGER_LOG_NONE, + SOUP_LOGGER_LOG_MINIMAL, + SOUP_LOGGER_LOG_HEADERS, + SOUP_LOGGER_LOG_BODY +} SoupLoggerLogLevel; + +typedef struct { + GObject parent; + +} SoupLogger; + +typedef struct { + GObjectClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupLoggerClass; + +typedef SoupLoggerLogLevel (*SoupLoggerFilter) (SoupLogger *logger, + SoupMessage *msg, + gpointer user_data); + +typedef void (*SoupLoggerPrinter) (SoupLogger *logger, + SoupLoggerLogLevel level, + char direction, + const char *data, + gpointer user_data); + +GType soup_logger_get_type (void); + +SoupLogger *soup_logger_new (SoupLoggerLogLevel level, + int max_body_size); +#ifndef LIBSOUP_DISABLE_DEPRECATED +/* Use soup_session_add/remove_feature */ +void soup_logger_attach (SoupLogger *logger, + SoupSession *session); +void soup_logger_detach (SoupLogger *logger, + SoupSession *session); +#endif + +void soup_logger_set_request_filter (SoupLogger *logger, + SoupLoggerFilter request_filter, + gpointer filter_data, + GDestroyNotify destroy); +void soup_logger_set_response_filter (SoupLogger *logger, + SoupLoggerFilter response_filter, + gpointer filter_data, + GDestroyNotify destroy); + +void soup_logger_set_printer (SoupLogger *logger, + SoupLoggerPrinter printer, + gpointer printer_data, + GDestroyNotify destroy); + +G_END_DECLS + +#endif /* SOUP_LOGGER_H */ diff --git a/libsoup/soup-marshal.list b/libsoup/soup-marshal.list new file mode 100644 index 0000000..7714813 --- /dev/null +++ b/libsoup/soup-marshal.list @@ -0,0 +1,10 @@ +NONE:BOXED +NONE:INT +NONE:NONE +NONE:OBJECT +NONE:OBJECT,OBJECT +NONE:OBJECT,POINTER +NONE:BOXED,BOXED +NONE:OBJECT,OBJECT,BOOLEAN +NONE:STRING,BOXED +NONE:STRING,STRING diff --git a/libsoup/soup-message-body.c b/libsoup/soup-message-body.c new file mode 100644 index 0000000..a1d78f8 --- /dev/null +++ b/libsoup/soup-message-body.c @@ -0,0 +1,752 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message-body.c: SoupMessage request/response bodies + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#include + +#include "soup-message-body.h" + +/** + * SECTION:soup-message-body + * @short_description: HTTP message body + * @see_also: #SoupMessage + * + * #SoupMessageBody represents the request or response body of a + * #SoupMessage. + * + * In addition to #SoupMessageBody, libsoup also defines a "smaller" + * data buffer type, #SoupBuffer, which is primarily used as a + * component of #SoupMessageBody. In particular, when using chunked + * encoding to transmit or receive a message, each chunk is + * represented as a #SoupBuffer. + **/ + +/** + * SoupMemoryUse: + * @SOUP_MEMORY_STATIC: The memory is statically allocated and + * constant; libsoup can use the passed-in buffer directly and not + * need to worry about it being modified or freed. + * @SOUP_MEMORY_TAKE: The caller has allocated the memory for the + * #SoupBuffer's use; libsoup will assume ownership of it and free it + * (with g_free()) when it is done with it. + * @SOUP_MEMORY_COPY: The passed-in data belongs to the caller; the + * #SoupBuffer will copy it into new memory, leaving the caller free + * to reuse the original memory. + * @SOUP_MEMORY_TEMPORARY: The passed-in data belongs to the caller, + * but will remain valid for the lifetime of the #SoupBuffer. The + * difference between this and @SOUP_MEMORY_STATIC is that if you copy + * a @SOUP_MEMORY_TEMPORARY buffer, it will make a copy of the memory + * as well, rather than reusing the original memory. + * + * Describes how #SoupBuffer should use the data passed in by the + * caller. + * + * See also soup_buffer_new_with_owner(), which allows to you create a + * buffer containing data which is owned by another object. + **/ + +/* Internal SoupMemoryUse values */ +enum { + SOUP_MEMORY_SUBBUFFER = SOUP_MEMORY_TEMPORARY + 1, + SOUP_MEMORY_OWNED +}; + +/** + * SoupBuffer: + * @data: (type gpointer): the data + * @length: length of @data + * + * A data buffer, generally used to represent a chunk of a + * #SoupMessageBody. + * + * @data is a #char because that's generally convenient; in some + * situations you may need to cast it to #guchar or another type. + **/ + +typedef struct { + SoupBuffer buffer; + SoupMemoryUse use; + guint refcount; + + gpointer owner; + GDestroyNotify owner_dnotify; +} SoupBufferPrivate; + +/** + * soup_buffer_new: + * @use: how @data is to be used by the buffer + * @data: data + * @length: length of @data + * + * Creates a new #SoupBuffer containing @length bytes from @data. + * + * Return value: the new #SoupBuffer. + **/ +SoupBuffer * +soup_buffer_new (SoupMemoryUse use, gconstpointer data, gsize length) +{ + SoupBufferPrivate *priv = g_slice_new0 (SoupBufferPrivate); + + if (use == SOUP_MEMORY_COPY) { + data = g_memdup (data, length); + use = SOUP_MEMORY_TAKE; + } + + priv->buffer.data = data; + priv->buffer.length = length; + priv->use = use; + priv->refcount = 1; + + if (use == SOUP_MEMORY_TAKE) { + priv->owner = (gpointer)data; + priv->owner_dnotify = g_free; + } + + return (SoupBuffer *)priv; +} + +/** + * soup_buffer_new_take: + * @data: (array length=length) (transfer full): data + * @length: length of @data + * + * Creates a new #SoupBuffer containing @length bytes from @data. + * + * This function is exactly equivalent to soup_buffer_new() with + * %SOUP_MEMORY_TAKE as first argument; it exists mainly for + * convenience and simplifying language bindings. + * + * Return value: the new #SoupBuffer. + * + * Since: 2.32 + * Rename to: soup_buffer_new + **/ +SoupBuffer * +soup_buffer_new_take (guchar *data, gsize length) +{ + return soup_buffer_new (SOUP_MEMORY_TAKE, data, length); +} + +/** + * soup_buffer_new_subbuffer: + * @parent: the parent #SoupBuffer + * @offset: offset within @parent to start at + * @length: number of bytes to copy from @parent + * + * Creates a new #SoupBuffer containing @length bytes "copied" from + * @parent starting at @offset. (Normally this will not actually copy + * any data, but will instead simply reference the same data as + * @parent does.) + * + * Return value: the new #SoupBuffer. + **/ +SoupBuffer * +soup_buffer_new_subbuffer (SoupBuffer *parent, gsize offset, gsize length) +{ + SoupBufferPrivate *priv; + + /* Normally this is just a ref, but if @parent is TEMPORARY, + * it will do an actual copy. + */ + parent = soup_buffer_copy (parent); + + priv = g_slice_new0 (SoupBufferPrivate); + priv->buffer.data = parent->data + offset; + priv->buffer.length = length; + priv->use = SOUP_MEMORY_SUBBUFFER; + priv->owner = parent; + priv->owner_dnotify = (GDestroyNotify)soup_buffer_free; + priv->refcount = 1; + + return (SoupBuffer *)priv; +} + +/** + * soup_buffer_new_with_owner: + * @data: data + * @length: length of @data + * @owner: pointer to an object that owns @data + * @owner_dnotify: (allow-none): a function to free/unref @owner when + * the buffer is freed + * + * Creates a new #SoupBuffer containing @length bytes from @data. When + * the #SoupBuffer is freed, it will call @owner_dnotify, passing + * @owner to it. You must ensure that @data will remain valid until + * @owner_dnotify is called. + * + * For example, you could use this to create a buffer containing data + * returned from libxml without needing to do an extra copy: + * + * + * xmlDocDumpMemory (doc, &xmlbody, &len); + * return soup_buffer_new_with_owner (xmlbody, len, xmlbody, + * (GDestroyNotify)xmlFree); + * + * + * In this example, @data and @owner are the same, but in other cases + * they would be different (eg, @owner would be a object, and @data + * would be a pointer to one of the object's fields). + * + * Return value: the new #SoupBuffer. + **/ +SoupBuffer * +soup_buffer_new_with_owner (gconstpointer data, gsize length, + gpointer owner, GDestroyNotify owner_dnotify) +{ + SoupBufferPrivate *priv = g_slice_new0 (SoupBufferPrivate); + + priv->buffer.data = data; + priv->buffer.length = length; + priv->use = SOUP_MEMORY_OWNED; + priv->owner = owner; + priv->owner_dnotify = owner_dnotify; + priv->refcount = 1; + + return (SoupBuffer *)priv; +} + +/** + * soup_buffer_get_owner: + * @buffer: a #SoupBuffer created with soup_buffer_new_with_owner() + * + * Gets the "owner" object for a buffer created with + * soup_buffer_new_with_owner(). + * + * Return value: (transfer none): the owner pointer + **/ +gpointer +soup_buffer_get_owner (SoupBuffer *buffer) +{ + SoupBufferPrivate *priv = (SoupBufferPrivate *)buffer; + + g_return_val_if_fail ((int)priv->use == (int)SOUP_MEMORY_OWNED, NULL); + return priv->owner; +} + +/** + * soup_buffer_get_data: + * @buffer: a #SoupBuffer + * @data: (out) (array length=length) (transfer none): the pointer + * to the buffer data is stored here + * @length: (out): the length of the buffer data is stored here + * + * This function exists for use by language bindings, because it's not + * currently possible to get the right effect by annotating the fields + * of #SoupBuffer. + * + * Since: 2.32 + */ +void +soup_buffer_get_data (SoupBuffer *buffer, + const guint8 **data, + gsize *length) +{ + *data = (const guint8 *)buffer->data; + *length = buffer->length; +} + +/** + * soup_buffer_copy: + * @buffer: a #SoupBuffer + * + * Makes a copy of @buffer. In reality, #SoupBuffer is a refcounted + * type, and calling soup_buffer_copy() will normally just increment + * the refcount on @buffer and return it. However, if @buffer was + * created with #SOUP_MEMORY_TEMPORARY memory, then soup_buffer_copy() + * will actually return a copy of it, so that the data in the copy + * will remain valid after the temporary buffer is freed. + * + * Return value: the new (or newly-reffed) buffer + **/ +SoupBuffer * +soup_buffer_copy (SoupBuffer *buffer) +{ + SoupBufferPrivate *priv = (SoupBufferPrivate *)buffer; + + /* For non-TEMPORARY buffers, this is just a ref */ + if (priv->use != SOUP_MEMORY_TEMPORARY) { + priv->refcount++; + return buffer; + } + + /* For TEMPORARY buffers, we need to do a real copy the first + * time, and then after that, we just keep returning the copy. + * We store the copy in priv->owner, which is technically + * backwards, but it saves us from having to keep an extra + * pointer in SoupBufferPrivate. + */ + + if (!priv->owner) { + priv->owner = soup_buffer_new (SOUP_MEMORY_COPY, + buffer->data, + buffer->length); + priv->owner_dnotify = (GDestroyNotify)soup_buffer_free; + } + return soup_buffer_copy (priv->owner); +} + +/** + * soup_buffer_free: + * @buffer: a #SoupBuffer + * + * Frees @buffer. (In reality, as described in the documentation for + * soup_buffer_copy(), this is actually an "unref" operation, and may + * or may not actually free @buffer.) + **/ +void +soup_buffer_free (SoupBuffer *buffer) +{ + SoupBufferPrivate *priv = (SoupBufferPrivate *)buffer; + + if (!--priv->refcount) { + if (priv->owner_dnotify) + priv->owner_dnotify (priv->owner); + g_slice_free (SoupBufferPrivate, priv); + } +} + +GType +soup_buffer_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupBuffer"), + (GBoxedCopyFunc) soup_buffer_copy, + (GBoxedFreeFunc) soup_buffer_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} + + +/** + * SoupMessageBody: + * @data: the data + * @length: length of @data + * + * A #SoupMessage request or response body. + * + * Note that while @length always reflects the full length of the + * message body, @data is normally %NULL, and will only be filled in + * after soup_message_body_flatten() is called. For client-side + * messages, this automatically happens for the response body after it + * has been fully read, unless you set the + * %SOUP_MESSAGE_OVERWRITE_CHUNKS flags. Likewise, for server-side + * messages, the request body is automatically filled in after being + * read. + * + * As an added bonus, when @data is filled in, it is always terminated + * with a '\0' byte (which is not reflected in @length). + **/ + +typedef struct { + SoupMessageBody body; + GSList *chunks, *last; + SoupBuffer *flattened; + gboolean accumulate; + goffset base_offset; + int ref_count; +} SoupMessageBodyPrivate; + +/** + * soup_message_body_new: + * + * Creates a new #SoupMessageBody. #SoupMessage uses this internally; you + * will not normally need to call it yourself. + * + * Return value: a new #SoupMessageBody. + **/ +SoupMessageBody * +soup_message_body_new (void) +{ + SoupMessageBodyPrivate *priv; + + priv = g_slice_new0 (SoupMessageBodyPrivate); + priv->accumulate = TRUE; + priv->ref_count = 1; + + return (SoupMessageBody *)priv; +} + +/** + * soup_message_body_set_accumulate: + * @body: a #SoupMessageBody + * @accumulate: whether or not to accumulate body chunks in @body + * + * Sets or clears the accumulate flag on @body. (The default value is + * %TRUE.) If set to %FALSE, @body's %data field will not be filled in + * after the body is fully sent/received, and the chunks that make up + * @body may be discarded when they are no longer needed. + * + * In particular, if you set this flag to %FALSE on an "incoming" + * message body (that is, the %response_body of a client-side message, + * or %request_body of a server-side message), this will cause each + * chunk of the body to be discarded after its corresponding + * #SoupMessage::got_chunk signal is emitted. (This is equivalent to + * setting the deprecated %SOUP_MESSAGE_OVERWRITE_CHUNKS flag on the + * message.) + * + * If you set this flag to %FALSE on the %response_body of a + * server-side message, it will cause each chunk of the body to be + * discarded after its corresponding #SoupMessage::wrote_chunk signal + * is emitted. + * + * If you set the flag to %FALSE on the %request_body of a client-side + * message, it will block the accumulation of chunks into @body's + * %data field, but it will not normally cause the chunks to be + * discarded after being written like in the server-side + * %response_body case, because the request body needs to be kept + * around in case the request needs to be sent a second time due to + * redirection or authentication. However, if you set the + * %SOUP_MESSAGE_CAN_REBUILD flag on the message, then the chunks will + * be discarded, and you will be responsible for recreating the + * request body after the #SoupMessage::restarted signal is emitted. + * + * Since: 2.4.1 + **/ +void +soup_message_body_set_accumulate (SoupMessageBody *body, + gboolean accumulate) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + + priv->accumulate = accumulate; +} + +/** + * soup_message_body_get_accumulate: + * @body: a #SoupMessageBody + * + * Gets the accumulate flag on @body; see + * soup_message_body_set_accumulate() for details. + * + * Return value: the accumulate flag for @body. + * + * Since: 2.4.1 + **/ +gboolean +soup_message_body_get_accumulate (SoupMessageBody *body) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + + return priv->accumulate; +} + +static void +append_buffer (SoupMessageBody *body, SoupBuffer *buffer) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + + if (priv->last) { + priv->last = g_slist_append (priv->last, buffer); + priv->last = priv->last->next; + } else + priv->chunks = priv->last = g_slist_append (NULL, buffer); + + if (priv->flattened) { + soup_buffer_free (priv->flattened); + priv->flattened = NULL; + body->data = NULL; + } + body->length += buffer->length; +} + +/** + * soup_message_body_append: + * @body: a #SoupMessageBody + * @use: how to use @data + * @data: (array length=length) (element-type guint8): data to append + * @length: length of @data + * + * Appends @length bytes from @data to @body according to @use. + **/ +void +soup_message_body_append (SoupMessageBody *body, SoupMemoryUse use, + gconstpointer data, gsize length) +{ + if (length > 0) + append_buffer (body, soup_buffer_new (use, data, length)); + else if (use == SOUP_MEMORY_TAKE) + g_free ((gpointer)data); +} + +/** + * soup_message_body_append_take: + * @body: a #SoupMessageBody + * @data: (array length=length) (transfer full): data to append + * @length: length of @data + * + * Appends @length bytes from @data to @body. + * + * This function is exactly equivalent to soup_message_body_apppend() + * with %SOUP_MEMORY_TAKE as second argument; it exists mainly for + * convenience and simplifying language bindings. + * + * Since: 2.32 + * Rename to: soup_message_body_append + **/ +void +soup_message_body_append_take (SoupMessageBody *body, + guchar *data, gsize length) +{ + soup_message_body_append(body, SOUP_MEMORY_TAKE, data, length); +} + +/** + * soup_message_body_append_buffer: + * @body: a #SoupMessageBody + * @buffer: a #SoupBuffer + * + * Appends the data from @buffer to @body. (#SoupMessageBody uses + * #SoupBuffers internally, so this is normally a constant-time + * operation that doesn't actually require copying the data in + * @buffer.) + **/ +void +soup_message_body_append_buffer (SoupMessageBody *body, SoupBuffer *buffer) +{ + g_return_if_fail (buffer->length > 0); + append_buffer (body, soup_buffer_copy (buffer)); +} + +/** + * soup_message_body_truncate: + * @body: a #SoupMessageBody + * + * Deletes all of the data in @body. + **/ +void +soup_message_body_truncate (SoupMessageBody *body) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + GSList *iter; + + for (iter = priv->chunks; iter; iter = iter->next) + soup_buffer_free (iter->data); + g_slist_free (priv->chunks); + priv->chunks = priv->last = NULL; + priv->base_offset = 0; + + if (priv->flattened) { + soup_buffer_free (priv->flattened); + priv->flattened = NULL; + body->data = NULL; + } + body->length = 0; +} + +/** + * soup_message_body_complete: + * @body: a #SoupMessageBody + * + * Tags @body as being complete; Call this when using chunked encoding + * after you have appended the last chunk. + **/ +void +soup_message_body_complete (SoupMessageBody *body) +{ + append_buffer (body, soup_buffer_new (SOUP_MEMORY_STATIC, NULL, 0)); +} + +/** + * soup_message_body_flatten: + * @body: a #SoupMessageBody + * + * Fills in @body's data field with a buffer containing all of the + * data in @body (plus an additional '\0' byte not counted by @body's + * length field). + * + * Return value: a #SoupBuffer containing the same data as @body. + * (You must free this buffer if you do not want it.) + **/ +SoupBuffer * +soup_message_body_flatten (SoupMessageBody *body) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + char *buf, *ptr; + GSList *iter; + SoupBuffer *chunk; + + g_return_val_if_fail (priv->accumulate == TRUE, NULL); + + if (!priv->flattened) { +#if GLIB_SIZEOF_SIZE_T < 8 + g_return_val_if_fail (body->length < G_MAXSIZE, NULL); +#endif + + buf = ptr = g_malloc (body->length + 1); + for (iter = priv->chunks; iter; iter = iter->next) { + chunk = iter->data; + memcpy (ptr, chunk->data, chunk->length); + ptr += chunk->length; + } + *ptr = '\0'; + + priv->flattened = soup_buffer_new (SOUP_MEMORY_TAKE, + buf, body->length); + body->data = priv->flattened->data; + } + + return soup_buffer_copy (priv->flattened); +} + +/** + * soup_message_body_get_chunk: + * @body: a #SoupMessageBody + * @offset: an offset + * + * Gets a #SoupBuffer containing data from @body starting at @offset. + * The size of the returned chunk is unspecified. You can iterate + * through the entire body by first calling + * soup_message_body_get_chunk() with an offset of 0, and then on each + * successive call, increment the offset by the length of the + * previously-returned chunk. + * + * If @offset is greater than or equal to the total length of @body, + * then the return value depends on whether or not + * soup_message_body_complete() has been called or not; if it has, + * then soup_message_body_get_chunk() will return a 0-length chunk + * (indicating the end of @body). If it has not, then + * soup_message_body_get_chunk() will return %NULL (indicating that + * @body may still potentially have more data, but that data is not + * currently available). + * + * Return value: a #SoupBuffer, or %NULL. + **/ +SoupBuffer * +soup_message_body_get_chunk (SoupMessageBody *body, goffset offset) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + GSList *iter; + SoupBuffer *chunk = NULL; + + offset -= priv->base_offset; + for (iter = priv->chunks; iter; iter = iter->next) { + chunk = iter->data; + + if (offset < chunk->length || offset == 0) + break; + + offset -= chunk->length; + } + + if (!iter) + return NULL; + + if (offset == 0) + return soup_buffer_copy (chunk); + else { + return soup_buffer_new_subbuffer (chunk, offset, + chunk->length - offset); + } +} + +/** + * soup_message_body_got_chunk: + * @body: a #SoupMessageBody + * @chunk: a #SoupBuffer received from the network + * + * Handles the #SoupMessageBody part of receiving a chunk of data from + * the network. Normally this means appending @chunk to @body, exactly + * as with soup_message_body_append_buffer(), but if you have set + * @body's accumulate flag to %FALSE, then that will not happen. + * + * This is a low-level method which you should not normally need to + * use. + * + * Since: 2.4.1 + **/ +void +soup_message_body_got_chunk (SoupMessageBody *body, SoupBuffer *chunk) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + + if (!priv->accumulate) + return; + + soup_message_body_append_buffer (body, chunk); +} + +/** + * soup_message_body_wrote_chunk: + * @body: a #SoupMessageBody + * @chunk: a #SoupBuffer returned from soup_message_body_get_chunk() + * + * Handles the #SoupMessageBody part of writing a chunk of data to the + * network. Normally this is a no-op, but if you have set @body's + * accumulate flag to %FALSE, then this will cause @chunk to be + * discarded to free up memory. + * + * This is a low-level method which you should not need to use, and + * there are further restrictions on its proper use which are not + * documented here. + * + * Since: 2.4.1 + **/ +void +soup_message_body_wrote_chunk (SoupMessageBody *body, SoupBuffer *chunk) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + SoupBuffer *chunk2; + + if (priv->accumulate) + return; + + chunk2 = priv->chunks->data; + g_return_if_fail (chunk->length == chunk2->length); + g_return_if_fail (chunk == chunk2 || ((SoupBufferPrivate *)chunk2)->use == SOUP_MEMORY_TEMPORARY); + + priv->chunks = g_slist_remove (priv->chunks, chunk2); + if (!priv->chunks) + priv->last = NULL; + + priv->base_offset += chunk2->length; + soup_buffer_free (chunk2); +} + +static SoupMessageBody * +soup_message_body_copy (SoupMessageBody *body) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + + priv->ref_count++; + return body; +} + +/** + * soup_message_body_free: + * @body: a #SoupMessageBody + * + * Frees @body. You will not normally need to use this, as + * #SoupMessage frees its associated message bodies automatically. + **/ +void +soup_message_body_free (SoupMessageBody *body) +{ + SoupMessageBodyPrivate *priv = (SoupMessageBodyPrivate *)body; + + if (--priv->ref_count == 0) { + soup_message_body_truncate (body); + g_slice_free (SoupMessageBodyPrivate, priv); + } +} + +GType +soup_message_body_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupMessageBody"), + (GBoxedCopyFunc) soup_message_body_copy, + (GBoxedFreeFunc) soup_message_body_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} diff --git a/libsoup/soup-message-body.h b/libsoup/soup-message-body.h new file mode 100644 index 0000000..4edaaf0 --- /dev/null +++ b/libsoup/soup-message-body.h @@ -0,0 +1,88 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_MESSAGE_BODY_H +#define SOUP_MESSAGE_BODY_H 1 + +#include + +G_BEGIN_DECLS + +typedef enum { + SOUP_MEMORY_STATIC, + SOUP_MEMORY_TAKE, + SOUP_MEMORY_COPY, + SOUP_MEMORY_TEMPORARY +} SoupMemoryUse; + +typedef struct { + const char *data; + gsize length; +} SoupBuffer; + +GType soup_buffer_get_type (void); +#define SOUP_TYPE_BUFFER (soup_buffer_get_type ()) + +SoupBuffer *soup_buffer_new (SoupMemoryUse use, + gconstpointer data, + gsize length); +SoupBuffer *soup_buffer_new_take (guchar *data, + gsize length); +SoupBuffer *soup_buffer_new_subbuffer (SoupBuffer *parent, + gsize offset, + gsize length); + +SoupBuffer *soup_buffer_new_with_owner (gconstpointer data, + gsize length, + gpointer owner, + GDestroyNotify owner_dnotify); +gpointer soup_buffer_get_owner (SoupBuffer *buffer); +void soup_buffer_get_data (SoupBuffer *buffer, + const guint8 **data, + gsize *length); +SoupBuffer *soup_buffer_copy (SoupBuffer *buffer); +void soup_buffer_free (SoupBuffer *buffer); + +typedef struct { + const char *data; + goffset length; +} SoupMessageBody; + +GType soup_message_body_get_type (void); +#define SOUP_TYPE_MESSAGE_BODY (soup_message_body_get_type ()) + +SoupMessageBody *soup_message_body_new (void); + +void soup_message_body_set_accumulate(SoupMessageBody *body, + gboolean accumulate); +gboolean soup_message_body_get_accumulate(SoupMessageBody *body); + +void soup_message_body_append (SoupMessageBody *body, + SoupMemoryUse use, + gconstpointer data, + gsize length); +void soup_message_body_append_take (SoupMessageBody *body, + guchar *data, + gsize length); +void soup_message_body_append_buffer (SoupMessageBody *body, + SoupBuffer *buffer); +void soup_message_body_truncate (SoupMessageBody *body); +void soup_message_body_complete (SoupMessageBody *body); + +SoupBuffer *soup_message_body_flatten (SoupMessageBody *body); + +SoupBuffer *soup_message_body_get_chunk (SoupMessageBody *body, + goffset offset); + +void soup_message_body_got_chunk (SoupMessageBody *body, + SoupBuffer *chunk); +void soup_message_body_wrote_chunk (SoupMessageBody *body, + SoupBuffer *chunk); + +void soup_message_body_free (SoupMessageBody *body); + +G_END_DECLS + +#endif /* SOUP_MESSAGE_BODY_H */ diff --git a/libsoup/soup-message-client-io.c b/libsoup/soup-message-client-io.c new file mode 100644 index 0000000..8796e90 --- /dev/null +++ b/libsoup/soup-message-client-io.c @@ -0,0 +1,151 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message-client-io.c: client-side request/response + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "soup-connection.h" +#include "soup-message-private.h" +#include "soup-auth.h" +#include "soup-connection.h" +#include "soup-headers.h" +#include "soup-message-queue.h" +#include "soup-uri.h" + +static guint +parse_response_headers (SoupMessage *req, + char *headers, guint headers_len, + SoupEncoding *encoding, + gpointer user_data) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (req); + SoupHTTPVersion version; + + g_free(req->reason_phrase); + req->reason_phrase = NULL; + if (!soup_headers_parse_response (headers, headers_len, + req->response_headers, + &version, + &req->status_code, + &req->reason_phrase)) + return SOUP_STATUS_MALFORMED; + + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_STATUS_CODE); + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_REASON_PHRASE); + + if (version < priv->http_version) { + priv->http_version = version; + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_HTTP_VERSION); + } + + if ((req->method == SOUP_METHOD_HEAD || + req->status_code == SOUP_STATUS_NO_CONTENT || + req->status_code == SOUP_STATUS_NOT_MODIFIED || + SOUP_STATUS_IS_INFORMATIONAL (req->status_code)) || + (req->method == SOUP_METHOD_CONNECT && + SOUP_STATUS_IS_SUCCESSFUL (req->status_code))) + *encoding = SOUP_ENCODING_NONE; + else + *encoding = soup_message_headers_get_encoding (req->response_headers); + + if (*encoding == SOUP_ENCODING_UNRECOGNIZED) + return SOUP_STATUS_MALFORMED; + + return SOUP_STATUS_OK; +} + +static void +get_request_headers (SoupMessage *req, GString *header, + SoupEncoding *encoding, gpointer user_data) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (req); + SoupMessageQueueItem *item = user_data; + SoupURI *uri = soup_message_get_uri (req); + char *uri_host; + char *uri_string; + SoupMessageHeadersIter iter; + const char *name, *value; + + if (strchr (uri->host, ':')) + uri_host = g_strdup_printf ("[%s]", uri->host); + else if (g_hostname_is_non_ascii (uri->host)) + uri_host = g_hostname_to_ascii (uri->host); + else + uri_host = uri->host; + + if (req->method == SOUP_METHOD_CONNECT) { + /* CONNECT URI is hostname:port for tunnel destination */ + uri_string = g_strdup_printf ("%s:%d", uri_host, uri->port); + } else { + gboolean proxy = soup_connection_is_via_proxy (item->conn); + + /* Proxy expects full URI to destination. Otherwise + * just the path. + */ + uri_string = soup_uri_to_string (uri, !proxy); + + if (proxy && uri->fragment) { + /* Strip fragment */ + char *fragment = strchr (uri_string, '#'); + if (fragment) + *fragment = '\0'; + } + } + + if (priv->http_version == SOUP_HTTP_1_0) { + g_string_append_printf (header, "%s %s HTTP/1.0\r\n", + req->method, uri_string); + } else { + g_string_append_printf (header, "%s %s HTTP/1.1\r\n", + req->method, uri_string); + if (!soup_message_headers_get_one (req->request_headers, "Host")) { + if (soup_uri_uses_default_port (uri)) { + g_string_append_printf (header, "Host: %s\r\n", + uri_host); + } else { + g_string_append_printf (header, "Host: %s:%d\r\n", + uri_host, uri->port); + } + } + } + g_free (uri_string); + if (uri_host != uri->host) + g_free (uri_host); + + *encoding = soup_message_headers_get_encoding (req->request_headers); + if ((*encoding == SOUP_ENCODING_CONTENT_LENGTH || + *encoding == SOUP_ENCODING_NONE) && + (req->request_body->length > 0 || + soup_message_headers_get_one (req->request_headers, "Content-Type")) && + !soup_message_headers_get_content_length (req->request_headers)) { + *encoding = SOUP_ENCODING_CONTENT_LENGTH; + soup_message_headers_set_content_length (req->request_headers, + req->request_body->length); + } + + soup_message_headers_iter_init (&iter, req->request_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) + g_string_append_printf (header, "%s: %s\r\n", name, value); + g_string_append (header, "\r\n"); +} + +void +soup_message_send_request (SoupMessageQueueItem *item, + SoupMessageCompletionFn completion_cb, + gpointer user_data) +{ + soup_message_cleanup_response (item->msg); + soup_message_io_client (item, + get_request_headers, + parse_response_headers, + item, + completion_cb, user_data); +} diff --git a/libsoup/soup-message-headers.c b/libsoup/soup-message-headers.c new file mode 100644 index 0000000..195a3b0 --- /dev/null +++ b/libsoup/soup-message-headers.c @@ -0,0 +1,1337 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message-headers.c: HTTP message header arrays + * + * Copyright (C) 2007, 2008 Red Hat, Inc. + */ + +#include +#include + +#include "soup-message-headers.h" +#include "soup-headers.h" +#include "soup-misc.h" + +/** + * SECTION:soup-message-headers + * @short_description: HTTP message headers + * @see_also: #SoupMessage + * + * #SoupMessageHeaders represents the HTTP message headers associated + * with a request or response. + **/ + +/** + * SoupMessageHeadersType: + * @SOUP_MESSAGE_HEADERS_REQUEST: request headers + * @SOUP_MESSAGE_HEADERS_RESPONSE: response headers + * @SOUP_MESSAGE_HEADERS_MULTIPART: multipart body part headers + * + * Value passed to soup_message_headers_new() to set certain default + * behaviors. + **/ + +typedef void (*SoupHeaderSetter) (SoupMessageHeaders *, const char *); +static const char *intern_header_name (const char *name, SoupHeaderSetter *setter); +static void clear_special_headers (SoupMessageHeaders *hdrs); + +typedef struct { + const char *name; + char *value; +} SoupHeader; + +struct SoupMessageHeaders { + GArray *array; + GHashTable *concat; + SoupMessageHeadersType type; + + SoupEncoding encoding; + goffset content_length; + SoupExpectation expectations; + char *content_type; + + int ref_count; +}; + +/** + * soup_message_headers_new: + * @type: the type of headers + * + * Creates a #SoupMessageHeaders. (#SoupMessage does this + * automatically for its own headers. You would only need to use this + * method if you are manually parsing or generating message headers.) + * + * Return value: a new #SoupMessageHeaders + **/ +SoupMessageHeaders * +soup_message_headers_new (SoupMessageHeadersType type) +{ + SoupMessageHeaders *hdrs; + + hdrs = g_slice_new0 (SoupMessageHeaders); + /* FIXME: is "5" a good default? */ + hdrs->array = g_array_sized_new (TRUE, FALSE, sizeof (SoupHeader), 5); + hdrs->type = type; + hdrs->encoding = -1; + hdrs->ref_count = 1; + + return hdrs; +} + +static SoupMessageHeaders * +soup_message_headers_copy (SoupMessageHeaders *hdrs) +{ + hdrs->ref_count++; + return hdrs; +} + +/** + * soup_message_headers_free: + * @hdrs: a #SoupMessageHeaders + * + * Frees @hdrs. + **/ +void +soup_message_headers_free (SoupMessageHeaders *hdrs) +{ + if (--hdrs->ref_count == 0) { + soup_message_headers_clear (hdrs); + g_array_free (hdrs->array, TRUE); + if (hdrs->concat) + g_hash_table_destroy (hdrs->concat); + g_slice_free (SoupMessageHeaders, hdrs); + } +} + +GType +soup_message_headers_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupMessageHeaders"), + (GBoxedCopyFunc) soup_message_headers_copy, + (GBoxedFreeFunc) soup_message_headers_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} + +/** + * soup_message_headers_clear: + * @hdrs: a #SoupMessageHeaders + * + * Clears @hdrs. + **/ +void +soup_message_headers_clear (SoupMessageHeaders *hdrs) +{ + SoupHeader *hdr_array = (SoupHeader *)hdrs->array->data; + int i; + + for (i = 0; i < hdrs->array->len; i++) + g_free (hdr_array[i].value); + g_array_set_size (hdrs->array, 0); + + if (hdrs->concat) + g_hash_table_remove_all (hdrs->concat); + + clear_special_headers (hdrs); +} + +/** + * soup_message_headers_clean_connection_headers: + * @hdrs: a #SoupMessageHeaders + * + * Removes all the headers listed in the Connection header. + * + * Since: 2.36 + */ +void +soup_message_headers_clean_connection_headers (SoupMessageHeaders *hdrs) +{ + /* RFC 2616 14.10 */ + const char *connection; + GSList *tokens, *t; + + connection = soup_message_headers_get_list (hdrs, "Connection"); + if (!connection) + return; + + tokens = soup_header_parse_list (connection); + for (t = tokens; t; t = t->next) + soup_message_headers_remove (hdrs, t->data); + soup_header_free_list (tokens); +} + +/** + * soup_message_headers_append: + * @hdrs: a #SoupMessageHeaders + * @name: the header name to add + * @value: the new value of @name + * + * Appends a new header with name @name and value @value to @hdrs. (If + * there is an existing header with name @name, then this creates a + * second one, which is only allowed for list-valued headers; see also + * soup_message_headers_replace().) + * + * The caller is expected to make sure that @name and @value are + * syntactically correct. + **/ +void +soup_message_headers_append (SoupMessageHeaders *hdrs, + const char *name, const char *value) +{ + SoupHeader header; + SoupHeaderSetter setter; + + g_return_if_fail (name != NULL); + g_return_if_fail (value != NULL); + + /* Setting a syntactically invalid header name or value is + * considered to be a programming error. However, it can also + * be a security hole, so we want to fail here even if + * compiled with G_DISABLE_CHECKS. + */ +#ifndef G_DISABLE_CHECKS + g_return_if_fail (*name && strpbrk (name, " \t\r\n:") == NULL); + g_return_if_fail (strpbrk (value, "\r\n") == NULL); +#else + if (*name && strpbrk (name, " \t\r\n:")) { + g_warning ("soup_message_headers_append: Ignoring bad name '%s'", name); + return; + } + if (strpbrk (value, "\r\n")) { + g_warning ("soup_message_headers_append: Ignoring bad value '%s'", value); + return; + } +#endif + + header.name = intern_header_name (name, &setter); + header.value = g_strdup (value); + g_array_append_val (hdrs->array, header); + if (hdrs->concat) + g_hash_table_remove (hdrs->concat, header.name); + if (setter) + setter (hdrs, header.value); +} + +/** + * soup_message_headers_replace: + * @hdrs: a #SoupMessageHeaders + * @name: the header name to replace + * @value: the new value of @name + * + * Replaces the value of the header @name in @hdrs with @value. (See + * also soup_message_headers_append().) + * + * The caller is expected to make sure that @name and @value are + * syntactically correct. + **/ +void +soup_message_headers_replace (SoupMessageHeaders *hdrs, + const char *name, const char *value) +{ + soup_message_headers_remove (hdrs, name); + soup_message_headers_append (hdrs, name, value); +} + +static int +find_header (SoupHeader *hdr_array, const char *interned_name, int nth) +{ + int i; + + for (i = 0; hdr_array[i].name; i++) { + if (hdr_array[i].name == interned_name) { + if (nth-- == 0) + return i; + } + } + return -1; +} + +static int +find_last_header (SoupHeader *hdr_array, guint length, const char *interned_name, int nth) +{ + int i; + + for (i = length; i >= 0; i--) { + if (hdr_array[i].name == interned_name) { + if (nth-- == 0) + return i; + } + } + return -1; +} + +/** + * soup_message_headers_remove: + * @hdrs: a #SoupMessageHeaders + * @name: the header name to remove + * + * Removes @name from @hdrs. If there are multiple values for @name, + * they are all removed. + **/ +void +soup_message_headers_remove (SoupMessageHeaders *hdrs, const char *name) +{ + SoupHeader *hdr_array = (SoupHeader *)(hdrs->array->data); + SoupHeaderSetter setter; + int index; + + g_return_if_fail (name != NULL); + + name = intern_header_name (name, &setter); + while ((index = find_header (hdr_array, name, 0)) != -1) { + g_free (hdr_array[index].value); + g_array_remove_index (hdrs->array, index); + } + if (hdrs->concat) + g_hash_table_remove (hdrs->concat, name); + if (setter) + setter (hdrs, NULL); +} + +/** + * soup_message_headers_get_one: + * @hdrs: a #SoupMessageHeaders + * @name: header name + * + * Gets the value of header @name in @hdrs. Use this for headers whose + * values are not comma-delimited lists, and + * which therefore can only appear at most once in the headers. For + * list-valued headers, use soup_message_headers_get_list(). + * + * If @hdrs does erroneously contain multiple copies of the header, it + * is not defined which one will be returned. (Ideally, it will return + * whichever one makes libsoup most compatible with other HTTP + * implementations.) + * + * Return value: the header's value or %NULL if not found. + * + * Since: 2.26.1 + **/ +const char * +soup_message_headers_get_one (SoupMessageHeaders *hdrs, const char *name) +{ + SoupHeader *hdr_array = (SoupHeader *)(hdrs->array->data); + guint hdr_length = hdrs->array->len; + int index; + + g_return_val_if_fail (name != NULL, NULL); + + name = intern_header_name (name, NULL); + + index = find_last_header (hdr_array, hdr_length, name, 0); + + return (index == -1) ? NULL : hdr_array[index].value; +} + +/** + * soup_message_headers_get_list: + * @hdrs: a #SoupMessageHeaders + * @name: header name + * + * Gets the value of header @name in @hdrs. Use this for headers whose + * values are comma-delimited lists, and which are therefore allowed + * to appear multiple times in the headers. For non-list-valued + * headers, use soup_message_headers_get_one(). + * + * If @name appears multiple times in @hdrs, + * soup_message_headers_get_list() will concatenate all of the values + * together, separated by commas. This is sometimes awkward to parse + * (eg, WWW-Authenticate, Set-Cookie), but you have to be able to deal + * with it anyway, because the HTTP spec explicitly states that this + * transformation is allowed, and so an upstream proxy could do the + * same thing. + * + * Return value: the header's value or %NULL if not found. + * + * Since: 2.26.1 + **/ +const char * +soup_message_headers_get_list (SoupMessageHeaders *hdrs, const char *name) +{ + SoupHeader *hdr_array = (SoupHeader *)(hdrs->array->data); + GString *concat; + char *value; + int index, i; + + g_return_val_if_fail (name != NULL, NULL); + + name = intern_header_name (name, NULL); + if (hdrs->concat) { + value = g_hash_table_lookup (hdrs->concat, name); + if (value) + return value; + } + + index = find_header (hdr_array, name, 0); + if (index == -1) + return NULL; + else if (find_header (hdr_array, name, 1) == -1) + return hdr_array[index].value; + + concat = g_string_new (NULL); + for (i = 0; (index = find_header (hdr_array, name, i)) != -1; i++) { + if (i != 0) + g_string_append (concat, ", "); + g_string_append (concat, hdr_array[index].value); + } + value = g_string_free (concat, FALSE); + + if (!hdrs->concat) + hdrs->concat = g_hash_table_new_full (NULL, NULL, NULL, g_free); + g_hash_table_insert (hdrs->concat, (gpointer)name, value); + return value; +} + +/** + * soup_message_headers_get: + * @hdrs: a #SoupMessageHeaders + * @name: header name + * + * Gets the value of header @name in @hdrs. + * + * This method was supposed to work correctly for both single-valued + * and list-valued headers, but because some HTTP clients/servers + * mistakenly send multiple copies of headers that are supposed to be + * single-valued, it sometimes returns incorrect results. To fix this, + * the methods soup_message_headers_get_one() and + * soup_message_headers_get_list() were introduced, so callers can + * explicitly state which behavior they are expecting. + * + * Return value: as with soup_message_headers_get_list(). + * + * Deprecated: Use soup_message_headers_get_one() or + * soup_message_headers_get_list() instead. + **/ +const char * +soup_message_headers_get (SoupMessageHeaders *hdrs, const char *name) +{ + return soup_message_headers_get_list (hdrs, name); +} + +/** + * SoupMessageHeadersIter: + * + * An opaque type used to iterate over a %SoupMessageHeaders + * structure. + * + * After intializing the iterator with + * soup_message_headers_iter_init(), call + * soup_message_headers_iter_next() to fetch data from it. + * + * You may not modify the headers while iterating over them. + **/ + +typedef struct { + SoupMessageHeaders *hdrs; + int index; +} SoupMessageHeadersIterReal; + +/** + * soup_message_headers_iter_init: + * @iter: (out) (transfer none): a pointer to a %SoupMessageHeadersIter + * structure + * @hdrs: a %SoupMessageHeaders + * + * Initializes @iter for iterating @hdrs. + **/ +void +soup_message_headers_iter_init (SoupMessageHeadersIter *iter, + SoupMessageHeaders *hdrs) +{ + SoupMessageHeadersIterReal *real = (SoupMessageHeadersIterReal *)iter; + + real->hdrs = hdrs; + real->index = 0; +} + +/** + * soup_message_headers_iter_next: + * @iter: (inout) (transfer none): a %SoupMessageHeadersIter + * @name: (out) (transfer none): pointer to a variable to return + * the header name in + * @value: (out) (transfer none): pointer to a variable to return + * the header value in + * + * Yields the next name/value pair in the %SoupMessageHeaders being + * iterated by @iter. If @iter has already yielded the last header, + * then soup_message_headers_iter_next() will return %FALSE and @name + * and @value will be unchanged. + * + * Return value: %TRUE if another name and value were returned, %FALSE + * if the end of the headers has been reached. + **/ +gboolean +soup_message_headers_iter_next (SoupMessageHeadersIter *iter, + const char **name, const char **value) +{ + SoupMessageHeadersIterReal *real = (SoupMessageHeadersIterReal *)iter; + SoupHeader *hdr_array = (SoupHeader *)real->hdrs->array->data; + + if (real->index >= real->hdrs->array->len) + return FALSE; + + *name = hdr_array[real->index].name; + *value = hdr_array[real->index].value; + real->index++; + return TRUE; +} + +/** + * SoupMessageHeadersForeachFunc: + * @name: the header name + * @value: the header value + * @user_data: the data passed to soup_message_headers_foreach() + * + * The callback passed to soup_message_headers_foreach(). + **/ + +/** + * soup_message_headers_foreach: + * @hdrs: a #SoupMessageHeaders + * @func: (scope call): callback function to run for each header + * @user_data: data to pass to @func + * + * Calls @func once for each header value in @hdrs. + * + * Beware that unlike soup_message_headers_get(), this processes the + * headers in exactly the way they were added, rather than + * concatenating multiple same-named headers into a single value. + * (This is intentional; it ensures that if you call + * soup_message_headers_append() multiple times with the same name, + * then the I/O code will output multiple copies of the header when + * sending the message to the remote implementation, which may be + * required for interoperability in some cases.) + * + * You may not modify the headers from @func. + **/ +void +soup_message_headers_foreach (SoupMessageHeaders *hdrs, + SoupMessageHeadersForeachFunc func, + gpointer user_data) +{ + SoupHeader *hdr_array = (SoupHeader *)hdrs->array->data; + int i; + + for (i = 0; i < hdrs->array->len; i++) + func (hdr_array[i].name, hdr_array[i].value, user_data); +} + + +static GStaticMutex header_pool_mutex = G_STATIC_MUTEX_INIT; +static GHashTable *header_pool, *header_setters; + +static void transfer_encoding_setter (SoupMessageHeaders *, const char *); +static void content_length_setter (SoupMessageHeaders *, const char *); +static void expectation_setter (SoupMessageHeaders *, const char *); +static void content_type_setter (SoupMessageHeaders *, const char *); + +static char * +intern_header_locked (const char *name) +{ + char *interned; + + interned = g_hash_table_lookup (header_pool, name); + if (!interned) { + char *dup = g_strdup (name); + g_hash_table_insert (header_pool, dup, dup); + interned = dup; + } + return interned; +} + +static const char * +intern_header_name (const char *name, SoupHeaderSetter *setter) +{ + const char *interned; + + g_static_mutex_lock (&header_pool_mutex); + + if (!header_pool) { + header_pool = g_hash_table_new (soup_str_case_hash, soup_str_case_equal); + header_setters = g_hash_table_new (NULL, NULL); + g_hash_table_insert (header_setters, + intern_header_locked ("Transfer-Encoding"), + transfer_encoding_setter); + g_hash_table_insert (header_setters, + intern_header_locked ("Content-Length"), + content_length_setter); + g_hash_table_insert (header_setters, + intern_header_locked ("Expect"), + expectation_setter); + g_hash_table_insert (header_setters, + intern_header_locked ("Content-Type"), + content_type_setter); + } + + interned = intern_header_locked (name); + if (setter) + *setter = g_hash_table_lookup (header_setters, interned); + + g_static_mutex_unlock (&header_pool_mutex); + return interned; +} + +static void +clear_special_headers (SoupMessageHeaders *hdrs) +{ + SoupHeaderSetter setter; + GHashTableIter iter; + gpointer key, value; + + /* Make sure header_setters has been initialized */ + intern_header_name ("", NULL); + + g_hash_table_iter_init (&iter, header_setters); + while (g_hash_table_iter_next (&iter, &key, &value)) { + setter = value; + setter (hdrs, NULL); + } +} + +/* Specific headers */ + +static void +transfer_encoding_setter (SoupMessageHeaders *hdrs, const char *value) +{ + if (value) { + if (g_ascii_strcasecmp (value, "chunked") == 0) + hdrs->encoding = SOUP_ENCODING_CHUNKED; + else + hdrs->encoding = SOUP_ENCODING_UNRECOGNIZED; + } else + hdrs->encoding = -1; +} + +static void +content_length_setter (SoupMessageHeaders *hdrs, const char *value) +{ + /* Transfer-Encoding trumps Content-Length */ + if (hdrs->encoding == SOUP_ENCODING_CHUNKED) + return; + + if (value) { + char *end; + + hdrs->content_length = g_ascii_strtoull (value, &end, 10); + if (*end) + hdrs->encoding = SOUP_ENCODING_UNRECOGNIZED; + else + hdrs->encoding = SOUP_ENCODING_CONTENT_LENGTH; + } else + hdrs->encoding = -1; +} + +/** + * SoupEncoding: + * @SOUP_ENCODING_UNRECOGNIZED: unknown / error + * @SOUP_ENCODING_NONE: no body is present (which is not the same as a + * 0-length body, and only occurs in certain places) + * @SOUP_ENCODING_CONTENT_LENGTH: Content-Length encoding + * @SOUP_ENCODING_EOF: Response body ends when the connection is closed + * @SOUP_ENCODING_CHUNKED: chunked encoding (currently only supported + * for response) + * @SOUP_ENCODING_BYTERANGES: multipart/byteranges (Reserved for future + * use: NOT CURRENTLY IMPLEMENTED) + * + * How a message body is encoded for transport + **/ + +/** + * soup_message_headers_get_encoding: + * @hdrs: a #SoupMessageHeaders + * + * Gets the message body encoding that @hdrs declare. This may not + * always correspond to the encoding used on the wire; eg, a HEAD + * response may declare a Content-Length or Transfer-Encoding, but + * it will never actually include a body. + * + * Return value: the encoding declared by @hdrs. + **/ +SoupEncoding +soup_message_headers_get_encoding (SoupMessageHeaders *hdrs) +{ + const char *header; + + if (hdrs->encoding != -1) + return hdrs->encoding; + + /* If Transfer-Encoding was set, hdrs->encoding would already + * be set. So we don't need to check that possibility. + */ + header = soup_message_headers_get_one (hdrs, "Content-Length"); + if (header) { + content_length_setter (hdrs, header); + if (hdrs->encoding != -1) + return hdrs->encoding; + } + + /* Per RFC 2616 4.4, a response body that doesn't indicate its + * encoding otherwise is terminated by connection close, and a + * request that doesn't indicate otherwise has no body. Note + * that SoupMessage calls soup_message_headers_set_encoding() + * to override the response body default for our own + * server-side messages. + */ + hdrs->encoding = (hdrs->type == SOUP_MESSAGE_HEADERS_RESPONSE) ? + SOUP_ENCODING_EOF : SOUP_ENCODING_NONE; + return hdrs->encoding; +} + +/** + * soup_message_headers_set_encoding: + * @hdrs: a #SoupMessageHeaders + * @encoding: a #SoupEncoding + * + * Sets the message body encoding that @hdrs will declare. In particular, + * you should use this if you are going to send a request or response in + * chunked encoding. + **/ +void +soup_message_headers_set_encoding (SoupMessageHeaders *hdrs, + SoupEncoding encoding) +{ + if (encoding == hdrs->encoding) + return; + + switch (encoding) { + case SOUP_ENCODING_NONE: + case SOUP_ENCODING_EOF: + soup_message_headers_remove (hdrs, "Transfer-Encoding"); + soup_message_headers_remove (hdrs, "Content-Length"); + break; + + case SOUP_ENCODING_CONTENT_LENGTH: + soup_message_headers_remove (hdrs, "Transfer-Encoding"); + break; + + case SOUP_ENCODING_CHUNKED: + soup_message_headers_remove (hdrs, "Content-Length"); + soup_message_headers_replace (hdrs, "Transfer-Encoding", "chunked"); + break; + + default: + g_return_if_reached (); + } + + hdrs->encoding = encoding; +} + +/** + * soup_message_headers_get_content_length: + * @hdrs: a #SoupMessageHeaders + * + * Gets the message body length that @hdrs declare. This will only + * be non-0 if soup_message_headers_get_encoding() returns + * %SOUP_ENCODING_CONTENT_LENGTH. + * + * Return value: the message body length declared by @hdrs. + **/ +goffset +soup_message_headers_get_content_length (SoupMessageHeaders *hdrs) +{ + return (hdrs->encoding == SOUP_ENCODING_CONTENT_LENGTH) ? + hdrs->content_length : 0; +} + +/** + * soup_message_headers_set_content_length: + * @hdrs: a #SoupMessageHeaders + * @content_length: the message body length + * + * Sets the message body length that @hdrs will declare, and sets + * @hdrs's encoding to %SOUP_ENCODING_CONTENT_LENGTH. + * + * You do not normally need to call this; if @hdrs is set to use + * Content-Length encoding, libsoup will automatically set its + * Content-Length header for you immediately before sending the + * headers. One situation in which this method is useful is when + * generating the response to a HEAD request; Calling + * soup_message_headers_set_content_length() allows you to put the + * correct content length into the response without needing to waste + * memory by filling in a response body which won't actually be sent. + **/ +void +soup_message_headers_set_content_length (SoupMessageHeaders *hdrs, + goffset content_length) +{ + char length[128]; + + snprintf (length, sizeof (length), "%" G_GUINT64_FORMAT, + content_length); + soup_message_headers_remove (hdrs, "Transfer-Encoding"); + soup_message_headers_replace (hdrs, "Content-Length", length); +} + +static void +expectation_setter (SoupMessageHeaders *hdrs, const char *value) +{ + if (value) { + if (!g_ascii_strcasecmp (value, "100-continue")) + hdrs->expectations = SOUP_EXPECTATION_CONTINUE; + else + hdrs->expectations = SOUP_EXPECTATION_UNRECOGNIZED; + } else + hdrs->expectations = 0; +} + +/** + * SoupExpectation: + * @SOUP_EXPECTATION_CONTINUE: "100-continue" + * @SOUP_EXPECTATION_UNRECOGNIZED: any unrecognized expectation + * + * Represents the parsed value of the "Expect" header. + **/ + +/** + * soup_message_headers_get_expectations: + * @hdrs: a #SoupMessageHeaders + * + * Gets the expectations declared by @hdrs's "Expect" header. + * Currently this will either be %SOUP_EXPECTATION_CONTINUE or + * %SOUP_EXPECTATION_UNRECOGNIZED. + * + * Return value: the contents of @hdrs's "Expect" header + **/ +SoupExpectation +soup_message_headers_get_expectations (SoupMessageHeaders *hdrs) +{ + return hdrs->expectations; +} + +/** + * soup_message_headers_set_expectations: + * @hdrs: a #SoupMessageHeaders + * @expectations: the expectations to set + * + * Sets @hdrs's "Expect" header according to @expectations. + * + * Currently %SOUP_EXPECTATION_CONTINUE is the only known expectation + * value. You should set this value on a request if you are sending a + * large message body (eg, via POST or PUT), and want to give the + * server a chance to reject the request after seeing just the headers + * (eg, because it will require authentication before allowing you to + * post, or because you're POSTing to a URL that doesn't exist). This + * saves you from having to transmit the large request body when the + * server is just going to ignore it anyway. + **/ +void +soup_message_headers_set_expectations (SoupMessageHeaders *hdrs, + SoupExpectation expectations) +{ + g_return_if_fail ((expectations & ~SOUP_EXPECTATION_CONTINUE) == 0); + + if (expectations & SOUP_EXPECTATION_CONTINUE) + soup_message_headers_replace (hdrs, "Expect", "100-continue"); + else + soup_message_headers_remove (hdrs, "Expect"); +} + +/** + * SoupRange: + * @start: the start of the range + * @end: the end of the range + * + * Represents a byte range as used in the Range header. + * + * If @end is non-negative, then @start and @end represent the bounds + * of of the range, counting from %0. (Eg, the first 500 bytes would be + * represented as @start = %0 and @end = %499.) + * + * If @end is %-1 and @start is non-negative, then this represents a + * range starting at @start and ending with the last byte of the + * requested resource body. (Eg, all but the first 500 bytes would be + * @start = %500, and @end = %-1.) + * + * If @end is %-1 and @start is negative, then it represents a "suffix + * range", referring to the last -@start bytes of the resource body. + * (Eg, the last 500 bytes would be @start = %-500 and @end = %-1.) + * + * Since: 2.26 + **/ + +static int +sort_ranges (gconstpointer a, gconstpointer b) +{ + SoupRange *ra = (SoupRange *)a; + SoupRange *rb = (SoupRange *)b; + + return ra->start - rb->start; +} + +/** + * soup_message_headers_get_ranges: + * @hdrs: a #SoupMessageHeaders + * @total_length: the total_length of the response body + * @ranges: (out): return location for an array of #SoupRange + * @length: the length of the returned array + * + * Parses @hdrs's Range header and returns an array of the requested + * byte ranges. The returned array must be freed with + * soup_message_headers_free_ranges(). + * + * If @total_length is non-0, its value will be used to adjust the + * returned ranges to have explicit start and end values, and the + * returned ranges will be sorted and non-overlapping. If + * @total_length is 0, then some ranges may have an end value of -1, + * as described under #SoupRange, and some of the ranges may be + * redundant. + * + * Return value: %TRUE if @hdrs contained a "Range" header containing + * byte ranges which could be parsed, %FALSE otherwise (in which case + * @range and @length will not be set). + * + * Since: 2.26 + **/ +gboolean +soup_message_headers_get_ranges (SoupMessageHeaders *hdrs, + goffset total_length, + SoupRange **ranges, + int *length) +{ + const char *range = soup_message_headers_get_one (hdrs, "Range"); + GSList *range_list, *r; + GArray *array; + char *spec, *end; + int i; + + if (!range || strncmp (range, "bytes", 5) != 0) + return FALSE; + + range += 5; + while (g_ascii_isspace (*range)) + range++; + if (*range++ != '=') + return FALSE; + while (g_ascii_isspace (*range)) + range++; + + range_list = soup_header_parse_list (range); + if (!range_list) + return FALSE; + + array = g_array_new (FALSE, FALSE, sizeof (SoupRange)); + for (r = range_list; r; r = r->next) { + SoupRange cur; + + spec = r->data; + if (*spec == '-') { + cur.start = g_ascii_strtoll (spec, &end, 10) + total_length; + cur.end = total_length - 1; + } else { + cur.start = g_ascii_strtoull (spec, &end, 10); + if (*end == '-') + end++; + if (*end) + cur.end = g_ascii_strtoull (end, &end, 10); + else + cur.end = total_length - 1; + } + if (*end) { + g_array_free (array, TRUE); + soup_header_free_list (range_list); + return FALSE; + } + + g_array_append_val (array, cur); + } + + soup_header_free_list (range_list); + + if (total_length) { + g_array_sort (array, sort_ranges); + for (i = 1; i < array->len; i++) { + SoupRange *cur = &((SoupRange *)array->data)[i]; + SoupRange *prev = &((SoupRange *)array->data)[i - 1]; + + if (cur->start <= prev->end) { + prev->end = MAX (prev->end, cur->end); + g_array_remove_index (array, i); + } + } + } + + *ranges = (SoupRange *)array->data; + *length = array->len; + + g_array_free (array, FALSE); + return TRUE; +} + +/** + * soup_message_headers_free_ranges: + * @hdrs: a #SoupMessageHeaders + * @ranges: an array of #SoupRange + * + * Frees the array of ranges returned from soup_message_headers_get_ranges(). + * + * Since: 2.26 + **/ +void +soup_message_headers_free_ranges (SoupMessageHeaders *hdrs, + SoupRange *ranges) +{ + g_free (ranges); +} + +/** + * soup_message_headers_set_ranges: + * @hdrs: a #SoupMessageHeaders + * @ranges: an array of #SoupRange + * @length: the length of @range + * + * Sets @hdrs's Range header to request the indicated ranges. (If you + * only want to request a single range, you can use + * soup_message_headers_set_range().) + * + * Since: 2.26 + **/ +void +soup_message_headers_set_ranges (SoupMessageHeaders *hdrs, + SoupRange *ranges, + int length) +{ + GString *header; + int i; + + header = g_string_new ("bytes="); + for (i = 0; i < length; i++) { + if (i > 0) + g_string_append_c (header, ','); + if (ranges[i].end >= 0) { + g_string_append_printf (header, "%" G_GINT64_FORMAT "-%" G_GINT64_FORMAT, + ranges[i].start, ranges[i].end); + } else if (ranges[i].start >= 0) { + g_string_append_printf (header,"%" G_GINT64_FORMAT "-", + ranges[i].start); + } else { + g_string_append_printf (header, "%" G_GINT64_FORMAT, + ranges[i].start); + } + } + + soup_message_headers_replace (hdrs, "Range", header->str); + g_string_free (header, TRUE); +} + +/** + * soup_message_headers_set_range: + * @hdrs: a #SoupMessageHeaders + * @start: the start of the range to request + * @end: the end of the range to request + * + * Sets @hdrs's Range header to request the indicated range. + * @start and @end are interpreted as in a #SoupRange. + * + * If you need to request multiple ranges, use + * soup_message_headers_set_ranges(). + * + * Since: 2.26 + **/ +void +soup_message_headers_set_range (SoupMessageHeaders *hdrs, + goffset start, + goffset end) +{ + SoupRange range; + + range.start = start; + range.end = end; + soup_message_headers_set_ranges (hdrs, &range, 1); +} + +/** + * soup_message_headers_get_content_range: + * @hdrs: a #SoupMessageHeaders + * @start: return value for the start of the range + * @end: return value for the end of the range + * @total_length: return value for the total length of the resource, + * or %NULL if you don't care. + * + * Parses @hdrs's Content-Range header and returns it in @start, + * @end, and @total_length. If the total length field in the header + * was specified as "*", then @total_length will be set to -1. + * + * Return value: %TRUE if @hdrs contained a "Content-Range" header + * containing a byte range which could be parsed, %FALSE otherwise. + * + * Since: 2.26 + **/ +gboolean +soup_message_headers_get_content_range (SoupMessageHeaders *hdrs, + goffset *start, + goffset *end, + goffset *total_length) +{ + const char *header = soup_message_headers_get_one (hdrs, "Content-Range"); + goffset length; + char *p; + + if (!header || strncmp (header, "bytes ", 6) != 0) + return FALSE; + + header += 6; + while (g_ascii_isspace (*header)) + header++; + if (!g_ascii_isdigit (*header)) + return FALSE; + + *start = g_ascii_strtoull (header, &p, 10); + if (*p != '-') + return FALSE; + *end = g_ascii_strtoull (p + 1, &p, 10); + if (*p != '/') + return FALSE; + p++; + if (*p == '*') { + length = -1; + p++; + } else + length = g_ascii_strtoull (p, &p, 10); + + if (total_length) + *total_length = length; + return *p == '\0'; +} + +/** + * soup_message_headers_set_content_range: + * @hdrs: a #SoupMessageHeaders + * @start: the start of the range + * @end: the end of the range + * @total_length: the total length of the resource, or -1 if unknown + * + * Sets @hdrs's Content-Range header according to the given values. + * (Note that @total_length is the total length of the entire resource + * that this is a range of, not simply @end - @start + 1.) + * + * Since: 2.26 + **/ +void +soup_message_headers_set_content_range (SoupMessageHeaders *hdrs, + goffset start, + goffset end, + goffset total_length) +{ + char *header; + + if (total_length >= 0) { + header = g_strdup_printf ("bytes %" G_GINT64_FORMAT "-%" + G_GINT64_FORMAT "/%" G_GINT64_FORMAT, + start, end, total_length); + } else { + header = g_strdup_printf ("bytes %" G_GINT64_FORMAT "-%" + G_GINT64_FORMAT "/*", start, end); + } + soup_message_headers_replace (hdrs, "Content-Range", header); + g_free (header); +} + +static gboolean +parse_content_foo (SoupMessageHeaders *hdrs, const char *header_name, + char **foo, GHashTable **params) +{ + const char *header; + char *semi; + + header = soup_message_headers_get_one (hdrs, header_name); + if (!header) + return FALSE; + + if (foo) { + *foo = g_strdup (header); + semi = strchr (*foo, ';'); + if (semi) { + char *p = semi; + + *semi++ = '\0'; + while (p - 1 > *foo && g_ascii_isspace(p[-1])) + *(--p) = '\0'; + } + } else { + semi = strchr (header, ';'); + if (semi) + semi++; + } + + if (!params) + return TRUE; + + if (!semi) { + *params = soup_header_parse_semi_param_list (""); + return TRUE; + } + + *params = soup_header_parse_semi_param_list (semi); + return TRUE; +} + +static void +set_content_foo (SoupMessageHeaders *hdrs, const char *header_name, + const char *foo, GHashTable *params) +{ + GString *str; + GHashTableIter iter; + gpointer key, value; + + str = g_string_new (foo); + if (params) { + g_hash_table_iter_init (&iter, params); + while (g_hash_table_iter_next (&iter, &key, &value)) { + g_string_append (str, "; "); + soup_header_g_string_append_param (str, key, value); + } + } + + soup_message_headers_replace (hdrs, header_name, str->str); + g_string_free (str, TRUE); +} + +static void +content_type_setter (SoupMessageHeaders *hdrs, const char *value) +{ + g_free (hdrs->content_type); + if (value) { + char *content_type, *p; + + parse_content_foo (hdrs, "Content-Type", &content_type, NULL); + p = strpbrk (content_type, " /"); + if (!p || *p != '/' || strpbrk (p + 1, " /")) { + g_free (content_type); + hdrs->content_type = NULL; + } else + hdrs->content_type = content_type; + } else + hdrs->content_type = NULL; +} + +/** + * soup_message_headers_get_content_type: + * @hdrs: a #SoupMessageHeaders + * @params: (out) (allow-none) (transfer full): return location for + * the Content-Type parameters (eg, "charset"), or %NULL + * + * Looks up the "Content-Type" header in @hdrs, parses it, and returns + * its value in *@content_type and *@params. @params can be %NULL if you + * are only interested in the content type itself. + * + * Return value: a string with the value of the "Content-Type" header + * or NULL if @hdrs does not contain that header or it cannot be + * parsed (in which case *@params will be unchanged). + * + * Since: 2.26 + **/ +const char * +soup_message_headers_get_content_type (SoupMessageHeaders *hdrs, + GHashTable **params) +{ + if (!hdrs->content_type) + return NULL; + + if (params) + parse_content_foo (hdrs, "Content-Type", NULL, params); + return hdrs->content_type; +} + +/** + * soup_message_headers_set_content_type: + * @hdrs: a #SoupMessageHeaders + * @content_type: the MIME type + * @params: (allow-none) (element-type utf8 utf8): additional + * parameters, or %NULL + * + * Sets the "Content-Type" header in @hdrs to @content_type, + * optionally with additional parameters specified in @params. + * + * Since: 2.26 + **/ +void +soup_message_headers_set_content_type (SoupMessageHeaders *hdrs, + const char *content_type, + GHashTable *params) +{ + set_content_foo (hdrs, "Content-Type", content_type, params); +} + +/** + * soup_message_headers_get_content_disposition: + * @hdrs: a #SoupMessageHeaders + * @disposition: (out) (transfer full): return location for the + * disposition-type, or %NULL + * @params: (out) (transfer full) (element-type utf8 utf8): return + * location for the Content-Disposition parameters, or %NULL + * + * Looks up the "Content-Disposition" header in @hdrs, parses it, and + * returns its value in *@disposition and *@params. @params can be + * %NULL if you are only interested in the disposition-type. + * + * In HTTP, the most common use of this header is to set a + * disposition-type of "attachment", to suggest to the browser that a + * response should be saved to disk rather than displayed in the + * browser. If @params contains a "filename" parameter, this is a + * suggestion of a filename to use. (If the parameter value in the + * header contains an absolute or relative path, libsoup will truncate + * it down to just the final path component, so you do not need to + * test this yourself.) + * + * Content-Disposition is also used in "multipart/form-data", however + * this is handled automatically by #SoupMultipart and the associated + * form methods. + * + * Return value: %TRUE if @hdrs contains a "Content-Disposition" + * header, %FALSE if not (in which case *@disposition and *@params + * will be unchanged). + * + * Since: 2.26 + **/ +gboolean +soup_message_headers_get_content_disposition (SoupMessageHeaders *hdrs, + char **disposition, + GHashTable **params) +{ + gpointer orig_key, orig_value; + + if (!parse_content_foo (hdrs, "Content-Disposition", + disposition, params)) + return FALSE; + + /* If there is a filename parameter, make sure it contains + * only a single path component + */ + if (params && g_hash_table_lookup_extended (*params, "filename", + &orig_key, &orig_value)) { + char *filename = strrchr (orig_value, '/'); + + if (filename) + g_hash_table_insert (*params, orig_key, filename + 1); + } + return TRUE; +} + +/** + * soup_message_headers_set_content_disposition: + * @hdrs: a #SoupMessageHeaders + * @disposition: the disposition-type + * @params: (allow-none) (element-type utf8 utf8): additional + * parameters, or %NULL + * + * Sets the "Content-Disposition" header in @hdrs to @disposition, + * optionally with additional parameters specified in @params. + * + * See soup_message_headers_get_content_disposition() for a discussion + * of how Content-Disposition is used in HTTP. + * + * Since: 2.26 + **/ +void +soup_message_headers_set_content_disposition (SoupMessageHeaders *hdrs, + const char *disposition, + GHashTable *params) +{ + set_content_foo (hdrs, "Content-Disposition", disposition, params); +} + diff --git a/libsoup/soup-message-headers.h b/libsoup/soup-message-headers.h new file mode 100644 index 0000000..e1dec1d --- /dev/null +++ b/libsoup/soup-message-headers.h @@ -0,0 +1,135 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2005 Novell, Inc. + */ + +#ifndef SOUP_MESSAGE_HEADERS_H +#define SOUP_MESSAGE_HEADERS_H 1 + +#include + +typedef struct SoupMessageHeaders SoupMessageHeaders; +GType soup_message_headers_get_type (void); +#define SOUP_TYPE_MESSAGE_HEADERS (soup_message_headers_get_type ()) + +typedef enum { + SOUP_MESSAGE_HEADERS_REQUEST, + SOUP_MESSAGE_HEADERS_RESPONSE, + SOUP_MESSAGE_HEADERS_MULTIPART +} SoupMessageHeadersType; + +SoupMessageHeaders *soup_message_headers_new (SoupMessageHeadersType type); + +void soup_message_headers_free (SoupMessageHeaders *hdrs); + +void soup_message_headers_append (SoupMessageHeaders *hdrs, + const char *name, + const char *value); +void soup_message_headers_replace (SoupMessageHeaders *hdrs, + const char *name, + const char *value); + +void soup_message_headers_remove (SoupMessageHeaders *hdrs, + const char *name); +void soup_message_headers_clear (SoupMessageHeaders *hdrs); + +void soup_message_headers_clean_connection_headers (SoupMessageHeaders *hdrs); + +#ifndef LIBSOUP_DISABLE_DEPRECATED +const char *soup_message_headers_get (SoupMessageHeaders *hdrs, + const char *name); +#endif +const char *soup_message_headers_get_one (SoupMessageHeaders *hdrs, + const char *name); +const char *soup_message_headers_get_list (SoupMessageHeaders *hdrs, + const char *name); + +typedef void (*SoupMessageHeadersForeachFunc)(const char *name, + const char *value, + gpointer user_data); + +void soup_message_headers_foreach (SoupMessageHeaders *hdrs, + SoupMessageHeadersForeachFunc func, + gpointer user_data); + +typedef struct { + /*< private >*/ + gpointer dummy[3]; +} SoupMessageHeadersIter; + +void soup_message_headers_iter_init (SoupMessageHeadersIter *iter, + SoupMessageHeaders *hdrs); +gboolean soup_message_headers_iter_next (SoupMessageHeadersIter *iter, + const char **name, + const char **value); + +/* Specific headers */ + +typedef enum { + SOUP_ENCODING_UNRECOGNIZED, + SOUP_ENCODING_NONE, + SOUP_ENCODING_CONTENT_LENGTH, + SOUP_ENCODING_EOF, + SOUP_ENCODING_CHUNKED, + SOUP_ENCODING_BYTERANGES +} SoupEncoding; + +SoupEncoding soup_message_headers_get_encoding (SoupMessageHeaders *hdrs); +void soup_message_headers_set_encoding (SoupMessageHeaders *hdrs, + SoupEncoding encoding); + +goffset soup_message_headers_get_content_length (SoupMessageHeaders *hdrs); +void soup_message_headers_set_content_length (SoupMessageHeaders *hdrs, + goffset content_length); + +typedef enum { + SOUP_EXPECTATION_UNRECOGNIZED = (1 << 0), + SOUP_EXPECTATION_CONTINUE = (1 << 1) +} SoupExpectation; + +SoupExpectation soup_message_headers_get_expectations (SoupMessageHeaders *hdrs); +void soup_message_headers_set_expectations (SoupMessageHeaders *hdrs, + SoupExpectation expectations); + +typedef struct { + goffset start; + goffset end; +} SoupRange; + +gboolean soup_message_headers_get_ranges (SoupMessageHeaders *hdrs, + goffset total_length, + SoupRange **ranges, + int *length); +void soup_message_headers_free_ranges (SoupMessageHeaders *hdrs, + SoupRange *ranges); +void soup_message_headers_set_ranges (SoupMessageHeaders *hdrs, + SoupRange *ranges, + int length); +void soup_message_headers_set_range (SoupMessageHeaders *hdrs, + goffset start, + goffset end); + +gboolean soup_message_headers_get_content_range (SoupMessageHeaders *hdrs, + goffset *start, + goffset *end, + goffset *total_length); +void soup_message_headers_set_content_range (SoupMessageHeaders *hdrs, + goffset start, + goffset end, + goffset total_length); + + +const char *soup_message_headers_get_content_type (SoupMessageHeaders *hdrs, + GHashTable **params); +void soup_message_headers_set_content_type (SoupMessageHeaders *hdrs, + const char *content_type, + GHashTable *params); + +gboolean soup_message_headers_get_content_disposition (SoupMessageHeaders *hdrs, + char **disposition, + GHashTable **params); +void soup_message_headers_set_content_disposition (SoupMessageHeaders *hdrs, + const char *disposition, + GHashTable *params); + +#endif /* SOUP_MESSAGE_HEADERS_H */ diff --git a/libsoup/soup-message-io.c b/libsoup/soup-message-io.c new file mode 100644 index 0000000..213a46b --- /dev/null +++ b/libsoup/soup-message-io.c @@ -0,0 +1,1253 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message-io.c: HTTP message I/O + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "soup-connection.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-message-queue.h" +#include "soup-misc.h" +#include "soup-socket.h" +#include "soup-ssl.h" + +typedef enum { + SOUP_MESSAGE_IO_CLIENT, + SOUP_MESSAGE_IO_SERVER +} SoupMessageIOMode; + +typedef enum { + SOUP_MESSAGE_IO_STATE_NOT_STARTED, + SOUP_MESSAGE_IO_STATE_HEADERS, + SOUP_MESSAGE_IO_STATE_BLOCKING, + SOUP_MESSAGE_IO_STATE_BODY, + SOUP_MESSAGE_IO_STATE_CHUNK_SIZE, + SOUP_MESSAGE_IO_STATE_CHUNK, + SOUP_MESSAGE_IO_STATE_CHUNK_END, + SOUP_MESSAGE_IO_STATE_TRAILERS, + SOUP_MESSAGE_IO_STATE_FINISHING, + SOUP_MESSAGE_IO_STATE_DONE +} SoupMessageIOState; + +#define SOUP_MESSAGE_IO_STATE_ACTIVE(state) \ + (state != SOUP_MESSAGE_IO_STATE_NOT_STARTED && \ + state != SOUP_MESSAGE_IO_STATE_BLOCKING && \ + state != SOUP_MESSAGE_IO_STATE_DONE) + +typedef struct { + SoupSocket *sock; + SoupMessageQueueItem *item; + SoupMessageIOMode mode; + GCancellable *cancellable; + + SoupMessageIOState read_state; + SoupEncoding read_encoding; + GByteArray *read_meta_buf; + SoupMessageBody *read_body; + goffset read_length; + gboolean read_eof_ok; + + gboolean need_content_sniffed, need_got_chunk; + SoupMessageBody *sniff_data; + + SoupMessageIOState write_state; + SoupEncoding write_encoding; + GString *write_buf; + SoupMessageBody *write_body; + SoupBuffer *write_chunk; + goffset write_body_offset; + goffset write_length; + goffset written; + + guint read_tag, write_tag, tls_signal_id; + GSource *unpause_source; + + SoupMessageGetHeadersFn get_headers_cb; + SoupMessageParseHeadersFn parse_headers_cb; + gpointer header_data; + SoupMessageCompletionFn completion_cb; + gpointer completion_data; +} SoupMessageIOData; + + +/* Put these around callback invocation if there is code afterward + * that depends on the IO having not been cancelled. + */ +#define dummy_to_make_emacs_happy { +#define SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK { gboolean cancelled; g_object_ref (msg); +#define SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED cancelled = (priv->io_data != io); g_object_unref (msg); if (cancelled || (!io->read_tag && !io->write_tag)) return; } +#define SOUP_MESSAGE_IO_RETURN_VAL_IF_CANCELLED_OR_PAUSED(val) cancelled = (priv->io_data != io); g_object_unref (msg); if (cancelled || (!io->read_tag && !io->write_tag)) return val; } + +#define RESPONSE_BLOCK_SIZE 8192 + +void +soup_message_io_cleanup (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io; + + soup_message_io_stop (msg); + + io = priv->io_data; + if (!io) + return; + priv->io_data = NULL; + + if (io->tls_signal_id) + g_signal_handler_disconnect (io->sock, io->tls_signal_id); + if (io->sock) + g_object_unref (io->sock); + if (io->item) + soup_message_queue_item_unref (io->item); + + g_byte_array_free (io->read_meta_buf, TRUE); + + g_string_free (io->write_buf, TRUE); + if (io->write_chunk) + soup_buffer_free (io->write_chunk); + + if (io->sniff_data) + soup_message_body_free (io->sniff_data); + + g_slice_free (SoupMessageIOData, io); +} + +void +soup_message_io_stop (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + + if (!io) + return; + + if (io->read_tag) { + g_signal_handler_disconnect (io->sock, io->read_tag); + io->read_tag = 0; + } + if (io->write_tag) { + g_signal_handler_disconnect (io->sock, io->write_tag); + io->write_tag = 0; + } + + if (io->unpause_source) { + g_source_destroy (io->unpause_source); + io->unpause_source = NULL; + } + + if (io->read_state < SOUP_MESSAGE_IO_STATE_FINISHING) + soup_socket_disconnect (io->sock); + else if (io->item && io->item->conn) + soup_connection_set_state (io->item->conn, SOUP_CONNECTION_IDLE); +} + +#define SOUP_MESSAGE_IO_EOL "\r\n" +#define SOUP_MESSAGE_IO_EOL_LEN 2 + +void +soup_message_io_finished (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + SoupMessageCompletionFn completion_cb = io->completion_cb; + gpointer completion_data = io->completion_data; + + g_object_ref (msg); + soup_message_io_cleanup (msg); + if (completion_cb) + completion_cb (msg, completion_data); + g_object_unref (msg); +} + +static void io_read (SoupSocket *sock, SoupMessage *msg); + +static gboolean +request_is_idempotent (SoupMessage *msg) +{ + /* FIXME */ + return (msg->method == SOUP_METHOD_GET); +} + +static void +io_error (SoupSocket *sock, SoupMessage *msg, GError *error) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + + if (error && error->domain == G_TLS_ERROR) { + soup_message_set_status_full (msg, + SOUP_STATUS_SSL_FAILED, + error->message); + } else if (io->mode == SOUP_MESSAGE_IO_CLIENT && + io->read_state <= SOUP_MESSAGE_IO_STATE_HEADERS && + io->read_meta_buf->len == 0 && + soup_connection_get_ever_used (io->item->conn) && + !g_error_matches (error, G_IO_ERROR, G_IO_ERROR_TIMED_OUT) && + request_is_idempotent (msg)) { + /* Connection got closed, but we can safely try again */ + io->item->state = SOUP_MESSAGE_RESTARTING; + } else if (!SOUP_STATUS_IS_TRANSPORT_ERROR (msg->status_code)) + soup_message_set_status (msg, SOUP_STATUS_IO_ERROR); + + if (error) + g_error_free (error); + + soup_message_io_finished (msg); +} + +static gboolean +io_handle_sniffing (SoupMessage *msg, gboolean done_reading) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + SoupBuffer *sniffed_buffer; + char *sniffed_mime_type; + GHashTable *params = NULL; + + if (!priv->sniffer) + return TRUE; + + if (!io->sniff_data) { + io->sniff_data = soup_message_body_new (); + io->need_content_sniffed = TRUE; + } + + if (io->need_content_sniffed) { + if (io->sniff_data->length < priv->bytes_for_sniffing && + !done_reading) + return TRUE; + + io->need_content_sniffed = FALSE; + sniffed_buffer = soup_message_body_flatten (io->sniff_data); + sniffed_mime_type = soup_content_sniffer_sniff (priv->sniffer, msg, sniffed_buffer, ¶ms); + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_content_sniffed (msg, sniffed_mime_type, params); + g_free (sniffed_mime_type); + if (params) + g_hash_table_destroy (params); + if (sniffed_buffer) + soup_buffer_free (sniffed_buffer); + SOUP_MESSAGE_IO_RETURN_VAL_IF_CANCELLED_OR_PAUSED (FALSE); + } + + if (io->need_got_chunk) { + io->need_got_chunk = FALSE; + sniffed_buffer = soup_message_body_flatten (io->sniff_data); + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_got_chunk (msg, sniffed_buffer); + soup_buffer_free (sniffed_buffer); + SOUP_MESSAGE_IO_RETURN_VAL_IF_CANCELLED_OR_PAUSED (FALSE); + } + + return TRUE; +} + +/* Reads data from io->sock into io->read_meta_buf. If @to_blank is + * %TRUE, it reads up until a blank line ("CRLF CRLF" or "LF LF"). + * Otherwise, it reads up until a single CRLF or LF. + * + * This function is used to read metadata, and read_body_chunk() is + * used to read the message body contents. + * + * read_metadata, read_body_chunk, and write_data all use the same + * convention for return values: if they return %TRUE, it means + * they've completely finished the requested read/write, and the + * caller should move on to the next step. If they return %FALSE, it + * means that either (a) the socket returned SOUP_SOCKET_WOULD_BLOCK, + * so the caller should give up for now and wait for the socket to + * emit a signal, or (b) the socket returned an error, and io_error() + * was called to process it and cancel the I/O. So either way, if the + * function returns %FALSE, the caller should return immediately. + */ +static gboolean +read_metadata (SoupMessage *msg, gboolean to_blank) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + SoupSocketIOStatus status; + guchar read_buf[RESPONSE_BLOCK_SIZE]; + gsize nread; + gboolean got_lf; + GError *error = NULL; + + while (1) { + status = soup_socket_read_until (io->sock, read_buf, + sizeof (read_buf), + "\n", 1, &nread, &got_lf, + io->cancellable, &error); + switch (status) { + case SOUP_SOCKET_OK: + g_byte_array_append (io->read_meta_buf, read_buf, nread); + break; + + case SOUP_SOCKET_EOF: + /* More lame server handling... deal with + * servers that don't send the final chunk. + */ + if (io->read_state == SOUP_MESSAGE_IO_STATE_CHUNK_SIZE && + io->read_meta_buf->len == 0) { + g_byte_array_append (io->read_meta_buf, + (guchar *)"0\r\n", 3); + got_lf = TRUE; + break; + } else if (io->read_state == SOUP_MESSAGE_IO_STATE_TRAILERS && + io->read_meta_buf->len == 0) { + g_byte_array_append (io->read_meta_buf, + (guchar *)"\r\n", 2); + got_lf = TRUE; + break; + } + /* else fall through */ + + case SOUP_SOCKET_ERROR: + io_error (io->sock, msg, error); + return FALSE; + + case SOUP_SOCKET_WOULD_BLOCK: + return FALSE; + } + + if (got_lf) { + if (!to_blank) + break; + if (nread == 1 && + !strncmp ((char *)io->read_meta_buf->data + + io->read_meta_buf->len - 2, + "\n\n", 2)) + break; + else if (nread == 2 && + !strncmp ((char *)io->read_meta_buf->data + + io->read_meta_buf->len - 3, + "\n\r\n", 3)) + break; + } + } + + return TRUE; +} + +static SoupBuffer * +content_decode_one (SoupBuffer *buf, GConverter *converter, GError **error) +{ + gsize outbuf_length, outbuf_used, outbuf_cur, input_used, input_cur; + char *outbuf; + GConverterResult result; + + outbuf_length = MAX (buf->length * 2, 1024); + outbuf = g_malloc (outbuf_length); + outbuf_cur = input_cur = 0; + + do { + result = g_converter_convert ( + converter, + buf->data + input_cur, buf->length - input_cur, + outbuf + outbuf_cur, outbuf_length - outbuf_cur, + 0, &input_used, &outbuf_used, error); + input_cur += input_used; + outbuf_cur += outbuf_used; + + if (g_error_matches (*error, G_IO_ERROR, G_IO_ERROR_NO_SPACE) || + (!*error && outbuf_cur == outbuf_length)) { + g_clear_error (error); + outbuf_length *= 2; + outbuf = g_realloc (outbuf, outbuf_length); + } else if (*error) { + /* GZlibDecompressor can't ever return + * G_IO_ERROR_PARTIAL_INPUT unless we pass it + * input_length = 0, which we don't. Other + * converters might of course, so eventually + * this code needs to be rewritten to deal + * with that. + */ + g_free (outbuf); + return NULL; + } + } while (input_cur < buf->length && result != G_CONVERTER_FINISHED); + + if (outbuf_cur) + return soup_buffer_new (SOUP_MEMORY_TAKE, outbuf, outbuf_cur); + else { + g_free (outbuf); + return NULL; + } +} + +static SoupBuffer * +content_decode (SoupMessage *msg, SoupBuffer *buf) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + GConverter *decoder; + SoupBuffer *decoded; + GError *error = NULL; + GSList *d; + + for (d = priv->decoders; d; d = d->next) { + decoder = d->data; + + decoded = content_decode_one (buf, decoder, &error); + if (error) { + if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_FAILED)) + g_warning ("Content-Decoding error: %s\n", error->message); + g_error_free (error); + + soup_message_set_flags (msg, priv->msg_flags & ~SOUP_MESSAGE_CONTENT_DECODED); + break; + } + if (buf) + soup_buffer_free (buf); + + if (decoded) + buf = decoded; + else + return NULL; + } + + return buf; +} + +/* Reads as much message body data as is available on io->sock (but no + * further than the end of the current message body or chunk). On a + * successful read, emits "got_chunk" (possibly multiple times), and + * (unless told not to) appends the chunk to io->read_body. + * + * See the note at read_metadata() for an explanation of the return + * value. + */ +static gboolean +read_body_chunk (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + SoupSocketIOStatus status; + guchar *stack_buf = NULL; + gsize len; + gboolean read_to_eof = (io->read_encoding == SOUP_ENCODING_EOF); + gsize nread; + GError *error = NULL; + SoupBuffer *buffer; + + if (!io_handle_sniffing (msg, FALSE)) + return FALSE; + + while (read_to_eof || io->read_length > 0) { + if (priv->chunk_allocator) { + buffer = priv->chunk_allocator (msg, io->read_length, priv->chunk_allocator_data); + if (!buffer) { + soup_message_io_pause (msg); + return FALSE; + } + } else { + if (!stack_buf) + stack_buf = alloca (RESPONSE_BLOCK_SIZE); + buffer = soup_buffer_new (SOUP_MEMORY_TEMPORARY, + stack_buf, + RESPONSE_BLOCK_SIZE); + } + + if (read_to_eof) + len = buffer->length; + else + len = MIN (buffer->length, io->read_length); + + status = soup_socket_read (io->sock, + (guchar *)buffer->data, len, + &nread, io->cancellable, &error); + + if (status == SOUP_SOCKET_OK && nread) { + buffer->length = nread; + io->read_length -= nread; + + buffer = content_decode (msg, buffer); + if (!buffer) + continue; + + soup_message_body_got_chunk (io->read_body, buffer); + + if (io->need_content_sniffed) { + soup_message_body_append_buffer (io->sniff_data, buffer); + soup_buffer_free (buffer); + io->need_got_chunk = TRUE; + if (!io_handle_sniffing (msg, FALSE)) + return FALSE; + continue; + } + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_got_chunk (msg, buffer); + soup_buffer_free (buffer); + SOUP_MESSAGE_IO_RETURN_VAL_IF_CANCELLED_OR_PAUSED (FALSE); + continue; + } + + soup_buffer_free (buffer); + switch (status) { + case SOUP_SOCKET_OK: + break; + + case SOUP_SOCKET_EOF: + if (io->read_eof_ok) { + io->read_length = 0; + return TRUE; + } + /* else fall through */ + + case SOUP_SOCKET_ERROR: + io_error (io->sock, msg, error); + return FALSE; + + case SOUP_SOCKET_WOULD_BLOCK: + return FALSE; + } + } + + return TRUE; +} + +/* Attempts to write @len bytes from @data. See the note at + * read_metadata() for an explanation of the return value. + */ +static gboolean +write_data (SoupMessage *msg, const char *data, guint len, gboolean body) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + SoupSocketIOStatus status; + gsize nwrote; + GError *error = NULL; + SoupBuffer *chunk; + const char *start; + + while (len > io->written) { + status = soup_socket_write (io->sock, + data + io->written, + len - io->written, + &nwrote, + io->cancellable, &error); + switch (status) { + case SOUP_SOCKET_EOF: + case SOUP_SOCKET_ERROR: + io_error (io->sock, msg, error); + return FALSE; + + case SOUP_SOCKET_WOULD_BLOCK: + return FALSE; + + case SOUP_SOCKET_OK: + start = data + io->written; + io->written += nwrote; + + if (body) { + if (io->write_length) + io->write_length -= nwrote; + + chunk = soup_buffer_new (SOUP_MEMORY_TEMPORARY, + start, nwrote); + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_wrote_body_data (msg, chunk); + soup_buffer_free (chunk); + SOUP_MESSAGE_IO_RETURN_VAL_IF_CANCELLED_OR_PAUSED (FALSE); + } + break; + } + } + + io->written = 0; + return TRUE; +} + +static inline SoupMessageIOState +io_body_state (SoupEncoding encoding) +{ + if (encoding == SOUP_ENCODING_CHUNKED) + return SOUP_MESSAGE_IO_STATE_CHUNK_SIZE; + else + return SOUP_MESSAGE_IO_STATE_BODY; +} + +/* + * There are two request/response formats: the basic request/response, + * possibly with one or more unsolicited informational responses (such + * as the WebDAV "102 Processing" response): + * + * Client Server + * W:HEADERS / R:NOT_STARTED -> R:HEADERS / W:NOT_STARTED + * W:BODY / R:NOT_STARTED -> R:BODY / W:NOT_STARTED + * [W:DONE / R:HEADERS (1xx) <- R:DONE / W:HEADERS (1xx) ...] + * W:DONE / R:HEADERS <- R:DONE / W:HEADERS + * W:DONE / R:BODY <- R:DONE / W:BODY + * W:DONE / R:DONE R:DONE / W:DONE + * + * and the "Expect: 100-continue" request/response, with the client + * blocking halfway through its request, and then either continuing or + * aborting, depending on the server response: + * + * Client Server + * W:HEADERS / R:NOT_STARTED -> R:HEADERS / W:NOT_STARTED + * W:BLOCKING / R:HEADERS <- R:BLOCKING / W:HEADERS + * [W:BODY / R:BLOCKING -> R:BODY / W:BLOCKING] + * [W:DONE / R:HEADERS <- R:DONE / W:HEADERS] + * W:DONE / R:BODY <- R:DONE / W:BODY + * W:DONE / R:DONE R:DONE / W:DONE + */ + +static void +io_write (SoupSocket *sock, SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + + write_more: + switch (io->write_state) { + case SOUP_MESSAGE_IO_STATE_NOT_STARTED: + return; + + + case SOUP_MESSAGE_IO_STATE_HEADERS: + if (!io->write_buf->len) { + io->get_headers_cb (msg, io->write_buf, + &io->write_encoding, + io->header_data); + if (!io->write_buf->len) { + soup_message_io_pause (msg); + return; + } + } + + if (!write_data (msg, io->write_buf->str, + io->write_buf->len, FALSE)) + return; + + g_string_truncate (io->write_buf, 0); + + if (io->write_encoding == SOUP_ENCODING_CONTENT_LENGTH) { + SoupMessageHeaders *hdrs = + (io->mode == SOUP_MESSAGE_IO_CLIENT) ? + msg->request_headers : msg->response_headers; + io->write_length = soup_message_headers_get_content_length (hdrs); + } + + if (io->mode == SOUP_MESSAGE_IO_SERVER && + SOUP_STATUS_IS_INFORMATIONAL (msg->status_code)) { + if (msg->status_code == SOUP_STATUS_CONTINUE) { + /* Stop and wait for the body now */ + io->write_state = + SOUP_MESSAGE_IO_STATE_BLOCKING; + io->read_state = io_body_state (io->read_encoding); + } else { + /* We just wrote a 1xx response + * header, so stay in STATE_HEADERS. + * (The caller will pause us from the + * wrote_informational callback if he + * is not ready to send the final + * response.) + */ + } + } else if (io->mode == SOUP_MESSAGE_IO_CLIENT && + soup_message_headers_get_expectations (msg->request_headers) & SOUP_EXPECTATION_CONTINUE) { + /* Need to wait for the Continue response */ + io->write_state = SOUP_MESSAGE_IO_STATE_BLOCKING; + io->read_state = SOUP_MESSAGE_IO_STATE_HEADERS; + } else { + io->write_state = io_body_state (io->write_encoding); + + /* If the client was waiting for a Continue + * but we sent something else, then they're + * now done writing. + */ + if (io->mode == SOUP_MESSAGE_IO_SERVER && + io->read_state == SOUP_MESSAGE_IO_STATE_BLOCKING) + io->read_state = SOUP_MESSAGE_IO_STATE_FINISHING; + } + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + if (SOUP_STATUS_IS_INFORMATIONAL (msg->status_code)) { + soup_message_wrote_informational (msg); + soup_message_cleanup_response (msg); + } else + soup_message_wrote_headers (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + break; + + + case SOUP_MESSAGE_IO_STATE_BLOCKING: + io_read (sock, msg); + + /* If io_read reached a point where we could write + * again, it would have recursively called io_write. + * So (a) we don't need to try to keep writing, and + * (b) we can't anyway, because msg may have been + * destroyed. + */ + return; + + + case SOUP_MESSAGE_IO_STATE_BODY: + if (!io->write_length && io->write_encoding != SOUP_ENCODING_EOF) { + wrote_body: + io->write_state = SOUP_MESSAGE_IO_STATE_FINISHING; + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_wrote_body (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + break; + } + + if (!io->write_chunk) { + io->write_chunk = soup_message_body_get_chunk (io->write_body, io->write_body_offset); + if (!io->write_chunk) { + soup_message_io_pause (msg); + return; + } + if (io->write_chunk->length > io->write_length && + io->write_encoding != SOUP_ENCODING_EOF) { + /* App is trying to write more than it + * claimed it would; we have to truncate. + */ + SoupBuffer *truncated = + soup_buffer_new_subbuffer (io->write_chunk, + 0, io->write_length); + soup_buffer_free (io->write_chunk); + io->write_chunk = truncated; + } else if (io->write_encoding == SOUP_ENCODING_EOF && + !io->write_chunk->length) + goto wrote_body; + } + + if (!write_data (msg, io->write_chunk->data, + io->write_chunk->length, TRUE)) + return; + + if (io->mode == SOUP_MESSAGE_IO_SERVER || + priv->msg_flags & SOUP_MESSAGE_CAN_REBUILD) + soup_message_body_wrote_chunk (io->write_body, io->write_chunk); + io->write_body_offset += io->write_chunk->length; + soup_buffer_free (io->write_chunk); + io->write_chunk = NULL; + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_wrote_chunk (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + break; + + case SOUP_MESSAGE_IO_STATE_CHUNK_SIZE: + if (!io->write_chunk) { + io->write_chunk = soup_message_body_get_chunk (io->write_body, io->write_body_offset); + if (!io->write_chunk) { + soup_message_io_pause (msg); + return; + } + g_string_append_printf (io->write_buf, "%lx\r\n", + (unsigned long) io->write_chunk->length); + io->write_body_offset += io->write_chunk->length; + } + + if (!write_data (msg, io->write_buf->str, + io->write_buf->len, FALSE)) + return; + + g_string_truncate (io->write_buf, 0); + + if (io->write_chunk->length == 0) { + /* The last chunk has no CHUNK_END... */ + io->write_state = SOUP_MESSAGE_IO_STATE_TRAILERS; + break; + } + + io->write_state = SOUP_MESSAGE_IO_STATE_CHUNK; + /* fall through */ + + + case SOUP_MESSAGE_IO_STATE_CHUNK: + if (!write_data (msg, io->write_chunk->data, + io->write_chunk->length, TRUE)) + return; + + if (io->mode == SOUP_MESSAGE_IO_SERVER || + priv->msg_flags & SOUP_MESSAGE_CAN_REBUILD) + soup_message_body_wrote_chunk (io->write_body, io->write_chunk); + soup_buffer_free (io->write_chunk); + io->write_chunk = NULL; + + io->write_state = SOUP_MESSAGE_IO_STATE_CHUNK_END; + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_wrote_chunk (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + + /* fall through */ + + + case SOUP_MESSAGE_IO_STATE_CHUNK_END: + if (!write_data (msg, SOUP_MESSAGE_IO_EOL, + SOUP_MESSAGE_IO_EOL_LEN, FALSE)) + return; + + io->write_state = SOUP_MESSAGE_IO_STATE_CHUNK_SIZE; + break; + + + case SOUP_MESSAGE_IO_STATE_TRAILERS: + if (!write_data (msg, SOUP_MESSAGE_IO_EOL, + SOUP_MESSAGE_IO_EOL_LEN, FALSE)) + return; + + io->write_state = SOUP_MESSAGE_IO_STATE_FINISHING; + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_wrote_body (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + /* fall through */ + + + case SOUP_MESSAGE_IO_STATE_FINISHING: + if (io->write_tag) { + g_signal_handler_disconnect (io->sock, io->write_tag); + io->write_tag = 0; + } + io->write_state = SOUP_MESSAGE_IO_STATE_DONE; + + if (io->mode == SOUP_MESSAGE_IO_CLIENT) { + io->read_state = SOUP_MESSAGE_IO_STATE_HEADERS; + io_read (sock, msg); + } else + soup_message_io_finished (msg); + return; + + + case SOUP_MESSAGE_IO_STATE_DONE: + default: + g_return_if_reached (); + } + + goto write_more; +} + +static void +io_read (SoupSocket *sock, SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + guint status; + + read_more: + switch (io->read_state) { + case SOUP_MESSAGE_IO_STATE_NOT_STARTED: + return; + + + case SOUP_MESSAGE_IO_STATE_HEADERS: + if (!read_metadata (msg, TRUE)) + return; + + /* We need to "rewind" io->read_meta_buf back one line. + * That SHOULD be two characters (CR LF), but if the + * web server was stupid, it might only be one. + */ + if (io->read_meta_buf->len < 3 || + io->read_meta_buf->data[io->read_meta_buf->len - 2] == '\n') + io->read_meta_buf->len--; + else + io->read_meta_buf->len -= 2; + io->read_meta_buf->data[io->read_meta_buf->len] = '\0'; + status = io->parse_headers_cb (msg, (char *)io->read_meta_buf->data, + io->read_meta_buf->len, + &io->read_encoding, + io->header_data); + g_byte_array_set_size (io->read_meta_buf, 0); + + if (status != SOUP_STATUS_OK) { + /* Either we couldn't parse the headers, or they + * indicated something that would mean we wouldn't + * be able to parse the body. (Eg, unknown + * Transfer-Encoding.). Skip the rest of the + * reading, and make sure the connection gets + * closed when we're done. + */ + soup_message_set_status (msg, status); + soup_message_headers_append (msg->request_headers, + "Connection", "close"); + io->read_state = SOUP_MESSAGE_IO_STATE_FINISHING; + break; + } + + if (io->read_encoding == SOUP_ENCODING_EOF) + io->read_eof_ok = TRUE; + + if (io->read_encoding == SOUP_ENCODING_CONTENT_LENGTH) { + SoupMessageHeaders *hdrs = + (io->mode == SOUP_MESSAGE_IO_CLIENT) ? + msg->response_headers : msg->request_headers; + io->read_length = soup_message_headers_get_content_length (hdrs); + + if (io->mode == SOUP_MESSAGE_IO_CLIENT && + !soup_message_is_keepalive (msg)) { + /* Some servers suck and send + * incorrect Content-Length values, so + * allow EOF termination in this case + * (iff the message is too short) too. + */ + io->read_eof_ok = TRUE; + } + } + + if (io->mode == SOUP_MESSAGE_IO_CLIENT && + SOUP_STATUS_IS_INFORMATIONAL (msg->status_code)) { + if (msg->status_code == SOUP_STATUS_CONTINUE && + io->write_state == SOUP_MESSAGE_IO_STATE_BLOCKING) { + /* Pause the reader, unpause the writer */ + io->read_state = + SOUP_MESSAGE_IO_STATE_BLOCKING; + io->write_state = + io_body_state (io->write_encoding); + } else { + /* Just stay in HEADERS */ + io->read_state = SOUP_MESSAGE_IO_STATE_HEADERS; + } + } else if (io->mode == SOUP_MESSAGE_IO_SERVER && + soup_message_headers_get_expectations (msg->request_headers) & SOUP_EXPECTATION_CONTINUE) { + /* The client requested a Continue response. The + * got_headers handler may change this to something + * else though. + */ + soup_message_set_status (msg, SOUP_STATUS_CONTINUE); + io->write_state = SOUP_MESSAGE_IO_STATE_HEADERS; + io->read_state = SOUP_MESSAGE_IO_STATE_BLOCKING; + } else { + io->read_state = io_body_state (io->read_encoding); + + /* If the client was waiting for a Continue + * but got something else, then it's done + * writing. + */ + if (io->mode == SOUP_MESSAGE_IO_CLIENT && + io->write_state == SOUP_MESSAGE_IO_STATE_BLOCKING) + io->write_state = SOUP_MESSAGE_IO_STATE_FINISHING; + } + + if (io->mode == SOUP_MESSAGE_IO_CLIENT && + SOUP_STATUS_IS_INFORMATIONAL (msg->status_code)) { + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_got_informational (msg); + soup_message_cleanup_response (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + } else { + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_got_headers (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + } + break; + + + case SOUP_MESSAGE_IO_STATE_BLOCKING: + io_write (sock, msg); + + /* As in the io_write case, we *must* return here. */ + return; + + + case SOUP_MESSAGE_IO_STATE_BODY: + if (!read_body_chunk (msg)) + return; + + got_body: + if (!io_handle_sniffing (msg, TRUE)) { + /* If the message was paused (as opposed to + * cancelled), we need to make sure we wind up + * back here when it's unpaused, even if it + * was doing a chunked or EOF-terminated read + * before. + */ + if (io == priv->io_data) { + io->read_state = SOUP_MESSAGE_IO_STATE_BODY; + io->read_encoding = SOUP_ENCODING_CONTENT_LENGTH; + io->read_length = 0; + } + return; + } + + io->read_state = SOUP_MESSAGE_IO_STATE_FINISHING; + + SOUP_MESSAGE_IO_PREPARE_FOR_CALLBACK; + soup_message_got_body (msg); + SOUP_MESSAGE_IO_RETURN_IF_CANCELLED_OR_PAUSED; + break; + + + case SOUP_MESSAGE_IO_STATE_CHUNK_SIZE: + if (!read_metadata (msg, FALSE)) + return; + + io->read_length = strtoul ((char *)io->read_meta_buf->data, NULL, 16); + g_byte_array_set_size (io->read_meta_buf, 0); + + if (io->read_length > 0) + io->read_state = SOUP_MESSAGE_IO_STATE_CHUNK; + else + io->read_state = SOUP_MESSAGE_IO_STATE_TRAILERS; + break; + + + case SOUP_MESSAGE_IO_STATE_CHUNK: + if (!read_body_chunk (msg)) + return; + + io->read_state = SOUP_MESSAGE_IO_STATE_CHUNK_END; + break; + + + case SOUP_MESSAGE_IO_STATE_CHUNK_END: + if (!read_metadata (msg, FALSE)) + return; + + g_byte_array_set_size (io->read_meta_buf, 0); + io->read_state = SOUP_MESSAGE_IO_STATE_CHUNK_SIZE; + break; + + + case SOUP_MESSAGE_IO_STATE_TRAILERS: + if (!read_metadata (msg, FALSE)) + return; + + if (io->read_meta_buf->len <= SOUP_MESSAGE_IO_EOL_LEN) + goto got_body; + + /* FIXME: process trailers */ + g_byte_array_set_size (io->read_meta_buf, 0); + break; + + + case SOUP_MESSAGE_IO_STATE_FINISHING: + if (io->read_tag) { + g_signal_handler_disconnect (io->sock, io->read_tag); + io->read_tag = 0; + } + io->read_state = SOUP_MESSAGE_IO_STATE_DONE; + + if (io->mode == SOUP_MESSAGE_IO_SERVER) { + io->write_state = SOUP_MESSAGE_IO_STATE_HEADERS; + io_write (sock, msg); + } else + soup_message_io_finished (msg); + return; + + + case SOUP_MESSAGE_IO_STATE_DONE: + default: + g_return_if_reached (); + } + + goto read_more; +} + +static void +socket_tls_certificate_changed (GObject *sock, GParamSpec *pspec, + gpointer msg) +{ + GTlsCertificate *certificate; + GTlsCertificateFlags errors; + + g_object_get (sock, + SOUP_SOCKET_TLS_CERTIFICATE, &certificate, + SOUP_SOCKET_TLS_ERRORS, &errors, + NULL); + g_object_set (msg, + SOUP_MESSAGE_TLS_CERTIFICATE, certificate, + SOUP_MESSAGE_TLS_ERRORS, errors, + NULL); + if (certificate) + g_object_unref (certificate); +} + +static SoupMessageIOData * +new_iostate (SoupMessage *msg, SoupSocket *sock, SoupMessageIOMode mode, + SoupMessageGetHeadersFn get_headers_cb, + SoupMessageParseHeadersFn parse_headers_cb, + gpointer header_data, + SoupMessageCompletionFn completion_cb, + gpointer completion_data) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io; + + io = g_slice_new0 (SoupMessageIOData); + io->sock = g_object_ref (sock); + io->mode = mode; + io->get_headers_cb = get_headers_cb; + io->parse_headers_cb = parse_headers_cb; + io->header_data = header_data; + io->completion_cb = completion_cb; + io->completion_data = completion_data; + + io->read_meta_buf = g_byte_array_new (); + io->write_buf = g_string_new (NULL); + + io->read_tag = g_signal_connect (io->sock, "readable", + G_CALLBACK (io_read), msg); + io->write_tag = g_signal_connect (io->sock, "writable", + G_CALLBACK (io_write), msg); + + io->read_state = SOUP_MESSAGE_IO_STATE_NOT_STARTED; + io->write_state = SOUP_MESSAGE_IO_STATE_NOT_STARTED; + + if (soup_socket_is_ssl (io->sock)) { + io->tls_signal_id = g_signal_connect (io->sock, "notify::tls-certificate", + G_CALLBACK (socket_tls_certificate_changed), msg); + } + + if (priv->io_data) + soup_message_io_cleanup (msg); + priv->io_data = io; + return io; +} + +void +soup_message_io_client (SoupMessageQueueItem *item, + SoupMessageGetHeadersFn get_headers_cb, + SoupMessageParseHeadersFn parse_headers_cb, + gpointer header_data, + SoupMessageCompletionFn completion_cb, + gpointer completion_data) +{ + SoupMessageIOData *io; + SoupSocket *sock = soup_connection_get_socket (item->conn); + + io = new_iostate (item->msg, sock, SOUP_MESSAGE_IO_CLIENT, + get_headers_cb, parse_headers_cb, header_data, + completion_cb, completion_data); + + io->item = item; + soup_message_queue_item_ref (item); + io->cancellable = item->cancellable; + + io->read_body = item->msg->response_body; + io->write_body = item->msg->request_body; + + io->write_state = SOUP_MESSAGE_IO_STATE_HEADERS; + io_write (sock, item->msg); +} + +void +soup_message_io_server (SoupMessage *msg, SoupSocket *sock, + SoupMessageGetHeadersFn get_headers_cb, + SoupMessageParseHeadersFn parse_headers_cb, + gpointer header_data, + SoupMessageCompletionFn completion_cb, + gpointer completion_data) +{ + SoupMessageIOData *io; + + io = new_iostate (msg, sock, SOUP_MESSAGE_IO_SERVER, + get_headers_cb, parse_headers_cb, header_data, + completion_cb, completion_data); + + io->read_body = msg->request_body; + io->write_body = msg->response_body; + + io->read_state = SOUP_MESSAGE_IO_STATE_HEADERS; + io_read (sock, msg); +} + +void +soup_message_io_pause (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + + g_return_if_fail (io != NULL); + + if (io->write_tag) { + g_signal_handler_disconnect (io->sock, io->write_tag); + io->write_tag = 0; + } + if (io->read_tag) { + g_signal_handler_disconnect (io->sock, io->read_tag); + io->read_tag = 0; + } + + if (io->unpause_source) { + g_source_destroy (io->unpause_source); + io->unpause_source = NULL; + } +} + +static gboolean +io_unpause_internal (gpointer msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + + g_return_val_if_fail (io != NULL, FALSE); + io->unpause_source = NULL; + + if (io->write_tag || io->read_tag) + return FALSE; + + if (io->write_state != SOUP_MESSAGE_IO_STATE_DONE) { + io->write_tag = g_signal_connect (io->sock, "writable", + G_CALLBACK (io_write), msg); + } + + if (io->read_state != SOUP_MESSAGE_IO_STATE_DONE) { + io->read_tag = g_signal_connect (io->sock, "readable", + G_CALLBACK (io_read), msg); + } + + if (SOUP_MESSAGE_IO_STATE_ACTIVE (io->write_state)) + io_write (io->sock, msg); + else if (SOUP_MESSAGE_IO_STATE_ACTIVE (io->read_state)) + io_read (io->sock, msg); + + return FALSE; +} + +void +soup_message_io_unpause (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + SoupMessageIOData *io = priv->io_data; + gboolean non_blocking; + GMainContext *async_context; + + g_return_if_fail (io != NULL); + + g_object_get (io->sock, + SOUP_SOCKET_FLAG_NONBLOCKING, &non_blocking, + SOUP_SOCKET_ASYNC_CONTEXT, &async_context, + NULL); + if (non_blocking) { + if (!io->unpause_source) { + io->unpause_source = soup_add_completion ( + async_context, io_unpause_internal, msg); + } + } else + io_unpause_internal (msg); + if (async_context) + g_main_context_unref (async_context); +} + +/** + * soup_message_io_in_progress: + * @msg: a #SoupMessage + * + * Tests whether or not I/O is currently in progress on @msg. + * + * Return value: whether or not I/O is currently in progress. + **/ +gboolean +soup_message_io_in_progress (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + return priv->io_data != NULL; +} diff --git a/libsoup/soup-message-private.h b/libsoup/soup-message-private.h new file mode 100644 index 0000000..ce866dc --- /dev/null +++ b/libsoup/soup-message-private.h @@ -0,0 +1,99 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_MESSAGE_PRIVATE_H +#define SOUP_MESSAGE_PRIVATE_H 1 + +#include "soup-message.h" +#include "soup-auth.h" +#include "soup-content-sniffer.h" + +typedef struct { + gpointer io_data; + + SoupChunkAllocator chunk_allocator; + gpointer chunk_allocator_data; + GDestroyNotify chunk_allocator_dnotify; + + guint msg_flags; + gboolean server_side; + + SoupContentSniffer *sniffer; + gsize bytes_for_sniffing; + + SoupHTTPVersion http_version, orig_http_version; + + SoupURI *uri; + SoupAddress *addr; + + SoupAuth *auth, *proxy_auth; + + GSList *disabled_features; + GSList *decoders; + + SoupURI *first_party; + + GTlsCertificate *tls_certificate; + GTlsCertificateFlags tls_errors; +} SoupMessagePrivate; +#define SOUP_MESSAGE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_MESSAGE, SoupMessagePrivate)) + +void soup_message_cleanup_response (SoupMessage *req); + + +typedef void (*SoupMessageGetHeadersFn) (SoupMessage *msg, + GString *headers, + SoupEncoding *encoding, + gpointer user_data); +typedef guint (*SoupMessageParseHeadersFn)(SoupMessage *msg, + char *headers, + guint header_len, + SoupEncoding *encoding, + gpointer user_data); +typedef void (*SoupMessageCompletionFn) (SoupMessage *msg, + gpointer user_data); + + +void soup_message_send_request (SoupMessageQueueItem *item, + SoupMessageCompletionFn completion_cb, + gpointer user_data); +void soup_message_read_request (SoupMessage *req, + SoupSocket *sock, + SoupMessageCompletionFn completion_cb, + gpointer user_data); + +void soup_message_io_client (SoupMessageQueueItem *item, + SoupMessageGetHeadersFn get_headers_cb, + SoupMessageParseHeadersFn parse_headers_cb, + gpointer headers_data, + SoupMessageCompletionFn completion_cb, + gpointer user_data); +void soup_message_io_server (SoupMessage *msg, + SoupSocket *sock, + SoupMessageGetHeadersFn get_headers_cb, + SoupMessageParseHeadersFn parse_headers_cb, + gpointer headers_data, + SoupMessageCompletionFn completion_cb, + gpointer user_data); +void soup_message_io_cleanup (SoupMessage *msg); + +/* Auth handling */ +void soup_message_set_auth (SoupMessage *msg, + SoupAuth *auth); +SoupAuth *soup_message_get_auth (SoupMessage *msg); +void soup_message_set_proxy_auth (SoupMessage *msg, + SoupAuth *auth); +SoupAuth *soup_message_get_proxy_auth (SoupMessage *msg); + +/* I/O */ +void soup_message_io_stop (SoupMessage *msg); +void soup_message_io_finished (SoupMessage *msg); +void soup_message_io_pause (SoupMessage *msg); +void soup_message_io_unpause (SoupMessage *msg); +gboolean soup_message_io_in_progress (SoupMessage *msg); + +gboolean soup_message_disables_feature (SoupMessage *msg, + gpointer feature); +#endif /* SOUP_MESSAGE_PRIVATE_H */ diff --git a/libsoup/soup-message-queue.c b/libsoup/soup-message-queue.c new file mode 100644 index 0000000..58fea58 --- /dev/null +++ b/libsoup/soup-message-queue.c @@ -0,0 +1,300 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message-queue.c: Message queue + * + * Copyright (C) 2003 Novell, Inc. + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-message-queue.h" +#include "soup-uri.h" + +/** + * SECTION:soup-message-queue + * + * This is an internal structure used by #SoupSession and its + * subclasses to keep track of the status of messages currently being + * processed. + * + * The #SoupMessageQueue itself is mostly just a linked list of + * #SoupMessageQueueItem, with some added cleverness to allow the list + * to be walked safely while other threads / re-entrant loops are + * adding items to and removing items from it. In particular, this is + * handled by refcounting items and then keeping "removed" items in + * the list until their ref_count drops to 0, but skipping over the + * "removed" ones when walking the queue. + **/ + +struct _SoupMessageQueue { + SoupSession *session; + + GMutex *mutex; + SoupMessageQueueItem *head, *tail; +}; + +SoupMessageQueue * +soup_message_queue_new (SoupSession *session) +{ + SoupMessageQueue *queue; + + queue = g_slice_new0 (SoupMessageQueue); + queue->session = session; + queue->mutex = g_mutex_new (); + return queue; +} + +void +soup_message_queue_destroy (SoupMessageQueue *queue) +{ + g_return_if_fail (queue->head == NULL); + + g_mutex_free (queue->mutex); + g_slice_free (SoupMessageQueue, queue); +} + +static void +queue_message_restarted (SoupMessage *msg, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + + if (item->proxy_addr) { + g_object_unref (item->proxy_addr); + item->proxy_addr = NULL; + } + if (item->proxy_uri) { + soup_uri_free (item->proxy_uri); + item->proxy_uri = NULL; + } + + if (item->conn && + (!soup_message_is_keepalive (msg) || + SOUP_STATUS_IS_REDIRECTION (msg->status_code))) { + if (soup_connection_get_state (item->conn) == SOUP_CONNECTION_IN_USE) + soup_connection_set_state (item->conn, SOUP_CONNECTION_IDLE); + g_object_unref (item->conn); + item->conn = NULL; + } + + soup_message_cleanup_response (msg); + + g_cancellable_reset (item->cancellable); + + item->state = SOUP_MESSAGE_STARTING; +} + +/** + * soup_message_queue_append: + * @queue: a #SoupMessageQueue + * @msg: a #SoupMessage + * @callback: the callback for @msg + * @user_data: the data to pass to @callback + * + * Creates a new #SoupMessageQueueItem and appends it to @queue. + * + * Return value: the new item, which you must unref with + * soup_message_queue_unref_item() when you are done with. + **/ +SoupMessageQueueItem * +soup_message_queue_append (SoupMessageQueue *queue, SoupMessage *msg, + SoupSessionCallback callback, gpointer user_data) +{ + SoupMessageQueueItem *item; + + item = g_slice_new0 (SoupMessageQueueItem); + item->session = queue->session; + item->queue = queue; + item->msg = g_object_ref (msg); + item->callback = callback; + item->callback_data = user_data; + item->cancellable = g_cancellable_new (); + + g_signal_connect (msg, "restarted", + G_CALLBACK (queue_message_restarted), item); + + /* Note: the initial ref_count of 1 represents the caller's + * ref; the queue's own ref is indicated by the absence of the + * "removed" flag. + */ + item->ref_count = 1; + + g_mutex_lock (queue->mutex); + if (queue->head) { + queue->tail->next = item; + item->prev = queue->tail; + queue->tail = item; + } else + queue->head = queue->tail = item; + + g_mutex_unlock (queue->mutex); + return item; +} + +/** + * soup_message_queue_item_ref: + * @item: a #SoupMessageQueueItem + * + * Refs @item. + **/ +void +soup_message_queue_item_ref (SoupMessageQueueItem *item) +{ + item->ref_count++; +} + +/** + * soup_message_queue_item_unref: + * @item: a #SoupMessageQueueItem + * + * Unrefs @item; use this on a #SoupMessageQueueItem that you are done + * with (but that you aren't passing to + * soup_message_queue_item_next()). + **/ +void +soup_message_queue_item_unref (SoupMessageQueueItem *item) +{ + g_mutex_lock (item->queue->mutex); + + /* Decrement the ref_count; if it's still non-zero OR if the + * item is still in the queue, then return. + */ + if (--item->ref_count || !item->removed) { + g_mutex_unlock (item->queue->mutex); + return; + } + + /* OK, @item is dead. Rewrite @queue around it */ + if (item->prev) + item->prev->next = item->next; + else + item->queue->head = item->next; + if (item->next) + item->next->prev = item->prev; + else + item->queue->tail = item->prev; + + g_mutex_unlock (item->queue->mutex); + + /* And free it */ + g_signal_handlers_disconnect_by_func (item->msg, + queue_message_restarted, item); + g_object_unref (item->msg); + g_object_unref (item->cancellable); + if (item->proxy_addr) + g_object_unref (item->proxy_addr); + if (item->proxy_uri) + soup_uri_free (item->proxy_uri); + if (item->conn) + g_object_unref (item->conn); + g_slice_free (SoupMessageQueueItem, item); +} + +/** + * soup_message_queue_lookup: + * @queue: a #SoupMessageQueue + * @msg: a #SoupMessage + * + * Finds the #SoupMessageQueueItem for @msg in @queue. You must unref + * the item with soup_message_queue_unref_item() when you are done + * with it. + * + * Return value: the queue item for @msg, or %NULL + **/ +SoupMessageQueueItem * +soup_message_queue_lookup (SoupMessageQueue *queue, SoupMessage *msg) +{ + SoupMessageQueueItem *item; + + g_mutex_lock (queue->mutex); + + item = queue->tail; + while (item && (item->removed || item->msg != msg)) + item = item->prev; + + if (item) + item->ref_count++; + + g_mutex_unlock (queue->mutex); + return item; +} + +/** + * soup_message_queue_first: + * @queue: a #SoupMessageQueue + * + * Gets the first item in @queue. You must unref the item by calling + * soup_message_queue_unref_item() on it when you are done. + * (soup_message_queue_next() does this for you automatically, so you + * only need to unref the item yourself if you are not going to + * finishing walking the queue.) + * + * Return value: the first item in @queue. + **/ +SoupMessageQueueItem * +soup_message_queue_first (SoupMessageQueue *queue) +{ + SoupMessageQueueItem *item; + + g_mutex_lock (queue->mutex); + + item = queue->head; + while (item && item->removed) + item = item->next; + + if (item) + item->ref_count++; + + g_mutex_unlock (queue->mutex); + return item; +} + +/** + * soup_message_queue_next: + * @queue: a #SoupMessageQueue + * @item: a #SoupMessageQueueItem + * + * Unrefs @item and gets the next item after it in @queue. As with + * soup_message_queue_first(), you must unref the returned item + * yourself with soup_message_queue_unref_item() if you do not finish + * walking the queue. + * + * Return value: the next item in @queue. + **/ +SoupMessageQueueItem * +soup_message_queue_next (SoupMessageQueue *queue, SoupMessageQueueItem *item) +{ + SoupMessageQueueItem *next; + + g_mutex_lock (queue->mutex); + + next = item->next; + while (next && next->removed) + next = next->next; + if (next) + next->ref_count++; + + g_mutex_unlock (queue->mutex); + soup_message_queue_item_unref (item); + return next; +} + +/** + * soup_message_queue_remove: + * @queue: a #SoupMessageQueue + * @item: a #SoupMessageQueueItem + * + * Removes @item from @queue. Note that you probably also need to call + * soup_message_queue_unref_item() after this. + **/ +void +soup_message_queue_remove (SoupMessageQueue *queue, SoupMessageQueueItem *item) +{ + g_return_if_fail (!item->removed); + + g_mutex_lock (queue->mutex); + item->removed = TRUE; + g_mutex_unlock (queue->mutex); +} diff --git a/libsoup/soup-message-queue.h b/libsoup/soup-message-queue.h new file mode 100644 index 0000000..08cc6df --- /dev/null +++ b/libsoup/soup-message-queue.h @@ -0,0 +1,83 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2003 Novell, Inc. + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_MESSAGE_QUEUE_H +#define SOUP_MESSAGE_QUEUE_H 1 + +#include "soup-connection.h" +#include "soup-message.h" +#include "soup-session.h" + +G_BEGIN_DECLS + +typedef enum { + SOUP_MESSAGE_STARTING, + SOUP_MESSAGE_RESOLVING_PROXY_URI, + SOUP_MESSAGE_RESOLVED_PROXY_URI, + SOUP_MESSAGE_RESOLVING_PROXY_ADDRESS, + SOUP_MESSAGE_RESOLVED_PROXY_ADDRESS, + SOUP_MESSAGE_AWAITING_CONNECTION, + SOUP_MESSAGE_GOT_CONNECTION, + SOUP_MESSAGE_CONNECTING, + SOUP_MESSAGE_CONNECTED, + SOUP_MESSAGE_TUNNELING, + SOUP_MESSAGE_TUNNELED, + SOUP_MESSAGE_READY, + SOUP_MESSAGE_RUNNING, + SOUP_MESSAGE_RESTARTING, + SOUP_MESSAGE_FINISHING, + SOUP_MESSAGE_FINISHED +} SoupMessageQueueItemState; + +struct _SoupMessageQueueItem { + /*< public >*/ + SoupSession *session; + SoupMessageQueue *queue; + SoupMessage *msg; + SoupSessionCallback callback; + gpointer callback_data; + + GCancellable *cancellable; + SoupAddress *proxy_addr; + SoupURI *proxy_uri; + SoupConnection *conn; + + guint redirection_count; + + SoupMessageQueueItemState state; + + /*< private >*/ + guint removed : 1; + guint ref_count : 31; + SoupMessageQueueItem *prev, *next; + SoupMessageQueueItem *related; +}; + +SoupMessageQueue *soup_message_queue_new (SoupSession *session); +SoupMessageQueueItem *soup_message_queue_append (SoupMessageQueue *queue, + SoupMessage *msg, + SoupSessionCallback callback, + gpointer user_data); + +SoupMessageQueueItem *soup_message_queue_lookup (SoupMessageQueue *queue, + SoupMessage *msg); + +SoupMessageQueueItem *soup_message_queue_first (SoupMessageQueue *queue); +SoupMessageQueueItem *soup_message_queue_next (SoupMessageQueue *queue, + SoupMessageQueueItem *item); + +void soup_message_queue_remove (SoupMessageQueue *queue, + SoupMessageQueueItem *item); + +void soup_message_queue_item_ref (SoupMessageQueueItem *item); +void soup_message_queue_item_unref (SoupMessageQueueItem *item); + +void soup_message_queue_destroy (SoupMessageQueue *queue); + + +G_END_DECLS + +#endif /* SOUP_MESSAGE_QUEUE_H */ diff --git a/libsoup/soup-message-server-io.c b/libsoup/soup-message-server-io.c new file mode 100644 index 0000000..573fcaf --- /dev/null +++ b/libsoup/soup-message-server-io.c @@ -0,0 +1,248 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message-server-io.c: server-side request/response + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "soup-message-private.h" +#include "soup-address.h" +#include "soup-auth.h" +#include "soup-headers.h" +#include "soup-multipart.h" +#include "soup-server.h" +#include "soup-socket.h" + +static guint +parse_request_headers (SoupMessage *msg, char *headers, guint headers_len, + SoupEncoding *encoding, gpointer sock) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + char *req_method, *req_path, *url; + SoupHTTPVersion version; + const char *req_host; + guint status; + SoupURI *uri; + + status = soup_headers_parse_request (headers, headers_len, + msg->request_headers, + &req_method, + &req_path, + &version); + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) + return status; + + g_object_set (G_OBJECT (msg), + SOUP_MESSAGE_METHOD, req_method, + SOUP_MESSAGE_HTTP_VERSION, version, + NULL); + g_free (req_method); + + /* Handle request body encoding */ + *encoding = soup_message_headers_get_encoding (msg->request_headers); + if (*encoding == SOUP_ENCODING_UNRECOGNIZED) { + if (soup_message_headers_get_list (msg->request_headers, "Transfer-Encoding")) + return SOUP_STATUS_NOT_IMPLEMENTED; + else + return SOUP_STATUS_BAD_REQUEST; + } + + /* Generate correct context for request */ + req_host = soup_message_headers_get_one (msg->request_headers, "Host"); + if (req_host && strchr (req_host, '/')) { + g_free (req_path); + return SOUP_STATUS_BAD_REQUEST; + } + + if (!strcmp (req_path, "*") && req_host) { + /* Eg, "OPTIONS * HTTP/1.1" */ + url = g_strdup_printf ("%s://%s", + soup_socket_is_ssl (sock) ? "https" : "http", + req_host); + uri = soup_uri_new (url); + if (uri) + soup_uri_set_path (uri, "*"); + g_free (url); + } else if (*req_path != '/') { + /* Must be an absolute URI */ + uri = soup_uri_new (req_path); + } else if (req_host) { + url = g_strdup_printf ("%s://%s%s", + soup_socket_is_ssl (sock) ? "https" : "http", + req_host, req_path); + uri = soup_uri_new (url); + g_free (url); + } else if (priv->http_version == SOUP_HTTP_1_0) { + /* No Host header, no AbsoluteUri */ + SoupAddress *addr = soup_socket_get_local_address (sock); + const char *host = soup_address_get_physical (addr); + + url = g_strdup_printf ("%s://%s:%d%s", + soup_socket_is_ssl (sock) ? "https" : "http", + host, soup_address_get_port (addr), + req_path); + uri = soup_uri_new (url); + g_free (url); + } else + uri = NULL; + + g_free (req_path); + if (!uri) + return SOUP_STATUS_BAD_REQUEST; + soup_message_set_uri (msg, uri); + soup_uri_free (uri); + + return SOUP_STATUS_OK; +} + +static void +handle_partial_get (SoupMessage *msg) +{ + SoupRange *ranges; + int nranges; + SoupBuffer *full_response; + + /* Make sure the message is set up right for us to return a + * partial response; it has to be a GET, the status must be + * 200 OK (and in particular, NOT already 206 Partial + * Content), and the SoupServer must have already filled in + * the response body + */ + if (msg->method != SOUP_METHOD_GET || + msg->status_code != SOUP_STATUS_OK || + soup_message_headers_get_encoding (msg->response_headers) != + SOUP_ENCODING_CONTENT_LENGTH || + msg->response_body->length == 0 || + !soup_message_body_get_accumulate (msg->response_body)) + return; + + /* Oh, and there has to have been a valid Range header on the + * request, of course. + */ + if (!soup_message_headers_get_ranges (msg->request_headers, + msg->response_body->length, + &ranges, &nranges)) + return; + + full_response = soup_message_body_flatten (msg->response_body); + if (!full_response) { + soup_message_headers_free_ranges (msg->request_headers, ranges); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_PARTIAL_CONTENT); + soup_message_body_truncate (msg->response_body); + + if (nranges == 1) { + SoupBuffer *range_buf; + + /* Single range, so just set Content-Range and fix the body. */ + + soup_message_headers_set_content_range (msg->response_headers, + ranges[0].start, + ranges[0].end, + full_response->length); + range_buf = soup_buffer_new_subbuffer (full_response, + ranges[0].start, + ranges[0].end - ranges[0].start + 1); + soup_message_body_append_buffer (msg->response_body, range_buf); + soup_buffer_free (range_buf); + } else { + SoupMultipart *multipart; + SoupMessageHeaders *part_headers; + SoupBuffer *part_body; + const char *content_type; + int i; + + /* Multiple ranges, so build a multipart/byteranges response + * to replace msg->response_body with. + */ + + multipart = soup_multipart_new ("multipart/byteranges"); + content_type = soup_message_headers_get_one (msg->response_headers, + "Content-Type"); + for (i = 0; i < nranges; i++) { + part_headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + if (content_type) { + soup_message_headers_append (part_headers, + "Content-Type", + content_type); + } + soup_message_headers_set_content_range (part_headers, + ranges[i].start, + ranges[i].end, + full_response->length); + part_body = soup_buffer_new_subbuffer (full_response, + ranges[i].start, + ranges[i].end - ranges[i].start + 1); + soup_multipart_append_part (multipart, part_headers, + part_body); + soup_message_headers_free (part_headers); + soup_buffer_free (part_body); + } + + soup_multipart_to_message (multipart, msg->response_headers, + msg->response_body); + soup_multipart_free (multipart); + } + + soup_buffer_free (full_response); + soup_message_headers_free_ranges (msg->request_headers, ranges); +} + +static void +get_response_headers (SoupMessage *msg, GString *headers, + SoupEncoding *encoding, gpointer user_data) +{ + SoupEncoding claimed_encoding; + SoupMessageHeadersIter iter; + const char *name, *value; + + handle_partial_get (msg); + + g_string_append_printf (headers, "HTTP/1.%c %d %s\r\n", + soup_message_get_http_version (msg) == SOUP_HTTP_1_0 ? '0' : '1', + msg->status_code, msg->reason_phrase); + + claimed_encoding = soup_message_headers_get_encoding (msg->response_headers); + if ((msg->method == SOUP_METHOD_HEAD || + msg->status_code == SOUP_STATUS_NO_CONTENT || + msg->status_code == SOUP_STATUS_NOT_MODIFIED || + SOUP_STATUS_IS_INFORMATIONAL (msg->status_code)) || + (msg->method == SOUP_METHOD_CONNECT && + SOUP_STATUS_IS_SUCCESSFUL (msg->status_code))) + *encoding = SOUP_ENCODING_NONE; + else + *encoding = claimed_encoding; + + if (claimed_encoding == SOUP_ENCODING_CONTENT_LENGTH && + !soup_message_headers_get_content_length (msg->response_headers)) { + soup_message_headers_set_content_length (msg->response_headers, + msg->response_body->length); + } + + soup_message_headers_iter_init (&iter, msg->response_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) + g_string_append_printf (headers, "%s: %s\r\n", name, value); + g_string_append (headers, "\r\n"); +} + +void +soup_message_read_request (SoupMessage *msg, + SoupSocket *sock, + SoupMessageCompletionFn completion_cb, + gpointer user_data) +{ + soup_message_io_server (msg, sock, + get_response_headers, + parse_request_headers, + sock, + completion_cb, user_data); +} diff --git a/libsoup/soup-message.c b/libsoup/soup-message.c new file mode 100644 index 0000000..9aa1209 --- /dev/null +++ b/libsoup/soup-message.c @@ -0,0 +1,1905 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-message.c: HTTP request/response + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#include +#include + +#include "soup-address.h" +#include "soup-auth.h" +#include "soup-enum-types.h" +#include "soup-marshal.h" +#include "soup-message.h" +#include "soup-message-private.h" +#include "soup-misc.h" +#include "soup-uri.h" + +/** + * SECTION:soup-message + * @short_description: An HTTP request and response. + * @see_also: #SoupMessageHeaders, #SoupMessageBody + * + * A #SoupMessage represents an HTTP message that is being sent or + * received. + * + * For client-side usage, you would create a #SoupMessage with + * soup_message_new() or soup_message_new_from_uri(), set up its + * fields appropriate, and send it via a #SoupSession. + * + * For server-side usage, #SoupServer will create #SoupMessages automatically for incoming requests, which your application + * will receive via handlers. + * + * Note that libsoup's terminology here does not quite match the HTTP + * specification: in RFC 2616, an "HTTP-message" is + * either a Request, or a + * Response. In libsoup, a #SoupMessage combines both the request and + * the response. + **/ + +/** + * SoupMessage: + * @method: the HTTP method + * @status_code: the HTTP status code + * @reason_phrase: the status phrase associated with @status_code + * @request_body: the request body + * @request_headers: the request headers + * @response_body: the response body + * @response_headers: the response headers + * + * Represents an HTTP message being sent or received. + * + * @status_code will normally be a #SoupKnownStatusCode, eg, + * %SOUP_STATUS_OK, though of course it might actually be an unknown + * status code. @reason_phrase is the actual text returned from the + * server, which may or may not correspond to the "standard" + * description of @status_code. At any rate, it is almost certainly + * not localized, and not very descriptive even if it is in the user's + * language; you should not use @reason_phrase in user-visible + * messages. Rather, you should look at @status_code, and determine an + * end-user-appropriate message based on that and on what you were + * trying to do. + * + * As described in the #SoupMessageBody documentation, the + * @request_body and @response_body %data fields will not necessarily + * be filled in at all times. When they are filled in, they will be + * terminated with a '\0' byte (which is not included in the %length), + * so you can use them as ordinary C strings (assuming that you know + * that the body doesn't have any other '\0' bytes). + * + * For a client-side #SoupMessage, @request_body's %data is usually + * filled in right before libsoup writes the request to the network, + * but you should not count on this; use soup_message_body_flatten() + * if you want to ensure that %data is filled in. @response_body's + * %data will be filled in before #SoupMessage::finished is emitted. + * + * For a server-side #SoupMessage, @request_body's %data will be + * filled in before #SoupMessage::got_body is emitted. + * + * To prevent the %data field from being filled in at all (eg, if you + * are handling the data from a #SoupMessage::got_chunk, and so don't + * need to see it all at the end), call + * soup_message_body_set_accumulate() on @response_body or + * @request_body as appropriate, passing %FALSE. + **/ + +G_DEFINE_TYPE (SoupMessage, soup_message, G_TYPE_OBJECT) + +enum { + WROTE_INFORMATIONAL, + WROTE_HEADERS, + WROTE_CHUNK, + WROTE_BODY_DATA, + WROTE_BODY, + + GOT_INFORMATIONAL, + GOT_HEADERS, + GOT_CHUNK, + GOT_BODY, + CONTENT_SNIFFED, + + RESTARTED, + FINISHED, + + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +enum { + PROP_0, + + PROP_METHOD, + PROP_URI, + PROP_HTTP_VERSION, + PROP_FLAGS, + PROP_SERVER_SIDE, + PROP_STATUS_CODE, + PROP_REASON_PHRASE, + PROP_FIRST_PARTY, + PROP_REQUEST_BODY, + PROP_REQUEST_HEADERS, + PROP_RESPONSE_BODY, + PROP_RESPONSE_HEADERS, + PROP_TLS_CERTIFICATE, + PROP_TLS_ERRORS, + + LAST_PROP +}; + +static void got_body (SoupMessage *req); + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_message_init (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + priv->http_version = priv->orig_http_version = SOUP_HTTP_1_1; + + msg->request_body = soup_message_body_new (); + msg->request_headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_REQUEST); + msg->response_body = soup_message_body_new (); + msg->response_headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE); +} + +static void +finalize (GObject *object) +{ + SoupMessage *msg = SOUP_MESSAGE (object); + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + soup_message_io_cleanup (msg); + if (priv->chunk_allocator_dnotify) + priv->chunk_allocator_dnotify (priv->chunk_allocator_data); + + if (priv->uri) + soup_uri_free (priv->uri); + if (priv->first_party) + soup_uri_free (priv->first_party); + if (priv->addr) + g_object_unref (priv->addr); + + if (priv->auth) + g_object_unref (priv->auth); + if (priv->proxy_auth) + g_object_unref (priv->proxy_auth); + + g_slist_free (priv->disabled_features); + + while (priv->decoders) { + g_object_unref (priv->decoders->data); + priv->decoders = g_slist_delete_link (priv->decoders, priv->decoders); + } + + if (priv->tls_certificate) + g_object_unref (priv->tls_certificate); + + soup_message_body_free (msg->request_body); + soup_message_headers_free (msg->request_headers); + soup_message_body_free (msg->response_body); + soup_message_headers_free (msg->response_headers); + + g_free (msg->reason_phrase); + + G_OBJECT_CLASS (soup_message_parent_class)->finalize (object); +} + +static void +soup_message_class_init (SoupMessageClass *message_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (message_class); + + g_type_class_add_private (message_class, sizeof (SoupMessagePrivate)); + + /* virtual method definition */ + message_class->got_body = got_body; + + /* virtual method override */ + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /* signals */ + + /** + * SoupMessage::wrote-informational: + * @msg: the message + * + * Emitted immediately after writing a 1xx (Informational) + * response for a (server-side) message. + **/ + signals[WROTE_INFORMATIONAL] = + g_signal_new ("wrote_informational", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, wrote_informational), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::wrote-headers: + * @msg: the message + * + * Emitted immediately after writing the headers for a + * message. (For a client-side message, this is after writing + * the request headers; for a server-side message, it is after + * writing the response headers.) + **/ + signals[WROTE_HEADERS] = + g_signal_new ("wrote_headers", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, wrote_headers), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::wrote-chunk: + * @msg: the message + * + * Emitted immediately after writing a body chunk for a message. + * + * Note that this signal is not parallel to + * #SoupMessage::got_chunk; it is emitted only when a complete + * chunk (added with soup_message_body_append() or + * soup_message_body_append_buffer()) has been written. To get + * more useful continuous progress information, use + * #SoupMessage::wrote_body_data. + **/ + signals[WROTE_CHUNK] = + g_signal_new ("wrote_chunk", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, wrote_chunk), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::wrote-body-data: + * @msg: the message + * @chunk: the data written + * + * Emitted immediately after writing a portion of the message + * body to the network. + * + * Unlike #SoupMessage::wrote_chunk, this is emitted after + * every successful write() call, not only after finishing a + * complete "chunk". + * + * Since: 2.4.1 + **/ + signals[WROTE_BODY_DATA] = + g_signal_new ("wrote_body_data", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, /* FIXME after next ABI break */ + NULL, NULL, + soup_marshal_NONE__BOXED, + G_TYPE_NONE, 1, + SOUP_TYPE_BUFFER); + + /** + * SoupMessage::wrote-body: + * @msg: the message + * + * Emitted immediately after writing the complete body for a + * message. (For a client-side message, this means that + * libsoup is done writing and is now waiting for the response + * from the server. For a server-side message, this means that + * libsoup has finished writing the response and is nearly + * done with the message.) + **/ + signals[WROTE_BODY] = + g_signal_new ("wrote_body", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, wrote_body), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::got-informational: + * @msg: the message + * + * Emitted after receiving a 1xx (Informational) response for + * a (client-side) message. The response_headers will be + * filled in with the headers associated with the + * informational response; however, those header values will + * be erased after this signal is done. + * + * If you cancel or requeue @msg while processing this signal, + * then the current HTTP I/O will be stopped after this signal + * emission finished, and @msg's connection will be closed. + **/ + signals[GOT_INFORMATIONAL] = + g_signal_new ("got_informational", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, got_informational), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::got-headers: + * @msg: the message + * + * Emitted after receiving all message headers for a message. + * (For a client-side message, this is after receiving the + * Status-Line and response headers; for a server-side + * message, it is after receiving the Request-Line and request + * headers.) + * + * See also soup_message_add_header_handler() and + * soup_message_add_status_code_handler(), which can be used + * to connect to a subset of emissions of this signal. + * + * If you cancel or requeue @msg while processing this signal, + * then the current HTTP I/O will be stopped after this signal + * emission finished, and @msg's connection will be closed. + * (If you need to requeue a message--eg, after handling + * authentication or redirection--it is usually better to + * requeue it from a #SoupMessage::got_body handler rather + * than a #SoupMessage::got_header handler, so that the + * existing HTTP connection can be reused.) + **/ + signals[GOT_HEADERS] = + g_signal_new ("got_headers", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, got_headers), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::got-chunk: + * @msg: the message + * @chunk: the just-read chunk + * + * Emitted after receiving a chunk of a message body. Note + * that "chunk" in this context means any subpiece of the + * body, not necessarily the specific HTTP 1.1 chunks sent by + * the other side. + * + * If you cancel or requeue @msg while processing this signal, + * then the current HTTP I/O will be stopped after this signal + * emission finished, and @msg's connection will be closed. + **/ + signals[GOT_CHUNK] = + g_signal_new ("got_chunk", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, got_chunk), + NULL, NULL, + soup_marshal_NONE__BOXED, + G_TYPE_NONE, 1, + /* Use %G_SIGNAL_TYPE_STATIC_SCOPE so that + * the %SOUP_MEMORY_TEMPORARY buffers used + * by soup-message-io.c when emitting this + * signal don't get forcibly copied by + * g_signal_emit(). + */ + SOUP_TYPE_BUFFER | G_SIGNAL_TYPE_STATIC_SCOPE); + + /** + * SoupMessage::got-body: + * @msg: the message + * + * Emitted after receiving the complete message body. (For a + * server-side message, this means it has received the request + * body. For a client-side message, this means it has received + * the response body and is nearly done with the message.) + * + * See also soup_message_add_header_handler() and + * soup_message_add_status_code_handler(), which can be used + * to connect to a subset of emissions of this signal. + **/ + signals[GOT_BODY] = + g_signal_new ("got_body", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, got_body), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::content-sniffed: + * @msg: the message + * @type: the content type that we got from sniffing + * @params: (element-type utf8 utf8): a #GHashTable with the parameters + * + * This signal is emitted after %got-headers, and before the + * first %got-chunk. If content sniffing is disabled, or no + * content sniffing will be performed, due to the sniffer + * deciding to trust the Content-Type sent by the server, this + * signal is emitted immediately after %got_headers, and @type + * is %NULL. + * + * If the #SoupContentSniffer feature is enabled, and the + * sniffer decided to perform sniffing, the first %got_chunk + * emission may be delayed, so that the sniffer has enough + * data to correctly sniff the content. It notified the + * library user that the content has been sniffed, and allows + * it to change the header contents in the message, if + * desired. + * + * After this signal is emitted, the data that was spooled so + * that sniffing could be done is delivered on the first + * emission of %got_chunk. + * + * Since: 2.27.3 + **/ + signals[CONTENT_SNIFFED] = + g_signal_new ("content_sniffed", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, + NULL, NULL, + soup_marshal_NONE__STRING_BOXED, + G_TYPE_NONE, 2, + G_TYPE_STRING, + G_TYPE_HASH_TABLE); + + /** + * SoupMessage::restarted: + * @msg: the message + * + * Emitted when a request that was already sent once is now + * being sent again (eg, because the first attempt received a + * redirection response, or because we needed to use + * authentication). + **/ + signals[RESTARTED] = + g_signal_new ("restarted", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, restarted), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupMessage::finished: + * @msg: the message + * + * Emitted when all HTTP processing is finished for a message. + * (After #SoupMessage::got_body for client-side messages, or + * after #SoupMessage::wrote_body for server-side messages.) + **/ + signals[FINISHED] = + g_signal_new ("finished", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupMessageClass, finished), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /* properties */ + /** + * SOUP_MESSAGE_METHOD: + * + * Alias for the #SoupMessage:method property. (The message's + * HTTP method.) + **/ + g_object_class_install_property ( + object_class, PROP_METHOD, + g_param_spec_string (SOUP_MESSAGE_METHOD, + "Method", + "The message's HTTP method", + SOUP_METHOD_GET, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_URI: + * + * Alias for the #SoupMessage:uri property. (The message's + * #SoupURI.) + **/ + g_object_class_install_property ( + object_class, PROP_URI, + g_param_spec_boxed (SOUP_MESSAGE_URI, + "URI", + "The message's Request-URI", + SOUP_TYPE_URI, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_HTTP_VERSION: + * + * Alias for the #SoupMessage:http-version property. (The + * message's #SoupHTTPVersion.) + **/ + g_object_class_install_property ( + object_class, PROP_HTTP_VERSION, + g_param_spec_enum (SOUP_MESSAGE_HTTP_VERSION, + "HTTP Version", + "The HTTP protocol version to use", + SOUP_TYPE_HTTP_VERSION, + SOUP_HTTP_1_1, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_FLAGS: + * + * Alias for the #SoupMessage:flags property. (The message's + * #SoupMessageFlags.) + **/ + g_object_class_install_property ( + object_class, PROP_FLAGS, + g_param_spec_flags (SOUP_MESSAGE_FLAGS, + "Flags", + "Various message options", + SOUP_TYPE_MESSAGE_FLAGS, + 0, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_SERVER_SIDE: + * + * Alias for the #SoupMessage:server-side property. (%TRUE if + * the message was created by #SoupServer.) + **/ + g_object_class_install_property ( + object_class, PROP_SERVER_SIDE, + g_param_spec_boolean (SOUP_MESSAGE_SERVER_SIDE, + "Server-side", + "Whether or not the message is server-side rather than client-side", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_MESSAGE_STATUS_CODE: + * + * Alias for the #SoupMessage:status-code property. (The + * message's HTTP response status code.) + **/ + g_object_class_install_property ( + object_class, PROP_STATUS_CODE, + g_param_spec_uint (SOUP_MESSAGE_STATUS_CODE, + "Status code", + "The HTTP response status code", + 0, 599, 0, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_REASON_PHRASE: + * + * Alias for the #SoupMessage:reason-phrase property. (The + * message's HTTP response reason phrase.) + **/ + g_object_class_install_property ( + object_class, PROP_REASON_PHRASE, + g_param_spec_string (SOUP_MESSAGE_REASON_PHRASE, + "Reason phrase", + "The HTTP response reason phrase", + NULL, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_FIRST_PARTY: + * + * Alias for the #SoupMessage:first-party property. (The + * #SoupURI loaded in the application when the message was + * queued.) + * + * Since: 2.30 + **/ + /** + * SoupMessage:first-party: + * + * The #SoupURI loaded in the application when the message was + * queued. + * + * Since: 2.30 + */ + g_object_class_install_property ( + object_class, PROP_FIRST_PARTY, + g_param_spec_boxed (SOUP_MESSAGE_FIRST_PARTY, + "First party", + "The URI loaded in the application when the message was requested.", + SOUP_TYPE_URI, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_REQUEST_BODY: + * + * Alias for the #SoupMessage:request-body property. (The + * message's HTTP request body.) + **/ + g_object_class_install_property ( + object_class, PROP_REQUEST_BODY, + g_param_spec_boxed (SOUP_MESSAGE_REQUEST_BODY, + "Request Body", + "The HTTP request content", + SOUP_TYPE_MESSAGE_BODY, + G_PARAM_READABLE)); + /** + * SOUP_MESSAGE_REQUEST_HEADERS: + * + * Alias for the #SoupMessage:request-headers property. (The + * message's HTTP request headers.) + **/ + g_object_class_install_property ( + object_class, PROP_REQUEST_HEADERS, + g_param_spec_boxed (SOUP_MESSAGE_REQUEST_HEADERS, + "Request Headers", + "The HTTP request headers", + SOUP_TYPE_MESSAGE_HEADERS, + G_PARAM_READABLE)); + /** + * SOUP_MESSAGE_RESPONSE_BODY: + * + * Alias for the #SoupMessage:response-body property. (The + * message's HTTP response body.) + **/ + g_object_class_install_property ( + object_class, PROP_RESPONSE_BODY, + g_param_spec_boxed (SOUP_MESSAGE_RESPONSE_BODY, + "Response Body", + "The HTTP response content", + SOUP_TYPE_MESSAGE_BODY, + G_PARAM_READABLE)); + /** + * SOUP_MESSAGE_RESPONSE_HEADERS: + * + * Alias for the #SoupMessage:response-headers property. (The + * message's HTTP response headers.) + **/ + g_object_class_install_property ( + object_class, PROP_RESPONSE_HEADERS, + g_param_spec_boxed (SOUP_MESSAGE_RESPONSE_HEADERS, + "Response Headers", + "The HTTP response headers", + SOUP_TYPE_MESSAGE_HEADERS, + G_PARAM_READABLE)); + /** + * SOUP_MESSAGE_TLS_CERTIFICATE: + * + * Alias for the #SoupMessage:tls-certificate property. (The + * TLS certificate associated with the message, if any.) + * + * Since: 2.34 + **/ + /** + * SoupMessage:tls-certificate: + * + * The #GTlsCertificate associated with the message + * + * Since: 2.34 + */ + g_object_class_install_property ( + object_class, PROP_TLS_CERTIFICATE, + g_param_spec_object (SOUP_MESSAGE_TLS_CERTIFICATE, + "TLS Certificate", + "The TLS certificate associated with the message", + G_TYPE_TLS_CERTIFICATE, + G_PARAM_READWRITE)); + /** + * SOUP_MESSAGE_TLS_ERRORS: + * + * Alias for the #SoupMessage:tls-errors property. (The + * verification errors on #SoupMessage:tls-certificate.) + * + * Since: 2.34 + **/ + /** + * SoupMessage:tls-certificate: + * + * The verification errors on #SoupMessage:tls-certificate + * + * Since: 2.34 + */ + g_object_class_install_property ( + object_class, PROP_TLS_ERRORS, + g_param_spec_flags (SOUP_MESSAGE_TLS_ERRORS, + "TLS Errors", + "The verification errors on the message's TLS certificate", + G_TYPE_TLS_CERTIFICATE_FLAGS, 0, + G_PARAM_READWRITE)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupMessage *msg = SOUP_MESSAGE (object); + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + switch (prop_id) { + case PROP_METHOD: + msg->method = g_intern_string (g_value_get_string (value)); + break; + case PROP_URI: + soup_message_set_uri (msg, g_value_get_boxed (value)); + break; + case PROP_HTTP_VERSION: + soup_message_set_http_version (msg, g_value_get_enum (value)); + break; + case PROP_FLAGS: + soup_message_set_flags (msg, g_value_get_flags (value)); + break; + case PROP_SERVER_SIDE: + priv->server_side = g_value_get_boolean (value); + if (priv->server_side) { + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_CONTENT_LENGTH); + } + break; + case PROP_STATUS_CODE: + soup_message_set_status (msg, g_value_get_uint (value)); + break; + case PROP_REASON_PHRASE: + soup_message_set_status_full (msg, msg->status_code, + g_value_get_string (value)); + break; + case PROP_FIRST_PARTY: + soup_message_set_first_party (msg, g_value_get_boxed (value)); + break; + case PROP_TLS_CERTIFICATE: + if (priv->tls_certificate) + g_object_unref (priv->tls_certificate); + priv->tls_certificate = g_value_dup_object (value); + if (priv->tls_certificate && !priv->tls_errors) + priv->msg_flags |= SOUP_MESSAGE_CERTIFICATE_TRUSTED; + break; + case PROP_TLS_ERRORS: + priv->tls_errors = g_value_get_flags (value); + if (priv->tls_errors) + priv->msg_flags &= ~SOUP_MESSAGE_CERTIFICATE_TRUSTED; + else if (priv->tls_certificate) + priv->msg_flags |= SOUP_MESSAGE_CERTIFICATE_TRUSTED; + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupMessage *msg = SOUP_MESSAGE (object); + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + switch (prop_id) { + case PROP_METHOD: + g_value_set_string (value, msg->method); + break; + case PROP_URI: + g_value_set_boxed (value, priv->uri); + break; + case PROP_HTTP_VERSION: + g_value_set_enum (value, priv->http_version); + break; + case PROP_FLAGS: + g_value_set_flags (value, priv->msg_flags); + break; + case PROP_SERVER_SIDE: + g_value_set_boolean (value, priv->server_side); + break; + case PROP_STATUS_CODE: + g_value_set_uint (value, msg->status_code); + break; + case PROP_REASON_PHRASE: + g_value_set_string (value, msg->reason_phrase); + break; + case PROP_FIRST_PARTY: + g_value_set_boxed (value, priv->first_party); + break; + case PROP_REQUEST_BODY: + g_value_set_boxed (value, msg->request_body); + break; + case PROP_REQUEST_HEADERS: + g_value_set_boxed (value, msg->request_headers); + break; + case PROP_RESPONSE_BODY: + g_value_set_boxed (value, msg->response_body); + break; + case PROP_RESPONSE_HEADERS: + g_value_set_boxed (value, msg->response_headers); + break; + case PROP_TLS_CERTIFICATE: + g_value_set_object (value, priv->tls_certificate); + break; + case PROP_TLS_ERRORS: + g_value_set_flags (value, priv->tls_errors); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + + +/** + * soup_message_new: + * @method: the HTTP method for the created request + * @uri_string: the destination endpoint (as a string) + * + * Creates a new empty #SoupMessage, which will connect to @uri + * + * Return value: the new #SoupMessage (or %NULL if @uri could not + * be parsed). + */ +SoupMessage * +soup_message_new (const char *method, const char *uri_string) +{ + SoupMessage *msg; + SoupURI *uri; + + g_return_val_if_fail (method != NULL, NULL); + g_return_val_if_fail (uri_string != NULL, NULL); + + uri = soup_uri_new (uri_string); + if (!uri) + return NULL; + if (!uri->host) { + soup_uri_free (uri); + return NULL; + } + + msg = soup_message_new_from_uri (method, uri); + soup_uri_free (uri); + return msg; +} + +/** + * soup_message_new_from_uri: + * @method: the HTTP method for the created request + * @uri: the destination endpoint (as a #SoupURI) + * + * Creates a new empty #SoupMessage, which will connect to @uri + * + * Return value: the new #SoupMessage + */ +SoupMessage * +soup_message_new_from_uri (const char *method, SoupURI *uri) +{ + return g_object_new (SOUP_TYPE_MESSAGE, + SOUP_MESSAGE_METHOD, method, + SOUP_MESSAGE_URI, uri, + NULL); +} + +/** + * soup_message_set_request: + * @msg: the message + * @content_type: MIME Content-Type of the body + * @req_use: a #SoupMemoryUse describing how to handle @req_body + * @req_body: a data buffer containing the body of the message request. + * @req_length: the byte length of @req_body. + * + * Convenience function to set the request body of a #SoupMessage. If + * @content_type is %NULL, the request body must be empty as well. + */ +void +soup_message_set_request (SoupMessage *msg, + const char *content_type, + SoupMemoryUse req_use, + const char *req_body, + gsize req_length) +{ + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (content_type != NULL || req_length == 0); + + if (content_type) { + soup_message_headers_replace (msg->request_headers, + "Content-Type", content_type); + soup_message_body_append (msg->request_body, req_use, + req_body, req_length); + } else { + soup_message_headers_remove (msg->request_headers, + "Content-Type"); + soup_message_body_truncate (msg->request_body); + } +} + +/** + * soup_message_set_response: + * @msg: the message + * @content_type: (allow-none): MIME Content-Type of the body + * @resp_use: a #SoupMemoryUse describing how to handle @resp_body + * @resp_body: (array length=resp_length) (element-type guint8): a data buffer + * containing the body of the message response. + * @resp_length: the byte length of @resp_body. + * + * Convenience function to set the response body of a #SoupMessage. If + * @content_type is %NULL, the response body must be empty as well. + */ +void +soup_message_set_response (SoupMessage *msg, + const char *content_type, + SoupMemoryUse resp_use, + const char *resp_body, + gsize resp_length) +{ + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (content_type != NULL || resp_length == 0); + + if (content_type) { + soup_message_headers_replace (msg->response_headers, + "Content-Type", content_type); + soup_message_body_append (msg->response_body, resp_use, + resp_body, resp_length); + } else { + soup_message_headers_remove (msg->response_headers, + "Content-Type"); + soup_message_body_truncate (msg->response_body); + } +} + +/** + * soup_message_wrote_informational: + * @msg: a #SoupMessage + * + * Emits the %wrote_informational signal, indicating that the IO layer + * finished writing an informational (1xx) response for @msg. + **/ +void +soup_message_wrote_informational (SoupMessage *msg) +{ + g_signal_emit (msg, signals[WROTE_INFORMATIONAL], 0); +} + +/** + * soup_message_wrote_headers: + * @msg: a #SoupMessage + * + * Emits the %wrote_headers signal, indicating that the IO layer + * finished writing the (non-informational) headers for @msg. + **/ +void +soup_message_wrote_headers (SoupMessage *msg) +{ + g_signal_emit (msg, signals[WROTE_HEADERS], 0); +} + +/** + * soup_message_wrote_chunk: + * @msg: a #SoupMessage + * + * Emits the %wrote_chunk signal, indicating that the IO layer + * finished writing a chunk of @msg's body. + **/ +void +soup_message_wrote_chunk (SoupMessage *msg) +{ + g_signal_emit (msg, signals[WROTE_CHUNK], 0); +} + +/** + * soup_message_wrote_body_data: + * @msg: a #SoupMessage + * @chunk: the data written + * + * Emits the %wrote_body_data signal, indicating that the IO layer + * finished writing a portion of @msg's body. + **/ +void +soup_message_wrote_body_data (SoupMessage *msg, SoupBuffer *chunk) +{ + g_signal_emit (msg, signals[WROTE_BODY_DATA], 0, chunk); +} + +/** + * soup_message_wrote_body: + * @msg: a #SoupMessage + * + * Emits the %wrote_body signal, indicating that the IO layer finished + * writing the body for @msg. + **/ +void +soup_message_wrote_body (SoupMessage *msg) +{ + g_signal_emit (msg, signals[WROTE_BODY], 0); +} + +/** + * soup_message_got_informational: + * @msg: a #SoupMessage + * + * Emits the %got_informational signal, indicating that the IO layer + * read a complete informational (1xx) response for @msg. + **/ +void +soup_message_got_informational (SoupMessage *msg) +{ + g_signal_emit (msg, signals[GOT_INFORMATIONAL], 0); +} + +/** + * soup_message_got_headers: + * @msg: a #SoupMessage + * + * Emits the %got_headers signal, indicating that the IO layer + * finished reading the (non-informational) headers for @msg. + **/ +void +soup_message_got_headers (SoupMessage *msg) +{ + g_signal_emit (msg, signals[GOT_HEADERS], 0); +} + +/** + * soup_message_got_chunk: + * @msg: a #SoupMessage + * @chunk: the newly-read chunk + * + * Emits the %got_chunk signal, indicating that the IO layer finished + * reading a chunk of @msg's body. + **/ +void +soup_message_got_chunk (SoupMessage *msg, SoupBuffer *chunk) +{ + g_signal_emit (msg, signals[GOT_CHUNK], 0, chunk); +} + +static void +got_body (SoupMessage *req) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (req); + SoupMessageBody *body; + + body = priv->server_side ? req->request_body : req->response_body; + if (soup_message_body_get_accumulate (body)) { + SoupBuffer *buffer; + + buffer = soup_message_body_flatten (body); + soup_buffer_free (buffer); + } +} + +/** + * soup_message_got_body: + * @msg: a #SoupMessage + * + * Emits the %got_body signal, indicating that the IO layer finished + * reading the body for @msg. + **/ +void +soup_message_got_body (SoupMessage *msg) +{ + g_signal_emit (msg, signals[GOT_BODY], 0); +} + +/** + * soup_message_content_sniffed: + * @msg: a #SoupMessage + * @type: a string with the sniffed content type + * @params: a #GHashTable with the parameters + * + * Emits the %content_sniffed signal, indicating that the IO layer + * finished sniffing the content type for @msg. If content sniffing + * will not be performed, due to the sniffer deciding to trust the + * Content-Type sent by the server, this signal is emitted immediately + * after %got_headers, with %NULL as @content_type. + **/ +void +soup_message_content_sniffed (SoupMessage *msg, const char *content_type, GHashTable *params) +{ + g_signal_emit (msg, signals[CONTENT_SNIFFED], 0, content_type, params); +} + +/** + * soup_message_restarted: + * @msg: a #SoupMessage + * + * Emits the %restarted signal, indicating that @msg should be + * requeued. + **/ +void +soup_message_restarted (SoupMessage *msg) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (priv->msg_flags & SOUP_MESSAGE_CAN_REBUILD) + soup_message_body_truncate (msg->request_body); + + g_signal_emit (msg, signals[RESTARTED], 0); +} + +/** + * soup_message_finished: + * @msg: a #SoupMessage + * + * Emits the %finished signal, indicating that @msg has been completely + * processed. + **/ +void +soup_message_finished (SoupMessage *msg) +{ + g_signal_emit (msg, signals[FINISHED], 0); +} + +static void +header_handler_free (gpointer header_name, GClosure *closure) +{ + g_free (header_name); +} + +static void +header_handler_metamarshal (GClosure *closure, GValue *return_value, + guint n_param_values, const GValue *param_values, + gpointer invocation_hint, gpointer marshal_data) +{ + SoupMessage *msg = g_value_get_object (¶m_values[0]); + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg); + const char *header_name = marshal_data; + SoupMessageHeaders *hdrs; + +#ifdef FIXME + if (priv->io_status != SOUP_MESSAGE_IO_STATUS_RUNNING) + return; +#endif + + hdrs = priv->server_side ? msg->request_headers : msg->response_headers; + if (soup_message_headers_get_one (hdrs, header_name)) { + closure->marshal (closure, return_value, n_param_values, + param_values, invocation_hint, + ((GCClosure *)closure)->callback); + } +} + +/** + * soup_message_add_header_handler: + * @msg: a #SoupMessage + * @signal: signal to connect the handler to. + * @header: HTTP response header to match against + * @callback: the header handler + * @user_data: data to pass to @handler_cb + * + * Adds a signal handler to @msg for @signal, as with + * g_signal_connect(), but with two differences: the @callback will + * only be run if @msg has a header named @header, and it will only be + * run if no earlier handler cancelled or requeued the message. + * + * If @signal is one of the "got" signals (eg, "got_headers"), or + * "finished" or "restarted", then @header is matched against the + * incoming message headers (that is, the #request_headers for a + * client #SoupMessage, or the #response_headers for a server + * #SoupMessage). If @signal is one of the "wrote" signals, then + * @header is matched against the outgoing message headers. + * + * Return value: the handler ID from g_signal_connect() + **/ +guint +soup_message_add_header_handler (SoupMessage *msg, + const char *signal, + const char *header, + GCallback callback, + gpointer user_data) +{ + GClosure *closure; + char *header_name; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), 0); + g_return_val_if_fail (signal != NULL, 0); + g_return_val_if_fail (header != NULL, 0); + g_return_val_if_fail (callback != NULL, 0); + + closure = g_cclosure_new (callback, user_data, NULL); + + header_name = g_strdup (header); + g_closure_set_meta_marshal (closure, header_name, + header_handler_metamarshal); + g_closure_add_finalize_notifier (closure, header_name, + header_handler_free); + + return g_signal_connect_closure (msg, signal, closure, FALSE); +} + +static void +status_handler_metamarshal (GClosure *closure, GValue *return_value, + guint n_param_values, const GValue *param_values, + gpointer invocation_hint, gpointer marshal_data) +{ + SoupMessage *msg = g_value_get_object (¶m_values[0]); + guint status = GPOINTER_TO_UINT (marshal_data); + +#ifdef FIXME + if (priv->io_status != SOUP_MESSAGE_IO_STATUS_RUNNING) + return; +#endif + + if (msg->status_code == status) { + closure->marshal (closure, return_value, n_param_values, + param_values, invocation_hint, + ((GCClosure *)closure)->callback); + } +} + +/** + * soup_message_add_status_code_handler: + * @msg: a #SoupMessage + * @signal: signal to connect the handler to. + * @status_code: status code to match against + * @callback: the header handler + * @user_data: data to pass to @handler_cb + * + * Adds a signal handler to @msg for @signal, as with + * g_signal_connect() but with two differences: the @callback will + * only be run if @msg has the status @status_code, and it will only + * be run if no earlier handler cancelled or requeued the message. + * + * @signal must be a signal that will be emitted after @msg's status + * is set. For a client #SoupMessage, this means it can't be a "wrote" + * signal. For a server #SoupMessage, this means it can't be a "got" + * signal. + * + * Return value: the handler ID from g_signal_connect() + **/ +guint +soup_message_add_status_code_handler (SoupMessage *msg, + const char *signal, + guint status_code, + GCallback callback, + gpointer user_data) +{ + GClosure *closure; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), 0); + g_return_val_if_fail (signal != NULL, 0); + g_return_val_if_fail (callback != NULL, 0); + + closure = g_cclosure_new (callback, user_data, NULL); + g_closure_set_meta_marshal (closure, GUINT_TO_POINTER (status_code), + status_handler_metamarshal); + + return g_signal_connect_closure (msg, signal, closure, FALSE); +} + + +/** + * soup_message_set_auth: + * @msg: a #SoupMessage + * @auth: a #SoupAuth, or %NULL + * + * Sets @msg to authenticate to its destination using @auth, which + * must have already been fully authenticated. If @auth is %NULL, @msg + * will not authenticate to its destination. + **/ +void +soup_message_set_auth (SoupMessage *msg, SoupAuth *auth) +{ + SoupMessagePrivate *priv; + char *token; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (auth == NULL || SOUP_IS_AUTH (auth)); + g_return_if_fail (auth == NULL || soup_auth_is_authenticated (auth)); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (priv->auth) { + g_object_unref (priv->auth); + soup_message_headers_remove (msg->request_headers, + "Authorization"); + } + priv->auth = auth; + if (!priv->auth) + return; + + g_object_ref (priv->auth); + token = soup_auth_get_authorization (auth, msg); + soup_message_headers_replace (msg->request_headers, + "Authorization", token); + g_free (token); +} + +/** + * soup_message_get_auth: + * @msg: a #SoupMessage + * + * Gets the #SoupAuth used by @msg for authentication. + * + * Return value: (transfer none): the #SoupAuth used by @msg for + * authentication, or %NULL if @msg is unauthenticated. + **/ +SoupAuth * +soup_message_get_auth (SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + return SOUP_MESSAGE_GET_PRIVATE (msg)->auth; +} + +/** + * soup_message_set_proxy_auth: + * @msg: a #SoupMessage + * @auth: a #SoupAuth, or %NULL + * + * Sets @msg to authenticate to its proxy using @auth, which must have + * already been fully authenticated. If @auth is %NULL, @msg will not + * authenticate to its proxy. + **/ +void +soup_message_set_proxy_auth (SoupMessage *msg, SoupAuth *auth) +{ + SoupMessagePrivate *priv; + char *token; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (auth == NULL || SOUP_IS_AUTH (auth)); + g_return_if_fail (auth == NULL || soup_auth_is_authenticated (auth)); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (priv->proxy_auth) { + g_object_unref (priv->proxy_auth); + soup_message_headers_remove (msg->request_headers, + "Proxy-Authorization"); + } + priv->proxy_auth = auth; + if (!priv->proxy_auth) + return; + + g_object_ref (priv->proxy_auth); + token = soup_auth_get_authorization (auth, msg); + soup_message_headers_replace (msg->request_headers, + "Proxy-Authorization", token); + g_free (token); +} + +/** + * soup_message_get_proxy_auth: + * @msg: a #SoupMessage + * + * Gets the #SoupAuth used by @msg for authentication to its proxy.. + * + * Return value: the #SoupAuth used by @msg for authentication to its + * proxy, or %NULL if @msg isn't authenticated to its proxy. + **/ +SoupAuth * +soup_message_get_proxy_auth (SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + return SOUP_MESSAGE_GET_PRIVATE (msg)->proxy_auth; +} + +/** + * soup_message_cleanup_response: + * @req: a #SoupMessage + * + * Cleans up all response data on @req, so that the request can be sent + * again and receive a new response. (Eg, as a result of a redirect or + * authorization request.) + **/ +void +soup_message_cleanup_response (SoupMessage *req) +{ + SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (req); + + soup_message_body_truncate (req->response_body); + soup_message_headers_clear (req->response_headers); + if (priv->server_side) { + soup_message_headers_set_encoding (req->response_headers, + SOUP_ENCODING_CONTENT_LENGTH); + } + + while (priv->decoders) { + g_object_unref (priv->decoders->data); + priv->decoders = g_slist_delete_link (priv->decoders, priv->decoders); + } + priv->msg_flags &= ~SOUP_MESSAGE_CONTENT_DECODED; + priv->msg_flags &= ~SOUP_MESSAGE_CERTIFICATE_TRUSTED; + + req->status_code = SOUP_STATUS_NONE; + if (req->reason_phrase) { + g_free (req->reason_phrase); + req->reason_phrase = NULL; + } + priv->http_version = priv->orig_http_version; + + if (priv->tls_certificate) { + g_object_unref (priv->tls_certificate); + priv->tls_certificate = NULL; + } + priv->tls_errors = 0; + + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_STATUS_CODE); + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_REASON_PHRASE); + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_HTTP_VERSION); + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_FLAGS); + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_TLS_CERTIFICATE); + g_object_notify (G_OBJECT (req), SOUP_MESSAGE_TLS_ERRORS); +} + +/** + * SoupMessageFlags: + * @SOUP_MESSAGE_NO_REDIRECT: The session should not follow redirect + * (3xx) responses received by this message. + * @SOUP_MESSAGE_CAN_REBUILD: The caller will rebuild the request + * body if the message is restarted; see + * soup_message_body_set_accumulate() for more details. + * @SOUP_MESSAGE_OVERWRITE_CHUNKS: Deprecated: equivalent to calling + * soup_message_body_set_accumulate() on the incoming message body + * (ie, %response_body for a client-side request), passing %FALSE. + * @SOUP_MESSAGE_CONTENT_DECODED: Set by #SoupContentDecoder to + * indicate that it has removed the Content-Encoding on a message (and + * so headers such as Content-Length may no longer accurately describe + * the body). + * @SOUP_MESSAGE_CERTIFICATE_TRUSTED: if %TRUE after an https response + * has been received, indicates that the server's SSL certificate is + * trusted according to the session's CA. + * + * Various flags that can be set on a #SoupMessage to alter its + * behavior. + **/ + +/** + * soup_message_set_flags: + * @msg: a #SoupMessage + * @flags: a set of #SoupMessageFlags values + * + * Sets the specified flags on @msg. + **/ +void +soup_message_set_flags (SoupMessage *msg, SoupMessageFlags flags) +{ + SoupMessagePrivate *priv; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if ((priv->msg_flags ^ flags) & SOUP_MESSAGE_OVERWRITE_CHUNKS) { + soup_message_body_set_accumulate ( + priv->server_side ? msg->request_body : msg->response_body, + !(flags & SOUP_MESSAGE_OVERWRITE_CHUNKS)); + } + + priv->msg_flags = flags; + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_FLAGS); +} + +/** + * soup_message_get_flags: + * @msg: a #SoupMessage + * + * Gets the flags on @msg + * + * Return value: the flags + **/ +SoupMessageFlags +soup_message_get_flags (SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), 0); + + return SOUP_MESSAGE_GET_PRIVATE (msg)->msg_flags; +} + +/** + * SoupHTTPVersion: + * @SOUP_HTTP_1_0: HTTP 1.0 (RFC 1945) + * @SOUP_HTTP_1_1: HTTP 1.1 (RFC 2616) + * + * Indicates the HTTP protocol version being used. + **/ + +/** + * soup_message_set_http_version: + * @msg: a #SoupMessage + * @version: the HTTP version + * + * Sets the HTTP version on @msg. The default version is + * %SOUP_HTTP_1_1. Setting it to %SOUP_HTTP_1_0 will prevent certain + * functionality from being used. + **/ +void +soup_message_set_http_version (SoupMessage *msg, SoupHTTPVersion version) +{ + SoupMessagePrivate *priv; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + priv->http_version = version; + if (msg->status_code == SOUP_STATUS_NONE) + priv->orig_http_version = version; + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_HTTP_VERSION); +} + +/** + * soup_message_get_http_version: + * @msg: a #SoupMessage + * + * Gets the HTTP version of @msg. This is the minimum of the + * version from the request and the version from the response. + * + * Return value: the HTTP version + **/ +SoupHTTPVersion +soup_message_get_http_version (SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), SOUP_HTTP_1_0); + + return SOUP_MESSAGE_GET_PRIVATE (msg)->http_version; +} + +/** + * soup_message_is_keepalive: + * @msg: a #SoupMessage + * + * Determines whether or not @msg's connection can be kept alive for + * further requests after processing @msg, based on the HTTP version, + * Connection header, etc. + * + * Return value: %TRUE or %FALSE. + **/ +gboolean +soup_message_is_keepalive (SoupMessage *msg) +{ + const char *c_conn, *s_conn; + + c_conn = soup_message_headers_get_list (msg->request_headers, + "Connection"); + s_conn = soup_message_headers_get_list (msg->response_headers, + "Connection"); + + if (msg->status_code == SOUP_STATUS_OK && + msg->method == SOUP_METHOD_CONNECT) + return TRUE; + + /* Not persistent if the server sent a terminate-by-EOF response */ + if (soup_message_headers_get_encoding (msg->response_headers) == SOUP_ENCODING_EOF) + return FALSE; + + if (SOUP_MESSAGE_GET_PRIVATE (msg)->http_version == SOUP_HTTP_1_0) { + /* In theory, HTTP/1.0 connections are only persistent + * if the client requests it, and the server agrees. + * But some servers do keep-alive even if the client + * doesn't request it. So ignore c_conn. + */ + + if (!s_conn || !soup_header_contains (s_conn, "Keep-Alive")) + return FALSE; + } else { + /* Normally persistent unless either side requested otherwise */ + if (c_conn && soup_header_contains (c_conn, "close")) + return FALSE; + if (s_conn && soup_header_contains (s_conn, "close")) + return FALSE; + + return TRUE; + } + + return TRUE; +} + +/** + * soup_message_set_uri: + * @msg: a #SoupMessage + * @uri: the new #SoupURI + * + * Sets @msg's URI to @uri. If @msg has already been sent and you want + * to re-send it with the new URI, you need to call + * soup_session_requeue_message(). + **/ +void +soup_message_set_uri (SoupMessage *msg, SoupURI *uri) +{ + SoupMessagePrivate *priv; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (priv->uri) + soup_uri_free (priv->uri); + if (priv->addr) { + g_object_unref (priv->addr); + priv->addr = NULL; + } + priv->uri = soup_uri_copy (uri); + + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_URI); +} + +/** + * soup_message_get_uri: + * @msg: a #SoupMessage + * + * Gets @msg's URI + * + * Return value: (transfer none): the URI @msg is targeted for. + **/ +SoupURI * +soup_message_get_uri (SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + return SOUP_MESSAGE_GET_PRIVATE (msg)->uri; +} + +/** + * soup_message_get_address: + * @msg: a #SoupMessage + * + * Gets the address @msg's URI points to. After first setting the + * URI on a message, this will be unresolved, although the message's + * session will resolve it before sending the message. + * + * Return value: (transfer none): the address @msg's URI points to + * + * Since: 2.26 + **/ +SoupAddress * +soup_message_get_address (SoupMessage *msg) +{ + SoupMessagePrivate *priv; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + if (!priv->addr) { + priv->addr = soup_address_new (priv->uri->host, + priv->uri->port); + } + return priv->addr; +} + +/** + * soup_message_set_status: + * @msg: a #SoupMessage + * @status_code: an HTTP status code + * + * Sets @msg's status code to @status_code. If @status_code is a + * known value, it will also set @msg's reason_phrase. + **/ +void +soup_message_set_status (SoupMessage *msg, guint status_code) +{ + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (status_code != 0); + + g_free (msg->reason_phrase); + + msg->status_code = status_code; + msg->reason_phrase = g_strdup (soup_status_get_phrase (status_code)); + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_STATUS_CODE); + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_REASON_PHRASE); +} + +/** + * soup_message_set_status_full: + * @msg: a #SoupMessage + * @status_code: an HTTP status code + * @reason_phrase: a description of the status + * + * Sets @msg's status code and reason phrase. + **/ +void +soup_message_set_status_full (SoupMessage *msg, + guint status_code, + const char *reason_phrase) +{ + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (status_code != 0); + g_return_if_fail (reason_phrase != NULL); + + g_free (msg->reason_phrase); + + msg->status_code = status_code; + msg->reason_phrase = g_strdup (reason_phrase); + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_STATUS_CODE); + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_REASON_PHRASE); +} + +/** + * SoupChunkAllocator: + * @msg: the #SoupMessage the chunk is being allocated for + * @max_len: the maximum length that will be read, or 0. + * @user_data: the data passed to soup_message_set_chunk_allocator() + * + * The prototype for a chunk allocation callback. This should allocate + * a new #SoupBuffer and return it for the I/O layer to read message + * body data off the network into. + * + * If @max_len is non-0, it indicates the maximum number of bytes that + * could be read, based on what is known about the message size. Note + * that this might be a very large number, and you should not simply + * try to allocate that many bytes blindly. If @max_len is 0, that + * means that libsoup does not know how many bytes remain to be read, + * and the allocator should return a buffer of a size that it finds + * convenient. + * + * If the allocator returns %NULL, the message will be paused. It is + * up to the application to make sure that it gets unpaused when it + * becomes possible to allocate a new buffer. + * + * Return value: the new buffer (or %NULL) + **/ + +/** + * soup_message_set_chunk_allocator: + * @msg: a #SoupMessage + * @allocator: the chunk allocator callback + * @user_data: data to pass to @allocator + * @destroy_notify: destroy notifier to free @user_data when @msg is + * destroyed + * + * Sets an alternate chunk-allocation function to use when reading + * @msg's body. Every time data is available to read, libsoup will + * call @allocator, which should return a #SoupBuffer. (See + * #SoupChunkAllocator for additional details.) Libsoup will then read + * data from the network into that buffer, and update the buffer's + * %length to indicate how much data it read. + * + * Generally, a custom chunk allocator would be used in conjunction + * with soup_message_body_set_accumulate() %FALSE and + * #SoupMessage::got_chunk, as part of a strategy to avoid unnecessary + * copying of data. However, you cannot assume that every call to the + * allocator will be followed by a call to your %got_chunk handler; if + * an I/O error occurs, then the buffer will be unreffed without ever + * having been used. If your buffer-allocation strategy requires + * special cleanup, use soup_buffer_new_with_owner() rather than doing + * the cleanup from the %got_chunk handler. + * + * The other thing to remember when using non-accumulating message + * bodies is that the buffer passed to the %got_chunk handler will be + * unreffed after the handler returns, just as it would be in the + * non-custom-allocated case. If you want to hand the chunk data off + * to some other part of your program to use later, you'll need to ref + * the #SoupBuffer (or its owner, in the soup_buffer_new_with_owner() + * case) to ensure that the data remains valid. + **/ +void +soup_message_set_chunk_allocator (SoupMessage *msg, + SoupChunkAllocator allocator, + gpointer user_data, + GDestroyNotify destroy_notify) +{ + SoupMessagePrivate *priv; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (priv->chunk_allocator_dnotify) + priv->chunk_allocator_dnotify (priv->chunk_allocator_data); + + priv->chunk_allocator = allocator; + priv->chunk_allocator_data = user_data; + priv->chunk_allocator_dnotify = destroy_notify; +} + +/** + * soup_message_disable_feature: + * @msg: a #SoupMessage + * @feature_type: the #GType of a #SoupSessionFeature + * + * This disables the actions of #SoupSessionFeatures with the + * given @feature_type (or a subclass of that type) on @msg, so that + * @msg is processed as though the feature(s) hadn't been added to the + * session. Eg, passing #SOUP_TYPE_PROXY_RESOLVER for @feature_type + * will disable proxy handling and cause @msg to be sent directly to + * the indicated origin server, regardless of system proxy + * configuration. + * + * You must call this before queueing @msg on a session; calling it on + * a message that has already been queued is undefined. In particular, + * you cannot call this on a message that is being requeued after a + * redirect or authentication. + * + * Since: 2.28 + **/ +void +soup_message_disable_feature (SoupMessage *msg, GType feature_type) +{ + SoupMessagePrivate *priv; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + priv->disabled_features = g_slist_prepend (priv->disabled_features, + GSIZE_TO_POINTER (feature_type)); +} + +gboolean +soup_message_disables_feature (SoupMessage *msg, gpointer feature) +{ + SoupMessagePrivate *priv; + GSList *f; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), FALSE); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + for (f = priv->disabled_features; f; f = f->next) { + if (G_TYPE_CHECK_INSTANCE_TYPE (feature, (GType) GPOINTER_TO_SIZE (f->data))) + return TRUE; + } + return FALSE; +} + +/** + * soup_message_get_first_party: + * @msg: a #SoupMessage + * + * Returns: (transfer none): the @msg's first party #SoupURI + * + * Since: 2.30 + **/ +SoupURI * +soup_message_get_first_party (SoupMessage *msg) +{ + SoupMessagePrivate *priv; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + return priv->first_party; +} + +/** + * soup_message_set_first_party: + * @msg: a #SoupMessage + * @first_party: the #SoupURI for the @msg's first party + * + * Sets @first_party as the main document #SoupURI for @msg. For + * details of when and how this is used refer to the documentation for + * #SoupCookieJarAcceptPolicy. + * + * Since: 2.30 + **/ +void +soup_message_set_first_party (SoupMessage *msg, + SoupURI *first_party) +{ + SoupMessagePrivate *priv; + + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + g_return_if_fail (first_party != NULL); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (priv->first_party) { + if (soup_uri_equal (priv->first_party, first_party)) + return; + + soup_uri_free (priv->first_party); + } + + priv->first_party = soup_uri_copy (first_party); + g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_FIRST_PARTY); +} + +/** + * soup_message_get_https_status: + * @msg: a #SoupMessage + * @certificate: (out) (transfer none): @msg's TLS certificate + * @errors: (out): the verification status of @certificate + * + * If @msg is using https, this retrieves the #GTlsCertificate + * associated with its connection, and the #GTlsCertificateFlags showing + * what problems, if any, have been found with that certificate. + * + * Return value: %TRUE if @msg uses https, %FALSE if not + * + * Since: 2.34 + */ +gboolean +soup_message_get_https_status (SoupMessage *msg, + GTlsCertificate **certificate, + GTlsCertificateFlags *errors) +{ + SoupMessagePrivate *priv; + + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), FALSE); + + priv = SOUP_MESSAGE_GET_PRIVATE (msg); + + if (certificate) + *certificate = priv->tls_certificate; + if (errors) + *errors = priv->tls_errors; + return priv->tls_certificate != NULL; +} diff --git a/libsoup/soup-message.h b/libsoup/soup-message.h new file mode 100644 index 0000000..8505f00 --- /dev/null +++ b/libsoup/soup-message.h @@ -0,0 +1,186 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_MESSAGE_H +#define SOUP_MESSAGE_H 1 + +#include +#include +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_MESSAGE (soup_message_get_type ()) +#define SOUP_MESSAGE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_MESSAGE, SoupMessage)) +#define SOUP_MESSAGE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_MESSAGE, SoupMessageClass)) +#define SOUP_IS_MESSAGE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_MESSAGE)) +#define SOUP_IS_MESSAGE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_MESSAGE)) +#define SOUP_MESSAGE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_MESSAGE, SoupMessageClass)) + +struct _SoupMessage { + GObject parent; + + /*< public >*/ + const char *method; + + guint status_code; + char *reason_phrase; + + SoupMessageBody *request_body; + SoupMessageHeaders *request_headers; + + SoupMessageBody *response_body; + SoupMessageHeaders *response_headers; +}; + +typedef struct { + GObjectClass parent_class; + + /* signals */ + void (*wrote_informational) (SoupMessage *msg); + void (*wrote_headers) (SoupMessage *msg); + void (*wrote_chunk) (SoupMessage *msg); + void (*wrote_body) (SoupMessage *msg); + void (*got_informational) (SoupMessage *msg); + void (*got_headers) (SoupMessage *msg); + void (*got_chunk) (SoupMessage *msg, SoupBuffer *chunk); + void (*got_body) (SoupMessage *msg); + void (*restarted) (SoupMessage *msg); + void (*finished) (SoupMessage *msg); + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupMessageClass; + +GType soup_message_get_type (void); + +#define SOUP_MESSAGE_METHOD "method" +#define SOUP_MESSAGE_URI "uri" +#define SOUP_MESSAGE_HTTP_VERSION "http-version" +#define SOUP_MESSAGE_FLAGS "flags" +#define SOUP_MESSAGE_SERVER_SIDE "server-side" +#define SOUP_MESSAGE_STATUS_CODE "status-code" +#define SOUP_MESSAGE_REASON_PHRASE "reason-phrase" +#define SOUP_MESSAGE_FIRST_PARTY "first-party" +#define SOUP_MESSAGE_REQUEST_BODY "request-body" +#define SOUP_MESSAGE_REQUEST_HEADERS "request-headers" +#define SOUP_MESSAGE_RESPONSE_BODY "response-body" +#define SOUP_MESSAGE_RESPONSE_HEADERS "response-headers" +#define SOUP_MESSAGE_TLS_CERTIFICATE "tls-certificate" +#define SOUP_MESSAGE_TLS_ERRORS "tls-errors" + +SoupMessage *soup_message_new (const char *method, + const char *uri_string); +SoupMessage *soup_message_new_from_uri (const char *method, + SoupURI *uri); + +void soup_message_set_request (SoupMessage *msg, + const char *content_type, + SoupMemoryUse req_use, + const char *req_body, + gsize req_length); +void soup_message_set_response (SoupMessage *msg, + const char *content_type, + SoupMemoryUse resp_use, + const char *resp_body, + gsize resp_length); + +typedef enum { + SOUP_HTTP_1_0 = 0, /*< nick=http-1-0 >*/ + SOUP_HTTP_1_1 = 1 /*< nick=http-1-1 >*/ +} SoupHTTPVersion; + +void soup_message_set_http_version (SoupMessage *msg, + SoupHTTPVersion version); +SoupHTTPVersion soup_message_get_http_version (SoupMessage *msg); + +gboolean soup_message_is_keepalive (SoupMessage *msg); + +SoupURI *soup_message_get_uri (SoupMessage *msg); +void soup_message_set_uri (SoupMessage *msg, + SoupURI *uri); +SoupAddress *soup_message_get_address (SoupMessage *msg); + +SoupURI *soup_message_get_first_party (SoupMessage *msg); +void soup_message_set_first_party (SoupMessage *msg, + SoupURI *first_party); + +typedef enum { + SOUP_MESSAGE_NO_REDIRECT = (1 << 1), + SOUP_MESSAGE_CAN_REBUILD = (1 << 2), +#ifndef LIBSOUP_DISABLE_DEPRECATED + SOUP_MESSAGE_OVERWRITE_CHUNKS = (1 << 3), +#endif + SOUP_MESSAGE_CONTENT_DECODED = (1 << 4), + SOUP_MESSAGE_CERTIFICATE_TRUSTED = (1 << 5) +} SoupMessageFlags; + +void soup_message_set_flags (SoupMessage *msg, + SoupMessageFlags flags); + +SoupMessageFlags soup_message_get_flags (SoupMessage *msg); + +gboolean soup_message_get_https_status (SoupMessage *msg, + GTlsCertificate **certificate, + GTlsCertificateFlags *errors); + + +/* Specialized signal handlers */ +guint soup_message_add_header_handler (SoupMessage *msg, + const char *signal, + const char *header, + GCallback callback, + gpointer user_data); + +guint soup_message_add_status_code_handler ( + SoupMessage *msg, + const char *signal, + guint status_code, + GCallback callback, + gpointer user_data); + +/* + * Status Setting + */ +void soup_message_set_status (SoupMessage *msg, + guint status_code); + +void soup_message_set_status_full (SoupMessage *msg, + guint status_code, + const char *reason_phrase); + +/* I/O */ +typedef SoupBuffer * (*SoupChunkAllocator) (SoupMessage *msg, + gsize max_len, + gpointer user_data); + +void soup_message_set_chunk_allocator (SoupMessage *msg, + SoupChunkAllocator allocator, + gpointer user_data, + GDestroyNotify destroy_notify); + +void soup_message_disable_feature (SoupMessage *msg, + GType feature_type); + +void soup_message_wrote_informational (SoupMessage *msg); +void soup_message_wrote_headers (SoupMessage *msg); +void soup_message_wrote_chunk (SoupMessage *msg); +void soup_message_wrote_body_data (SoupMessage *msg, SoupBuffer *chunk); +void soup_message_wrote_body (SoupMessage *msg); +void soup_message_got_informational (SoupMessage *msg); +void soup_message_got_headers (SoupMessage *msg); +void soup_message_got_chunk (SoupMessage *msg, SoupBuffer *chunk); +void soup_message_got_body (SoupMessage *msg); +void soup_message_content_sniffed (SoupMessage *msg, const char *content_type, GHashTable *params); +void soup_message_restarted (SoupMessage *msg); +void soup_message_finished (SoupMessage *msg); + +G_END_DECLS + +#endif /*SOUP_MESSAGE_H*/ diff --git a/libsoup/soup-method.c b/libsoup/soup-method.c new file mode 100644 index 0000000..de7d589 --- /dev/null +++ b/libsoup/soup-method.c @@ -0,0 +1,107 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-method.c: declarations of _SOUP_METHOD_* variables + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-method.h" + +/* Explicit assignment to NULL is to help the OS X linker not be + * stupid. #522957 + */ +gpointer _SOUP_METHOD_CONNECT = NULL; +gpointer _SOUP_METHOD_COPY = NULL; +gpointer _SOUP_METHOD_DELETE = NULL; +gpointer _SOUP_METHOD_GET = NULL; +gpointer _SOUP_METHOD_HEAD = NULL; +gpointer _SOUP_METHOD_LOCK = NULL; +gpointer _SOUP_METHOD_MKCOL = NULL; +gpointer _SOUP_METHOD_MOVE = NULL; +gpointer _SOUP_METHOD_OPTIONS = NULL; +gpointer _SOUP_METHOD_POST = NULL; +gpointer _SOUP_METHOD_PROPFIND = NULL; +gpointer _SOUP_METHOD_PROPPATCH = NULL; +gpointer _SOUP_METHOD_PUT = NULL; +gpointer _SOUP_METHOD_TRACE = NULL; +gpointer _SOUP_METHOD_UNLOCK = NULL; + +/** + * SOUP_METHOD_OPTIONS: + * + * "OPTIONS" as an interned string. + **/ +/** + * SOUP_METHOD_GET: + * + * "GET" as an interned string. + **/ +/** + * SOUP_METHOD_HEAD: + * + * "HEAD" as an interned string. + **/ +/** + * SOUP_METHOD_POST: + * + * "POST" as an interned string. + **/ +/** + * SOUP_METHOD_PUT: + * + * "PUT" as an interned string. + **/ +/** + * SOUP_METHOD_DELETE: + * + * "DELETE" as an interned string. + **/ +/** + * SOUP_METHOD_TRACE: + * + * "TRACE" as an interned string. + **/ +/** + * SOUP_METHOD_CONNECT: + * + * "CONNECT" as an interned string. + **/ +/** + * SOUP_METHOD_PROPFIND: + * + * "PROPFIND" as an interned string. + **/ +/** + * SOUP_METHOD_PROPPATCH: + * + * "PROPPATCH" as an interned string. + **/ +/** + * SOUP_METHOD_MKCOL: + * + * "MKCOL" as an interned string. + **/ +/** + * SOUP_METHOD_COPY: + * + * "COPY" as an interned string. + **/ +/** + * SOUP_METHOD_MOVE: + * + * "MOVE" as an interned string. + **/ +/** + * SOUP_METHOD_LOCK: + * + * "LOCK" as an interned string. + **/ +/** + * SOUP_METHOD_UNLOCK: + * + * "UNLOCK" as an interned string. + **/ diff --git a/libsoup/soup-method.h b/libsoup/soup-method.h new file mode 100644 index 0000000..2934cca --- /dev/null +++ b/libsoup/soup-method.h @@ -0,0 +1,80 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_METHOD_H +#define SOUP_METHOD_H 1 + +#include +#include + +G_BEGIN_DECLS + +/** + * SECTION:soup-method + * @short_description: HTTP method definitions + * + * soup-method.h contains a number of defines for standard HTTP and + * WebDAV headers. You do not need to use these defines; you can pass + * arbitrary strings to soup_message_new() if you prefer. + * + * The thing that these defines are useful for is + * performing quick comparisons against #SoupMessage's %method field; + * because that field always contains an interned string, and these + * macros return interned strings, you can compare %method directly + * against these macros rather than needing to use strcmp(). This is + * most useful in SoupServer handlers. Eg: + * + * + * if (msg->method != SOUP_METHOD_GET && msg->method != SOUP_METHOD_HEAD) { + * soup_message_set_status (msg, SOUP_METHOD_NOT_IMPLEMENTED); + * return; + * } + * + **/ + +#define _SOUP_INTERN_METHOD(method) (_SOUP_ATOMIC_INTERN_STRING (_SOUP_METHOD_##method, #method)) + +/* HTTP/1.1 methods */ +#define SOUP_METHOD_OPTIONS _SOUP_INTERN_METHOD (OPTIONS) +#define SOUP_METHOD_GET _SOUP_INTERN_METHOD (GET) +#define SOUP_METHOD_HEAD _SOUP_INTERN_METHOD (HEAD) +#define SOUP_METHOD_POST _SOUP_INTERN_METHOD (POST) +#define SOUP_METHOD_PUT _SOUP_INTERN_METHOD (PUT) +#define SOUP_METHOD_DELETE _SOUP_INTERN_METHOD (DELETE) +#define SOUP_METHOD_TRACE _SOUP_INTERN_METHOD (TRACE) +#define SOUP_METHOD_CONNECT _SOUP_INTERN_METHOD (CONNECT) + +/* WebDAV methods */ +#define SOUP_METHOD_PROPFIND _SOUP_INTERN_METHOD (PROPFIND) +#define SOUP_METHOD_PROPPATCH _SOUP_INTERN_METHOD (PROPPATCH) +#define SOUP_METHOD_MKCOL _SOUP_INTERN_METHOD (MKCOL) +#define SOUP_METHOD_COPY _SOUP_INTERN_METHOD (COPY) +#define SOUP_METHOD_MOVE _SOUP_INTERN_METHOD (MOVE) +#define SOUP_METHOD_LOCK _SOUP_INTERN_METHOD (LOCK) +#define SOUP_METHOD_UNLOCK _SOUP_INTERN_METHOD (UNLOCK) + +/* Do not use these variables directly; use the macros above, which + * ensure that they get initialized properly. + */ +extern gpointer _SOUP_METHOD_OPTIONS; +extern gpointer _SOUP_METHOD_GET; +extern gpointer _SOUP_METHOD_HEAD; +extern gpointer _SOUP_METHOD_POST; +extern gpointer _SOUP_METHOD_PUT; +extern gpointer _SOUP_METHOD_DELETE; +extern gpointer _SOUP_METHOD_TRACE; +extern gpointer _SOUP_METHOD_CONNECT; + +extern gpointer _SOUP_METHOD_PROPFIND; +extern gpointer _SOUP_METHOD_PROPPATCH; +extern gpointer _SOUP_METHOD_MKCOL; +extern gpointer _SOUP_METHOD_COPY; +extern gpointer _SOUP_METHOD_MOVE; +extern gpointer _SOUP_METHOD_LOCK; +extern gpointer _SOUP_METHOD_UNLOCK; + +G_END_DECLS + +#endif /* SOUP_METHOD_H */ diff --git a/libsoup/soup-misc-private.h b/libsoup/soup-misc-private.h new file mode 100644 index 0000000..8407101 --- /dev/null +++ b/libsoup/soup-misc-private.h @@ -0,0 +1,21 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright 2011 Igalia, S.L. + * Copyright 2011 Red Hat, Inc. + */ + +#ifndef SOUP_URI_PRIVATE_H +#define SOUP_URI_PRIVATE_H 1 + +#include "soup-socket.h" + +char *uri_decoded_copy (const char *str, int length); + +guint soup_socket_handshake_sync (SoupSocket *sock, + GCancellable *cancellable); +void soup_socket_handshake_async (SoupSocket *sock, + GCancellable *cancellable, + SoupSocketCallback callback, + gpointer user_data); + +#endif /* SOUP_URI_PRIVATE_H */ diff --git a/libsoup/soup-misc.c b/libsoup/soup-misc.c new file mode 100644 index 0000000..3136645 --- /dev/null +++ b/libsoup/soup-misc.c @@ -0,0 +1,225 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-misc.c: Miscellaneous functions + + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#include +#include + +#include "soup-misc.h" + +/** + * SECTION:soup-misc + * @short_description: Miscellaneous functions + * + **/ + +/** + * soup_str_case_hash: + * @key: ASCII string to hash + * + * Hashes @key in a case-insensitive manner. + * + * Return value: the hash code. + **/ +guint +soup_str_case_hash (gconstpointer key) +{ + const char *p = key; + guint h = g_ascii_toupper(*p); + + if (h) + for (p += 1; *p != '\0'; p++) + h = (h << 5) - h + g_ascii_toupper(*p); + + return h; +} + +/** + * soup_str_case_equal: + * @v1: an ASCII string + * @v2: another ASCII string + * + * Compares @v1 and @v2 in a case-insensitive manner + * + * Return value: %TRUE if they are equal (modulo case) + **/ +gboolean +soup_str_case_equal (gconstpointer v1, + gconstpointer v2) +{ + const char *string1 = v1; + const char *string2 = v2; + + return g_ascii_strcasecmp (string1, string2) == 0; +} + +/** + * soup_add_io_watch: (skip) + * @async_context: (allow-none): the #GMainContext to dispatch the I/O + * watch in, or %NULL for the default context + * @chan: the #GIOChannel to watch + * @condition: the condition to watch for + * @function: the callback to invoke when @condition occurs + * @data: user data to pass to @function + * + * Adds an I/O watch as with g_io_add_watch(), but using the given + * @async_context. + * + * Return value: a #GSource, which can be removed from @async_context + * with g_source_destroy(). + **/ +GSource * +soup_add_io_watch (GMainContext *async_context, + GIOChannel *chan, GIOCondition condition, + GIOFunc function, gpointer data) +{ + GSource *watch = g_io_create_watch (chan, condition); + g_source_set_callback (watch, (GSourceFunc) function, data, NULL); + g_source_attach (watch, async_context); + g_source_unref (watch); + return watch; +} + +/** + * soup_add_idle: (skip) + * @async_context: (allow-none): the #GMainContext to dispatch the I/O + * watch in, or %NULL for the default context + * @function: the callback to invoke at idle time + * @data: user data to pass to @function + * + * Adds an idle event as with g_idle_add(), but using the given + * @async_context. + * + * If you want @function to run "right away", use + * soup_add_completion(), since that sets a higher priority on the + * #GSource than soup_add_idle() does. + * + * Return value: a #GSource, which can be removed from @async_context + * with g_source_destroy(). + **/ +GSource * +soup_add_idle (GMainContext *async_context, + GSourceFunc function, gpointer data) +{ + GSource *source = g_idle_source_new (); + g_source_set_callback (source, function, data, NULL); + g_source_attach (source, async_context); + g_source_unref (source); + return source; +} + +/** + * soup_add_completion: (skip) + * @async_context: (allow-none): the #GMainContext to dispatch the I/O + * watch in, or %NULL for the default context + * @function: the callback to invoke + * @data: user data to pass to @function + * + * Adds @function to be executed from inside @async_context with the + * default priority. Use this when you want to complete an action in + * @async_context's main loop, as soon as possible. + * + * Return value: a #GSource, which can be removed from @async_context + * with g_source_destroy(). + * + * Since: 2.24 + **/ +GSource * +soup_add_completion (GMainContext *async_context, + GSourceFunc function, gpointer data) +{ + GSource *source = g_idle_source_new (); + g_source_set_priority (source, G_PRIORITY_DEFAULT); + g_source_set_callback (source, function, data, NULL); + g_source_attach (source, async_context); + g_source_unref (source); + return source; +} + +/** + * soup_add_timeout: (skip) + * @async_context: (allow-none): the #GMainContext to dispatch the I/O + * watch in, or %NULL for the default context + * @interval: the timeout interval, in milliseconds + * @function: the callback to invoke at timeout time + * @data: user data to pass to @function + * + * Adds a timeout as with g_timeout_add(), but using the given + * @async_context. + * + * Return value: a #GSource, which can be removed from @async_context + * with g_source_destroy(). + **/ +GSource * +soup_add_timeout (GMainContext *async_context, + guint interval, + GSourceFunc function, gpointer data) +{ + GSource *source = g_timeout_source_new (interval); + g_source_set_callback (source, function, data, NULL); + g_source_attach (source, async_context); + g_source_unref (source); + return source; +} + +/* 00 URI_UNRESERVED + * 01 URI_PCT_ENCODED + * 02 URI_GEN_DELIMS + * 04 URI_SUB_DELIMS + * 08 HTTP_SEPARATOR + * 10 HTTP_CTL + */ +const char soup_char_attributes[] = { + /* 0x00 - 0x07 */ + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + /* 0x08 - 0x0f */ + 0x11, 0x19, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + /* 0x10 - 0x17 */ + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + /* 0x18 - 0x1f */ + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + /* !"#$%&' */ + 0x09, 0x04, 0x09, 0x02, 0x04, 0x01, 0x04, 0x04, + /* ()*+,-./ */ + 0x0c, 0x0c, 0x04, 0x04, 0x0c, 0x00, 0x00, 0x0a, + /* 01234567 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 89:;<=>? */ + 0x00, 0x00, 0x0a, 0x0c, 0x09, 0x0a, 0x09, 0x0a, + /* @ABCDEFG */ + 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* HIJKLMNO */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* PQRSTUVW */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* XYZ[\]^_ */ + 0x00, 0x00, 0x00, 0x0a, 0x09, 0x0a, 0x01, 0x00, + /* `abcdefg */ + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* hijklmno */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* pqrstuvw */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* xyz{|}~ */ + 0x00, 0x00, 0x00, 0x09, 0x01, 0x09, 0x00, 0x11, + /* 0x80 - 0xFF */ + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 +}; diff --git a/libsoup/soup-misc.h b/libsoup/soup-misc.h new file mode 100644 index 0000000..45c8883 --- /dev/null +++ b/libsoup/soup-misc.h @@ -0,0 +1,79 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_MISC_H +#define SOUP_MISC_H 1 + +#include + +G_BEGIN_DECLS + +/* Non-default-GMainContext operations */ +GSource *soup_add_io_watch (GMainContext *async_context, + GIOChannel *chan, + GIOCondition condition, + GIOFunc function, + gpointer data); +GSource *soup_add_idle (GMainContext *async_context, + GSourceFunc function, + gpointer data); +GSource *soup_add_completion (GMainContext *async_context, + GSourceFunc function, + gpointer data); +GSource *soup_add_timeout (GMainContext *async_context, + guint interval, + GSourceFunc function, + gpointer data); + +/* Misc utils */ + +guint soup_str_case_hash (gconstpointer key); +gboolean soup_str_case_equal (gconstpointer v1, + gconstpointer v2); + +#define _SOUP_ATOMIC_INTERN_STRING(variable, value) ((const char *)(g_atomic_pointer_get (&(variable)) ? (variable) : (g_atomic_pointer_set (&(variable), (gpointer)g_intern_static_string (value)), (variable)))) + +/* character classes */ + +extern const char soup_char_attributes[]; +#define SOUP_CHAR_URI_PERCENT_ENCODED 0x01 +#define SOUP_CHAR_URI_GEN_DELIMS 0x02 +#define SOUP_CHAR_URI_SUB_DELIMS 0x04 +#define SOUP_CHAR_HTTP_SEPARATOR 0x08 +#define SOUP_CHAR_HTTP_CTL 0x10 + +#define soup_char_is_uri_percent_encoded(ch) (soup_char_attributes[(guchar)ch] & SOUP_CHAR_URI_PERCENT_ENCODED) +#define soup_char_is_uri_gen_delims(ch) (soup_char_attributes[(guchar)ch] & SOUP_CHAR_URI_GEN_DELIMS) +#define soup_char_is_uri_sub_delims(ch) (soup_char_attributes[(guchar)ch] & SOUP_CHAR_URI_SUB_DELIMS) +#define soup_char_is_uri_unreserved(ch) (!(soup_char_attributes[(guchar)ch] & (SOUP_CHAR_URI_PERCENT_ENCODED | SOUP_CHAR_URI_GEN_DELIMS | SOUP_CHAR_URI_SUB_DELIMS))) +#define soup_char_is_token(ch) (!(soup_char_attributes[(guchar)ch] & (SOUP_CHAR_HTTP_SEPARATOR | SOUP_CHAR_HTTP_CTL))) + +/* SSL stuff */ + +extern const gboolean soup_ssl_supported; + +#define SOUP_SSL_ERROR soup_ssl_error_quark() + +GQuark soup_ssl_error_quark (void); + +typedef enum { + SOUP_SSL_ERROR_HANDSHAKE_NEEDS_READ, + SOUP_SSL_ERROR_HANDSHAKE_NEEDS_WRITE, + SOUP_SSL_ERROR_CERTIFICATE, + SOUP_SSL_ERROR_HANDSHAKE_FAILED +} SoupSSLError; + +typedef enum { + SOUP_CONNECTION_NEW, + SOUP_CONNECTION_CONNECTING, + SOUP_CONNECTION_IDLE, + SOUP_CONNECTION_IN_USE, + SOUP_CONNECTION_REMOTE_DISCONNECTED, + SOUP_CONNECTION_DISCONNECTED +} SoupConnectionState; + +G_END_DECLS + +#endif /* SOUP_MISC_H */ diff --git a/libsoup/soup-multipart.c b/libsoup/soup-multipart.c new file mode 100644 index 0000000..ee99920 --- /dev/null +++ b/libsoup/soup-multipart.c @@ -0,0 +1,510 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-multipart.c: multipart HTTP message bodies + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#include + +#include "soup-multipart.h" +#include "soup-headers.h" + +/** + * SECTION:soup-multipart + * @short_description: multipart HTTP message bodies + * @see_also: #SoupMessageBody, #SoupMessageHeaders + * + **/ + +/** + * SoupMultipart: + * + * Represents a multipart HTTP message body, parsed according to the + * syntax of RFC 2046. Of particular interest to HTTP are + * multipart/byte-ranges and + * multipart/form-data. + * + * Although the headers of a #SoupMultipart body part will contain the + * full headers from that body part, libsoup does not interpret them + * according to MIME rules. For example, each body part is assumed to + * have "binary" Content-Transfer-Encoding, even if its headers + * explicitly state otherwise. In other words, don't try to use + * #SoupMultipart for handling real MIME multiparts. + * + * Since: 2.26 + **/ + +struct SoupMultipart { + char *mime_type, *boundary; + GPtrArray *headers, *bodies; +}; + +static SoupMultipart * +soup_multipart_new_internal (char *mime_type, char *boundary) +{ + SoupMultipart *multipart; + + multipart = g_slice_new (SoupMultipart); + multipart->mime_type = mime_type; + multipart->boundary = boundary; + multipart->headers = g_ptr_array_new (); + multipart->bodies = g_ptr_array_new (); + + return multipart; +} + +static char * +generate_boundary (void) +{ + static int counter; + struct { + GTimeVal timeval; + int counter; + } data; + + /* avoid valgrind warning */ + if (sizeof (data) != sizeof (data.timeval) + sizeof (data.counter)) + memset (&data, 0, sizeof (data)); + + g_get_current_time (&data.timeval); + data.counter = counter++; + + /* The maximum boundary string length is 69 characters, and a + * stringified SHA256 checksum is 64 bytes long. + */ + return g_compute_checksum_for_data (G_CHECKSUM_SHA256, + (const guchar *)&data, + sizeof (data)); +} + +/** + * soup_multipart_new: + * @mime_type: the MIME type of the multipart to create. + * + * Creates a new empty #SoupMultipart with a randomly-generated + * boundary string. Note that @mime_type must be the full MIME type, + * including "multipart/". + * + * Return value: a new empty #SoupMultipart of the given @mime_type + * + * Since: 2.26 + **/ +SoupMultipart * +soup_multipart_new (const char *mime_type) +{ + return soup_multipart_new_internal (g_strdup (mime_type), + generate_boundary ()); +} + +static const char * +find_boundary (const char *start, const char *end, + const char *boundary, int boundary_len) +{ + const char *b; + + for (b = memchr (start, '-', end - start); + b && b + boundary_len + 4 < end; + b = memchr (b + 2, '-', end - (b + 2))) { + /* Check for "--boundary" */ + if (b[1] != '-' || + memcmp (b + 2, boundary, boundary_len) != 0) + continue; + + /* Check that it's at start of line */ + if (!(b == start || (b[-1] == '\n' && b[-2] == '\r'))) + continue; + + /* Check for "--" or "\r\n" after boundary */ + if ((b[boundary_len + 2] == '-' && b[boundary_len + 3] == '-') || + (b[boundary_len + 2] == '\r' && b[boundary_len + 3] == '\n')) + return b; + } + return NULL; +} + +/** + * soup_multipart_new_from_message: + * @headers: the headers of the HTTP message to parse + * @body: the body of the HTTP message to parse + * + * Parses @headers and @body to form a new #SoupMultipart + * + * Return value: a new #SoupMultipart (or %NULL if the message couldn't + * be parsed or wasn't multipart). + * + * Since: 2.26 + **/ +SoupMultipart * +soup_multipart_new_from_message (SoupMessageHeaders *headers, + SoupMessageBody *body) +{ + SoupMultipart *multipart; + const char *content_type, *boundary; + GHashTable *params; + int boundary_len; + SoupBuffer *flattened; + const char *start, *split, *end, *body_end; + SoupMessageHeaders *part_headers; + SoupBuffer *part_body; + + content_type = soup_message_headers_get_content_type (headers, ¶ms); + if (!content_type) + return NULL; + + boundary = g_hash_table_lookup (params, "boundary"); + if (strncmp (content_type, "multipart/", 10) != 0 || !boundary) { + g_hash_table_destroy (params); + return NULL; + } + + multipart = soup_multipart_new_internal ( + g_strdup (content_type), g_strdup (boundary)); + g_hash_table_destroy (params); + + flattened = soup_message_body_flatten (body); + body_end = flattened->data + flattened->length; + boundary = multipart->boundary; + boundary_len = strlen (boundary); + + /* skip preamble */ + start = find_boundary (flattened->data, body_end, + boundary, boundary_len); + if (!start) { + soup_multipart_free (multipart); + soup_buffer_free (flattened); + return NULL; + } + + while (start[2 + boundary_len] != '-') { + end = find_boundary (start + 2 + boundary_len, body_end, + boundary, boundary_len); + if (!end) { + soup_multipart_free (multipart); + soup_buffer_free (flattened); + return NULL; + } + + split = strstr (start, "\r\n\r\n"); + if (!split || split > end) { + soup_multipart_free (multipart); + soup_buffer_free (flattened); + return NULL; + } + split += 4; + + /* @start points to the start of the boundary line + * preceding this part, and @split points to the end + * of the headers / start of the body. + * + * We tell soup_headers_parse() to start parsing at + * @start, because it skips the first line of the + * input anyway (expecting it to be either a + * Request-Line or Status-Line). + */ + part_headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + g_ptr_array_add (multipart->headers, part_headers); + if (!soup_headers_parse (start, split - 2 - start, + part_headers)) { + soup_multipart_free (multipart); + soup_buffer_free (flattened); + return NULL; + } + + /* @split, as previously mentioned, points to the + * start of the body, and @end points to the start of + * the following boundary line, which is to say 2 bytes + * after the end of the body. + */ + part_body = soup_buffer_new_subbuffer (flattened, + split - flattened->data, + end - 2 - split); + g_ptr_array_add (multipart->bodies, part_body); + + start = end; + } + + soup_buffer_free (flattened); + return multipart; +} + +/** + * soup_multipart_get_length: + * @multipart: a #SoupMultipart + * + * Gets the number of body parts in @multipart + * + * Return value: the number of body parts in @multipart + * + * Since: 2.26 + **/ +int +soup_multipart_get_length (SoupMultipart *multipart) +{ + return multipart->bodies->len; +} + +/** + * soup_multipart_get_part: + * @multipart: a #SoupMultipart + * @part: the part number to get (counting from 0) + * @headers: (out) (transfer none): return location for the MIME part + * headers + * @body: (out) (transfer none): return location for the MIME part + * body + * + * Gets the indicated body part from @multipart. + * + * Return value: %TRUE on success, %FALSE if @part is out of range (in + * which case @headers and @body won't be set) + * + * Since: 2.26 + **/ +gboolean +soup_multipart_get_part (SoupMultipart *multipart, int part, + SoupMessageHeaders **headers, SoupBuffer **body) +{ + if (part < 0 || part >= multipart->bodies->len) + return FALSE; + *headers = multipart->headers->pdata[part]; + *body = multipart->bodies->pdata[part]; + return TRUE; +} + +/** + * soup_multipart_append_part: + * @multipart: a #SoupMultipart + * @headers: the MIME part headers + * @body: the MIME part body + * + * Adds a new MIME part to @multipart with the given headers and body. + * (The multipart will make its own copies of @headers and @body, so + * you should free your copies if you are not using them for anything + * else.) + * + * Since: 2.26 + **/ +void +soup_multipart_append_part (SoupMultipart *multipart, + SoupMessageHeaders *headers, + SoupBuffer *body) +{ + SoupMessageHeaders *headers_copy; + SoupMessageHeadersIter iter; + const char *name, *value; + + /* Copying @headers is annoying, but the alternatives seem + * worse: + * + * 1) We don't want to use g_boxed_copy, because + * SoupMessageHeaders actually implements that as just a + * ref, which would be confusing since SoupMessageHeaders + * is mutable and the caller might modify @headers after + * appending it. + * + * 2) We can't change SoupMessageHeaders to not just do a ref + * from g_boxed_copy, because that would break language + * bindings (which need to be able to hold a ref on + * msg->request_headers, but don't want to duplicate it). + * + * 3) We don't want to steal the reference to @headers, + * because then we'd have to either also steal the + * reference to @body (which would be inconsistent with + * other SoupBuffer methods), or NOT steal the reference to + * @body, in which case there'd be inconsistency just + * between the two arguments of this method! + */ + headers_copy = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + soup_message_headers_iter_init (&iter, headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) + soup_message_headers_append (headers_copy, name, value); + + g_ptr_array_add (multipart->headers, headers_copy); + g_ptr_array_add (multipart->bodies, soup_buffer_copy (body)); +} + +/** + * soup_multipart_append_form_string: + * @multipart: a multipart (presumably of type "multipart/form-data") + * @control_name: the name of the control associated with @data + * @data: the body data + * + * Adds a new MIME part containing @data to @multipart, using + * "Content-Disposition: form-data", as per the HTML forms + * specification. See soup_form_request_new_from_multipart() for more + * details. + * + * Since: 2.26 + **/ +void +soup_multipart_append_form_string (SoupMultipart *multipart, + const char *control_name, const char *data) +{ + SoupBuffer *body; + + body = soup_buffer_new (SOUP_MEMORY_COPY, data, strlen (data)); + soup_multipart_append_form_file (multipart, control_name, + NULL, NULL, body); + soup_buffer_free (body); +} + +/** + * soup_multipart_append_form_file: + * @multipart: a multipart (presumably of type "multipart/form-data") + * @control_name: the name of the control associated with this file + * @filename: the name of the file, or %NULL if not known + * @content_type: the MIME type of the file, or %NULL if not known + * @body: the file data + * + * Adds a new MIME part containing @body to @multipart, using + * "Content-Disposition: form-data", as per the HTML forms + * specification. See soup_form_request_new_from_multipart() for more + * details. + * + * Since: 2.26 + **/ +void +soup_multipart_append_form_file (SoupMultipart *multipart, + const char *control_name, const char *filename, + const char *content_type, SoupBuffer *body) +{ + SoupMessageHeaders *headers; + GString *disposition; + + headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + disposition = g_string_new ("form-data; "); + soup_header_g_string_append_param_quoted (disposition, "name", control_name); + if (filename) { + g_string_append (disposition, "; "); + soup_header_g_string_append_param_quoted (disposition, "filename", filename); + } + soup_message_headers_append (headers, "Content-Disposition", + disposition->str); + g_string_free (disposition, TRUE); + + if (content_type) { + soup_message_headers_append (headers, "Content-Type", + content_type); + } + + g_ptr_array_add (multipart->headers, headers); + g_ptr_array_add (multipart->bodies, soup_buffer_copy (body)); +} + +/** + * soup_multipart_to_message: + * @multipart: a #SoupMultipart + * @dest_headers: the headers of the HTTP message to serialize @multipart to + * @dest_body: the body of the HTTP message to serialize @multipart to + * + * Serializes @multipart to @dest_headers and @dest_body. + * + * Since: 2.26 + **/ +void +soup_multipart_to_message (SoupMultipart *multipart, + SoupMessageHeaders *dest_headers, + SoupMessageBody *dest_body) +{ + SoupMessageHeaders *part_headers; + SoupBuffer *part_body; + SoupMessageHeadersIter iter; + const char *name, *value; + GString *str; + GHashTable *params; + int i; + + params = g_hash_table_new (g_str_hash, g_str_equal); + g_hash_table_insert (params, "boundary", multipart->boundary); + soup_message_headers_set_content_type (dest_headers, + multipart->mime_type, + params); + g_hash_table_destroy (params); + + for (i = 0; i < multipart->bodies->len; i++) { + part_headers = multipart->headers->pdata[i]; + part_body = multipart->bodies->pdata[i]; + + str = g_string_new (i == 0 ? NULL : "\r\n"); + g_string_append (str, "--"); + g_string_append (str, multipart->boundary); + g_string_append (str, "\r\n"); + soup_message_headers_iter_init (&iter, part_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) + g_string_append_printf (str, "%s: %s\r\n", name, value); + g_string_append (str, "\r\n"); + soup_message_body_append (dest_body, SOUP_MEMORY_TAKE, + str->str, str->len); + g_string_free (str, FALSE); + + soup_message_body_append_buffer (dest_body, part_body); + } + + str = g_string_new ("\r\n--"); + g_string_append (str, multipart->boundary); + g_string_append (str, "--\r\n"); + soup_message_body_append (dest_body, SOUP_MEMORY_TAKE, + str->str, str->len); + g_string_free (str, FALSE); + + /* (The "\r\n" after the close-delimiter seems wrong according + * to my reading of RFCs 2046 and 2616, but that's what + * everyone else does.) + */ +} + +/** + * soup_multipart_free: + * @multipart: a #SoupMultipart + * + * Frees @multipart + * + * Since: 2.26 + **/ +void +soup_multipart_free (SoupMultipart *multipart) +{ + int i; + + g_free (multipart->mime_type); + g_free (multipart->boundary); + for (i = 0; i < multipart->headers->len; i++) + soup_message_headers_free (multipart->headers->pdata[i]); + g_ptr_array_free (multipart->headers, TRUE); + for (i = 0; i < multipart->bodies->len; i++) + soup_buffer_free (multipart->bodies->pdata[i]); + g_ptr_array_free (multipart->bodies, TRUE); + + g_slice_free (SoupMultipart, multipart); +} + +static SoupMultipart * +soup_multipart_copy (SoupMultipart *multipart) +{ + SoupMultipart *copy; + int i; + + copy = soup_multipart_new_internal (g_strdup (multipart->mime_type), + g_strdup (multipart->boundary)); + for (i = 0; i < multipart->bodies->len; i++) { + soup_multipart_append_part (copy, + multipart->headers->pdata[i], + multipart->bodies->pdata[i]); + } + return copy; +} + +GType +soup_multipart_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupMultipart"), + (GBoxedCopyFunc) soup_multipart_copy, + (GBoxedFreeFunc) soup_multipart_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} diff --git a/libsoup/soup-multipart.h b/libsoup/soup-multipart.h new file mode 100644 index 0000000..cc01eec --- /dev/null +++ b/libsoup/soup-multipart.h @@ -0,0 +1,51 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_MULTIPART_H +#define SOUP_MULTIPART_H 1 + +#include +#include +#include + +G_BEGIN_DECLS + +typedef struct SoupMultipart SoupMultipart; + +GType soup_multipart_get_type (void); +#define SOUP_TYPE_MULTIPART (soup_multipart_get_type ()) + +SoupMultipart *soup_multipart_new (const char *mime_type); +SoupMultipart *soup_multipart_new_from_message (SoupMessageHeaders *headers, + SoupMessageBody *body); + +int soup_multipart_get_length (SoupMultipart *multipart); +gboolean soup_multipart_get_part (SoupMultipart *multipart, + int part, + SoupMessageHeaders **headers, + SoupBuffer **body); + +void soup_multipart_append_part (SoupMultipart *multipart, + SoupMessageHeaders *headers, + SoupBuffer *body); + +void soup_multipart_append_form_string (SoupMultipart *multipart, + const char *control_name, + const char *data); +void soup_multipart_append_form_file (SoupMultipart *multipart, + const char *control_name, + const char *filename, + const char *content_type, + SoupBuffer *body); + +void soup_multipart_to_message (SoupMultipart *multipart, + SoupMessageHeaders *dest_headers, + SoupMessageBody *dest_body); + +void soup_multipart_free (SoupMultipart *multipart); + +G_END_DECLS + +#endif /* SOUP_MULTIPART_H */ diff --git a/libsoup/soup-password-manager-gnome.c b/libsoup/soup-password-manager-gnome.c new file mode 100644 index 0000000..ae65564 --- /dev/null +++ b/libsoup/soup-password-manager-gnome.c @@ -0,0 +1,233 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-password-manager-gnome.c: GNOME-keyring-based password manager + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY + +#include "soup-password-manager-gnome.h" +#include "soup-auth.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +#include + +static void soup_password_manager_gnome_interface_init (SoupPasswordManagerInterface *password_manager_interface); + +G_DEFINE_TYPE_EXTENDED (SoupPasswordManagerGNOME, soup_password_manager_gnome, G_TYPE_OBJECT, 0, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, NULL) + G_IMPLEMENT_INTERFACE (SOUP_TYPE_PASSWORD_MANAGER, soup_password_manager_gnome_interface_init)) + +static void get_passwords_async (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + gboolean retrying, + GMainContext *async_context, + GCancellable *cancellable, + SoupPasswordManagerCallback callback, + gpointer user_data); +static void get_passwords_sync (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + GCancellable *cancellable); + +static void +soup_password_manager_gnome_init (SoupPasswordManagerGNOME *manager_gnome) +{ +} + +static void +soup_password_manager_gnome_class_init (SoupPasswordManagerGNOMEClass *gnome_class) +{ +} + +static void +soup_password_manager_gnome_interface_init (SoupPasswordManagerInterface *password_manager_interface) +{ + password_manager_interface->get_passwords_async = get_passwords_async; + password_manager_interface->get_passwords_sync = get_passwords_sync; +} + + +static void +save_password_callback (GnomeKeyringResult result, guint32 val, gpointer data) +{ +} + +static void +async_save_password (SoupAuth *auth, const char *username, + const char *password, gpointer user_data) +{ + SoupURI *uri = user_data; + + gnome_keyring_set_network_password ( + NULL, /* use default keyring */ + username, + soup_auth_get_realm (auth), + uri->host, + NULL, + uri->scheme, + soup_auth_get_scheme_name (auth), + uri->port, + password, + save_password_callback, NULL, NULL); +} + +static void +sync_save_password (SoupAuth *auth, const char *username, + const char *password, gpointer user_data) +{ + SoupURI *uri = user_data; + guint32 item_id; + + gnome_keyring_set_network_password_sync ( + NULL, /* use default keyring */ + username, + soup_auth_get_realm (auth), + uri->host, + NULL, + uri->scheme, + soup_auth_get_scheme_name (auth), + uri->port, + password, + &item_id); +} + +static void +update_auth_for_passwords (SoupAuth *auth, SoupMessage *msg, + GList *passwords, gboolean async) +{ + GnomeKeyringNetworkPasswordData *pdata; + SoupURI *uri; + + while (passwords) { + pdata = passwords->data; + soup_auth_has_saved_password (auth, pdata->user, + pdata->password); + passwords = passwords->next; + } + + uri = g_object_get_data (G_OBJECT (auth), + "SoupPasswordManagerGNOME-save_password-uri"); + if (uri) { + g_signal_handlers_disconnect_by_func (auth, async_save_password, uri); + g_signal_handlers_disconnect_by_func (auth, sync_save_password, uri); + } + + uri = soup_uri_copy (soup_message_get_uri (msg)); + g_signal_connect (auth, "save_password", + G_CALLBACK (async ? async_save_password : sync_save_password), + uri); + g_object_set_data_full (G_OBJECT (auth), + "SoupPasswordManagerGNOME-save_password-uri", + uri, (GDestroyNotify)soup_uri_free); +} + +typedef struct { + SoupPasswordManager *password_manager; + SoupMessage *msg; + SoupAuth *auth; + gboolean retrying; + + SoupPasswordManagerCallback callback; + gpointer user_data; + + gpointer request; +} SoupPasswordManagerGNOMEAuthData; + +static void +find_password_callback (GnomeKeyringResult result, GList *list, + gpointer user_data) +{ + SoupPasswordManagerGNOMEAuthData *auth_data = user_data; + + /* FIXME: check result? */ + + update_auth_for_passwords (auth_data->auth, auth_data->msg, list, TRUE); + auth_data->callback (auth_data->password_manager, + auth_data->msg, auth_data->auth, + auth_data->retrying, auth_data->user_data); + + /* gnome-keyring will call free_auth_data to clean up for us. */ +} + +static void +free_auth_data (gpointer data) +{ + SoupPasswordManagerGNOMEAuthData *auth_data = data; + + g_object_unref (auth_data->auth); + g_object_unref (auth_data->msg); + g_slice_free (SoupPasswordManagerGNOMEAuthData, auth_data); +} + +static void +get_passwords_async (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + gboolean retrying, + GMainContext *async_context, + GCancellable *cancellable, + SoupPasswordManagerCallback callback, + gpointer user_data) +{ + SoupPasswordManagerGNOMEAuthData *auth_data; + SoupURI *uri = soup_message_get_uri (msg); + + auth_data = g_slice_new (SoupPasswordManagerGNOMEAuthData); + auth_data->password_manager = password_manager; + auth_data->msg = g_object_ref (msg); + auth_data->auth = g_object_ref (auth); + auth_data->retrying = retrying; + + /* FIXME: async_context, cancellable */ + + auth_data->callback = callback; + auth_data->user_data = user_data; + + /* FIXME: should we be specifying protocol and port here, or + * leaving them NULL/0 and filtering results in the callback? + * We don't want to send https passwords to http, but the + * reverse might be OK (if that's how other clients tend to + * behave). + */ + auth_data->request = gnome_keyring_find_network_password ( + NULL, /* user -- accept any */ + soup_auth_get_realm (auth), /* domain */ + uri->host, /* server */ + NULL, /* object -- unused */ + uri->scheme, /* protocol */ + soup_auth_get_scheme_name (auth), /* authtype */ + uri->port, /* port */ + find_password_callback, auth_data, free_auth_data); +} + +static void +get_passwords_sync (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + GCancellable *cancellable) +{ + SoupURI *uri = soup_message_get_uri (msg); + GList *results = NULL; + + /* FIXME: cancellable */ + + gnome_keyring_find_network_password_sync ( + NULL, /* user -- accept any */ + soup_auth_get_realm (auth), /* domain */ + uri->host, /* server */ + NULL, /* object -- unused */ + uri->scheme, /* protocol */ + soup_auth_get_scheme_name (auth), /* authtype */ + uri->port, /* port */ + &results); + + update_auth_for_passwords (auth, msg, results, FALSE); +} diff --git a/libsoup/soup-password-manager-gnome.h b/libsoup/soup-password-manager-gnome.h new file mode 100644 index 0000000..290e3ec --- /dev/null +++ b/libsoup/soup-password-manager-gnome.h @@ -0,0 +1,28 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_PASSWORD_MANAGER_GNOME_H +#define SOUP_PASSWORD_MANAGER_GNOME_H 1 + +#include "soup-password-manager.h" +#include "soup-gnome-features.h" + +#define SOUP_PASSWORD_MANAGER_GNOME(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PASSWORD_MANAGER_GNOME, SoupPasswordManagerGNOME)) +#define SOUP_PASSWORD_MANAGER_GNOME_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PASSWORD_MANAGER_GNOME, SoupPasswordManagerGNOMEClass)) +#define SOUP_IS_PASSWORD_MANAGER_GNOME(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PASSWORD_MANAGER_GNOME)) +#define SOUP_IS_PASSWORD_MANAGER_GNOME_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PASSWORD_MANAGER_GNOME)) +#define SOUP_PASSWORD_MANAGER_GNOME_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_PASSWORD_MANAGER_GNOME, SoupPasswordManagerGNOMEClass)) + +typedef struct { + GObject parent; + +} SoupPasswordManagerGNOME; + +typedef struct { + GObjectClass parent_class; + +} SoupPasswordManagerGNOMEClass; + +#endif /* SOUP_PASSWORD_MANAGER_GNOME_H */ diff --git a/libsoup/soup-password-manager.c b/libsoup/soup-password-manager.c new file mode 100644 index 0000000..5f629ee --- /dev/null +++ b/libsoup/soup-password-manager.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-password-manager.c: HTTP auth password manager interface + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY + +#include "soup-password-manager.h" +#include "soup-session-feature.h" + +GType +soup_password_manager_get_type (void) +{ + static volatile gsize g_define_type_id__volatile = 0; + if (g_once_init_enter (&g_define_type_id__volatile)) + { + GType g_define_type_id = + g_type_register_static_simple (G_TYPE_INTERFACE, + g_intern_static_string ("SoupPasswordManager"), + sizeof (SoupPasswordManagerInterface), + (GClassInitFunc)NULL, + 0, + (GInstanceInitFunc)NULL, + (GTypeFlags) 0); + g_type_interface_add_prerequisite (g_define_type_id, G_TYPE_OBJECT); + g_type_interface_add_prerequisite (g_define_type_id, SOUP_TYPE_SESSION_FEATURE); + g_once_init_leave (&g_define_type_id__volatile, g_define_type_id); + } + return g_define_type_id__volatile; +} + +/** + * soup_password_manager_get_passwords_async: + * @password_manager: the #SoupPasswordManager + * @msg: the #SoupMessage being authenticated + * @auth: the #SoupAuth being authenticated + * @retrying: whether or not this is a re-attempt to authenticate + * @async_context: (allow-none): the #GMainContext to invoke @callback in + * @cancellable: a #GCancellable, or %NULL + * @callback: callback to invoke after fetching passwords + * @user_data: data for @callback + * + * Asynchronously attempts to look up saved passwords for @auth/@msg + * and then calls @callback after updating @auth with the information. + * Also registers @auth with @password_manager so that if the caller + * calls soup_auth_save_password() on it, the password will be saved. + * + * #SoupPasswordManager does not actually use the @retrying flag itself; + * it just passes its value on to @callback. + * + * If @cancellable is cancelled, @callback will still be invoked. + * + * Since: 2.28 + **/ +void +soup_password_manager_get_passwords_async (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + gboolean retrying, + GMainContext *async_context, + GCancellable *cancellable, + SoupPasswordManagerCallback callback, + gpointer user_data) +{ + SOUP_PASSWORD_MANAGER_GET_CLASS (password_manager)-> + get_passwords_async (password_manager, msg, auth, retrying, + async_context, cancellable, + callback, user_data); +} + +/** + * soup_password_manager_get_passwords_sync: + * @password_manager: the #SoupPasswordManager + * @msg: the #SoupMessage being authenticated + * @auth: the #SoupAuth being authenticated + * @cancellable: a #GCancellable, or %NULL + * + * Synchronously attempts to look up saved passwords for @auth/@msg + * and updates @auth with the information. Also registers @auth with + * @password_manager so that if the caller calls + * soup_auth_save_password() on it, the password will be saved. + * + * Since: 2.28 + **/ +void +soup_password_manager_get_passwords_sync (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + GCancellable *cancellable) +{ + SOUP_PASSWORD_MANAGER_GET_CLASS (password_manager)-> + get_passwords_sync (password_manager, msg, auth, cancellable); +} diff --git a/libsoup/soup-password-manager.h b/libsoup/soup-password-manager.h new file mode 100644 index 0000000..775f84c --- /dev/null +++ b/libsoup/soup-password-manager.h @@ -0,0 +1,58 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_PASSWORD_MANAGER_H +#define SOUP_PASSWORD_MANAGER_H 1 + +#ifdef LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY + +#include + +#define SOUP_TYPE_PASSWORD_MANAGER (soup_password_manager_get_type ()) +#define SOUP_PASSWORD_MANAGER(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PASSWORD_MANAGER, SoupPasswordManager)) +#define SOUP_PASSWORD_MANAGER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PASSWORD_MANAGER, SoupPasswordManagerInterface)) +#define SOUP_IS_PASSWORD_MANAGER(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PASSWORD_MANAGER)) +#define SOUP_IS_PASSWORD_MANAGER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PASSWORD_MANAGER)) +#define SOUP_PASSWORD_MANAGER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_INTERFACE ((obj), SOUP_TYPE_PASSWORD_MANAGER, SoupPasswordManagerInterface)) + +typedef struct _SoupPasswordManager SoupPasswordManager; + +typedef void (*SoupPasswordManagerCallback) (SoupPasswordManager *, + SoupMessage *, SoupAuth *, + gboolean retrying, + gpointer user_data); + +typedef struct { + GTypeInterface base; + + /* virtual methods */ + void (*get_passwords_async) (SoupPasswordManager *, SoupMessage *, + SoupAuth *, gboolean, + GMainContext *, GCancellable *, + SoupPasswordManagerCallback, gpointer); + void (*get_passwords_sync) (SoupPasswordManager *, SoupMessage *, + SoupAuth *, GCancellable *); + +} SoupPasswordManagerInterface; + +GType soup_password_manager_get_type (void); + +void soup_password_manager_get_passwords_async (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + gboolean retrying, + GMainContext *async_context, + GCancellable *cancellable, + SoupPasswordManagerCallback callback, + gpointer user_data); + +void soup_password_manager_get_passwords_sync (SoupPasswordManager *password_manager, + SoupMessage *msg, + SoupAuth *auth, + GCancellable *cancellable); + +#endif /* LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY */ + +#endif /* SOUP_PASSWORD_MANAGER_H */ diff --git a/libsoup/soup-path-map.c b/libsoup/soup-path-map.c new file mode 100644 index 0000000..60191b4 --- /dev/null +++ b/libsoup/soup-path-map.c @@ -0,0 +1,186 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-path-map.c: URI path prefix-matcher + * + * Copyright (C) 2007 Novell, Inc. + */ + +#include "soup-path-map.h" +#include + +/* This could be replaced with something more clever, like a Patricia + * trie, but it's probably not worth it since the total number of + * mappings is likely to always be small. So we keep an array of + * paths, sorted by decreasing length. (The first prefix match will + * therefore be the longest.) + */ + +typedef struct { + char *path; + int len; + gpointer data; +} SoupPathMapping; + +struct SoupPathMap { + GArray *mappings; + GDestroyNotify free_func; +}; + +/** + * soup_path_map_new: + * @data_free_func: function to use to free data added with + * soup_path_map_add(). + * + * Creates a new %SoupPathMap. + * + * Return value: the new %SoupPathMap + **/ +SoupPathMap * +soup_path_map_new (GDestroyNotify data_free_func) +{ + SoupPathMap *map; + + map = g_slice_new0 (SoupPathMap); + map->mappings = g_array_new (FALSE, FALSE, sizeof (SoupPathMapping)); + map->free_func = data_free_func; + + return map; +} + +/** + * soup_path_map_free: + * @map: a %SoupPathMap + * + * Frees @map and all data stored in it. + **/ +void +soup_path_map_free (SoupPathMap *map) +{ + SoupPathMapping *mappings = (SoupPathMapping *)map->mappings->data; + int i; + + for (i = 0; i < map->mappings->len; i++) { + g_free (mappings[i].path); + if (map->free_func) + map->free_func (mappings[i].data); + } + g_array_free (map->mappings, TRUE); + + g_slice_free (SoupPathMap, map); +} + +/* Scan @map looking for @path or one of its ancestors. + * Sets *@match to the index of a match, or -1 if no match is found. + * Sets *@insert to the index to insert @path at if a new mapping is + * desired. Returns %TRUE if *@match is an exact match. + */ +static gboolean +mapping_lookup (SoupPathMap *map, const char *path, int *match, int *insert) +{ + SoupPathMapping *mappings = (SoupPathMapping *)map->mappings->data; + int i, path_len; + gboolean exact = FALSE; + + *match = -1; + + path_len = strcspn (path, "?"); + for (i = 0; i < map->mappings->len; i++) { + if (mappings[i].len > path_len) + continue; + + if (insert && mappings[i].len < path_len) { + *insert = i; + /* Clear insert so we don't try to set it again */ + insert = NULL; + } + + if (!strncmp (mappings[i].path, path, mappings[i].len)) { + *match = i; + if (path_len == mappings[i].len) + exact = TRUE; + if (!insert) + return exact; + } + } + + if (insert) + *insert = i; + return exact; +} + +/** + * soup_path_map_add: + * @map: a %SoupPathMap + * @path: the path + * @data: the data + * + * Adds @data to @map at @path. If there was already data at @path it + * will be freed. + **/ +void +soup_path_map_add (SoupPathMap *map, const char *path, gpointer data) +{ + SoupPathMapping *mappings = (SoupPathMapping *)map->mappings->data; + int match, insert; + + if (mapping_lookup (map, path, &match, &insert)) { + if (map->free_func) + map->free_func (mappings[match].data); + mappings[match].data = data; + } else { + SoupPathMapping mapping; + + mapping.path = g_strdup (path); + mapping.len = strlen (path); + mapping.data = data; + g_array_insert_val (map->mappings, insert, mapping); + } +} + +/** + * soup_path_map_remove: + * @map: a %SoupPathMap + * @path: the path + * + * Removes @data from @map at @path. (This must be called with the same + * path the data was originally added with, not a subdirectory.) + **/ +void +soup_path_map_remove (SoupPathMap *map, const char *path) +{ + SoupPathMapping *mappings = (SoupPathMapping *)map->mappings->data; + int match; + + if (!mapping_lookup (map, path, &match, NULL)) + return; + + if (map->free_func) + map->free_func (mappings[match].data); + g_free (mappings[match].path); + g_array_remove_index (map->mappings, match); +} + +/** + * soup_path_map_lookup: + * @map: a %SoupPathMap + * @path: the path + * + * Finds the data associated with @path in @map. If there is no data + * specifically associated with @path, it will return the data for the + * closest parent directory of @path that has data associated with it. + * + * Return value: the data set with soup_path_map_add(), or %NULL if no + * data could be found for @path or any of its ancestors. + **/ +gpointer +soup_path_map_lookup (SoupPathMap *map, const char *path) +{ + SoupPathMapping *mappings = (SoupPathMapping *)map->mappings->data; + int match; + + mapping_lookup (map, path, &match, NULL); + if (match == -1) + return NULL; + else + return mappings[match].data; +} diff --git a/libsoup/soup-path-map.h b/libsoup/soup-path-map.h new file mode 100644 index 0000000..f5a8ba8 --- /dev/null +++ b/libsoup/soup-path-map.h @@ -0,0 +1,26 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Novell, Inc. + */ + +#ifndef SOUP_PATH_MAP_H +#define SOUP_PATH_MAP_H 1 + +#include + +typedef struct SoupPathMap SoupPathMap; + +SoupPathMap *soup_path_map_new (GDestroyNotify data_free_func); +void soup_path_map_free (SoupPathMap *map); + +void soup_path_map_add (SoupPathMap *map, + const char *path, + gpointer data); +void soup_path_map_remove (SoupPathMap *map, + const char *path); + +gpointer soup_path_map_lookup (SoupPathMap *map, + const char *path); + + +#endif /* SOUP_PATH_MAP_H */ diff --git a/libsoup/soup-portability.h b/libsoup/soup-portability.h new file mode 100644 index 0000000..1814efb --- /dev/null +++ b/libsoup/soup-portability.h @@ -0,0 +1,26 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2005, Novell, Inc. + */ + +#ifndef SOUP_PORTABILITY_H +#define SOUP_PORTABILITY_H + +#include + +#ifdef G_OS_WIN32 + +#include +#include + +#else + +#include +#include +#include +#include +#include + +#endif + +#endif /* SOUP_PORTABILITY_H */ diff --git a/libsoup/soup-proxy-resolver-default.c b/libsoup/soup-proxy-resolver-default.c new file mode 100644 index 0000000..58be8ab --- /dev/null +++ b/libsoup/soup-proxy-resolver-default.c @@ -0,0 +1,250 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-proxy-resolver-default.c: proxy resolution based on GIO's GProxyResolver + * + * Copyright (C) 2011 Collabora Ltd. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "soup-proxy-resolver-default.h" +#include "soup-proxy-uri-resolver.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +/** + * SoupProxyResolverDefault: + * + * A #SoupProxyURIResolver implementation that uses the default gio + * #GProxyResolver to resolve proxies. + * + * Since: 2.34 + */ + +static void soup_proxy_resolver_default_interface_init (SoupProxyURIResolverInterface *proxy_resolver_interface); + +G_DEFINE_TYPE_EXTENDED (SoupProxyResolverDefault, soup_proxy_resolver_default, G_TYPE_OBJECT, 0, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, NULL) + G_IMPLEMENT_INTERFACE (SOUP_TYPE_PROXY_URI_RESOLVER, soup_proxy_resolver_default_interface_init)) + +enum { + PROP_0, + PROP_GPROXY_RESOLVER +}; + +typedef struct { + GProxyResolver *gproxy_resolver; +} SoupProxyResolverDefaultPrivate; +#define SOUP_PROXY_RESOLVER_DEFAULT_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_PROXY_RESOLVER_DEFAULT, SoupProxyResolverDefaultPrivate)) + +static void +soup_proxy_resolver_default_init (SoupProxyResolverDefault *resolver) +{ +} + +static void +soup_proxy_resolver_default_set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupProxyResolverDefaultPrivate *priv = SOUP_PROXY_RESOLVER_DEFAULT_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_GPROXY_RESOLVER: + if (priv->gproxy_resolver) + g_object_unref (priv->gproxy_resolver); + priv->gproxy_resolver = g_value_dup_object (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +soup_proxy_resolver_default_constructed (GObject *object) +{ + SoupProxyResolverDefaultPrivate *priv = SOUP_PROXY_RESOLVER_DEFAULT_GET_PRIVATE (object); + + if (!priv->gproxy_resolver) { + priv->gproxy_resolver = g_proxy_resolver_get_default (); + g_object_ref (priv->gproxy_resolver); + } + + G_OBJECT_CLASS (soup_proxy_resolver_default_parent_class)->constructed (object); +} + +static void +soup_proxy_resolver_default_finalize (GObject *object) +{ + SoupProxyResolverDefaultPrivate *priv = SOUP_PROXY_RESOLVER_DEFAULT_GET_PRIVATE (object); + + if (priv->gproxy_resolver) + g_object_unref (priv->gproxy_resolver); + + G_OBJECT_CLASS (soup_proxy_resolver_default_parent_class)->finalize (object); +} + +static void +soup_proxy_resolver_default_class_init (SoupProxyResolverDefaultClass *klass) +{ + GObjectClass *object_class = G_OBJECT_CLASS (klass); + + g_type_class_add_private (klass, sizeof (SoupProxyResolverDefaultPrivate)); + + object_class->set_property = soup_proxy_resolver_default_set_property; + object_class->constructed = soup_proxy_resolver_default_constructed; + object_class->finalize = soup_proxy_resolver_default_finalize; + + g_object_class_install_property ( + object_class, PROP_GPROXY_RESOLVER, + g_param_spec_object ("gproxy-resolver", + "GProxyResolver", + "The underlying GProxyResolver", + G_TYPE_PROXY_RESOLVER, + G_PARAM_WRITABLE)); +} + +typedef struct { + SoupProxyURIResolver *resolver; + GCancellable *cancellable; + SoupProxyURIResolverCallback callback; + gpointer user_data; +} SoupAsyncData; + +static void +resolved_proxy (GObject *object, GAsyncResult *result, gpointer data) +{ + GProxyResolver *proxy_resolver = G_PROXY_RESOLVER (object); + SoupAsyncData *async_data = data; + GError *error = NULL; + char **proxy_uris = NULL; + SoupURI *proxy_uri = NULL; + guint status = SOUP_STATUS_OK; + + proxy_uris = g_proxy_resolver_lookup_finish (proxy_resolver, + result, + &error); + + if (error || proxy_uris == NULL || proxy_uris[0] == NULL) { + status = SOUP_STATUS_CANT_RESOLVE_PROXY; + goto finish; + } + + /* We need to handle direct:// specially, otherwise + * SoupSession will try to resolve it as the proxy address. + */ + if (!g_strcmp0 (proxy_uris[0], "direct://")) + goto finish; + + proxy_uri = soup_uri_new (proxy_uris[0]); + if (proxy_uri == NULL) + status = SOUP_STATUS_CANT_RESOLVE_PROXY; + +finish: + async_data->callback (async_data->resolver, + status, + proxy_uri, + async_data->user_data); + + if (async_data->cancellable) + g_object_unref (async_data->cancellable); + + g_strfreev (proxy_uris); + + if (proxy_uri) + soup_uri_free (proxy_uri); + + g_object_unref (async_data->resolver); + g_slice_free (SoupAsyncData, async_data); +} + +static void +get_proxy_uri_async (SoupProxyURIResolver *resolver, + SoupURI *uri, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data) +{ + SoupProxyResolverDefaultPrivate *priv = SOUP_PROXY_RESOLVER_DEFAULT_GET_PRIVATE (resolver); + SoupAsyncData *async_data; + char *uri_string; + + async_data = g_slice_new0 (SoupAsyncData); + async_data->resolver = (SoupProxyURIResolver*) g_object_ref (resolver); + async_data->cancellable = cancellable; + async_data->callback = callback; + async_data->user_data = user_data; + + uri_string = soup_uri_to_string (uri, FALSE); + + if (async_context) + g_main_context_push_thread_default (async_context); + + g_proxy_resolver_lookup_async (priv->gproxy_resolver, + uri_string, + cancellable ? g_object_ref (cancellable) : NULL, + resolved_proxy, + async_data); + + if (async_context) + g_main_context_pop_thread_default (async_context); + + g_free (uri_string); +} + +static guint +get_proxy_uri_sync (SoupProxyURIResolver *resolver, + SoupURI *uri, + GCancellable *cancellable, + SoupURI **proxy_uri) +{ + SoupProxyResolverDefaultPrivate *priv = SOUP_PROXY_RESOLVER_DEFAULT_GET_PRIVATE (resolver); + GError *error = NULL; + char** proxy_uris = NULL; + char *uri_string; + guint status = SOUP_STATUS_OK; + + uri_string = soup_uri_to_string (uri, FALSE); + + proxy_uris = g_proxy_resolver_lookup (priv->gproxy_resolver, + uri_string, + cancellable, + &error); + + g_free (uri_string); + + if (error || proxy_uris == NULL || proxy_uris[0] == NULL) { + status = SOUP_STATUS_CANT_RESOLVE_PROXY; + goto cleanup; + } + + /* We need to handle direct:// specially, otherwise + * SoupSession will try to resolve it as the proxy address. + */ + if (!g_strcmp0 (proxy_uris[0], "direct://")) + goto cleanup; + + *proxy_uri = soup_uri_new (proxy_uris[0]); + + if (!*proxy_uri) + status = SOUP_STATUS_CANT_RESOLVE_PROXY; + +cleanup: + g_strfreev (proxy_uris); + if (error) + g_clear_error (&error); + return status; +} + +static void +soup_proxy_resolver_default_interface_init (SoupProxyURIResolverInterface *iface) +{ + iface->get_proxy_uri_async = get_proxy_uri_async; + iface->get_proxy_uri_sync = get_proxy_uri_sync; +} diff --git a/libsoup/soup-proxy-resolver-default.h b/libsoup/soup-proxy-resolver-default.h new file mode 100644 index 0000000..6c9715c --- /dev/null +++ b/libsoup/soup-proxy-resolver-default.h @@ -0,0 +1,30 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2011 Collabora Ltd. + */ + +#ifndef SOUP_PROXY_RESOLVER_DEFAULT_H +#define SOUP_PROXY_RESOLVER_DEFAULT_H 1 + +#include + +#define SOUP_PROXY_RESOLVER_DEFAULT(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PROXY_RESOLVER_DEFAULT, SoupProxyResolverDefault)) +#define SOUP_PROXY_RESOLVER_DEFAULT_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PROXY_RESOLVER_DEFAULT, SoupProxyResolverDefaultClass)) +#define SOUP_IS_PROXY_RESOLVER_DEFAULT(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PROXY_RESOLVER_DEFAULT)) +#define SOUP_IS_PROXY_RESOLVER_DEFAULT_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PROXY_RESOLVER_DEFAULT)) +#define SOUP_PROXY_RESOLVER_DEFAULT_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_PROXY_RESOLVER_DEFAULT, SoupProxyResolverDefaultClass)) + +typedef struct { + GObject parent; + +} SoupProxyResolverDefault; + +typedef struct { + GObjectClass parent_class; + +} SoupProxyResolverDefaultClass; + +GType soup_proxy_resolver_default_get_type (void); +#define SOUP_TYPE_PROXY_RESOLVER_DEFAULT (soup_proxy_resolver_default_get_type ()) + +#endif /*SOUP_PROXY_RESOLVER_DEFAULT_H*/ diff --git a/libsoup/soup-proxy-resolver-gnome.c b/libsoup/soup-proxy-resolver-gnome.c new file mode 100644 index 0000000..e01dd6a --- /dev/null +++ b/libsoup/soup-proxy-resolver-gnome.c @@ -0,0 +1,54 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-proxy-resolver-gnome.c: GNOME proxy resolution + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-proxy-resolver-gnome.h" + +G_DEFINE_TYPE (SoupProxyResolverGNOME, soup_proxy_resolver_gnome, SOUP_TYPE_PROXY_RESOLVER_DEFAULT) + +static void +soup_proxy_resolver_gnome_init (SoupProxyResolverGNOME *resolver_gnome) +{ + GProxyResolver *gproxyresolver; + GIOExtensionPoint *ep; + GIOExtension *ext; + GType type; + + /* FIXME: there is no way to force _g_io_modules_ensure_loaded() + * to be run other than by requesting some extension that we + * don't necessarily want. + */ + gproxyresolver = g_proxy_resolver_get_default (); + if (strcmp (G_OBJECT_TYPE_NAME (gproxyresolver), + "GProxyResolverGnome") == 0) + return; + + ep = g_io_extension_point_lookup (G_PROXY_RESOLVER_EXTENSION_POINT_NAME); + if (!ep) + return; + + ext = g_io_extension_point_get_extension_by_name (ep, "gnome"); + if (!ext) + return; + + type = g_io_extension_get_type (ext); + gproxyresolver = g_object_new (type, NULL); + g_object_set (G_OBJECT (resolver_gnome), + "gproxy-resolver", gproxyresolver, + NULL); + g_object_unref (gproxyresolver); +} + +static void +soup_proxy_resolver_gnome_class_init (SoupProxyResolverGNOMEClass *gnome_class) +{ +} diff --git a/libsoup/soup-proxy-resolver-gnome.h b/libsoup/soup-proxy-resolver-gnome.h new file mode 100644 index 0000000..809e9dd --- /dev/null +++ b/libsoup/soup-proxy-resolver-gnome.h @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_PROXY_RESOLVER_GNOME_H +#define SOUP_PROXY_RESOLVER_GNOME_H 1 + +#include "soup-gnome-features.h" +#include "soup-proxy-resolver-default.h" + +/* SOUP_TYPE_PROXY_RESOLVER_GNOME and soup_proxy_resolver_gnome_get_type() + * are declared in soup-gnome-features.h. + */ + +#define SOUP_PROXY_RESOLVER_GNOME(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PROXY_RESOLVER_GNOME, SoupProxyResolverGNOME)) +#define SOUP_PROXY_RESOLVER_GNOME_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PROXY_RESOLVER_GNOME, SoupProxyResolverGNOMEClass)) +#define SOUP_IS_PROXY_RESOLVER_GNOME(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PROXY_RESOLVER_GNOME)) +#define SOUP_IS_PROXY_RESOLVER_GNOME_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PROXY_RESOLVER_GNOME)) +#define SOUP_PROXY_RESOLVER_GNOME_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_PROXY_RESOLVER_GNOME, SoupProxyResolverGNOMEClass)) + +typedef SoupProxyResolverDefault SoupProxyResolverGNOME; +typedef SoupProxyResolverDefaultClass SoupProxyResolverGNOMEClass; + +#endif /*SOUP_PROXY_RESOLVER_GNOME_H*/ diff --git a/libsoup/soup-proxy-resolver-static.c b/libsoup/soup-proxy-resolver-static.c new file mode 100644 index 0000000..2c1f3e9 --- /dev/null +++ b/libsoup/soup-proxy-resolver-static.c @@ -0,0 +1,197 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-proxy-resolver-static.c: Static proxy "resolution" + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-proxy-resolver-static.h" +#include "soup-address.h" +#include "soup-message.h" +#include "soup-misc.h" +#include "soup-session-feature.h" + +typedef struct { + SoupURI *proxy_uri; + +} SoupProxyResolverStaticPrivate; +#define SOUP_PROXY_RESOLVER_STATIC_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_PROXY_RESOLVER_STATIC, SoupProxyResolverStaticPrivate)) + +static void soup_proxy_resolver_static_interface_init (SoupProxyURIResolverInterface *proxy_resolver_interface); + +G_DEFINE_TYPE_EXTENDED (SoupProxyResolverStatic, soup_proxy_resolver_static, G_TYPE_OBJECT, 0, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, NULL) + G_IMPLEMENT_INTERFACE (SOUP_TYPE_PROXY_URI_RESOLVER, soup_proxy_resolver_static_interface_init)) + +enum { + PROP_0, + + PROP_PROXY_URI, + + LAST_PROP +}; + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void get_proxy_uri_async (SoupProxyURIResolver *proxy_resolver, + SoupURI *uri, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data); +static guint get_proxy_uri_sync (SoupProxyURIResolver *proxy_resolver, + SoupURI *uri, + GCancellable *cancellable, + SoupURI **proxy_uri); + +static void +soup_proxy_resolver_static_init (SoupProxyResolverStatic *resolver_static) +{ +} + +static void +finalize (GObject *object) +{ + SoupProxyResolverStaticPrivate *priv = + SOUP_PROXY_RESOLVER_STATIC_GET_PRIVATE (object); + + if (priv->proxy_uri) + soup_uri_free (priv->proxy_uri); + + G_OBJECT_CLASS (soup_proxy_resolver_static_parent_class)->finalize (object); +} + +static void +soup_proxy_resolver_static_class_init (SoupProxyResolverStaticClass *static_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (static_class); + + g_type_class_add_private (static_class, sizeof (SoupProxyResolverStaticPrivate)); + + object_class->set_property = set_property; + object_class->get_property = get_property; + object_class->finalize = finalize; + + g_object_class_install_property ( + object_class, PROP_PROXY_URI, + g_param_spec_boxed (SOUP_PROXY_RESOLVER_STATIC_PROXY_URI, + "Proxy URI", + "The HTTP Proxy to use", + SOUP_TYPE_URI, + G_PARAM_READWRITE)); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupProxyResolverStaticPrivate *priv = + SOUP_PROXY_RESOLVER_STATIC_GET_PRIVATE (object); + SoupURI *uri; + + switch (prop_id) { + case PROP_PROXY_URI: + uri = g_value_get_boxed (value); + if (priv->proxy_uri) + soup_uri_free (priv->proxy_uri); + + priv->proxy_uri = uri ? soup_uri_copy (uri) : NULL; + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupProxyResolverStaticPrivate *priv = + SOUP_PROXY_RESOLVER_STATIC_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_PROXY_URI: + g_value_set_boxed (value, priv->proxy_uri); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +soup_proxy_resolver_static_interface_init (SoupProxyURIResolverInterface *proxy_uri_resolver_interface) +{ + proxy_uri_resolver_interface->get_proxy_uri_async = get_proxy_uri_async; + proxy_uri_resolver_interface->get_proxy_uri_sync = get_proxy_uri_sync; +} + +SoupProxyURIResolver * +soup_proxy_resolver_static_new (SoupURI *proxy_uri) +{ + return g_object_new (SOUP_TYPE_PROXY_RESOLVER_STATIC, + SOUP_PROXY_RESOLVER_STATIC_PROXY_URI, proxy_uri, + NULL); +} + +typedef struct { + SoupProxyURIResolver *proxy_resolver; + SoupProxyURIResolverCallback callback; + gpointer user_data; +} SoupStaticAsyncData; + +static gboolean +idle_return_proxy_uri (gpointer data) +{ + SoupStaticAsyncData *ssad = data; + SoupProxyResolverStaticPrivate *priv = + SOUP_PROXY_RESOLVER_STATIC_GET_PRIVATE (ssad->proxy_resolver); + + ssad->callback (ssad->proxy_resolver, + SOUP_STATUS_OK, priv->proxy_uri, + ssad->user_data); + g_object_unref (ssad->proxy_resolver); + g_slice_free (SoupStaticAsyncData, ssad); + + return FALSE; +} + +static void +get_proxy_uri_async (SoupProxyURIResolver *proxy_resolver, + SoupURI *uri, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data) +{ + SoupStaticAsyncData *ssad; + + ssad = g_slice_new0 (SoupStaticAsyncData); + ssad->proxy_resolver = g_object_ref (proxy_resolver); + ssad->callback = callback; + ssad->user_data = user_data; + soup_add_completion (async_context, idle_return_proxy_uri, ssad); +} + +static guint +get_proxy_uri_sync (SoupProxyURIResolver *proxy_resolver, + SoupURI *uri, + GCancellable *cancellable, + SoupURI **proxy_uri) +{ + SoupProxyResolverStaticPrivate *priv = + SOUP_PROXY_RESOLVER_STATIC_GET_PRIVATE (proxy_resolver); + + *proxy_uri = soup_uri_copy (priv->proxy_uri); + return SOUP_STATUS_OK; +} diff --git a/libsoup/soup-proxy-resolver-static.h b/libsoup/soup-proxy-resolver-static.h new file mode 100644 index 0000000..090b84b --- /dev/null +++ b/libsoup/soup-proxy-resolver-static.h @@ -0,0 +1,35 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_PROXY_RESOLVER_STATIC_H +#define SOUP_PROXY_RESOLVER_STATIC_H 1 + +#include "soup-proxy-uri-resolver.h" +#include "soup-uri.h" + +#define SOUP_TYPE_PROXY_RESOLVER_STATIC (soup_proxy_resolver_static_get_type ()) +#define SOUP_PROXY_RESOLVER_STATIC(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PROXY_RESOLVER_STATIC, SoupProxyResolverStatic)) +#define SOUP_PROXY_RESOLVER_STATIC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PROXY_RESOLVER_STATIC, SoupProxyResolverStaticClass)) +#define SOUP_IS_PROXY_RESOLVER_STATIC(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PROXY_RESOLVER_STATIC)) +#define SOUP_IS_PROXY_RESOLVER_STATIC_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PROXY_RESOLVER_STATIC)) +#define SOUP_PROXY_RESOLVER_STATIC_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_PROXY_RESOLVER_STATIC, SoupProxyResolverStaticClass)) + +typedef struct { + GObject parent; + +} SoupProxyResolverStatic; + +typedef struct { + GObjectClass parent_class; + +} SoupProxyResolverStaticClass; + +GType soup_proxy_resolver_static_get_type (void); + +#define SOUP_PROXY_RESOLVER_STATIC_PROXY_URI "proxy-uri" + +SoupProxyURIResolver *soup_proxy_resolver_static_new (SoupURI *proxy_uri); + +#endif /* SOUP_PROXY_RESOLVER_STATIC_H */ diff --git a/libsoup/soup-proxy-resolver.c b/libsoup/soup-proxy-resolver.c new file mode 100644 index 0000000..c356ab1 --- /dev/null +++ b/libsoup/soup-proxy-resolver.c @@ -0,0 +1,188 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-proxy-resolver.c: HTTP proxy resolver interface + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-proxy-resolver.h" +#include "soup-proxy-uri-resolver.h" +#include "soup-address.h" +#include "soup-message.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +static void soup_proxy_resolver_interface_init (GTypeInterface *iface); +static void soup_proxy_resolver_uri_resolver_interface_init (SoupProxyURIResolverInterface *uri_resolver_interface); + +GType +soup_proxy_resolver_get_type (void) +{ + static volatile gsize g_define_type_id__volatile = 0; + if (g_once_init_enter (&g_define_type_id__volatile)) + { + GType g_define_type_id = + g_type_register_static_simple (G_TYPE_INTERFACE, + g_intern_static_string ("SoupProxyResolver"), + sizeof (SoupProxyResolverInterface), + (GClassInitFunc)soup_proxy_resolver_interface_init, + 0, + (GInstanceInitFunc)NULL, + (GTypeFlags) 0); + g_type_interface_add_prerequisite (g_define_type_id, G_TYPE_OBJECT); + g_type_interface_add_prerequisite (g_define_type_id, SOUP_TYPE_SESSION_FEATURE); + g_once_init_leave (&g_define_type_id__volatile, g_define_type_id); + } + return g_define_type_id__volatile; +} + +static void +proxy_resolver_interface_check (gpointer func_data, gpointer g_iface) +{ + GTypeInterface *iface = g_iface; + + if (iface->g_type != SOUP_TYPE_PROXY_RESOLVER) + return; + + /* If the class hasn't already declared that it implements + * SoupProxyURIResolver, add our own compat implementation. + */ + if (!g_type_is_a (iface->g_instance_type, SOUP_TYPE_PROXY_URI_RESOLVER)) { + const GInterfaceInfo uri_resolver_interface_info = { + (GInterfaceInitFunc) soup_proxy_resolver_uri_resolver_interface_init, NULL, NULL + }; + g_type_add_interface_static (iface->g_instance_type, + SOUP_TYPE_PROXY_URI_RESOLVER, + &uri_resolver_interface_info); + } +} + + +static void +soup_proxy_resolver_interface_init (GTypeInterface *iface) +{ + /* Add an interface_check where we can kludgily add the + * SoupProxyURIResolver interface to all SoupProxyResolvers. + * (SoupProxyResolver can't just implement + * SoupProxyURIResolver itself because interface types can't + * implement other interfaces.) This is an ugly hack, but it + * only gets used if someone actually creates a + * SoupProxyResolver... + */ + g_type_add_interface_check (NULL, proxy_resolver_interface_check); +} + +void +soup_proxy_resolver_get_proxy_async (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyResolverCallback callback, + gpointer user_data) +{ + SOUP_PROXY_RESOLVER_GET_CLASS (proxy_resolver)-> + get_proxy_async (proxy_resolver, msg, + async_context, cancellable, + callback, user_data); +} + +guint +soup_proxy_resolver_get_proxy_sync (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, + GCancellable *cancellable, + SoupAddress **addr) +{ + return SOUP_PROXY_RESOLVER_GET_CLASS (proxy_resolver)-> + get_proxy_sync (proxy_resolver, msg, cancellable, addr); +} + +/* SoupProxyURIResolver implementation */ + +static SoupURI * +uri_from_address (SoupAddress *addr) +{ + SoupURI *proxy_uri; + + proxy_uri = soup_uri_new (NULL); + soup_uri_set_scheme (proxy_uri, SOUP_URI_SCHEME_HTTP); + soup_uri_set_host (proxy_uri, soup_address_get_name (addr)); + soup_uri_set_port (proxy_uri, soup_address_get_port (addr)); + return proxy_uri; +} + +typedef struct { + SoupProxyURIResolverCallback callback; + gpointer user_data; +} ProxyURIResolverAsyncData; + +static void +compat_got_proxy (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, guint status, SoupAddress *proxy_addr, + gpointer user_data) +{ + ProxyURIResolverAsyncData *purad = user_data; + SoupURI *proxy_uri; + + proxy_uri = proxy_addr ? uri_from_address (proxy_addr) : NULL; + purad->callback (SOUP_PROXY_URI_RESOLVER (proxy_resolver), + status, proxy_uri, purad->user_data); + g_object_unref (msg); + if (proxy_uri) + soup_uri_free (proxy_uri); + g_slice_free (ProxyURIResolverAsyncData, purad); +} + +static void +compat_get_proxy_uri_async (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data) +{ + SoupMessage *dummy_msg; + ProxyURIResolverAsyncData *purad; + + dummy_msg = soup_message_new_from_uri (SOUP_METHOD_GET, uri); + + purad = g_slice_new (ProxyURIResolverAsyncData); + purad->callback = callback; + purad->user_data = user_data; + + soup_proxy_resolver_get_proxy_async ( + SOUP_PROXY_RESOLVER (proxy_uri_resolver), dummy_msg, + async_context, cancellable, + compat_got_proxy, purad); +} + +static guint +compat_get_proxy_uri_sync (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, GCancellable *cancellable, + SoupURI **proxy_uri) +{ + SoupMessage *dummy_msg; + SoupAddress *proxy_addr = NULL; + guint status; + + dummy_msg = soup_message_new_from_uri (SOUP_METHOD_GET, uri); + status = soup_proxy_resolver_get_proxy_sync ( + SOUP_PROXY_RESOLVER (proxy_uri_resolver), dummy_msg, + cancellable, &proxy_addr); + g_object_unref (dummy_msg); + if (!proxy_addr) + return status; + + *proxy_uri = uri_from_address (proxy_addr); + g_object_unref (proxy_addr); + return status; +} + +static void +soup_proxy_resolver_uri_resolver_interface_init (SoupProxyURIResolverInterface *uri_resolver_interface) +{ + uri_resolver_interface->get_proxy_uri_async = compat_get_proxy_uri_async; + uri_resolver_interface->get_proxy_uri_sync = compat_get_proxy_uri_sync; +} diff --git a/libsoup/soup-proxy-resolver.h b/libsoup/soup-proxy-resolver.h new file mode 100644 index 0000000..dd78714 --- /dev/null +++ b/libsoup/soup-proxy-resolver.h @@ -0,0 +1,59 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_PROXY_RESOLVER_H +#define SOUP_PROXY_RESOLVER_H 1 + +#include + +G_BEGIN_DECLS + +#ifndef LIBSOUP_DISABLE_DEPRECATED + +#define SOUP_TYPE_PROXY_RESOLVER (soup_proxy_resolver_get_type ()) +#define SOUP_PROXY_RESOLVER(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PROXY_RESOLVER, SoupProxyResolver)) +#define SOUP_PROXY_RESOLVER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PROXY_RESOLVER, SoupProxyResolverInterface)) +#define SOUP_IS_PROXY_RESOLVER(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PROXY_RESOLVER)) +#define SOUP_IS_PROXY_RESOLVER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PROXY_RESOLVER)) +#define SOUP_PROXY_RESOLVER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_INTERFACE ((obj), SOUP_TYPE_PROXY_RESOLVER, SoupProxyResolverInterface)) + +typedef struct _SoupProxyResolver SoupProxyResolver; + +typedef void (*SoupProxyResolverCallback) (SoupProxyResolver *, SoupMessage *, + guint, SoupAddress *, gpointer); + +typedef struct { + GTypeInterface base; + + /* virtual methods */ + void (*get_proxy_async) (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, GMainContext *async_context, + GCancellable *cancellable, + SoupProxyResolverCallback callaback, + gpointer user_data); + guint (*get_proxy_sync) (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, GCancellable *cancellable, + SoupAddress **addr); + +} SoupProxyResolverInterface; + +GType soup_proxy_resolver_get_type (void); + +void soup_proxy_resolver_get_proxy_async (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyResolverCallback callback, + gpointer user_data); +guint soup_proxy_resolver_get_proxy_sync (SoupProxyResolver *proxy_resolver, + SoupMessage *msg, + GCancellable *cancellable, + SoupAddress **addr); + +#endif + +G_END_DECLS + +#endif /*SOUP_PROXY_RESOLVER_H*/ diff --git a/libsoup/soup-proxy-uri-resolver.c b/libsoup/soup-proxy-uri-resolver.c new file mode 100644 index 0000000..556ba78 --- /dev/null +++ b/libsoup/soup-proxy-uri-resolver.c @@ -0,0 +1,97 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-proxy-uri-resolver.c: HTTP proxy resolver interface, take 2 + * + * Copyright (C) 2009 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-proxy-uri-resolver.h" +#include "soup-session-feature.h" + +GType +soup_proxy_uri_resolver_get_type (void) +{ + static volatile gsize g_define_type_id__volatile = 0; + if (g_once_init_enter (&g_define_type_id__volatile)) + { + GType g_define_type_id = + g_type_register_static_simple (G_TYPE_INTERFACE, + g_intern_static_string ("SoupProxyURIResolver"), + sizeof (SoupProxyURIResolverInterface), + (GClassInitFunc)NULL, + 0, + (GInstanceInitFunc)NULL, + (GTypeFlags) 0); + g_type_interface_add_prerequisite (g_define_type_id, G_TYPE_OBJECT); + g_once_init_leave (&g_define_type_id__volatile, g_define_type_id); + } + return g_define_type_id__volatile; +} + +/** + * SoupProxyURIResolverCallback: + * @resolver: the #SoupProxyURIResolver + * @status: a #SoupKnownStatusCode + * @proxy_uri: the resolved proxy URI, or %NULL + * @user_data: data passed to soup_proxy_uri_resolver_get_proxy_uri_async() + * + * Callback for soup_proxy_uri_resolver_get_proxy_uri_async() + **/ + +/** + * soup_proxy_uri_resolver_get_proxy_uri_async: + * @proxy_uri_resolver: the #SoupProxyURIResolver + * @uri: the #SoupURI you want a proxy for + * @async_context: (allow-none): the #GMainContext to invoke @callback in + * @cancellable: a #GCancellable, or %NULL + * @callback: (scope async): callback to invoke with the proxy address + * @user_data: data for @callback + * + * Asynchronously determines a proxy URI to use for @msg and calls + * @callback. + * + * Since: 2.26.3 + **/ +void +soup_proxy_uri_resolver_get_proxy_uri_async (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data) +{ + SOUP_PROXY_URI_RESOLVER_GET_CLASS (proxy_uri_resolver)-> + get_proxy_uri_async (proxy_uri_resolver, uri, + async_context, cancellable, + callback, user_data); +} + +/** + * soup_proxy_uri_resolver_get_proxy_uri_sync: + * @proxy_uri_resolver: the #SoupProxyURIResolver + * @uri: the #SoupURI you want a proxy for + * @cancellable: a #GCancellable, or %NULL + * @proxy_uri: (out): on return, will contain the proxy URI + * + * Synchronously determines a proxy URI to use for @uri. If @uri + * should be sent via proxy, *@proxy_uri will be set to the URI of the + * proxy, else it will be set to %NULL. + * + * Return value: %SOUP_STATUS_OK if successful, or a transport-level + * error. + * + * Since: 2.26.3 + **/ +guint +soup_proxy_uri_resolver_get_proxy_uri_sync (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, + GCancellable *cancellable, + SoupURI **proxy_uri) +{ + return SOUP_PROXY_URI_RESOLVER_GET_CLASS (proxy_uri_resolver)-> + get_proxy_uri_sync (proxy_uri_resolver, uri, cancellable, proxy_uri); +} diff --git a/libsoup/soup-proxy-uri-resolver.h b/libsoup/soup-proxy-uri-resolver.h new file mode 100644 index 0000000..1d15b8d --- /dev/null +++ b/libsoup/soup-proxy-uri-resolver.h @@ -0,0 +1,61 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009 Red Hat, Inc. + */ + +#ifndef SOUP_PROXY_URI_RESOLVER_H +#define SOUP_PROXY_URI_RESOLVER_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_PROXY_URI_RESOLVER (soup_proxy_uri_resolver_get_type ()) +#define SOUP_PROXY_URI_RESOLVER(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_PROXY_URI_RESOLVER, SoupProxyURIResolver)) +#define SOUP_PROXY_URI_RESOLVER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_PROXY_URI_RESOLVER, SoupProxyURIResolverInterface)) +#define SOUP_IS_PROXY_URI_RESOLVER(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_PROXY_URI_RESOLVER)) +#define SOUP_IS_PROXY_URI_RESOLVER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_PROXY_URI_RESOLVER)) +#define SOUP_PROXY_URI_RESOLVER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_INTERFACE ((obj), SOUP_TYPE_PROXY_URI_RESOLVER, SoupProxyURIResolverInterface)) + +typedef struct _SoupProxyURIResolver SoupProxyURIResolver; + +typedef void (*SoupProxyURIResolverCallback) (SoupProxyURIResolver *resolver, + guint status, SoupURI *proxy_uri, + gpointer user_data); + +typedef struct { + GTypeInterface base; + + /* virtual methods */ + void (*get_proxy_uri_async) (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data); + guint (*get_proxy_uri_sync) (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, GCancellable *cancellable, + SoupURI **proxy_uri); + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupProxyURIResolverInterface; + +GType soup_proxy_uri_resolver_get_type (void); + +void soup_proxy_uri_resolver_get_proxy_uri_async (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, + GMainContext *async_context, + GCancellable *cancellable, + SoupProxyURIResolverCallback callback, + gpointer user_data); +guint soup_proxy_uri_resolver_get_proxy_uri_sync (SoupProxyURIResolver *proxy_uri_resolver, + SoupURI *uri, + GCancellable *cancellable, + SoupURI **proxy_uri); + +G_END_DECLS + +#endif /*SOUP_PROXY_URI_RESOLVER_H*/ diff --git a/libsoup/soup-request-data.c b/libsoup/soup-request-data.c new file mode 100644 index 0000000..8a2e065 --- /dev/null +++ b/libsoup/soup-request-data.c @@ -0,0 +1,158 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-request-data.c: data: URI request object + * + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-request-data.h" + +#include "soup-requester.h" +#include "soup-misc-private.h" +#include +#include + +G_DEFINE_TYPE (SoupRequestData, soup_request_data, SOUP_TYPE_REQUEST) + +struct _SoupRequestDataPrivate { + gsize content_length; + char *content_type; +}; + +static void +soup_request_data_init (SoupRequestData *data) +{ + data->priv = G_TYPE_INSTANCE_GET_PRIVATE (data, SOUP_TYPE_REQUEST_DATA, SoupRequestDataPrivate); +} + +static void +soup_request_data_finalize (GObject *object) +{ + SoupRequestData *data = SOUP_REQUEST_DATA (object); + + g_free (data->priv->content_type); + + G_OBJECT_CLASS (soup_request_data_parent_class)->finalize (object); +} + +static gboolean +soup_request_data_check_uri (SoupRequest *request, + SoupURI *uri, + GError **error) +{ + return uri->host == NULL; +} + +#define BASE64_INDICATOR ";base64" +#define BASE64_INDICATOR_LEN (sizeof (";base64") - 1) + +static GInputStream * +soup_request_data_send (SoupRequest *request, + GCancellable *cancellable, + GError **error) +{ + SoupRequestData *data = SOUP_REQUEST_DATA (request); + SoupURI *uri = soup_request_get_uri (request); + GInputStream *memstream; + const char *comma, *start, *end; + gboolean base64 = FALSE; + char *uristr; + + uristr = soup_uri_to_string (uri, FALSE); + start = uristr + 5; + comma = strchr (start, ','); + if (comma && comma != start) { + /* Deal with MIME type / params */ + if (comma > start + BASE64_INDICATOR_LEN && !g_ascii_strncasecmp (comma - BASE64_INDICATOR_LEN, BASE64_INDICATOR, BASE64_INDICATOR_LEN)) { + end = comma - BASE64_INDICATOR_LEN; + base64 = TRUE; + } else + end = comma; + + if (end != start) + data->priv->content_type = uri_decoded_copy (start, end - start); + } + + memstream = g_memory_input_stream_new (); + + if (comma) + start = comma + 1; + + if (*start) { + guchar *buf = (guchar *) soup_uri_decode (start); + + if (base64) + buf = g_base64_decode_inplace ((gchar*) buf, &data->priv->content_length); + else + data->priv->content_length = strlen ((const char *) buf); + + g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (memstream), + buf, data->priv->content_length, + g_free); + } + g_free (uristr); + + return memstream; +} + +static goffset +soup_request_data_get_content_length (SoupRequest *request) +{ + SoupRequestData *data = SOUP_REQUEST_DATA (request); + + return data->priv->content_length; +} + +static const char * +soup_request_data_get_content_type (SoupRequest *request) +{ + SoupRequestData *data = SOUP_REQUEST_DATA (request); + + if (data->priv->content_type) + return data->priv->content_type; + else + return "text/plain;charset=US-ASCII"; +} + +static const char *data_schemes[] = { "data", NULL }; + +static void +soup_request_data_class_init (SoupRequestDataClass *request_data_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (request_data_class); + SoupRequestClass *request_class = + SOUP_REQUEST_CLASS (request_data_class); + + g_type_class_add_private (request_data_class, sizeof (SoupRequestDataPrivate)); + + request_class->schemes = data_schemes; + + object_class->finalize = soup_request_data_finalize; + + request_class->check_uri = soup_request_data_check_uri; + request_class->send = soup_request_data_send; + request_class->get_content_length = soup_request_data_get_content_length; + request_class->get_content_type = soup_request_data_get_content_type; +} diff --git a/libsoup/soup-request-data.h b/libsoup/soup-request-data.h new file mode 100644 index 0000000..aeb9599 --- /dev/null +++ b/libsoup/soup-request-data.h @@ -0,0 +1,56 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_REQUEST_DATA_H +#define SOUP_REQUEST_DATA_H 1 + +#ifdef LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-request.h" + +G_BEGIN_DECLS + +#define SOUP_TYPE_REQUEST_DATA (soup_request_data_get_type ()) +#define SOUP_REQUEST_DATA(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_REQUEST_DATA, SoupRequestData)) +#define SOUP_REQUEST_DATA_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_REQUEST_DATA, SoupRequestDataClass)) +#define SOUP_IS_REQUEST_DATA(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_REQUEST_DATA)) +#define SOUP_IS_REQUEST_DATA_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_REQUEST_DATA)) +#define SOUP_REQUEST_DATA_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_REQUEST_DATA, SoupRequestDataClass)) + +typedef struct _SoupRequestDataPrivate SoupRequestDataPrivate; + +typedef struct { + SoupRequest parent; + + SoupRequestDataPrivate *priv; +} SoupRequestData; + +typedef struct { + SoupRequestClass parent; +} SoupRequestDataClass; + +GType soup_request_data_get_type (void); + +G_END_DECLS + +#endif /* LIBSOUP_USE_UNSTABLE_REQUEST_API */ + +#endif /* SOUP_REQUEST_DATA_H */ diff --git a/libsoup/soup-request-file.c b/libsoup/soup-request-file.c new file mode 100644 index 0000000..d49d374 --- /dev/null +++ b/libsoup/soup-request-file.c @@ -0,0 +1,257 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-request-file.c: file: URI request object + * + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include + +#include "soup-request-file.h" +#include "soup-directory-input-stream.h" +#include "soup-requester.h" +#include "soup-uri.h" + +G_DEFINE_TYPE (SoupRequestFile, soup_request_file, SOUP_TYPE_REQUEST) + +struct _SoupRequestFilePrivate { + GFile *gfile; + + char *mime_type; + goffset size; +}; + +static void +soup_request_file_init (SoupRequestFile *file) +{ + file->priv = G_TYPE_INSTANCE_GET_PRIVATE (file, SOUP_TYPE_REQUEST_FILE, SoupRequestFilePrivate); + + file->priv->size = -1; +} + +static void +soup_request_file_finalize (GObject *object) +{ + SoupRequestFile *file = SOUP_REQUEST_FILE (object); + + if (file->priv->gfile) + g_object_unref (file->priv->gfile); + g_free (file->priv->mime_type); + + G_OBJECT_CLASS (soup_request_file_parent_class)->finalize (object); +} + +static gboolean +soup_request_file_check_uri (SoupRequest *request, + SoupURI *uri, + GError **error) +{ + /* "file:/foo" is not valid */ + if (!uri->host) + return FALSE; + + /* but it must be "file:///..." or "file://localhost/..." */ + if (uri->scheme == SOUP_URI_SCHEME_FILE && + *uri->host && + g_ascii_strcasecmp (uri->host, "localhost") != 0) + return FALSE; + + return TRUE; +} + +static gboolean +soup_request_file_ensure_file (SoupRequestFile *file, + GCancellable *cancellable, + GError **error) +{ + SoupURI *uri; + + if (file->priv->gfile) + return TRUE; + + uri = soup_request_get_uri (SOUP_REQUEST (file)); + if (uri->scheme == SOUP_URI_SCHEME_FILE) { + gchar *decoded_uri = soup_uri_decode (uri->path); + + if (decoded_uri) { + file->priv->gfile = g_file_new_for_path (decoded_uri); + g_free (decoded_uri); + } + + return TRUE; + } + + g_set_error (error, SOUP_REQUESTER_ERROR, SOUP_REQUESTER_ERROR_UNSUPPORTED_URI_SCHEME, + _("Unsupported URI scheme '%s'"), uri->scheme); + return FALSE; +} + +static GInputStream * +soup_request_file_send (SoupRequest *request, + GCancellable *cancellable, + GError **error) +{ + SoupRequestFile *file = SOUP_REQUEST_FILE (request); + GInputStream *stream; + GError *my_error = NULL; + + if (!soup_request_file_ensure_file (file, cancellable, error)) + return NULL; + + stream = G_INPUT_STREAM (g_file_read (file->priv->gfile, + cancellable, &my_error)); + if (stream == NULL) { + if (g_error_matches (my_error, G_IO_ERROR, G_IO_ERROR_IS_DIRECTORY)) { + GFileEnumerator *enumerator; + g_clear_error (&my_error); + enumerator = g_file_enumerate_children (file->priv->gfile, + "*", + G_FILE_QUERY_INFO_NONE, + cancellable, + error); + if (enumerator) { + stream = soup_directory_input_stream_new (enumerator, + soup_request_get_uri (request)); + g_object_unref (enumerator); + file->priv->mime_type = g_strdup ("text/html"); + } + } else + g_propagate_error (error, my_error); + } else { + GFileInfo *info = g_file_query_info (file->priv->gfile, + G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE "," + G_FILE_ATTRIBUTE_STANDARD_SIZE, + 0, cancellable, NULL); + if (info) { + const char *content_type; + file->priv->size = g_file_info_get_size (info); + content_type = g_file_info_get_content_type (info); + + if (content_type) + file->priv->mime_type = g_content_type_get_mime_type (content_type); + g_object_unref (info); + } + } + + return stream; +} + +static void +soup_request_file_send_async_thread (GSimpleAsyncResult *res, + GObject *object, + GCancellable *cancellable) +{ + GInputStream *stream; + SoupRequest *request; + GError *error = NULL; + + request = SOUP_REQUEST (object); + + stream = soup_request_file_send (request, cancellable, &error); + + if (stream == NULL) { + g_simple_async_result_set_from_error (res, error); + g_error_free (error); + } else { + g_simple_async_result_set_op_res_gpointer (res, stream, g_object_unref); + } +} + +static void +soup_request_file_send_async (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + GSimpleAsyncResult *res; + + res = g_simple_async_result_new (G_OBJECT (request), callback, user_data, soup_request_file_send_async); + + g_simple_async_result_run_in_thread (res, soup_request_file_send_async_thread, G_PRIORITY_DEFAULT, cancellable); + g_object_unref (res); +} + +static GInputStream * +soup_request_file_send_finish (SoupRequest *request, + GAsyncResult *result, + GError **error) +{ + GSimpleAsyncResult *simple = G_SIMPLE_ASYNC_RESULT (result); + + g_warn_if_fail (g_simple_async_result_get_source_tag (simple) == soup_request_file_send_async); + + if (g_simple_async_result_propagate_error (simple, error)) + return NULL; + + return g_object_ref (g_simple_async_result_get_op_res_gpointer (simple)); +} + +static goffset +soup_request_file_get_content_length (SoupRequest *request) +{ + SoupRequestFile *file = SOUP_REQUEST_FILE (request); + + return file->priv->size; +} + +static const char * +soup_request_file_get_content_type (SoupRequest *request) +{ + SoupRequestFile *file = SOUP_REQUEST_FILE (request); + + if (!file->priv->mime_type) + return "application/octet-stream"; + + return file->priv->mime_type; +} + +static const char *file_schemes[] = { "file", NULL }; + +static void +soup_request_file_class_init (SoupRequestFileClass *request_file_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (request_file_class); + SoupRequestClass *request_class = + SOUP_REQUEST_CLASS (request_file_class); + + g_type_class_add_private (request_file_class, sizeof (SoupRequestFilePrivate)); + + request_class->schemes = file_schemes; + + object_class->finalize = soup_request_file_finalize; + + request_class->check_uri = soup_request_file_check_uri; + request_class->send = soup_request_file_send; + request_class->send_async = soup_request_file_send_async; + request_class->send_finish = soup_request_file_send_finish; + request_class->get_content_length = soup_request_file_get_content_length; + request_class->get_content_type = soup_request_file_get_content_type; +} + +GFile * +soup_request_file_get_file (SoupRequestFile *file) +{ + return g_object_ref (file->priv->gfile); +} diff --git a/libsoup/soup-request-file.h b/libsoup/soup-request-file.h new file mode 100644 index 0000000..acb1a08 --- /dev/null +++ b/libsoup/soup-request-file.h @@ -0,0 +1,58 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_REQUEST_FILE_H +#define SOUP_REQUEST_FILE_H 1 + +#ifdef LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-request.h" + +G_BEGIN_DECLS + +#define SOUP_TYPE_REQUEST_FILE (soup_request_file_get_type ()) +#define SOUP_REQUEST_FILE(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_REQUEST_FILE, SoupRequestFile)) +#define SOUP_REQUEST_FILE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_REQUEST_FILE, SoupRequestFileClass)) +#define SOUP_IS_REQUEST_FILE(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_REQUEST_FILE)) +#define SOUP_IS_REQUEST_FILE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_REQUEST_FILE)) +#define SOUP_REQUEST_FILE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_REQUEST_FILE, SoupRequestFileClass)) + +typedef struct _SoupRequestFilePrivate SoupRequestFilePrivate; + +typedef struct { + SoupRequest parent; + + SoupRequestFilePrivate *priv; +} SoupRequestFile; + +typedef struct { + SoupRequestClass parent; +} SoupRequestFileClass; + +GType soup_request_file_get_type (void); + +GFile *soup_request_file_get_file (SoupRequestFile *file); + +G_END_DECLS + +#endif /* LIBSOUP_USE_UNSTABLE_REQUEST_API */ + +#endif /* SOUP_REQUEST_FILE_H */ diff --git a/libsoup/soup-request-http.c b/libsoup/soup-request-http.c new file mode 100644 index 0000000..90a5c48 --- /dev/null +++ b/libsoup/soup-request-http.c @@ -0,0 +1,358 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-request-http.c: http: URI request object + * + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-request-http.h" +#include "soup-cache.h" +#include "soup-cache-private.h" +#include "soup-content-sniffer.h" +#include "soup-http-input-stream.h" +#include "soup-message.h" +#include "soup-session.h" +#include "soup-uri.h" + +G_DEFINE_TYPE (SoupRequestHTTP, soup_request_http, SOUP_TYPE_REQUEST) + +struct _SoupRequestHTTPPrivate { + SoupMessage *msg; +}; + +static void +soup_request_http_init (SoupRequestHTTP *http) +{ + http->priv = G_TYPE_INSTANCE_GET_PRIVATE (http, SOUP_TYPE_REQUEST_HTTP, SoupRequestHTTPPrivate); +} + +static gboolean +soup_request_http_check_uri (SoupRequest *request, + SoupURI *uri, + GError **error) +{ + SoupRequestHTTP *http = SOUP_REQUEST_HTTP (request); + + if (!SOUP_URI_VALID_FOR_HTTP (uri)) + return FALSE; + + http->priv->msg = soup_message_new_from_uri (SOUP_METHOD_GET, uri); + return TRUE; +} + +static void +soup_request_http_finalize (GObject *object) +{ + SoupRequestHTTP *http = SOUP_REQUEST_HTTP (object); + + if (http->priv->msg) + g_object_unref (http->priv->msg); + + G_OBJECT_CLASS (soup_request_http_parent_class)->finalize (object); +} + +static GInputStream * +soup_request_http_send (SoupRequest *request, + GCancellable *cancellable, + GError **error) +{ + SoupHTTPInputStream *httpstream; + SoupRequestHTTP *http = SOUP_REQUEST_HTTP (request); + + httpstream = soup_http_input_stream_new (soup_request_get_session (request), http->priv->msg); + if (!soup_http_input_stream_send (httpstream, cancellable, error)) { + g_object_unref (httpstream); + return NULL; + } + return (GInputStream *)httpstream; +} + + +static void +sent_async (GObject *source, GAsyncResult *result, gpointer user_data) +{ + SoupHTTPInputStream *httpstream = SOUP_HTTP_INPUT_STREAM (source); + GSimpleAsyncResult *simple = user_data; + GError *error = NULL; + + if (soup_http_input_stream_send_finish (httpstream, result, &error)) { + g_simple_async_result_set_op_res_gpointer (simple, httpstream, g_object_unref); + } else { + g_simple_async_result_set_from_error (simple, error); + g_error_free (error); + g_object_unref (httpstream); + } + g_simple_async_result_complete (simple); + g_object_unref (simple); +} + + +typedef struct { + SoupRequestHTTP *req; + SoupMessage *original; + GCancellable *cancellable; + GAsyncReadyCallback callback; + gpointer user_data; +} ConditionalHelper; + +static void +conditional_get_ready_cb (SoupSession *session, SoupMessage *msg, gpointer user_data) +{ + ConditionalHelper *helper = (ConditionalHelper *)user_data; + GSimpleAsyncResult *simple; + SoupHTTPInputStream *httpstream; + + simple = g_simple_async_result_new (G_OBJECT (helper->req), + helper->callback, helper->user_data, + conditional_get_ready_cb); + + if (msg->status_code == SOUP_STATUS_NOT_MODIFIED) { + SoupCache *cache = (SoupCache *)soup_session_get_feature (session, SOUP_TYPE_CACHE); + + httpstream = (SoupHTTPInputStream *)soup_cache_send_response (cache, helper->original); + if (httpstream) { + g_simple_async_result_set_op_res_gpointer (simple, httpstream, g_object_unref); + + soup_message_got_headers (helper->original); + + if (soup_session_get_feature_for_message (session, SOUP_TYPE_CONTENT_SNIFFER, helper->original)) { + const char *content_type = + soup_message_headers_get_content_type (helper->original->response_headers, NULL); + soup_message_content_sniffed (helper->original, content_type, NULL); + } + + g_simple_async_result_complete (simple); + + soup_message_finished (helper->original); + + g_object_unref (simple); + } else { + /* Ask again for the resource, somehow the cache cannot locate it */ + httpstream = soup_http_input_stream_new (session, helper->original); + soup_http_input_stream_send_async (httpstream, G_PRIORITY_DEFAULT, + helper->cancellable, sent_async, simple); + } + } else { + /* It is in the cache but it was modified remotely */ + httpstream = soup_http_input_stream_new (session, helper->original); + soup_http_input_stream_send_async (httpstream, G_PRIORITY_DEFAULT, + helper->cancellable, sent_async, simple); + } + + g_object_unref (helper->req); + g_object_unref (helper->original); + g_slice_free (ConditionalHelper, helper); +} + +typedef struct { + SoupRequestHTTP *http; + GAsyncReadyCallback callback; + gpointer user_data; + SoupHTTPInputStream *httpstream; +} SendAsyncHelper; + +static void soup_request_http_send_async (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); + +static gboolean +send_async_cb (gpointer data) +{ + GSimpleAsyncResult *simple; + SoupSession *session; + SendAsyncHelper *helper = (SendAsyncHelper *)data; + + session = soup_request_get_session (SOUP_REQUEST (helper->http)); + simple = g_simple_async_result_new (G_OBJECT (helper->http), + helper->callback, helper->user_data, + soup_request_http_send_async); + + g_simple_async_result_set_op_res_gpointer (simple, helper->httpstream, g_object_unref); + + /* Issue signals */ + soup_message_got_headers (helper->http->priv->msg); + + if (soup_session_get_feature_for_message (session, SOUP_TYPE_CONTENT_SNIFFER, helper->http->priv->msg)) { + const char *content_type = soup_message_headers_get_content_type (helper->http->priv->msg->response_headers, NULL); + soup_message_content_sniffed (helper->http->priv->msg, content_type, NULL); + } + + g_simple_async_result_complete (simple); + + soup_message_finished (helper->http->priv->msg); + + g_object_unref (simple); + + g_object_unref (helper->http); + g_slice_free (SendAsyncHelper, helper); + + return FALSE; +} + +static void +soup_request_http_send_async (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + SoupRequestHTTP *http = SOUP_REQUEST_HTTP (request); + SoupHTTPInputStream *httpstream; + GSimpleAsyncResult *simple; + SoupSession *session; + SoupCache *cache; + + session = soup_request_get_session (request); + cache = (SoupCache *)soup_session_get_feature (session, SOUP_TYPE_CACHE); + + if (cache) { + SoupCacheResponse response; + + response = soup_cache_has_response (cache, http->priv->msg); + if (response == SOUP_CACHE_RESPONSE_FRESH) { + SoupHTTPInputStream *httpstream; + + httpstream = (SoupHTTPInputStream *) + soup_cache_send_response (cache, http->priv->msg); + + /* Cached resource file could have been deleted outside + */ + if (httpstream) { + /* Do return the stream asynchronously as in + * the other cases. It's not enough to use + * g_simple_async_result_complete_in_idle as + * the signals must be also emitted + * asynchronously + */ + SendAsyncHelper *helper = g_slice_new (SendAsyncHelper); + helper->http = g_object_ref (http); + helper->callback = callback; + helper->user_data = user_data; + helper->httpstream = httpstream; + soup_add_timeout (soup_session_get_async_context (session), + 0, send_async_cb, helper); + return; + } + } else if (response == SOUP_CACHE_RESPONSE_NEEDS_VALIDATION) { + SoupMessage *conditional_msg; + ConditionalHelper *helper; + + conditional_msg = soup_cache_generate_conditional_request (cache, http->priv->msg); + + helper = g_slice_new0 (ConditionalHelper); + helper->req = g_object_ref (http); + helper->original = g_object_ref (http->priv->msg); + helper->cancellable = cancellable; + helper->callback = callback; + helper->user_data = user_data; + soup_session_queue_message (session, conditional_msg, + conditional_get_ready_cb, + helper); + return; + } + } + + simple = g_simple_async_result_new (G_OBJECT (http), + callback, user_data, + soup_request_http_send_async); + httpstream = soup_http_input_stream_new (soup_request_get_session (request), + http->priv->msg); + soup_http_input_stream_send_async (httpstream, G_PRIORITY_DEFAULT, + cancellable, sent_async, simple); +} + +static GInputStream * +soup_request_http_send_finish (SoupRequest *request, + GAsyncResult *result, + GError **error) +{ + GSimpleAsyncResult *simple; + + g_return_val_if_fail (g_simple_async_result_is_valid (result, G_OBJECT (request), soup_request_http_send_async) || g_simple_async_result_is_valid (result, G_OBJECT (request), conditional_get_ready_cb), NULL); + + simple = G_SIMPLE_ASYNC_RESULT (result); + if (g_simple_async_result_propagate_error (simple, error)) + return NULL; + return g_object_ref (g_simple_async_result_get_op_res_gpointer (simple)); +} + +static goffset +soup_request_http_get_content_length (SoupRequest *request) +{ + SoupRequestHTTP *http = SOUP_REQUEST_HTTP (request); + + return soup_message_headers_get_content_length (http->priv->msg->response_headers); +} + +static const char * +soup_request_http_get_content_type (SoupRequest *request) +{ + SoupRequestHTTP *http = SOUP_REQUEST_HTTP (request); + + return soup_message_headers_get_content_type (http->priv->msg->response_headers, NULL); +} + +static const char *http_schemes[] = { "http", "https", NULL }; + +static void +soup_request_http_class_init (SoupRequestHTTPClass *request_http_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (request_http_class); + SoupRequestClass *request_class = + SOUP_REQUEST_CLASS (request_http_class); + + g_type_class_add_private (request_http_class, sizeof (SoupRequestHTTPPrivate)); + + request_class->schemes = http_schemes; + + object_class->finalize = soup_request_http_finalize; + + request_class->check_uri = soup_request_http_check_uri; + request_class->send = soup_request_http_send; + request_class->send_async = soup_request_http_send_async; + request_class->send_finish = soup_request_http_send_finish; + request_class->get_content_length = soup_request_http_get_content_length; + request_class->get_content_type = soup_request_http_get_content_type; +} + +/** + * soup_request_http_get_message: + * @http: a #SoupRequestHTTP object + * + * Gets a new reference to the #SoupMessage associated to this SoupRequest + * + * Returns: a new reference to the #SoupMessage + * + * Since: 2.34 + */ +SoupMessage * +soup_request_http_get_message (SoupRequestHTTP *http) +{ + g_return_val_if_fail (SOUP_IS_REQUEST_HTTP (http), NULL); + + return g_object_ref (http->priv->msg); +} diff --git a/libsoup/soup-request-http.h b/libsoup/soup-request-http.h new file mode 100644 index 0000000..6402646 --- /dev/null +++ b/libsoup/soup-request-http.h @@ -0,0 +1,58 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_REQUEST_HTTP_H +#define SOUP_REQUEST_HTTP_H 1 + +#ifdef LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-request.h" + +G_BEGIN_DECLS + +#define SOUP_TYPE_REQUEST_HTTP (soup_request_http_get_type ()) +#define SOUP_REQUEST_HTTP(object) (G_TYPE_CHECK_INSTANCE_CAST ((object), SOUP_TYPE_REQUEST_HTTP, SoupRequestHTTP)) +#define SOUP_REQUEST_HTTP_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_REQUEST_HTTP, SoupRequestHTTPClass)) +#define SOUP_IS_REQUEST_HTTP(object) (G_TYPE_CHECK_INSTANCE_TYPE ((object), SOUP_TYPE_REQUEST_HTTP)) +#define SOUP_IS_REQUEST_HTTP_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_REQUEST_HTTP)) +#define SOUP_REQUEST_HTTP_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_REQUEST_HTTP, SoupRequestHTTPClass)) + +typedef struct _SoupRequestHTTPPrivate SoupRequestHTTPPrivate; + +typedef struct { + SoupRequest parent; + + SoupRequestHTTPPrivate *priv; +} SoupRequestHTTP; + +typedef struct { + SoupRequestClass parent; +} SoupRequestHTTPClass; + +GType soup_request_http_get_type (void); + +SoupMessage *soup_request_http_get_message (SoupRequestHTTP *http); + +G_END_DECLS + +#endif /* LIBSOUP_USE_UNSTABLE_REQUEST_API */ + +#endif /* SOUP_REQUEST_HTTP_H */ diff --git a/libsoup/soup-request.c b/libsoup/soup-request.c new file mode 100644 index 0000000..ac85615 --- /dev/null +++ b/libsoup/soup-request.c @@ -0,0 +1,284 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-request.c: Protocol-independent streaming request interface + * + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010, Igalia S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-request.h" +#include "soup-requester.h" +#include "soup-session.h" +#include "soup-uri.h" + +/** + * SECTION:soup-request + * @short_description: Protocol-independent streaming request interface + * + * FIXME + */ + +/** + * SoupRequest: + * + * FIXME + * + * Since: 2.34 + */ + +static void soup_request_initable_interface_init (GInitableIface *initable_interface); + +G_DEFINE_TYPE_WITH_CODE (SoupRequest, soup_request, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE, + soup_request_initable_interface_init)) + +enum { + PROP_0, + PROP_URI, + PROP_SESSION +}; + +struct _SoupRequestPrivate { + SoupURI *uri; + SoupSession *session; +}; + +static void +soup_request_init (SoupRequest *request) +{ + request->priv = G_TYPE_INSTANCE_GET_PRIVATE (request, SOUP_TYPE_REQUEST, SoupRequestPrivate); +} + +static void +soup_request_finalize (GObject *object) +{ + SoupRequest *request = SOUP_REQUEST (object); + + if (request->priv->uri) + soup_uri_free (request->priv->uri); + if (request->priv->session) + g_object_unref (request->priv->session); + + G_OBJECT_CLASS (soup_request_parent_class)->finalize (object); +} + +static void +soup_request_set_property (GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + SoupRequest *request = SOUP_REQUEST (object); + + switch (prop_id) { + case PROP_URI: + if (request->priv->uri) + soup_uri_free (request->priv->uri); + request->priv->uri = g_value_dup_boxed (value); + break; + case PROP_SESSION: + if (request->priv->session) + g_object_unref (request->priv->session); + request->priv->session = g_value_dup_object (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +soup_request_get_property (GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + SoupRequest *request = SOUP_REQUEST (object); + + switch (prop_id) { + case PROP_URI: + g_value_set_boxed (value, request->priv->uri); + break; + case PROP_SESSION: + g_value_set_object (value, request->priv->session); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static gboolean +soup_request_initable_init (GInitable *initable, + GCancellable *cancellable, + GError **error) +{ + SoupRequest *request = SOUP_REQUEST (initable); + gboolean ok; + + if (!request->priv->uri) { + g_set_error (error, SOUP_REQUESTER_ERROR, SOUP_REQUESTER_ERROR_BAD_URI, + _("No URI provided")); + return FALSE; + } + + ok = SOUP_REQUEST_GET_CLASS (initable)-> + check_uri (request, request->priv->uri, error); + + if (!ok && error) { + char *uri_string = soup_uri_to_string (request->priv->uri, FALSE); + g_set_error (error, SOUP_REQUESTER_ERROR, SOUP_REQUESTER_ERROR_BAD_URI, + _("Invalid '%s' URI: %s"), + request->priv->uri->scheme, + uri_string); + g_free (uri_string); + } + + return ok; +} + +static gboolean +soup_request_default_check_uri (SoupRequest *request, + SoupURI *uri, + GError **error) +{ + return TRUE; +} + +/* Default implementation: assume the sync implementation doesn't block */ +static void +soup_request_default_send_async (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + GSimpleAsyncResult *simple; + + simple = g_simple_async_result_new (G_OBJECT (request), + callback, user_data, + soup_request_default_send_async); + g_simple_async_result_complete_in_idle (simple); + g_object_unref (simple); +} + +static GInputStream * +soup_request_default_send_finish (SoupRequest *request, + GAsyncResult *result, + GError **error) +{ + g_return_val_if_fail (g_simple_async_result_is_valid (result, G_OBJECT (request), soup_request_default_send_async), NULL); + + return soup_request_send (request, NULL, error); +} + +GInputStream * +soup_request_send (SoupRequest *request, + GCancellable *cancellable, + GError **error) +{ + return SOUP_REQUEST_GET_CLASS (request)-> + send (request, cancellable, error); +} + +void +soup_request_send_async (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + SOUP_REQUEST_GET_CLASS (request)-> + send_async (request, cancellable, callback, user_data); +} + +GInputStream * +soup_request_send_finish (SoupRequest *request, + GAsyncResult *result, + GError **error) +{ + return SOUP_REQUEST_GET_CLASS (request)-> + send_finish (request, result, error); +} + +static void +soup_request_class_init (SoupRequestClass *request_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (request_class); + + g_type_class_add_private (request_class, sizeof (SoupRequestPrivate)); + + request_class->check_uri = soup_request_default_check_uri; + request_class->send_async = soup_request_default_send_async; + request_class->send_finish = soup_request_default_send_finish; + + object_class->finalize = soup_request_finalize; + object_class->set_property = soup_request_set_property; + object_class->get_property = soup_request_get_property; + + g_object_class_install_property ( + object_class, PROP_URI, + g_param_spec_boxed (SOUP_REQUEST_URI, + "URI", + "The request URI", + SOUP_TYPE_URI, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property ( + object_class, PROP_SESSION, + g_param_spec_object (SOUP_REQUEST_SESSION, + "Session", + "The request's session", + SOUP_TYPE_SESSION, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); +} + +static void +soup_request_initable_interface_init (GInitableIface *initable_interface) +{ + initable_interface->init = soup_request_initable_init; +} + +SoupURI * +soup_request_get_uri (SoupRequest *request) +{ + return request->priv->uri; +} + +SoupSession * +soup_request_get_session (SoupRequest *request) +{ + return request->priv->session; +} + +goffset +soup_request_get_content_length (SoupRequest *request) +{ + return SOUP_REQUEST_GET_CLASS (request)->get_content_length (request); +} + +const char * +soup_request_get_content_type (SoupRequest *request) +{ + return SOUP_REQUEST_GET_CLASS (request)->get_content_type (request); +} diff --git a/libsoup/soup-request.h b/libsoup/soup-request.h new file mode 100644 index 0000000..a48e917 --- /dev/null +++ b/libsoup/soup-request.h @@ -0,0 +1,100 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009, 2010 Red Hat, Inc. + * Copyright (C) 2010 Igalia, S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_REQUEST_H +#define SOUP_REQUEST_H 1 + +#ifdef LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_REQUEST (soup_request_get_type ()) +#define SOUP_REQUEST(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_REQUEST, SoupRequest)) +#define SOUP_REQUEST_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_REQUEST, SoupRequestClass)) +#define SOUP_IS_REQUEST(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_REQUEST)) +#define SOUP_IS_REQUEST_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_REQUEST)) +#define SOUP_REQUEST_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_REQUEST, SoupRequestClass)) + +typedef struct _SoupRequest SoupRequest; +typedef struct _SoupRequestPrivate SoupRequestPrivate; +typedef struct _SoupRequestClass SoupRequestClass; + +struct _SoupRequest { + GObject parent; + + SoupRequestPrivate *priv; +}; + +struct _SoupRequestClass { + GObjectClass parent; + + const char **schemes; + + gboolean (*check_uri) (SoupRequest *req_base, + SoupURI *uri, + GError **error); + + GInputStream * (*send) (SoupRequest *request, + GCancellable *cancellable, + GError **error); + void (*send_async) (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); + GInputStream * (*send_finish) (SoupRequest *request, + GAsyncResult *result, + GError **error); + + goffset (*get_content_length) (SoupRequest *request); + const char * (*get_content_type) (SoupRequest *request); +}; + +GType soup_request_get_type (void); + +#define SOUP_REQUEST_URI "uri" +#define SOUP_REQUEST_SESSION "session" + +GInputStream *soup_request_send (SoupRequest *request, + GCancellable *cancellable, + GError **error); +void soup_request_send_async (SoupRequest *request, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); +GInputStream *soup_request_send_finish (SoupRequest *request, + GAsyncResult *result, + GError **error); + +SoupURI *soup_request_get_uri (SoupRequest *request); +SoupSession *soup_request_get_session (SoupRequest *request); + +goffset soup_request_get_content_length (SoupRequest *request); +const char *soup_request_get_content_type (SoupRequest *request); + +G_END_DECLS + +#endif /* LIBSOUP_USE_UNSTABLE_REQUEST_API */ + +#endif /* SOUP_REQUEST_H */ diff --git a/libsoup/soup-requester.c b/libsoup/soup-requester.c new file mode 100644 index 0000000..3375a7a --- /dev/null +++ b/libsoup/soup-requester.c @@ -0,0 +1,235 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-requester.c: + * + * Copyright (C) 2010, Igalia S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" + +#include + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include "soup-requester.h" +#include "soup-request-data.h" +#include "soup-request-file.h" +#include "soup-request-http.h" +#include "soup-session-feature.h" +#include "soup-uri.h" + +static SoupSessionFeatureInterface *soup_requester_default_feature_interface; +static void soup_requester_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data); + +struct _SoupRequesterPrivate { + SoupSession *session; + GHashTable *request_types; +}; + +G_DEFINE_TYPE_WITH_CODE (SoupRequester, soup_requester, G_TYPE_OBJECT, + G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE, + soup_requester_session_feature_init)) + +static void +soup_requester_init (SoupRequester *requester) +{ + SoupSessionFeature *feature; + + requester->priv = G_TYPE_INSTANCE_GET_PRIVATE (requester, + SOUP_TYPE_REQUESTER, + SoupRequesterPrivate); + + requester->priv->request_types = g_hash_table_new (soup_str_case_hash, + soup_str_case_equal); + + feature = SOUP_SESSION_FEATURE (requester); + soup_session_feature_add_feature (feature, SOUP_TYPE_REQUEST_HTTP); + soup_session_feature_add_feature (feature, SOUP_TYPE_REQUEST_FILE); + soup_session_feature_add_feature (feature, SOUP_TYPE_REQUEST_DATA); +} + +static void +finalize (GObject *object) +{ + SoupRequester *requester = SOUP_REQUESTER (object); + + g_hash_table_destroy (requester->priv->request_types); + + G_OBJECT_CLASS (soup_requester_parent_class)->finalize (object); +} + +static void +soup_requester_class_init (SoupRequesterClass *requester_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (requester_class); + + g_type_class_add_private (requester_class, sizeof (SoupRequesterPrivate)); + + /* virtual method override */ + object_class->finalize = finalize; +} + +static void +attach (SoupSessionFeature *feature, SoupSession *session) +{ + SoupRequester *requester = SOUP_REQUESTER (feature); + + requester->priv->session = session; + + soup_requester_default_feature_interface->attach (feature, session); +} + +static void +detach (SoupSessionFeature *feature, SoupSession *session) +{ + SoupRequester *requester = SOUP_REQUESTER (feature); + + requester->priv->session = NULL; + + soup_requester_default_feature_interface->detach (feature, session); +} + +static gboolean +add_feature (SoupSessionFeature *feature, GType type) +{ + SoupRequester *requester = SOUP_REQUESTER (feature); + SoupRequestClass *request_class; + int i; + + if (!g_type_is_a (type, SOUP_TYPE_REQUEST)) + return FALSE; + + request_class = g_type_class_ref (type); + for (i = 0; request_class->schemes[i]; i++) { + g_hash_table_insert (requester->priv->request_types, + (char *)request_class->schemes[i], + GSIZE_TO_POINTER (type)); + } + return TRUE; +} + +static gboolean +remove_feature (SoupSessionFeature *feature, GType type) +{ + SoupRequester *requester = SOUP_REQUESTER (feature); + SoupRequestClass *request_class; + int i, orig_size; + + if (!g_type_is_a (type, SOUP_TYPE_REQUEST)) + return FALSE; + + request_class = g_type_class_peek (type); + if (!request_class) + return FALSE; + + orig_size = g_hash_table_size (requester->priv->request_types); + for (i = 0; request_class->schemes[i]; i++) { + g_hash_table_remove (requester->priv->request_types, + request_class->schemes[i]); + } + + return g_hash_table_size (requester->priv->request_types) != orig_size; +} + +static gboolean +has_feature (SoupSessionFeature *feature, GType type) +{ + SoupRequester *requester = SOUP_REQUESTER (feature); + GHashTableIter iter; + gpointer key, value; + + if (!g_type_is_a (type, SOUP_TYPE_REQUEST)) + return FALSE; + + g_hash_table_iter_init (&iter, requester->priv->request_types); + while (g_hash_table_iter_next (&iter, &key, &value)) { + if (value == GSIZE_TO_POINTER (type)) + return TRUE; + } + return FALSE; +} + +static void +soup_requester_session_feature_init (SoupSessionFeatureInterface *feature_interface, + gpointer interface_data) +{ + soup_requester_default_feature_interface = + g_type_default_interface_peek (SOUP_TYPE_SESSION_FEATURE); + + feature_interface->attach = attach; + feature_interface->detach = detach; + feature_interface->add_feature = add_feature; + feature_interface->remove_feature = remove_feature; + feature_interface->has_feature = has_feature; +} + +SoupRequester * +soup_requester_new (void) +{ + return g_object_new (SOUP_TYPE_REQUESTER, NULL); +} + +SoupRequest * +soup_requester_request (SoupRequester *requester, const char *uri_string, + GError **error) +{ + SoupURI *uri; + SoupRequest *req; + + uri = soup_uri_new (uri_string); + if (!uri) { + g_set_error (error, SOUP_REQUESTER_ERROR, SOUP_REQUESTER_ERROR_BAD_URI, + _("Could not parse URI '%s'"), uri_string); + return NULL; + } + + req = soup_requester_request_uri (requester, uri, error); + soup_uri_free (uri); + return req; +} + +SoupRequest * +soup_requester_request_uri (SoupRequester *requester, SoupURI *uri, + GError **error) +{ + GType request_type; + + g_return_val_if_fail (SOUP_IS_REQUESTER (requester), NULL); + + request_type = (GType)GPOINTER_TO_SIZE (g_hash_table_lookup (requester->priv->request_types, uri->scheme)); + if (!request_type) { + g_set_error (error, SOUP_REQUESTER_ERROR, + SOUP_REQUESTER_ERROR_UNSUPPORTED_URI_SCHEME, + _("Unsupported URI scheme '%s'"), uri->scheme); + return NULL; + } + + return g_initable_new (request_type, NULL, error, + "uri", uri, + "session", requester->priv->session, + NULL); +} + +GQuark +soup_requester_error_quark (void) +{ + static GQuark error; + if (!error) + error = g_quark_from_static_string ("soup_requester_error_quark"); + return error; +} diff --git a/libsoup/soup-requester.h b/libsoup/soup-requester.h new file mode 100644 index 0000000..a10f1e5 --- /dev/null +++ b/libsoup/soup-requester.h @@ -0,0 +1,75 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2010 Igalia S.L. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SOUP_REQUESTER_H +#define SOUP_REQUESTER_H 1 + +#ifdef LIBSOUP_USE_UNSTABLE_REQUEST_API + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_REQUESTER (soup_requester_get_type ()) +#define SOUP_REQUESTER(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_REQUESTER, SoupRequester)) +#define SOUP_REQUESTER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_REQUESTER, SoupRequesterClass)) +#define SOUP_IS_REQUESTER(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_REQUESTER)) +#define SOUP_IS_REQUESTER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_REQUESTER)) +#define SOUP_REQUESTER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_REQUESTER, SoupRequesterClass)) + +typedef struct _SoupRequester SoupRequester; +typedef struct _SoupRequesterPrivate SoupRequesterPrivate; + +struct _SoupRequester { + GObject parent; + + SoupRequesterPrivate *priv; +}; + +typedef struct { + GObjectClass parent_class; +} SoupRequesterClass; + +GType soup_requester_get_type (void); + +SoupRequester *soup_requester_new (void); + +SoupRequest *soup_requester_request (SoupRequester *requester, + const char *uri_string, + GError **error); + +SoupRequest *soup_requester_request_uri (SoupRequester *requester, + SoupURI *uri, + GError **error); + +GQuark soup_requester_error_quark (void); +#define SOUP_REQUESTER_ERROR soup_requester_error_quark () + +typedef enum { + SOUP_REQUESTER_ERROR_BAD_URI, + SOUP_REQUESTER_ERROR_UNSUPPORTED_URI_SCHEME +} SoupRequesterError; + +G_END_DECLS + +#endif /* LIBSOUP_USE_UNSTABLE_REQUEST_API */ + +#endif /* SOUP_REQUESTER_H */ diff --git a/libsoup/soup-server.c b/libsoup/soup-server.c new file mode 100644 index 0000000..92bcc50 --- /dev/null +++ b/libsoup/soup-server.c @@ -0,0 +1,1434 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-server.c: Asynchronous HTTP server + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include "soup-server.h" +#include "soup-address.h" +#include "soup-auth-domain.h" +#include "soup-date.h" +#include "soup-form.h" +#include "soup-headers.h" +#include "soup-message-private.h" +#include "soup-marshal.h" +#include "soup-path-map.h" +#include "soup-socket.h" +#include "soup-ssl.h" + +/** + * SECTION:soup-server + * @short_description: HTTP server + * @see_also: #SoupAuthDomain + * + * #SoupServer implements a simple HTTP server. + * + * To begin, create a server using soup_server_new(). Add at least one + * handler by calling soup_server_add_handler(); the handler will be + * called to process any requests underneath the path passed to + * soup_server_add_handler(). (If you want all requests to go to the + * same handler, just pass "/" (or %NULL) for the path.) Any request + * that does not match any handler will automatically be returned to + * the client with a 404 (Not Found) status. + * + * If you want to handle the special "*" URI (eg, "OPTIONS *"), you + * must explicitly register a handler for "*"; the default handler + * will not be used for that case. + * + * To add authentication to some or all paths, create an appropriate + * #SoupAuthDomain (qv), and add it to the server via + * soup_server_add_auth_domain(). (As with handlers, you must + * explicitly add "*" to an auth domain if you want it to be covered.) + * + * Additional processing options are available via #SoupServer's + * signals; Connect to #SoupServer::request-started to be notified + * every time a new request is being processed. (This gives you a + * chance to connect to the #SoupMessage "got-" signals in case you + * want to do processing before the body has been fully read.) + * + * Once the server is set up, start it processing connections by + * calling soup_server_run_async() or soup_server_run(). #SoupServer + * runs via the glib main loop; if you need to have a server that runs + * in another thread (or merely isn't bound to the default main loop), + * create a #GMainContext for it to use, and set that via the + * #SOUP_SERVER_ASYNC_CONTEXT property. + **/ + +G_DEFINE_TYPE (SoupServer, soup_server, G_TYPE_OBJECT) + +enum { + REQUEST_STARTED, + REQUEST_READ, + REQUEST_FINISHED, + REQUEST_ABORTED, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +struct SoupClientContext { + SoupServer *server; + SoupSocket *sock; + SoupMessage *msg; + SoupAuthDomain *auth_domain; + char *auth_user; + + int ref_count; +}; + +typedef struct { + char *path; + + SoupServerCallback callback; + GDestroyNotify destroy; + gpointer user_data; +} SoupServerHandler; + +typedef struct { + SoupAddress *iface; + guint port; + + char *ssl_cert_file, *ssl_key_file; + SoupSSLCredentials *ssl_creds; + + char *server_header; + + GMainLoop *loop; + + SoupSocket *listen_sock; + GSList *clients; + + gboolean raw_paths; + SoupPathMap *handlers; + SoupServerHandler *default_handler; + + GSList *auth_domains; + + GMainContext *async_context; +} SoupServerPrivate; +#define SOUP_SERVER_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_SERVER, SoupServerPrivate)) + +#define SOUP_SERVER_SERVER_HEADER_BASE "libsoup/" PACKAGE_VERSION + +enum { + PROP_0, + + PROP_PORT, + PROP_INTERFACE, + PROP_SSL_CERT_FILE, + PROP_SSL_KEY_FILE, + PROP_ASYNC_CONTEXT, + PROP_RAW_PATHS, + PROP_SERVER_HEADER, + + LAST_PROP +}; + +static GObject *constructor (GType type, + guint n_construct_properties, + GObjectConstructParam *construct_properties); +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +free_handler (SoupServerHandler *hand) +{ + g_free (hand->path); + g_slice_free (SoupServerHandler, hand); +} + +static void +soup_server_init (SoupServer *server) +{ + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (server); + + priv->handlers = soup_path_map_new ((GDestroyNotify)free_handler); +} + +static void +finalize (GObject *object) +{ + SoupServer *server = SOUP_SERVER (object); + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (server); + GSList *iter; + + if (priv->iface) + g_object_unref (priv->iface); + + g_free (priv->ssl_cert_file); + g_free (priv->ssl_key_file); + if (priv->ssl_creds) + soup_ssl_free_server_credentials (priv->ssl_creds); + + g_free (priv->server_header); + + if (priv->listen_sock) + g_object_unref (priv->listen_sock); + + while (priv->clients) { + SoupClientContext *client = priv->clients->data; + SoupSocket *sock = g_object_ref (client->sock); + + priv->clients = g_slist_remove (priv->clients, client); + + if (client->msg) { + soup_message_set_status (client->msg, SOUP_STATUS_IO_ERROR); + soup_message_io_finished (client->msg); + } + + soup_socket_disconnect (sock); + g_object_unref (sock); + } + + if (priv->default_handler) + free_handler (priv->default_handler); + soup_path_map_free (priv->handlers); + + for (iter = priv->auth_domains; iter; iter = iter->next) + g_object_unref (iter->data); + g_slist_free (priv->auth_domains); + + if (priv->loop) + g_main_loop_unref (priv->loop); + if (priv->async_context) + g_main_context_unref (priv->async_context); + + G_OBJECT_CLASS (soup_server_parent_class)->finalize (object); +} + +static void +soup_server_class_init (SoupServerClass *server_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (server_class); + + g_type_class_add_private (server_class, sizeof (SoupServerPrivate)); + + /* virtual method override */ + object_class->constructor = constructor; + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /* signals */ + + /** + * SoupServer::request-started + * @server: the server + * @message: the new message + * @client: the client context + * + * Emitted when the server has started reading a new request. + * @message will be completely blank; not even the + * Request-Line will have been read yet. About the only thing + * you can usefully do with it is connect to its signals. + * + * If the request is read successfully, this will eventually + * be followed by a #SoupServer::request_read signal. If a + * response is then sent, the request processing will end with + * a #SoupServer::request_finished signal. If a network error + * occurs, the processing will instead end with + * #SoupServer::request_aborted. + **/ + signals[REQUEST_STARTED] = + g_signal_new ("request-started", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupServerClass, request_started), + NULL, NULL, + soup_marshal_NONE__OBJECT_POINTER, + G_TYPE_NONE, 2, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_CLIENT_CONTEXT); + + /** + * SoupServer::request-read + * @server: the server + * @message: the message + * @client: the client context + * + * Emitted when the server has successfully read a request. + * @message will have all of its request-side information + * filled in, and if the message was authenticated, @client + * will have information about that. This signal is emitted + * before any handlers are called for the message, and if it + * sets the message's #status_code, then normal handler + * processing will be skipped. + **/ + signals[REQUEST_READ] = + g_signal_new ("request-read", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupServerClass, request_read), + NULL, NULL, + soup_marshal_NONE__OBJECT_POINTER, + G_TYPE_NONE, 2, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_CLIENT_CONTEXT); + + /** + * SoupServer::request-finished + * @server: the server + * @message: the message + * @client: the client context + * + * Emitted when the server has finished writing a response to + * a request. + **/ + signals[REQUEST_FINISHED] = + g_signal_new ("request-finished", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupServerClass, request_finished), + NULL, NULL, + soup_marshal_NONE__OBJECT_POINTER, + G_TYPE_NONE, 2, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_CLIENT_CONTEXT); + + /** + * SoupServer::request-aborted + * @server: the server + * @message: the message + * @client: the client context + * + * Emitted when processing has failed for a message; this + * could mean either that it could not be read (if + * #SoupServer::request_read has not been emitted for it yet), + * or that the response could not be written back (if + * #SoupServer::request_read has been emitted but + * #SoupServer::request_finished has not been). + * + * @message is in an undefined state when this signal is + * emitted; the signal exists primarily to allow the server to + * free any state that it may have allocated in + * #SoupServer::request_started. + **/ + signals[REQUEST_ABORTED] = + g_signal_new ("request-aborted", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupServerClass, request_aborted), + NULL, NULL, + soup_marshal_NONE__OBJECT_POINTER, + G_TYPE_NONE, 2, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_CLIENT_CONTEXT); + + /* properties */ + /** + * SOUP_SERVER_PORT: + * + * Alias for the #SoupServer:port property. (The port the + * server listens on.) + **/ + g_object_class_install_property ( + object_class, PROP_PORT, + g_param_spec_uint (SOUP_SERVER_PORT, + "Port", + "Port to listen on", + 0, 65536, 0, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SERVER_INTERFACE: + * + * Alias for the #SoupServer:interface property. (The address + * of the network interface the server listens on.) + **/ + g_object_class_install_property ( + object_class, PROP_INTERFACE, + g_param_spec_object (SOUP_SERVER_INTERFACE, + "Interface", + "Address of interface to listen on", + SOUP_TYPE_ADDRESS, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SERVER_SSL_CERT_FILE: + * + * Alias for the #SoupServer:ssl-cert-file property. (The file + * containing the SSL certificate for the server.) + **/ + g_object_class_install_property ( + object_class, PROP_SSL_CERT_FILE, + g_param_spec_string (SOUP_SERVER_SSL_CERT_FILE, + "SSL certificate file", + "File containing server SSL certificate", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SERVER_SSL_KEY_FILE: + * + * Alias for the #SoupServer:ssl-key-file property. (The file + * containing the SSL certificate key for the server.) + **/ + g_object_class_install_property ( + object_class, PROP_SSL_KEY_FILE, + g_param_spec_string (SOUP_SERVER_SSL_KEY_FILE, + "SSL key file", + "File containing server SSL key", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SERVER_ASYNC_CONTEXT: + * + * Alias for the #SoupServer:async-context property. (The + * server's #GMainContext.) + **/ + g_object_class_install_property ( + object_class, PROP_ASYNC_CONTEXT, + g_param_spec_pointer (SOUP_SERVER_ASYNC_CONTEXT, + "Async GMainContext", + "The GMainContext to dispatch async I/O in", + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SERVER_RAW_PATHS: + * + * Alias for the #SoupServer:raw-paths property. (If %TRUE, + * percent-encoding in the Request-URI path will not be + * automatically decoded.) + **/ + g_object_class_install_property ( + object_class, PROP_RAW_PATHS, + g_param_spec_boolean (SOUP_SERVER_RAW_PATHS, + "Raw paths", + "If %TRUE, percent-encoding in the Request-URI path will not be automatically decoded.", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + + /** + * SoupServer:server-header: + * + * If non-%NULL, the value to use for the "Server" header on + * #SoupMessages processed by this server. + * + * The Server header is the server equivalent of the + * User-Agent header, and provides information about the + * server and its components. It contains a list of one or + * more product tokens, separated by whitespace, with the most + * significant product token coming first. The tokens must be + * brief, ASCII, and mostly alphanumeric (although "-", "_", + * and "." are also allowed), and may optionally include a "/" + * followed by a version string. You may also put comments, + * enclosed in parentheses, between or after the tokens. + * + * Some HTTP server implementations intentionally do not use + * version numbers in their Server header, so that + * installations running older versions of the server don't + * end up advertising their vulnerability to specific security + * holes. + * + * As with #SoupSession:user_agent, if you set a + * %server_header property that has trailing whitespace, + * #SoupServer will append its own product token (eg, + * "libsoup/2.3.2") to the end of the + * header for you. + **/ + /** + * SOUP_SERVER_SERVER_HEADER: + * + * Alias for the #SoupServer:server-header property, qv. + **/ + g_object_class_install_property ( + object_class, PROP_SERVER_HEADER, + g_param_spec_string (SOUP_SERVER_SERVER_HEADER, + "Server header", + "Server header", + NULL, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT)); +} + +static GObject * +constructor (GType type, + guint n_construct_properties, + GObjectConstructParam *construct_properties) +{ + GObject *server; + SoupServerPrivate *priv; + + server = G_OBJECT_CLASS (soup_server_parent_class)->constructor ( + type, n_construct_properties, construct_properties); + if (!server) + return NULL; + priv = SOUP_SERVER_GET_PRIVATE (server); + + if (!priv->iface) { + priv->iface = + soup_address_new_any (SOUP_ADDRESS_FAMILY_IPV4, + priv->port); + } + + if (priv->ssl_cert_file && priv->ssl_key_file) { + priv->ssl_creds = soup_ssl_get_server_credentials ( + priv->ssl_cert_file, + priv->ssl_key_file); + if (!priv->ssl_creds) { + g_object_unref (server); + return NULL; + } + } + + priv->listen_sock = + soup_socket_new (SOUP_SOCKET_LOCAL_ADDRESS, priv->iface, + SOUP_SOCKET_SSL_CREDENTIALS, priv->ssl_creds, + SOUP_SOCKET_ASYNC_CONTEXT, priv->async_context, + NULL); + if (!soup_socket_listen (priv->listen_sock)) { + g_object_unref (server); + return NULL; + } + + /* Re-resolve the interface address, in particular in case + * the passed-in address had SOUP_ADDRESS_ANY_PORT. + */ + g_object_unref (priv->iface); + priv->iface = soup_socket_get_local_address (priv->listen_sock); + g_object_ref (priv->iface); + priv->port = soup_address_get_port (priv->iface); + + return server; +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (object); + const char *header; + + switch (prop_id) { + case PROP_PORT: + priv->port = g_value_get_uint (value); + break; + case PROP_INTERFACE: + if (priv->iface) + g_object_unref (priv->iface); + priv->iface = g_value_get_object (value); + if (priv->iface) + g_object_ref (priv->iface); + break; + case PROP_SSL_CERT_FILE: + priv->ssl_cert_file = + g_strdup (g_value_get_string (value)); + break; + case PROP_SSL_KEY_FILE: + priv->ssl_key_file = + g_strdup (g_value_get_string (value)); + break; + case PROP_ASYNC_CONTEXT: + priv->async_context = g_value_get_pointer (value); + if (priv->async_context) + g_main_context_ref (priv->async_context); + break; + case PROP_RAW_PATHS: + priv->raw_paths = g_value_get_boolean (value); + break; + case PROP_SERVER_HEADER: + g_free (priv->server_header); + header = g_value_get_string (value); + if (!header) + priv->server_header = NULL; + else if (!*header) { + priv->server_header = + g_strdup (SOUP_SERVER_SERVER_HEADER_BASE); + } else if (g_str_has_suffix (header, " ")) { + priv->server_header = + g_strdup_printf ("%s%s", header, + SOUP_SERVER_SERVER_HEADER_BASE); + } else + priv->server_header = g_strdup (header); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_PORT: + g_value_set_uint (value, priv->port); + break; + case PROP_INTERFACE: + g_value_set_object (value, priv->iface); + break; + case PROP_SSL_CERT_FILE: + g_value_set_string (value, priv->ssl_cert_file); + break; + case PROP_SSL_KEY_FILE: + g_value_set_string (value, priv->ssl_key_file); + break; + case PROP_ASYNC_CONTEXT: + g_value_set_pointer (value, priv->async_context ? g_main_context_ref (priv->async_context) : NULL); + break; + case PROP_RAW_PATHS: + g_value_set_boolean (value, priv->raw_paths); + break; + case PROP_SERVER_HEADER: + g_value_set_string (value, priv->server_header); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/** + * soup_server_new: + * @optname1: name of first property to set + * @...: value of @optname1, followed by additional property/value pairs + * + * Creates a new #SoupServer. + * + * Return value: a new #SoupServer + **/ +SoupServer * +soup_server_new (const char *optname1, ...) +{ + SoupServer *server; + va_list ap; + + va_start (ap, optname1); + server = (SoupServer *)g_object_new_valist (SOUP_TYPE_SERVER, + optname1, ap); + va_end (ap); + + return server; +} + +/** + * soup_server_get_port: + * @server: a #SoupServer + * + * Gets the TCP port that @server is listening on. This is most useful + * when you did not request a specific port (or explicitly requested + * %SOUP_ADDRESS_ANY_PORT). + * + * Return value: the port @server is listening on. + **/ +guint +soup_server_get_port (SoupServer *server) +{ + g_return_val_if_fail (SOUP_IS_SERVER (server), 0); + + return SOUP_SERVER_GET_PRIVATE (server)->port; +} + +/** + * soup_server_is_https: + * @server: a #SoupServer + * + * Checks whether @server is running plain http or https. + * + * In order for a server to run https, you must set the + * %SOUP_SERVER_SSL_CERT_FILE and %SOUP_SERVER_SSL_KEY_FILE properties + * to provide it with an SSL certificate to use. + * + * Return value: %TRUE if @server is serving https. + **/ +gboolean +soup_server_is_https (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SERVER (server), 0); + priv = SOUP_SERVER_GET_PRIVATE (server); + + return (priv->ssl_cert_file && priv->ssl_key_file); +} + +/** + * soup_server_get_listener: + * @server: a #SoupServer + * + * Gets @server's listening socket. You should treat this as + * read-only; writing to it or modifiying it may cause @server to + * malfunction. + * + * Return value: (transfer none): the listening socket. + **/ +SoupSocket * +soup_server_get_listener (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SERVER (server), NULL); + priv = SOUP_SERVER_GET_PRIVATE (server); + + return priv->listen_sock; +} + +static void start_request (SoupServer *, SoupClientContext *); + +static SoupClientContext * +soup_client_context_new (SoupServer *server, SoupSocket *sock) +{ + SoupClientContext *client = g_slice_new0 (SoupClientContext); + + client->server = server; + client->sock = sock; + client->ref_count = 1; + + return client; +} + +static void +soup_client_context_cleanup (SoupClientContext *client) +{ + if (client->auth_domain) { + g_object_unref (client->auth_domain); + client->auth_domain = NULL; + } + if (client->auth_user) { + g_free (client->auth_user); + client->auth_user = NULL; + } + client->msg = NULL; +} + +static SoupClientContext * +soup_client_context_ref (SoupClientContext *client) +{ + client->ref_count++; + return client; +} + +static void +soup_client_context_unref (SoupClientContext *client) +{ + if (--client->ref_count == 0) { + soup_client_context_cleanup (client); + g_slice_free (SoupClientContext, client); + } +} + +static void +request_finished (SoupMessage *msg, gpointer user_data) +{ + SoupClientContext *client = user_data; + SoupServer *server = client->server; + SoupSocket *sock = client->sock; + + soup_message_finished (msg); + g_signal_emit (server, + msg->status_code == SOUP_STATUS_IO_ERROR ? + signals[REQUEST_ABORTED] : signals[REQUEST_FINISHED], + 0, msg, client); + + soup_client_context_cleanup (client); + if (soup_socket_is_connected (sock) && soup_message_is_keepalive (msg)) { + /* Start a new request */ + start_request (server, client); + } else { + soup_socket_disconnect (sock); + soup_client_context_unref (client); + } + g_object_unref (msg); + g_object_unref (sock); +} + +static SoupServerHandler * +soup_server_get_handler (SoupServer *server, const char *path) +{ + SoupServerPrivate *priv; + SoupServerHandler *hand; + + g_return_val_if_fail (SOUP_IS_SERVER (server), NULL); + priv = SOUP_SERVER_GET_PRIVATE (server); + + if (path) { + hand = soup_path_map_lookup (priv->handlers, path); + if (hand) + return hand; + if (!strcmp (path, "*")) + return NULL; + } + return priv->default_handler; +} + +static void +got_headers (SoupMessage *req, SoupClientContext *client) +{ + SoupServer *server = client->server; + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (server); + SoupURI *uri; + SoupDate *date; + char *date_string; + SoupAuthDomain *domain; + GSList *iter; + gboolean rejected = FALSE; + char *auth_user; + + if (!priv->raw_paths) { + char *decoded_path; + + uri = soup_message_get_uri (req); + decoded_path = soup_uri_decode (uri->path); + + if (strstr (decoded_path, "/../") || + g_str_has_suffix (decoded_path, "/..")) { + /* Introducing new ".." segments is not allowed */ + g_free (decoded_path); + soup_message_set_status (req, SOUP_STATUS_BAD_REQUEST); + return; + } + + soup_uri_set_path (uri, decoded_path); + g_free (decoded_path); + } + + /* Add required response headers */ + date = soup_date_new_from_now (0); + date_string = soup_date_to_string (date, SOUP_DATE_HTTP); + soup_message_headers_replace (req->response_headers, "Date", + date_string); + g_free (date_string); + soup_date_free (date); + + /* Now handle authentication. (We do this here so that if + * the request uses "Expect: 100-continue", we can reject it + * immediately rather than waiting for the request body to + * be sent. + */ + for (iter = priv->auth_domains; iter; iter = iter->next) { + domain = iter->data; + + if (soup_auth_domain_covers (domain, req)) { + auth_user = soup_auth_domain_accepts (domain, req); + if (auth_user) { + client->auth_domain = g_object_ref (domain); + client->auth_user = auth_user; + return; + } + + rejected = TRUE; + } + } + + /* If no auth domain rejected it, then it's ok. */ + if (!rejected) + return; + + for (iter = priv->auth_domains; iter; iter = iter->next) { + domain = iter->data; + + if (soup_auth_domain_covers (domain, req)) + soup_auth_domain_challenge (domain, req); + } +} + +static void +call_handler (SoupMessage *req, SoupClientContext *client) +{ + SoupServer *server = client->server; + SoupServerHandler *hand; + SoupURI *uri; + + g_signal_emit (server, signals[REQUEST_READ], 0, req, client); + + if (req->status_code != 0) + return; + + uri = soup_message_get_uri (req); + hand = soup_server_get_handler (server, uri->path); + if (!hand) { + soup_message_set_status (req, SOUP_STATUS_NOT_FOUND); + return; + } + + if (hand->callback) { + GHashTable *form_data_set; + + if (uri->query) + form_data_set = soup_form_decode (uri->query); + else + form_data_set = NULL; + + /* Call method handler */ + (*hand->callback) (server, req, + uri->path, form_data_set, + client, hand->user_data); + + if (form_data_set) + g_hash_table_destroy (form_data_set); + } +} + +static void +start_request (SoupServer *server, SoupClientContext *client) +{ + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (server); + SoupMessage *msg; + + soup_client_context_cleanup (client); + + /* Listen for another request on this connection */ + msg = g_object_new (SOUP_TYPE_MESSAGE, + SOUP_MESSAGE_SERVER_SIDE, TRUE, + NULL); + client->msg = msg; + + if (priv->server_header) { + soup_message_headers_append (msg->response_headers, "Server", + priv->server_header); + } + + g_signal_connect (msg, "got_headers", G_CALLBACK (got_headers), client); + g_signal_connect (msg, "got_body", G_CALLBACK (call_handler), client); + + g_signal_emit (server, signals[REQUEST_STARTED], 0, + msg, client); + + g_object_ref (client->sock); + soup_message_read_request (msg, client->sock, + request_finished, client); +} + +static void +socket_disconnected (SoupSocket *sock, SoupClientContext *client) +{ + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (client->server); + + priv->clients = g_slist_remove (priv->clients, client); + g_signal_handlers_disconnect_by_func (sock, socket_disconnected, client); + g_object_unref (sock); +} + +static void +new_connection (SoupSocket *listner, SoupSocket *sock, gpointer user_data) +{ + SoupServer *server = user_data; + SoupServerPrivate *priv = SOUP_SERVER_GET_PRIVATE (server); + SoupClientContext *client; + + client = soup_client_context_new (server, g_object_ref (sock)); + priv->clients = g_slist_prepend (priv->clients, client); + g_signal_connect (sock, "disconnected", + G_CALLBACK (socket_disconnected), client); + start_request (server, client); +} + +/** + * soup_server_run_async: + * @server: a #SoupServer + * + * Starts @server, causing it to listen for and process incoming + * connections. + * + * The server actually runs in @server's #GMainContext. It will not + * actually perform any processing unless the appropriate main loop is + * running. In the simple case where you did not set the server's + * %SOUP_SERVER_ASYNC_CONTEXT property, this means the server will run + * whenever the glib main loop is running. + **/ +void +soup_server_run_async (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + if (!priv->listen_sock) { + if (priv->loop) { + g_main_loop_unref (priv->loop); + priv->loop = NULL; + } + return; + } + + g_signal_connect (priv->listen_sock, "new_connection", + G_CALLBACK (new_connection), server); + + return; + +} + +/** + * soup_server_run: + * @server: a #SoupServer + * + * Starts @server, causing it to listen for and process incoming + * connections. Unlike soup_server_run_async(), this creates a + * #GMainLoop and runs it, and it will not return until someone calls + * soup_server_quit() to stop the server. + **/ +void +soup_server_run (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + if (!priv->loop) { + priv->loop = g_main_loop_new (priv->async_context, TRUE); + soup_server_run_async (server); + } + + if (priv->loop) + g_main_loop_run (priv->loop); +} + +/** + * soup_server_quit: + * @server: a #SoupServer + * + * Stops processing for @server. Call this to clean up after + * soup_server_run_async(), or to terminate a call to soup_server_run(). + * + * @server is still in a working state after this call; you can start + * and stop a server as many times as you want. + **/ +void +soup_server_quit (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + g_signal_handlers_disconnect_by_func (priv->listen_sock, + G_CALLBACK (new_connection), + server); + if (priv->loop) + g_main_loop_quit (priv->loop); +} + +/** + * soup_server_disconnect: + * @server: a #SoupServer + * + * Stops processing for @server and closes its socket. This implies + * the effects of soup_server_quit(), but additionally closes the + * listening socket. Note that messages currently in progress will + * continue to be handled, if the main loop associated with the + * server is resumed or kept running. + * + * After calling this function, @server is no longer functional, so it + * has nearly the same effect as destroying @server entirely. The + * function is thus useful mainly for language bindings without + * explicit control over object lifetime. + **/ +void +soup_server_disconnect (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + soup_server_quit (server); + + if (priv->listen_sock) { + soup_socket_disconnect (priv->listen_sock); + g_object_unref (priv->listen_sock); + priv->listen_sock = NULL; + } +} + +/** + * soup_server_get_async_context: + * @server: a #SoupServer + * + * Gets @server's async_context. This does not add a ref to the + * context, so you will need to ref it yourself if you want it to + * outlive its server. + * + * Return value: (transfer none): @server's #GMainContext, which may be %NULL + **/ +GMainContext * +soup_server_get_async_context (SoupServer *server) +{ + SoupServerPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SERVER (server), NULL); + priv = SOUP_SERVER_GET_PRIVATE (server); + + return priv->async_context; +} + +/** + * SoupClientContext: + * + * A #SoupClientContext provides additional information about the + * client making a particular request. In particular, you can use + * soup_client_context_get_auth_domain() and + * soup_client_context_get_auth_user() to determine if HTTP + * authentication was used successfully. + * + * soup_client_context_get_address() and/or + * soup_client_context_get_host() can be used to get information for + * logging or debugging purposes. soup_client_context_get_socket() may + * also be of use in some situations (eg, tracking when multiple + * requests are made on the same connection). + **/ +GType +soup_client_context_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupClientContext"), + (GBoxedCopyFunc) soup_client_context_ref, + (GBoxedFreeFunc) soup_client_context_unref); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} + +/** + * soup_client_context_get_socket: + * @client: a #SoupClientContext + * + * Retrieves the #SoupSocket that @client is associated with. + * + * If you are using this method to observe when multiple requests are + * made on the same persistent HTTP connection (eg, as the ntlm-test + * test program does), you will need to pay attention to socket + * destruction as well (either by using weak references, or by + * connecting to the #SoupSocket::disconnected signal), so that you do + * not get fooled when the allocator reuses the memory address of a + * previously-destroyed socket to represent a new socket. + * + * Return value: (transfer none): the #SoupSocket that @client is + * associated with. + **/ +SoupSocket * +soup_client_context_get_socket (SoupClientContext *client) +{ + g_return_val_if_fail (client != NULL, NULL); + + return client->sock; +} + +/** + * soup_client_context_get_address: + * @client: a #SoupClientContext + * + * Retrieves the #SoupAddress associated with the remote end + * of a connection. + * + * Return value: (transfer none): the #SoupAddress associated with the + * remote end of a connection. + **/ +SoupAddress * +soup_client_context_get_address (SoupClientContext *client) +{ + g_return_val_if_fail (client != NULL, NULL); + + return soup_socket_get_remote_address (client->sock); +} + +/** + * soup_client_context_get_host: + * @client: a #SoupClientContext + * + * Retrieves the IP address associated with the remote end of a + * connection. (If you want the actual hostname, you'll have to call + * soup_client_context_get_address() and then call the appropriate + * #SoupAddress method to resolve it.) + * + * Return value: the IP address associated with the remote end of a + * connection. + **/ +const char * +soup_client_context_get_host (SoupClientContext *client) +{ + SoupAddress *address; + + address = soup_client_context_get_address (client); + return soup_address_get_physical (address); +} + +/** + * soup_client_context_get_auth_domain: + * @client: a #SoupClientContext + * + * Checks whether the request associated with @client has been + * authenticated, and if so returns the #SoupAuthDomain that + * authenticated it. + * + * Return value: (transfer none) (allow-none): a #SoupAuthDomain, or + * %NULL if the request was not authenticated. + **/ +SoupAuthDomain * +soup_client_context_get_auth_domain (SoupClientContext *client) +{ + g_return_val_if_fail (client != NULL, NULL); + + return client->auth_domain; +} + +/** + * soup_client_context_get_auth_user: + * @client: a #SoupClientContext + * + * Checks whether the request associated with @client has been + * authenticated, and if so returns the username that the client + * authenticated as. + * + * Return value: the authenticated-as user, or %NULL if the request + * was not authenticated. + **/ +const char * +soup_client_context_get_auth_user (SoupClientContext *client) +{ + g_return_val_if_fail (client != NULL, NULL); + + return client->auth_user; +} + +/** + * SoupServerCallback: + * @server: the #SoupServer + * @msg: the message being processed + * @path: the path component of @msg's Request-URI + * @query: (element-type utf8 utf8) (allow-none): the parsed query + * component of @msg's Request-URI + * @client: additional contextual information about the client + * @user_data: the data passed to @soup_server_add_handler + * + * A callback used to handle requests to a #SoupServer. The callback + * will be invoked after receiving the request body; @msg's %method, + * %request_headers, and %request_body fields will be filled in. + * + * @path and @query contain the likewise-named components of the + * Request-URI, subject to certain assumptions. By default, + * #SoupServer decodes all percent-encoding in the URI path, such that + * "/foo%2Fbar" is treated the same as "/foo/bar". If your + * server is serving resources in some non-POSIX-filesystem namespace, + * you may want to distinguish those as two distinct paths. In that + * case, you can set the %SOUP_SERVER_RAW_PATHS property when creating + * the #SoupServer, and it will leave those characters undecoded. (You + * may want to call soup_uri_normalize() to decode any percent-encoded + * characters that you aren't handling specially.) + * + * @query contains the query component of the Request-URI parsed + * according to the rules for HTML form handling. Although this is the + * only commonly-used query string format in HTTP, there is nothing + * that actually requires that HTTP URIs use that format; if your + * server needs to use some other format, you can just ignore @query, + * and call soup_message_get_uri() and parse the URI's query field + * yourself. + * + * After determining what to do with the request, the callback must at + * a minimum call soup_message_set_status() (or + * soup_message_set_status_full()) on @msg to set the response status + * code. Additionally, it may set response headers and/or fill in the + * response body. + * + * If the callback cannot fully fill in the response before returning + * (eg, if it needs to wait for information from a database, or + * another network server), it should call soup_server_pause_message() + * to tell #SoupServer to not send the response right away. When the + * response is ready, call soup_server_unpause_message() to cause it + * to be sent. + * + * To send the response body a bit at a time using "chunked" encoding, + * first call soup_message_headers_set_encoding() to set + * %SOUP_ENCODING_CHUNKED on the %response_headers. Then call + * soup_message_body_append() (or soup_message_body_append_buffer()) + * to append each chunk as it becomes ready, and + * soup_server_unpause_message() to make sure it's running. (The + * server will automatically pause the message if it is using chunked + * encoding but no more chunks are available.) When you are done, call + * soup_message_body_complete() to indicate that no more chunks are + * coming. + **/ + +/** + * soup_server_add_handler: + * @server: a #SoupServer + * @path: (allow-none): the toplevel path for the handler + * @callback: callback to invoke for requests under @path + * @user_data: data for @callback + * @destroy: destroy notifier to free @user_data + * + * Adds a handler to @server for requests under @path. See the + * documentation for #SoupServerCallback for information about + * how callbacks should behave. + * + * If @path is %NULL or "/", then this will be the default handler for + * all requests that don't have a more specific handler. Note though + * that if you want to handle requests to the special "*" URI, you + * must explicitly register a handler for "*"; the default handler + * will not be used for that case. + **/ +void +soup_server_add_handler (SoupServer *server, + const char *path, + SoupServerCallback callback, + gpointer user_data, + GDestroyNotify destroy) +{ + SoupServerPrivate *priv; + SoupServerHandler *hand; + + g_return_if_fail (SOUP_IS_SERVER (server)); + g_return_if_fail (callback != NULL); + priv = SOUP_SERVER_GET_PRIVATE (server); + + /* "" was never documented as meaning the same this as "/", + * but it effectively was. We have to special case it now or + * otherwise it would match "*" too. + */ + if (path && (!*path || !strcmp (path, "/"))) + path = NULL; + + hand = g_slice_new0 (SoupServerHandler); + hand->path = g_strdup (path); + hand->callback = callback; + hand->destroy = destroy; + hand->user_data = user_data; + + soup_server_remove_handler (server, path); + if (path) + soup_path_map_add (priv->handlers, path, hand); + else + priv->default_handler = hand; +} + +static void +unregister_handler (SoupServerHandler *handler) +{ + if (handler->destroy) + handler->destroy (handler->user_data); +} + +/** + * soup_server_remove_handler: + * @server: a #SoupServer + * @path: the toplevel path for the handler + * + * Removes the handler registered at @path. + **/ +void +soup_server_remove_handler (SoupServer *server, const char *path) +{ + SoupServerPrivate *priv; + SoupServerHandler *hand; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + if (!path || !*path || !strcmp (path, "/")) { + if (priv->default_handler) { + unregister_handler (priv->default_handler); + free_handler (priv->default_handler); + priv->default_handler = NULL; + } + return; + } + + hand = soup_path_map_lookup (priv->handlers, path); + if (hand && !strcmp (path, hand->path)) { + unregister_handler (hand); + soup_path_map_remove (priv->handlers, path); + } +} + +/** + * soup_server_add_auth_domain: + * @server: a #SoupServer + * @auth_domain: a #SoupAuthDomain + * + * Adds an authentication domain to @server. Each auth domain will + * have the chance to require authentication for each request that + * comes in; normally auth domains will require authentication for + * requests on certain paths that they have been set up to watch, or + * that meet other criteria set by the caller. If an auth domain + * determines that a request requires authentication (and the request + * doesn't contain authentication), @server will automatically reject + * the request with an appropriate status (401 Unauthorized or 407 + * Proxy Authentication Required). If the request used the + * "100-continue" Expectation, @server will reject it before the + * request body is sent. + **/ +void +soup_server_add_auth_domain (SoupServer *server, SoupAuthDomain *auth_domain) +{ + SoupServerPrivate *priv; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + priv->auth_domains = g_slist_append (priv->auth_domains, auth_domain); + g_object_ref (auth_domain); +} + +/** + * soup_server_remove_auth_domain: + * @server: a #SoupServer + * @auth_domain: a #SoupAuthDomain + * + * Removes @auth_domain from @server. + **/ +void +soup_server_remove_auth_domain (SoupServer *server, SoupAuthDomain *auth_domain) +{ + SoupServerPrivate *priv; + + g_return_if_fail (SOUP_IS_SERVER (server)); + priv = SOUP_SERVER_GET_PRIVATE (server); + + priv->auth_domains = g_slist_remove (priv->auth_domains, auth_domain); + g_object_unref (auth_domain); +} + +/** + * soup_server_pause_message: + * @server: a #SoupServer + * @msg: a #SoupMessage associated with @server. + * + * Pauses I/O on @msg. This can be used when you need to return from + * the server handler without having the full response ready yet. Use + * soup_server_unpause_message() to resume I/O. + **/ +void +soup_server_pause_message (SoupServer *server, + SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_SERVER (server)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + soup_message_io_pause (msg); +} + +/** + * soup_server_unpause_message: + * @server: a #SoupServer + * @msg: a #SoupMessage associated with @server. + * + * Resumes I/O on @msg. Use this to resume after calling + * soup_server_pause_message(), or after adding a new chunk to a + * chunked response. + * + * I/O won't actually resume until you return to the main loop. + **/ +void +soup_server_unpause_message (SoupServer *server, + SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_SERVER (server)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + soup_message_io_unpause (msg); +} + diff --git a/libsoup/soup-server.h b/libsoup/soup-server.h new file mode 100644 index 0000000..4ea17ad --- /dev/null +++ b/libsoup/soup-server.h @@ -0,0 +1,114 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SERVER_H +#define SOUP_SERVER_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_SERVER (soup_server_get_type ()) +#define SOUP_SERVER(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_SERVER, SoupServer)) +#define SOUP_SERVER_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_SERVER, SoupServerClass)) +#define SOUP_IS_SERVER(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_SERVER)) +#define SOUP_IS_SERVER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_SERVER)) +#define SOUP_SERVER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_SERVER, SoupServerClass)) + +typedef struct SoupClientContext SoupClientContext; +GType soup_client_context_get_type (void); +#define SOUP_TYPE_CLIENT_CONTEXT (soup_client_context_get_type ()) + +struct _SoupServer { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + /* signals */ + void (*request_started) (SoupServer *server, SoupMessage *msg, + SoupClientContext *client); + void (*request_read) (SoupServer *server, SoupMessage *msg, + SoupClientContext *client); + void (*request_finished) (SoupServer *server, SoupMessage *msg, + SoupClientContext *client); + void (*request_aborted) (SoupServer *server, SoupMessage *msg, + SoupClientContext *client); + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupServerClass; + +GType soup_server_get_type (void); + +typedef void (*SoupServerCallback) (SoupServer *server, + SoupMessage *msg, + const char *path, + GHashTable *query, + SoupClientContext *client, + gpointer user_data); + +#define SOUP_SERVER_PORT "port" +#define SOUP_SERVER_INTERFACE "interface" +#define SOUP_SERVER_SSL_CERT_FILE "ssl-cert-file" +#define SOUP_SERVER_SSL_KEY_FILE "ssl-key-file" +#define SOUP_SERVER_ASYNC_CONTEXT "async-context" +#define SOUP_SERVER_RAW_PATHS "raw-paths" +#define SOUP_SERVER_SERVER_HEADER "server-header" + +SoupServer *soup_server_new (const char *optname1, + ...) G_GNUC_NULL_TERMINATED; + +gboolean soup_server_is_https (SoupServer *server); +guint soup_server_get_port (SoupServer *server); + +SoupSocket *soup_server_get_listener (SoupServer *server); + +void soup_server_run (SoupServer *server); +void soup_server_run_async (SoupServer *server); +void soup_server_quit (SoupServer *server); +void soup_server_disconnect (SoupServer *server); + +GMainContext *soup_server_get_async_context (SoupServer *server); + +/* Handlers and auth */ + +void soup_server_add_handler (SoupServer *server, + const char *path, + SoupServerCallback callback, + gpointer user_data, + GDestroyNotify destroy); +void soup_server_remove_handler (SoupServer *server, + const char *path); + +void soup_server_add_auth_domain (SoupServer *server, + SoupAuthDomain *auth_domain); +void soup_server_remove_auth_domain (SoupServer *server, + SoupAuthDomain *auth_domain); + +/* I/O */ + +void soup_server_pause_message (SoupServer *server, + SoupMessage *msg); +void soup_server_unpause_message (SoupServer *server, + SoupMessage *msg); + +/* Client context */ + +SoupSocket *soup_client_context_get_socket (SoupClientContext *client); +SoupAddress *soup_client_context_get_address (SoupClientContext *client); +const char *soup_client_context_get_host (SoupClientContext *client); +SoupAuthDomain *soup_client_context_get_auth_domain (SoupClientContext *client); +const char *soup_client_context_get_auth_user (SoupClientContext *client); + +G_END_DECLS + +#endif /* SOUP_SERVER_H */ diff --git a/libsoup/soup-session-async.c b/libsoup/soup-session-async.c new file mode 100644 index 0000000..661883b --- /dev/null +++ b/libsoup/soup-session-async.c @@ -0,0 +1,577 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-session-async.c + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY + +#include "soup-address.h" +#include "soup-session-async.h" +#include "soup-session-private.h" +#include "soup-address.h" +#include "soup-message-private.h" +#include "soup-message-queue.h" +#include "soup-misc.h" +#include "soup-password-manager.h" +#include "soup-proxy-uri-resolver.h" +#include "soup-uri.h" + +/** + * SECTION:soup-session-async + * @short_description: Soup session for asynchronous (main-loop-based) I/O. + * + * #SoupSessionAsync is an implementation of #SoupSession that uses + * non-blocking I/O via the glib main loop. It is intended for use in + * single-threaded programs. + **/ + +static void run_queue (SoupSessionAsync *sa); +static void do_idle_run_queue (SoupSession *session); + +static void queue_message (SoupSession *session, SoupMessage *req, + SoupSessionCallback callback, gpointer user_data); +static guint send_message (SoupSession *session, SoupMessage *req); +static void cancel_message (SoupSession *session, SoupMessage *msg, + guint status_code); + +static void auth_required (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying); + +G_DEFINE_TYPE (SoupSessionAsync, soup_session_async, SOUP_TYPE_SESSION) + +typedef struct { + GSource *idle_run_queue_source; +} SoupSessionAsyncPrivate; +#define SOUP_SESSION_ASYNC_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_SESSION_ASYNC, SoupSessionAsyncPrivate)) + +static void +soup_session_async_init (SoupSessionAsync *sa) +{ +} + +static void +finalize (GObject *object) +{ + SoupSessionAsyncPrivate *priv = SOUP_SESSION_ASYNC_GET_PRIVATE (object); + + if (priv->idle_run_queue_source) + g_source_destroy (priv->idle_run_queue_source); + + G_OBJECT_CLASS (soup_session_async_parent_class)->finalize (object); +} + +static void +soup_session_async_class_init (SoupSessionAsyncClass *soup_session_async_class) +{ + SoupSessionClass *session_class = SOUP_SESSION_CLASS (soup_session_async_class); + GObjectClass *object_class = G_OBJECT_CLASS (session_class); + + g_type_class_add_private (soup_session_async_class, + sizeof (SoupSessionAsyncPrivate)); + + /* virtual method override */ + session_class->queue_message = queue_message; + session_class->send_message = send_message; + session_class->cancel_message = cancel_message; + session_class->auth_required = auth_required; + + object_class->finalize = finalize; +} + + +/** + * soup_session_async_new: + * + * Creates an asynchronous #SoupSession with the default options. + * + * Return value: the new session. + **/ +SoupSession * +soup_session_async_new (void) +{ + return g_object_new (SOUP_TYPE_SESSION_ASYNC, NULL); +} + +/** + * soup_session_async_new_with_options: + * @optname1: name of first property to set + * @...: value of @optname1, followed by additional property/value pairs + * + * Creates an asynchronous #SoupSession with the specified options. + * + * Return value: the new session. + **/ +SoupSession * +soup_session_async_new_with_options (const char *optname1, ...) +{ + SoupSession *session; + va_list ap; + + va_start (ap, optname1); + session = (SoupSession *)g_object_new_valist (SOUP_TYPE_SESSION_ASYNC, + optname1, ap); + va_end (ap); + + return session; +} + +static gboolean +item_failed (SoupMessageQueueItem *item, guint status) +{ + if (item->removed) { + soup_message_queue_item_unref (item); + return TRUE; + } + + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) { + item->state = SOUP_MESSAGE_FINISHING; + if (!item->msg->status_code) + soup_session_set_item_status (item->session, item, status); + do_idle_run_queue (item->session); + soup_message_queue_item_unref (item); + return TRUE; + } + + return FALSE; +} + +static void +resolved_proxy_addr (SoupAddress *addr, guint status, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + SoupSession *session = item->session; + + if (item_failed (item, soup_status_proxify (status))) + return; + + item->proxy_addr = g_object_ref (addr); + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + + soup_message_queue_item_unref (item); + + /* If we got here we know session still exists */ + run_queue ((SoupSessionAsync *)session); +} + +static void +resolved_proxy_uri (SoupProxyURIResolver *proxy_resolver, + guint status, SoupURI *proxy_uri, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + SoupSession *session = item->session; + + if (item_failed (item, status)) + return; + + if (proxy_uri) { + SoupAddress *proxy_addr; + + item->state = SOUP_MESSAGE_RESOLVING_PROXY_ADDRESS; + + item->proxy_uri = soup_uri_copy (proxy_uri); + proxy_addr = soup_address_new (proxy_uri->host, + proxy_uri->port); + soup_address_resolve_async (proxy_addr, + soup_session_get_async_context (session), + item->cancellable, + resolved_proxy_addr, item); + g_object_unref (proxy_addr); + return; + } + + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + soup_message_queue_item_unref (item); + + /* If we got here we know session still exists */ + run_queue ((SoupSessionAsync *)session); +} + +static void +resolve_proxy_addr (SoupMessageQueueItem *item, + SoupProxyURIResolver *proxy_resolver) +{ + item->state = SOUP_MESSAGE_RESOLVING_PROXY_URI; + + soup_message_queue_item_ref (item); + soup_proxy_uri_resolver_get_proxy_uri_async ( + proxy_resolver, soup_message_get_uri (item->msg), + soup_session_get_async_context (item->session), + item->cancellable, resolved_proxy_uri, item); +} + +static void +connection_closed (SoupConnection *conn, gpointer session) +{ + /* Run the queue in case anyone was waiting for a connection + * to be closed. + */ + do_idle_run_queue (session); +} + +static void +message_completed (SoupMessage *msg, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + + if (item->state != SOUP_MESSAGE_RESTARTING) + item->state = SOUP_MESSAGE_FINISHING; + do_idle_run_queue (item->session); +} + +static void +tunnel_complete (SoupMessageQueueItem *item) +{ + SoupSession *session = item->session; + + soup_message_finished (item->msg); + if (item->related->msg->status_code) + item->related->state = SOUP_MESSAGE_FINISHING; + + do_idle_run_queue (session); + soup_message_queue_item_unref (item->related); + soup_session_unqueue_item (session, item); + soup_message_queue_item_unref (item); + g_object_unref (session); +} + +static void +ssl_tunnel_completed (SoupConnection *conn, guint status, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + + if (SOUP_STATUS_IS_SUCCESSFUL (status)) { + g_signal_connect (item->conn, "disconnected", + G_CALLBACK (connection_closed), item->session); + soup_connection_set_state (item->conn, SOUP_CONNECTION_IDLE); + soup_connection_set_state (item->conn, SOUP_CONNECTION_IN_USE); + + item->related->state = SOUP_MESSAGE_READY; + } else { + if (item->conn) + soup_connection_disconnect (item->conn); + soup_message_set_status (item->related->msg, SOUP_STATUS_SSL_FAILED); + } + + tunnel_complete (item); +} + +static void +tunnel_message_completed (SoupMessage *msg, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + SoupSession *session = item->session; + + if (item->state == SOUP_MESSAGE_RESTARTING) { + soup_message_restarted (msg); + if (item->conn) { + soup_session_send_queue_item (session, item, tunnel_message_completed); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_TRY_AGAIN); + } + + item->state = SOUP_MESSAGE_FINISHED; + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + if (item->conn) + soup_connection_disconnect (item->conn); + if (msg->status_code == SOUP_STATUS_TRY_AGAIN) { + item->related->state = SOUP_MESSAGE_AWAITING_CONNECTION; + g_object_unref (item->related->conn); + item->related->conn = NULL; + } else + soup_message_set_status (item->related->msg, msg->status_code); + + tunnel_complete (item); + return; + } + + soup_connection_start_ssl_async (item->conn, item->cancellable, + ssl_tunnel_completed, item); +} + +static void +got_connection (SoupConnection *conn, guint status, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + SoupSession *session = item->session; + SoupAddress *tunnel_addr; + + if (item->state != SOUP_MESSAGE_CONNECTING) { + soup_connection_disconnect (conn); + do_idle_run_queue (session); + soup_message_queue_item_unref (item); + g_object_unref (session); + return; + } + + if (status != SOUP_STATUS_OK) { + soup_connection_disconnect (conn); + + if (status == SOUP_STATUS_TRY_AGAIN) { + g_object_unref (item->conn); + item->conn = NULL; + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + } else { + soup_session_set_item_status (session, item, status); + item->state = SOUP_MESSAGE_FINISHING; + } + + do_idle_run_queue (session); + soup_message_queue_item_unref (item); + g_object_unref (session); + return; + } + + tunnel_addr = soup_connection_get_tunnel_addr (conn); + if (tunnel_addr) { + SoupMessageQueueItem *tunnel_item; + + item->state = SOUP_MESSAGE_TUNNELING; + + tunnel_item = soup_session_make_connect_message (session, conn); + tunnel_item->related = item; + soup_session_send_queue_item (session, tunnel_item, tunnel_message_completed); + return; + } + + item->state = SOUP_MESSAGE_READY; + g_signal_connect (conn, "disconnected", + G_CALLBACK (connection_closed), session); + run_queue ((SoupSessionAsync *)session); + soup_message_queue_item_unref (item); + g_object_unref (session); +} + +static void +process_queue_item (SoupMessageQueueItem *item, + gboolean *should_prune, + gboolean loop) +{ + SoupSession *session = item->session; + SoupProxyURIResolver *proxy_resolver; + + do { + switch (item->state) { + case SOUP_MESSAGE_STARTING: + proxy_resolver = (SoupProxyURIResolver *)soup_session_get_feature_for_message (session, SOUP_TYPE_PROXY_URI_RESOLVER, item->msg); + if (!proxy_resolver) { + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + break; + } + resolve_proxy_addr (item, proxy_resolver); + return; + + case SOUP_MESSAGE_AWAITING_CONNECTION: + if (!soup_session_get_connection (session, item, should_prune)) + return; + + if (soup_connection_get_state (item->conn) != SOUP_CONNECTION_NEW) { + item->state = SOUP_MESSAGE_READY; + break; + } + + item->state = SOUP_MESSAGE_CONNECTING; + soup_message_queue_item_ref (item); + g_object_ref (session); + soup_connection_connect_async (item->conn, item->cancellable, + got_connection, item); + return; + + case SOUP_MESSAGE_READY: + item->state = SOUP_MESSAGE_RUNNING; + soup_session_send_queue_item (session, item, message_completed); + break; + + case SOUP_MESSAGE_RESTARTING: + item->state = SOUP_MESSAGE_STARTING; + soup_message_restarted (item->msg); + break; + + case SOUP_MESSAGE_FINISHING: + item->state = SOUP_MESSAGE_FINISHED; + soup_message_finished (item->msg); + if (item->state != SOUP_MESSAGE_FINISHED) + break; + + g_object_ref (session); + soup_session_unqueue_item (session, item); + if (item->callback) + item->callback (session, item->msg, item->callback_data); + g_object_unref (item->msg); + do_idle_run_queue (session); + g_object_unref (session); + return; + + default: + /* Nothing to do with this message in any + * other state. + */ + return; + } + } while (loop && item->state != SOUP_MESSAGE_FINISHED); +} + +static void +run_queue (SoupSessionAsync *sa) +{ + SoupSession *session = SOUP_SESSION (sa); + SoupMessageQueue *queue = soup_session_get_queue (session); + SoupMessageQueueItem *item; + SoupMessage *msg; + gboolean try_pruning = TRUE, should_prune = FALSE; + + g_object_ref (session); + soup_session_cleanup_connections (session, FALSE); + + try_again: + for (item = soup_message_queue_first (queue); + item; + item = soup_message_queue_next (queue, item)) { + msg = item->msg; + + /* CONNECT messages are handled specially */ + if (msg->method != SOUP_METHOD_CONNECT) + process_queue_item (item, &should_prune, TRUE); + } + + if (try_pruning && should_prune) { + /* There is at least one message in the queue that + * could be sent if we pruned an idle connection from + * some other server. + */ + if (soup_session_cleanup_connections (session, TRUE)) { + try_pruning = should_prune = FALSE; + goto try_again; + } + } + + g_object_unref (session); +} + +static gboolean +idle_run_queue (gpointer sa) +{ + SoupSessionAsyncPrivate *priv = SOUP_SESSION_ASYNC_GET_PRIVATE (sa); + + priv->idle_run_queue_source = NULL; + run_queue (sa); + return FALSE; +} + +static void +do_idle_run_queue (SoupSession *session) +{ + SoupSessionAsyncPrivate *priv = SOUP_SESSION_ASYNC_GET_PRIVATE (session); + + if (!priv->idle_run_queue_source) { + priv->idle_run_queue_source = soup_add_completion ( + soup_session_get_async_context (session), + idle_run_queue, session); + } +} + +static void +queue_message (SoupSession *session, SoupMessage *req, + SoupSessionCallback callback, gpointer user_data) +{ + SOUP_SESSION_CLASS (soup_session_async_parent_class)->queue_message (session, req, callback, user_data); + + do_idle_run_queue (session); +} + +static guint +send_message (SoupSession *session, SoupMessage *req) +{ + SoupMessageQueueItem *item; + GMainContext *async_context = + soup_session_get_async_context (session); + + /* Balance out the unref that queuing will eventually do */ + g_object_ref (req); + + queue_message (session, req, NULL, NULL); + + item = soup_message_queue_lookup (soup_session_get_queue (session), req); + g_return_val_if_fail (item != NULL, SOUP_STATUS_MALFORMED); + + while (item->state != SOUP_MESSAGE_FINISHED) + g_main_context_iteration (async_context, TRUE); + + soup_message_queue_item_unref (item); + + return req->status_code; +} + +static void +cancel_message (SoupSession *session, SoupMessage *msg, + guint status_code) +{ + SoupMessageQueue *queue; + SoupMessageQueueItem *item; + gboolean dummy; + + SOUP_SESSION_CLASS (soup_session_async_parent_class)-> + cancel_message (session, msg, status_code); + + queue = soup_session_get_queue (session); + item = soup_message_queue_lookup (queue, msg); + if (!item) + return; + + /* Force it to finish immediately, so that + * soup_session_abort (session); g_object_unref (session); + * will work. (The soup_session_cancel_message() docs + * point out that the callback will be invoked from + * within the cancel call.) + */ + if (soup_message_io_in_progress (msg)) + soup_message_io_finished (msg); + else if (item->state != SOUP_MESSAGE_FINISHED) + item->state = SOUP_MESSAGE_FINISHING; + + if (item->state != SOUP_MESSAGE_FINISHED) + process_queue_item (item, &dummy, FALSE); + + soup_message_queue_item_unref (item); +} + +static void +got_passwords (SoupPasswordManager *password_manager, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer session) +{ + soup_session_unpause_message (session, msg); + SOUP_SESSION_CLASS (soup_session_async_parent_class)-> + auth_required (session, msg, auth, retrying); + g_object_unref (auth); +} + +static void +auth_required (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying) +{ + SoupSessionFeature *password_manager; + + password_manager = soup_session_get_feature_for_message ( + session, SOUP_TYPE_PASSWORD_MANAGER, msg); + if (password_manager) { + soup_session_pause_message (session, msg); + g_object_ref (auth); + soup_password_manager_get_passwords_async ( + SOUP_PASSWORD_MANAGER (password_manager), + msg, auth, retrying, + soup_session_get_async_context (session), + NULL, /* FIXME cancellable */ + got_passwords, session); + } else { + SOUP_SESSION_CLASS (soup_session_async_parent_class)-> + auth_required (session, msg, auth, retrying); + } +} diff --git a/libsoup/soup-session-async.h b/libsoup/soup-session-async.h new file mode 100644 index 0000000..9fb9cfe --- /dev/null +++ b/libsoup/soup-session-async.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SESSION_ASYNC_H +#define SOUP_SESSION_ASYNC_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_SESSION_ASYNC (soup_session_async_get_type ()) +#define SOUP_SESSION_ASYNC(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_SESSION_ASYNC, SoupSessionAsync)) +#define SOUP_SESSION_ASYNC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_SESSION_ASYNC, SoupSessionAsyncClass)) +#define SOUP_IS_SESSION_ASYNC(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_SESSION_ASYNC)) +#define SOUP_IS_SESSION_ASYNC_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_SESSION_ASYNC)) +#define SOUP_SESSION_ASYNC_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_SESSION_ASYNC, SoupSessionAsyncClass)) + +struct _SoupSessionAsync { + SoupSession parent; + +}; + +typedef struct { + SoupSessionClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupSessionAsyncClass; + +GType soup_session_async_get_type (void); + +SoupSession *soup_session_async_new (void); +SoupSession *soup_session_async_new_with_options (const char *optname1, + ...) G_GNUC_NULL_TERMINATED; + +G_END_DECLS + +#endif /* SOUP_SESSION_ASYNC_H */ diff --git a/libsoup/soup-session-feature.c b/libsoup/soup-session-feature.c new file mode 100644 index 0000000..97061ad --- /dev/null +++ b/libsoup/soup-session-feature.c @@ -0,0 +1,243 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-session-feature.c: Miscellaneous session feature-provider interface + * + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "soup-session-feature.h" +#include "soup-message-private.h" + +/** + * SECTION:soup-session-feature + * @short_description: Interface for miscellaneous session features + * + * #SoupSessionFeature is the interface used by classes that extend + * the functionality of a #SoupSession. Some features like HTTP + * authentication handling are implemented internally via + * #SoupSessionFeatures. Other features can be added to the session + * by the application. (Eg, #SoupLogger, #SoupCookieJar.) + * + * See soup_session_add_feature(), etc, to add a feature to a session. + **/ + +/** + * SoupSessionFeature: + * + * An object that implement some sort of optional feature for + * #SoupSession. + * + * Since: 2.24 + **/ + +/** + * SoupSessionFeatureInterface: + * @parent: The parent interface. + * @attach: Perform setup when a feature is added to a session + * @detach: Perform cleanup when a feature is removed from a session + * @request_queued: Proxies the session's #SoupSession::request_queued signal + * @request_started: Proxies the session's #SoupSession::request_started signal + * @request_unqueued: Proxies the session's #SoupSession::request_unqueued signal + * + * The interface implemented by #SoupSessionFeatures. + * + * Since: 2.24 + **/ + +static void soup_session_feature_interface_init (SoupSessionFeatureInterface *interface); + +static void attach (SoupSessionFeature *feature, SoupSession *session); +static void detach (SoupSessionFeature *feature, SoupSession *session); + +GType +soup_session_feature_get_type (void) +{ + static volatile gsize g_define_type_id__volatile = 0; + if (g_once_init_enter (&g_define_type_id__volatile)) + { + GType g_define_type_id = + g_type_register_static_simple (G_TYPE_INTERFACE, + g_intern_static_string ("SoupSessionFeature"), + sizeof (SoupSessionFeatureInterface), + (GClassInitFunc)soup_session_feature_interface_init, + 0, + (GInstanceInitFunc)NULL, + (GTypeFlags) 0); + g_type_interface_add_prerequisite (g_define_type_id, G_TYPE_OBJECT); + g_once_init_leave (&g_define_type_id__volatile, g_define_type_id); + } + return g_define_type_id__volatile; +} + +static void +soup_session_feature_interface_init (SoupSessionFeatureInterface *interface) +{ + interface->attach = attach; + interface->detach = detach; +} + +static void +weak_notify_unref (gpointer feature, GObject *ex_object) +{ + g_object_unref (feature); +} + +static void +request_queued (SoupSession *session, SoupMessage *msg, gpointer feature) +{ + if (soup_message_disables_feature (msg, feature)) + return; + + SOUP_SESSION_FEATURE_GET_CLASS (feature)-> + request_queued (feature, session, msg); +} + +static void +request_started (SoupSession *session, SoupMessage *msg, + SoupSocket *socket, gpointer feature) +{ + if (soup_message_disables_feature (msg, feature)) + return; + + SOUP_SESSION_FEATURE_GET_CLASS (feature)-> + request_started (feature, session, msg, socket); +} + +static void +request_unqueued (SoupSession *session, SoupMessage *msg, gpointer feature) +{ + if (soup_message_disables_feature (msg, feature)) + return; + + SOUP_SESSION_FEATURE_GET_CLASS (feature)-> + request_unqueued (feature, session, msg); +} + +static void +attach (SoupSessionFeature *feature, SoupSession *session) +{ + g_object_weak_ref (G_OBJECT (session), + weak_notify_unref, g_object_ref (feature)); + + if (SOUP_SESSION_FEATURE_GET_CLASS (feature)->request_queued) { + g_signal_connect (session, "request_queued", + G_CALLBACK (request_queued), feature); + } + + if (SOUP_SESSION_FEATURE_GET_CLASS (feature)->request_started) { + g_signal_connect (session, "request_started", + G_CALLBACK (request_started), feature); + } + + if (SOUP_SESSION_FEATURE_GET_CLASS (feature)->request_unqueued) { + g_signal_connect (session, "request_unqueued", + G_CALLBACK (request_unqueued), feature); + } +} + +void +soup_session_feature_attach (SoupSessionFeature *feature, + SoupSession *session) +{ + SOUP_SESSION_FEATURE_GET_CLASS (feature)->attach (feature, session); +} + +static void +detach (SoupSessionFeature *feature, SoupSession *session) +{ + g_object_weak_unref (G_OBJECT (session), weak_notify_unref, feature); + + g_signal_handlers_disconnect_by_func (session, request_queued, feature); + g_signal_handlers_disconnect_by_func (session, request_started, feature); + g_signal_handlers_disconnect_by_func (session, request_unqueued, feature); + + g_object_unref (feature); +} + +void +soup_session_feature_detach (SoupSessionFeature *feature, + SoupSession *session) +{ + SOUP_SESSION_FEATURE_GET_CLASS (feature)->detach (feature, session); +} + +/** + * soup_session_feature_add_feature: + * @feature: the "base" feature + * @type: the #GType of a "sub-feature" + * + * Adds a "sub-feature" of type @type to the base feature @feature. + * This is used for features that can be extended with multiple + * different types. Eg, the authentication manager can be extended + * with subtypes of #SoupAuth. + * + * Return value: %TRUE if @feature accepted @type as a subfeature. + * + * Since: 2.34 + */ +gboolean +soup_session_feature_add_feature (SoupSessionFeature *feature, + GType type) +{ + SoupSessionFeatureInterface *feature_iface = + SOUP_SESSION_FEATURE_GET_CLASS (feature); + + if (feature_iface->add_feature) + return feature_iface->add_feature (feature, type); + else + return FALSE; +} + +/** + * soup_session_feature_remove_feature: + * @feature: the "base" feature + * @type: the #GType of a "sub-feature" + * + * Removes the "sub-feature" of type @type from the base feature + * @feature. See soup_session_feature_add_feature(). + * + * Return value: %TRUE if @type was removed from @feature + * + * Since: 2.34 + */ +gboolean +soup_session_feature_remove_feature (SoupSessionFeature *feature, + GType type) +{ + SoupSessionFeatureInterface *feature_iface = + SOUP_SESSION_FEATURE_GET_CLASS (feature); + + if (feature_iface->remove_feature) + return feature_iface->remove_feature (feature, type); + else + return FALSE; +} + +/** + * soup_session_feature_has_feature: + * @feature: the "base" feature + * @type: the #GType of a "sub-feature" + * + * Tests if @feature has a "sub-feature" of type @type. See + * soup_session_feature_add_feature(). + * + * Return value: %TRUE if @feature has a subfeature of type @type + * + * Since: 2.34 + */ +gboolean +soup_session_feature_has_feature (SoupSessionFeature *feature, + GType type) +{ + SoupSessionFeatureInterface *feature_iface = + SOUP_SESSION_FEATURE_GET_CLASS (feature); + + if (feature_iface->has_feature) + return feature_iface->has_feature (feature, type); + else + return FALSE; +} diff --git a/libsoup/soup-session-feature.h b/libsoup/soup-session-feature.h new file mode 100644 index 0000000..aa64aa0 --- /dev/null +++ b/libsoup/soup-session-feature.h @@ -0,0 +1,65 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#ifndef SOUP_SESSION_FEATURE_H +#define SOUP_SESSION_FEATURE_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_SESSION_FEATURE (soup_session_feature_get_type ()) +#define SOUP_SESSION_FEATURE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_SESSION_FEATURE, SoupSessionFeature)) +#define SOUP_SESSION_FEATURE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_SESSION_FEATURE, SoupSessionFeatureInterface)) +#define SOUP_IS_SESSION_FEATURE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_SESSION_FEATURE)) +#define SOUP_IS_SESSION_FEATURE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), SOUP_TYPE_SESSION_FEATURE)) +#define SOUP_SESSION_FEATURE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_INTERFACE ((obj), SOUP_TYPE_SESSION_FEATURE, SoupSessionFeatureInterface)) + +typedef struct { + GTypeInterface parent; + + /* methods */ + void (*attach) (SoupSessionFeature *feature, + SoupSession *session); + void (*detach) (SoupSessionFeature *feature, + SoupSession *session); + + void (*request_queued) (SoupSessionFeature *feature, + SoupSession *session, + SoupMessage *msg); + void (*request_started) (SoupSessionFeature *feature, + SoupSession *session, + SoupMessage *msg, + SoupSocket *socket); + void (*request_unqueued) (SoupSessionFeature *feature, + SoupSession *session, + SoupMessage *msg); + + gboolean (*add_feature) (SoupSessionFeature *feature, + GType type); + gboolean (*remove_feature) (SoupSessionFeature *feature, + GType type); + gboolean (*has_feature) (SoupSessionFeature *feature, + GType type); + +} SoupSessionFeatureInterface; + +GType soup_session_feature_get_type (void); + +void soup_session_feature_attach (SoupSessionFeature *feature, + SoupSession *session); +void soup_session_feature_detach (SoupSessionFeature *feature, + SoupSession *session); + +gboolean soup_session_feature_add_feature (SoupSessionFeature *feature, + GType type); +gboolean soup_session_feature_remove_feature (SoupSessionFeature *feature, + GType type); +gboolean soup_session_feature_has_feature (SoupSessionFeature *feature, + GType type); + +G_END_DECLS + +#endif /* SOUP_SESSION_FEATURE_H */ diff --git a/libsoup/soup-session-private.h b/libsoup/soup-session-private.h new file mode 100644 index 0000000..7462c61 --- /dev/null +++ b/libsoup/soup-session-private.h @@ -0,0 +1,36 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SESSION_PRIVATE_H +#define SOUP_SESSION_PRIVATE_H 1 + +#include "soup-session.h" +#include "soup-message-private.h" +#include "soup-proxy-uri-resolver.h" + +G_BEGIN_DECLS + +/* "protected" methods for subclasses */ +SoupMessageQueue *soup_session_get_queue (SoupSession *session); + +SoupMessageQueueItem *soup_session_make_connect_message (SoupSession *session, + SoupConnection *conn); +gboolean soup_session_get_connection (SoupSession *session, + SoupMessageQueueItem *item, + gboolean *try_pruning); +gboolean soup_session_cleanup_connections (SoupSession *session, + gboolean prune_idle); +void soup_session_send_queue_item (SoupSession *session, + SoupMessageQueueItem *item, + SoupMessageCompletionFn completion_cb); +void soup_session_unqueue_item (SoupSession *session, + SoupMessageQueueItem *item); +void soup_session_set_item_status (SoupSession *session, + SoupMessageQueueItem *item, + guint status_code); + +G_END_DECLS + +#endif /* SOUP_SESSION_PRIVATE_H */ diff --git a/libsoup/soup-session-sync.c b/libsoup/soup-session-sync.c new file mode 100644 index 0000000..373b1bd --- /dev/null +++ b/libsoup/soup-session-sync.c @@ -0,0 +1,456 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-session-sync.c + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#define LIBSOUP_I_HAVE_READ_BUG_594377_AND_KNOW_SOUP_PASSWORD_MANAGER_MIGHT_GO_AWAY + +#include "soup-address.h" +#include "soup-session-sync.h" +#include "soup-session-private.h" +#include "soup-address.h" +#include "soup-message-private.h" +#include "soup-message-queue.h" +#include "soup-misc.h" +#include "soup-password-manager.h" +#include "soup-proxy-uri-resolver.h" +#include "soup-uri.h" + +/** + * SECTION:soup-session-sync + * @short_description: Soup session for blocking I/O in multithreaded + * programs. + * + * #SoupSessionSync is an implementation of #SoupSession that uses + * synchronous I/O, intended for use in multi-threaded programs. + * + * You can use #SoupSessionSync from multiple threads concurrently. + * Eg, you can send a #SoupMessage in one thread, and then while + * waiting for the response, send another #SoupMessage from another + * thread. You can also send a message from one thread and then call + * soup_session_cancel_message() on it from any other thread (although + * you need to be careful to avoid race conditions, where the message + * finishes and is then unreffed by the sending thread just before you + * cancel it). + * + * However, the majority of other types and methods in libsoup are not + * MT-safe. In particular, you cannot modify or + * examine a #SoupMessage while it is being transmitted by + * #SoupSessionSync in another thread. Once a message has been handed + * off to #SoupSessionSync, it can only be manipulated from its signal + * handler callbacks, until I/O is complete. + **/ + +typedef struct { + GMutex *lock; + GCond *cond; +} SoupSessionSyncPrivate; +#define SOUP_SESSION_SYNC_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_SESSION_SYNC, SoupSessionSyncPrivate)) + +static void queue_message (SoupSession *session, SoupMessage *msg, + SoupSessionCallback callback, gpointer user_data); +static guint send_message (SoupSession *session, SoupMessage *msg); +static void cancel_message (SoupSession *session, SoupMessage *msg, + guint status_code); +static void auth_required (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying); +static void flush_queue (SoupSession *session); + +G_DEFINE_TYPE (SoupSessionSync, soup_session_sync, SOUP_TYPE_SESSION) + +static void +soup_session_sync_init (SoupSessionSync *ss) +{ + SoupSessionSyncPrivate *priv = SOUP_SESSION_SYNC_GET_PRIVATE (ss); + + priv->lock = g_mutex_new (); + priv->cond = g_cond_new (); +} + +static void +finalize (GObject *object) +{ + SoupSessionSyncPrivate *priv = SOUP_SESSION_SYNC_GET_PRIVATE (object); + + g_mutex_free (priv->lock); + g_cond_free (priv->cond); + + G_OBJECT_CLASS (soup_session_sync_parent_class)->finalize (object); +} + +static void +soup_session_sync_class_init (SoupSessionSyncClass *session_sync_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (session_sync_class); + SoupSessionClass *session_class = SOUP_SESSION_CLASS (session_sync_class); + + g_type_class_add_private (session_sync_class, sizeof (SoupSessionSyncPrivate)); + + /* virtual method override */ + session_class->queue_message = queue_message; + session_class->send_message = send_message; + session_class->cancel_message = cancel_message; + session_class->auth_required = auth_required; + session_class->flush_queue = flush_queue; + + object_class->finalize = finalize; +} + + +/** + * soup_session_sync_new: + * + * Creates an synchronous #SoupSession with the default options. + * + * Return value: the new session. + **/ +SoupSession * +soup_session_sync_new (void) +{ + return g_object_new (SOUP_TYPE_SESSION_SYNC, NULL); +} + +/** + * soup_session_sync_new_with_options: + * @optname1: name of first property to set + * @...: value of @optname1, followed by additional property/value pairs + * + * Creates an synchronous #SoupSession with the specified options. + * + * Return value: the new session. + **/ +SoupSession * +soup_session_sync_new_with_options (const char *optname1, ...) +{ + SoupSession *session; + va_list ap; + + va_start (ap, optname1); + session = (SoupSession *)g_object_new_valist (SOUP_TYPE_SESSION_SYNC, + optname1, ap); + va_end (ap); + + return session; +} + +static guint +tunnel_connect (SoupSession *session, SoupMessageQueueItem *related) +{ + SoupConnection *conn = related->conn; + SoupMessageQueueItem *item; + guint status; + + g_object_ref (conn); + + item = soup_session_make_connect_message (session, conn); + do { + soup_session_send_queue_item (session, item, NULL); + status = item->msg->status_code; + if (item->state == SOUP_MESSAGE_RESTARTING && + soup_connection_get_state (conn) != SOUP_CONNECTION_DISCONNECTED) { + item->state = SOUP_MESSAGE_STARTING; + soup_message_restarted (item->msg); + } else { + if (item->state == SOUP_MESSAGE_RESTARTING) + status = SOUP_STATUS_TRY_AGAIN; + item->state = SOUP_MESSAGE_FINISHED; + soup_message_finished (item->msg); + } + } while (item->state == SOUP_MESSAGE_STARTING); + soup_session_unqueue_item (session, item); + soup_message_queue_item_unref (item); + + if (SOUP_STATUS_IS_SUCCESSFUL (status)) { + if (!soup_connection_start_ssl_sync (conn, related->cancellable)) + status = SOUP_STATUS_SSL_FAILED; + } + + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) + soup_connection_disconnect (conn); + + g_object_unref (conn); + return status; +} + +static void +get_connection (SoupMessageQueueItem *item) +{ + SoupSession *session = item->session; + SoupMessage *msg = item->msg; + gboolean try_pruning = FALSE; + guint status; + +try_again: + soup_session_cleanup_connections (session, FALSE); + + if (!soup_session_get_connection (session, item, &try_pruning)) { + if (!try_pruning) + return; + soup_session_cleanup_connections (session, TRUE); + if (!soup_session_get_connection (session, item, &try_pruning)) + return; + try_pruning = FALSE; + } + + if (soup_connection_get_state (item->conn) != SOUP_CONNECTION_NEW) { + item->state = SOUP_MESSAGE_READY; + return; + } + + status = soup_connection_connect_sync (item->conn, item->cancellable); + if (status == SOUP_STATUS_TRY_AGAIN) { + soup_connection_disconnect (item->conn); + g_object_unref (item->conn); + item->conn = NULL; + goto try_again; + } + + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) { + if (!msg->status_code) + soup_session_set_item_status (session, item, status); + item->state = SOUP_MESSAGE_FINISHING; + soup_connection_disconnect (item->conn); + g_object_unref (item->conn); + item->conn = NULL; + return; + } + + if (soup_connection_get_tunnel_addr (item->conn)) { + status = tunnel_connect (session, item); + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) { + soup_connection_disconnect (item->conn); + g_object_unref (item->conn); + item->conn = NULL; + if (status == SOUP_STATUS_TRY_AGAIN) + goto try_again; + soup_session_set_item_status (session, item, status); + item->state = SOUP_MESSAGE_FINISHING; + return; + } + } + + item->state = SOUP_MESSAGE_READY; +} + +static void +process_queue_item (SoupMessageQueueItem *item) +{ + SoupSession *session = item->session; + SoupSessionSyncPrivate *priv = SOUP_SESSION_SYNC_GET_PRIVATE (session); + SoupMessage *msg = item->msg; + SoupProxyURIResolver *proxy_resolver; + guint status; + + item->state = SOUP_MESSAGE_STARTING; + do { + switch (item->state) { + case SOUP_MESSAGE_STARTING: + proxy_resolver = (SoupProxyURIResolver *)soup_session_get_feature_for_message (session, SOUP_TYPE_PROXY_URI_RESOLVER, msg); + if (!proxy_resolver) { + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + break; + } + + status = soup_proxy_uri_resolver_get_proxy_uri_sync ( + proxy_resolver, soup_message_get_uri (msg), + item->cancellable, &item->proxy_uri); + if (!SOUP_STATUS_IS_SUCCESSFUL (status)) { + soup_session_set_item_status (session, item, status); + item->state = SOUP_MESSAGE_FINISHING; + break; + } + if (!item->proxy_uri) { + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + break; + } + + item->proxy_addr = soup_address_new ( + item->proxy_uri->host, item->proxy_uri->port); + status = soup_address_resolve_sync (item->proxy_addr, + item->cancellable); + if (SOUP_STATUS_IS_SUCCESSFUL (status)) + item->state = SOUP_MESSAGE_AWAITING_CONNECTION; + else { + soup_session_set_item_status (session, item, soup_status_proxify (status)); + item->state = SOUP_MESSAGE_FINISHING; + } + break; + + case SOUP_MESSAGE_AWAITING_CONNECTION: + g_mutex_lock (priv->lock); + do { + get_connection (item); + if (item->state == SOUP_MESSAGE_AWAITING_CONNECTION) + g_cond_wait (priv->cond, priv->lock); + } while (item->state == SOUP_MESSAGE_AWAITING_CONNECTION); + g_mutex_unlock (priv->lock); + break; + + case SOUP_MESSAGE_READY: + item->state = SOUP_MESSAGE_RUNNING; + soup_session_send_queue_item (item->session, item, NULL); + if (item->state != SOUP_MESSAGE_RESTARTING) + item->state = SOUP_MESSAGE_FINISHING; + break; + + case SOUP_MESSAGE_RESTARTING: + item->state = SOUP_MESSAGE_STARTING; + soup_message_restarted (item->msg); + break; + + case SOUP_MESSAGE_FINISHING: + item->state = SOUP_MESSAGE_FINISHED; + soup_message_finished (item->msg); + soup_session_unqueue_item (session, item); + g_cond_broadcast (priv->cond); + break; + + default: + g_warn_if_reached (); + item->state = SOUP_MESSAGE_FINISHING; + break; + } + } while (item->state != SOUP_MESSAGE_FINISHED); +} + +static gboolean +queue_message_callback (gpointer data) +{ + SoupMessageQueueItem *item = data; + + item->callback (item->session, item->msg, item->callback_data); + g_object_unref (item->session); + g_object_unref (item->msg); + soup_message_queue_item_unref (item); + return FALSE; +} + +static gpointer +queue_message_thread (gpointer data) +{ + SoupMessageQueueItem *item = data; + + process_queue_item (item); + if (item->callback) { + soup_add_completion (soup_session_get_async_context (item->session), + queue_message_callback, item); + } else { + g_object_unref (item->session); + g_object_unref (item->msg); + soup_message_queue_item_unref (item); + } + + return NULL; +} + +static void +queue_message (SoupSession *session, SoupMessage *msg, + SoupSessionCallback callback, gpointer user_data) +{ + SoupMessageQueueItem *item; + + SOUP_SESSION_CLASS (soup_session_sync_parent_class)-> + queue_message (g_object_ref (session), msg, callback, user_data); + + item = soup_message_queue_lookup (soup_session_get_queue (session), msg); + g_return_if_fail (item != NULL); + + g_thread_create (queue_message_thread, item, FALSE, NULL); +} + +static guint +send_message (SoupSession *session, SoupMessage *msg) +{ + SoupMessageQueueItem *item; + guint status; + + SOUP_SESSION_CLASS (soup_session_sync_parent_class)->queue_message (session, msg, NULL, NULL); + + item = soup_message_queue_lookup (soup_session_get_queue (session), msg); + g_return_val_if_fail (item != NULL, SOUP_STATUS_MALFORMED); + + process_queue_item (item); + status = msg->status_code; + soup_message_queue_item_unref (item); + return status; +} + +static void +cancel_message (SoupSession *session, SoupMessage *msg, guint status_code) +{ + SoupSessionSyncPrivate *priv = SOUP_SESSION_SYNC_GET_PRIVATE (session); + + g_mutex_lock (priv->lock); + SOUP_SESSION_CLASS (soup_session_sync_parent_class)->cancel_message (session, msg, status_code); + g_cond_broadcast (priv->cond); + g_mutex_unlock (priv->lock); +} + +static void +auth_required (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying) +{ + SoupSessionFeature *password_manager; + + password_manager = soup_session_get_feature_for_message ( + session, SOUP_TYPE_PASSWORD_MANAGER, msg); + if (password_manager) { + soup_password_manager_get_passwords_sync ( + SOUP_PASSWORD_MANAGER (password_manager), + msg, auth, NULL); /* FIXME cancellable */ + } + + SOUP_SESSION_CLASS (soup_session_sync_parent_class)-> + auth_required (session, msg, auth, retrying); +} + +static void +flush_queue (SoupSession *session) +{ + SoupSessionSyncPrivate *priv = SOUP_SESSION_SYNC_GET_PRIVATE (session); + SoupMessageQueue *queue; + SoupMessageQueueItem *item; + GHashTable *current; + gboolean done = FALSE; + + /* Record the current contents of the queue */ + current = g_hash_table_new (NULL, NULL); + queue = soup_session_get_queue (session); + for (item = soup_message_queue_first (queue); + item; + item = soup_message_queue_next (queue, item)) + g_hash_table_insert (current, item, item); + + /* Cancel everything */ + SOUP_SESSION_CLASS (soup_session_sync_parent_class)->flush_queue (session); + + /* Wait until all of the items in @current have been removed + * from the queue. (This is not the same as "wait for the + * queue to be empty", because the app may queue new requests + * in response to the cancellation of the old ones. We don't + * try to cancel those requests as well, since we'd likely + * just end up looping forever.) + */ + g_mutex_lock (priv->lock); + do { + done = TRUE; + for (item = soup_message_queue_first (queue); + item; + item = soup_message_queue_next (queue, item)) { + if (g_hash_table_lookup (current, item)) + done = FALSE; + } + + if (!done) + g_cond_wait (priv->cond, priv->lock); + } while (!done); + g_mutex_unlock (priv->lock); + + g_hash_table_destroy (current); +} diff --git a/libsoup/soup-session-sync.h b/libsoup/soup-session-sync.h new file mode 100644 index 0000000..845e01c --- /dev/null +++ b/libsoup/soup-session-sync.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SESSION_SYNC_H +#define SOUP_SESSION_SYNC_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_SESSION_SYNC (soup_session_sync_get_type ()) +#define SOUP_SESSION_SYNC(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_SESSION_SYNC, SoupSessionSync)) +#define SOUP_SESSION_SYNC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_SESSION_SYNC, SoupSessionSyncClass)) +#define SOUP_IS_SESSION_SYNC(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_SESSION_SYNC)) +#define SOUP_IS_SESSION_SYNC_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_SESSION_SYNC)) +#define SOUP_SESSION_SYNC_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_SESSION_SYNC, SoupSessionSyncClass)) + +struct _SoupSessionSync { + SoupSession parent; + +}; + +typedef struct { + SoupSessionClass parent_class; + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupSessionSyncClass; + +GType soup_session_sync_get_type (void); + +SoupSession *soup_session_sync_new (void); +SoupSession *soup_session_sync_new_with_options (const char *optname1, + ...) G_GNUC_NULL_TERMINATED; + +G_END_DECLS + +#endif /* SOUP_SESSION_SYNC_H */ diff --git a/libsoup/soup-session.c b/libsoup/soup-session.c new file mode 100644 index 0000000..fd568ef --- /dev/null +++ b/libsoup/soup-session.c @@ -0,0 +1,2066 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-session.c + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include "soup-address.h" +#include "soup-auth.h" +#include "soup-auth-basic.h" +#include "soup-auth-digest.h" +#include "soup-auth-manager-ntlm.h" +#include "soup-connection.h" +#include "soup-marshal.h" +#include "soup-message-private.h" +#include "soup-message-queue.h" +#include "soup-misc.h" +#include "soup-proxy-resolver-static.h" +#include "soup-proxy-uri-resolver.h" +#include "soup-session.h" +#include "soup-session-feature.h" +#include "soup-session-private.h" +#include "soup-socket.h" +#include "soup-ssl.h" +#include "soup-uri.h" +/*TIZEN patch*/ +#include "TIZEN.h" + +/** + * SECTION:soup-session + * @short_description: Soup session state object + * + * #SoupSession is the object that controls client-side HTTP. A + * #SoupSession encapsulates all of the state that libsoup is keeping + * on behalf of your program; cached HTTP connections, authentication + * information, etc. + * + * Most applications will only need a single #SoupSession; the primary + * reason you might need multiple sessions is if you need to have + * multiple independent authentication contexts. (Eg, you are + * connecting to a server and authenticating as two different users at + * different times; the easiest way to ensure that each #SoupMessage + * is sent with the authentication information you intended is to use + * one session for the first user, and a second session for the other + * user.) + * + * #SoupSession itself is an abstract class, with two subclasses. If + * you are using the glib main loop, you will generally want to use + * #SoupSessionAsync, which uses non-blocking I/O and callbacks. On + * the other hand, if your application is threaded and you want to do + * synchronous I/O in a separate thread from the UI, use + * #SoupSessionSync. + **/ + +typedef struct { + SoupURI *uri; + SoupAddress *addr; + + GSList *connections; /* CONTAINS: SoupConnection */ + guint num_conns; + + guint num_messages; + + gboolean ssl_fallback; +} SoupSessionHost; + +typedef struct { + char *ssl_ca_file; + SoupSSLCredentials *ssl_creds; + gboolean ssl_strict; + + SoupMessageQueue *queue; + + char *user_agent; + char *accept_language; + gboolean accept_language_auto; + + GSList *features; + GHashTable *features_cache; + + GHashTable *hosts; /* char* -> SoupSessionHost */ + GHashTable *conns; /* SoupConnection -> SoupSessionHost */ + guint num_conns; + guint max_conns, max_conns_per_host; + guint io_timeout, idle_timeout; + + /* Must hold the host_lock before potentially creating a + * new SoupSessionHost, or adding/removing a connection. + * Must not emit signals or destroy objects while holding it. + */ + GMutex *host_lock; + + GMainContext *async_context; + + GResolver *resolver; +} SoupSessionPrivate; +#define SOUP_SESSION_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_SESSION, SoupSessionPrivate)) + +static void free_host (SoupSessionHost *host); + +static void queue_message (SoupSession *session, SoupMessage *msg, + SoupSessionCallback callback, gpointer user_data); +static void requeue_message (SoupSession *session, SoupMessage *msg); +static void cancel_message (SoupSession *session, SoupMessage *msg, + guint status_code); +static void auth_required (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying); +static void flush_queue (SoupSession *session); + +static void auth_manager_authenticate (SoupAuthManager *manager, + SoupMessage *msg, SoupAuth *auth, + gboolean retrying, gpointer user_data); + +#define SOUP_SESSION_MAX_CONNS_DEFAULT 10 +#define SOUP_SESSION_MAX_CONNS_PER_HOST_DEFAULT 2 + +#define SOUP_SESSION_MAX_REDIRECTION_COUNT 20 + +#define SOUP_SESSION_USER_AGENT_BASE "libsoup/" PACKAGE_VERSION + +G_DEFINE_ABSTRACT_TYPE (SoupSession, soup_session, G_TYPE_OBJECT) + +enum { + REQUEST_QUEUED, + REQUEST_STARTED, + REQUEST_UNQUEUED, + AUTHENTICATE, + CONNECTION_CREATED, + TUNNELING, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +enum { + PROP_0, + + PROP_PROXY_URI, + PROP_MAX_CONNS, + PROP_MAX_CONNS_PER_HOST, + PROP_USE_NTLM, + PROP_SSL_CA_FILE, + PROP_SSL_STRICT, + PROP_ASYNC_CONTEXT, + PROP_TIMEOUT, + PROP_USER_AGENT, + PROP_ACCEPT_LANGUAGE, + PROP_ACCEPT_LANGUAGE_AUTO, + PROP_IDLE_TIMEOUT, + PROP_ADD_FEATURE, + PROP_ADD_FEATURE_BY_TYPE, + PROP_REMOVE_FEATURE_BY_TYPE, + + LAST_PROP +}; + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void +soup_session_init (SoupSession *session) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupAuthManager *auth_manager; + + priv->queue = soup_message_queue_new (session); + + priv->host_lock = g_mutex_new (); + priv->hosts = g_hash_table_new_full (soup_uri_host_hash, + soup_uri_host_equal, + NULL, (GDestroyNotify)free_host); + priv->conns = g_hash_table_new (NULL, NULL); + + priv->max_conns = SOUP_SESSION_MAX_CONNS_DEFAULT; + priv->max_conns_per_host = SOUP_SESSION_MAX_CONNS_PER_HOST_DEFAULT; + + priv->features_cache = g_hash_table_new (NULL, NULL); + + auth_manager = g_object_new (SOUP_TYPE_AUTH_MANAGER_NTLM, NULL); + g_signal_connect (auth_manager, "authenticate", + G_CALLBACK (auth_manager_authenticate), session); + soup_session_feature_add_feature (SOUP_SESSION_FEATURE (auth_manager), + SOUP_TYPE_AUTH_BASIC); + soup_session_feature_add_feature (SOUP_SESSION_FEATURE (auth_manager), + SOUP_TYPE_AUTH_DIGEST); + soup_session_add_feature (session, SOUP_SESSION_FEATURE (auth_manager)); + g_object_unref (auth_manager); + + /* We'll be doing DNS continuously-ish while the session is active, + * so hold a ref on the default GResolver. + */ + priv->resolver = g_resolver_get_default (); + + priv->ssl_strict = TRUE; +} + +static void +dispose (GObject *object) +{ + SoupSession *session = SOUP_SESSION (object); + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + + soup_session_abort (session); + + while (priv->features) + soup_session_remove_feature (session, priv->features->data); + + G_OBJECT_CLASS (soup_session_parent_class)->dispose (object); +} + +static void +finalize (GObject *object) +{ + SoupSession *session = SOUP_SESSION (object); + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + + soup_message_queue_destroy (priv->queue); + + g_mutex_free (priv->host_lock); + g_hash_table_destroy (priv->hosts); + g_hash_table_destroy (priv->conns); + + g_free (priv->user_agent); + g_free (priv->accept_language); + + if (priv->ssl_ca_file) + g_free (priv->ssl_ca_file); + if (priv->ssl_creds) + soup_ssl_free_client_credentials (priv->ssl_creds); + + if (priv->async_context) + g_main_context_unref (priv->async_context); + + g_hash_table_destroy (priv->features_cache); + + g_object_unref (priv->resolver); + + G_OBJECT_CLASS (soup_session_parent_class)->finalize (object); +} + +static void +soup_session_class_init (SoupSessionClass *session_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (session_class); + + g_type_class_add_private (session_class, sizeof (SoupSessionPrivate)); + + /* virtual method definition */ + session_class->queue_message = queue_message; + session_class->requeue_message = requeue_message; + session_class->cancel_message = cancel_message; + session_class->auth_required = auth_required; + session_class->flush_queue = flush_queue; + + /* virtual method override */ + object_class->dispose = dispose; + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /* signals */ + + /** + * SoupSession::request-queued: + * @session: the session + * @msg: the request that was queued + * + * Emitted when a request is queued on @session. (Note that + * "queued" doesn't just mean soup_session_queue_message(); + * soup_session_send_message() implicitly queues the message + * as well.) + * + * When sending a request, first #SoupSession::request_queued + * is emitted, indicating that the session has become aware of + * the request. + * + * Once a connection is available to send the request on, the + * session emits #SoupSession::request_started. Then, various + * #SoupMessage signals are emitted as the message is + * processed. If the message is requeued, it will emit + * #SoupMessage::restarted, which will then be followed by + * another #SoupSession::request_started and another set of + * #SoupMessage signals when the message is re-sent. + * + * Eventually, the message will emit #SoupMessage::finished. + * Normally, this signals the completion of message + * processing. However, it is possible that the application + * will requeue the message from the "finished" handler (or + * equivalently, from the soup_session_queue_message() + * callback). In that case, the process will loop back to + * #SoupSession::request_started. + * + * Eventually, a message will reach "finished" and not be + * requeued. At that point, the session will emit + * #SoupSession::request_unqueued to indicate that it is done + * with the message. + * + * To sum up: #SoupSession::request_queued and + * #SoupSession::request_unqueued are guaranteed to be emitted + * exactly once, but #SoupSession::request_started and + * #SoupMessage::finished (and all of the other #SoupMessage + * signals) may be invoked multiple times for a given message. + * + * Since: 2.4.1 + **/ + signals[REQUEST_QUEUED] = + g_signal_new ("request-queued", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, /* FIXME? */ + NULL, NULL, + soup_marshal_NONE__OBJECT, + G_TYPE_NONE, 1, + SOUP_TYPE_MESSAGE); + + /** + * SoupSession::request-started: + * @session: the session + * @msg: the request being sent + * @socket: the socket the request is being sent on + * + * Emitted just before a request is sent. See + * #SoupSession::request_queued for a detailed description of + * the message lifecycle within a session. + **/ + signals[REQUEST_STARTED] = + g_signal_new ("request-started", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupSessionClass, request_started), + NULL, NULL, + soup_marshal_NONE__OBJECT_OBJECT, + G_TYPE_NONE, 2, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_SOCKET); + + /** + * SoupSession::request-unqueued: + * @session: the session + * @msg: the request that was unqueued + * + * Emitted when a request is removed from @session's queue, + * indicating that @session is done with it. See + * #SoupSession::request_queued for a detailed description of the + * message lifecycle within a session. + * + * Since: 2.4.1 + **/ + signals[REQUEST_UNQUEUED] = + g_signal_new ("request-unqueued", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, /* FIXME? */ + NULL, NULL, + soup_marshal_NONE__OBJECT, + G_TYPE_NONE, 1, + SOUP_TYPE_MESSAGE); + + /** + * SoupSession::authenticate: + * @session: the session + * @msg: the #SoupMessage being sent + * @auth: the #SoupAuth to authenticate + * @retrying: %TRUE if this is the second (or later) attempt + * + * Emitted when the session requires authentication. If + * credentials are available call soup_auth_authenticate() on + * @auth. If these credentials fail, the signal will be + * emitted again, with @retrying set to %TRUE, which will + * continue until you return without calling + * soup_auth_authenticate() on @auth. + * + * Note that this may be emitted before @msg's body has been + * fully read. + * + * If you call soup_session_pause_message() on @msg before + * returning, then you can authenticate @auth asynchronously + * (as long as you g_object_ref() it to make sure it doesn't + * get destroyed), and then unpause @msg when you are ready + * for it to continue. + **/ + signals[AUTHENTICATE] = + g_signal_new ("authenticate", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupSessionClass, authenticate), + NULL, NULL, + soup_marshal_NONE__OBJECT_OBJECT_BOOLEAN, + G_TYPE_NONE, 3, + SOUP_TYPE_MESSAGE, + SOUP_TYPE_AUTH, + G_TYPE_BOOLEAN); + + signals[CONNECTION_CREATED] = + g_signal_new ("connection-created", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, + NULL, NULL, + soup_marshal_NONE__OBJECT, + G_TYPE_NONE, 1, + /* SoupConnection is private, so we can't use + * SOUP_TYPE_CONNECTION here. + */ + G_TYPE_OBJECT); + + signals[TUNNELING] = + g_signal_new ("tunneling", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + 0, + NULL, NULL, + soup_marshal_NONE__OBJECT, + G_TYPE_NONE, 1, + /* SoupConnection is private, so we can't use + * SOUP_TYPE_CONNECTION here. + */ + G_TYPE_OBJECT); + + + /* properties */ + /** + * SOUP_SESSION_PROXY_URI: + * + * Alias for the #SoupSession:proxy-uri property. (The HTTP + * proxy to use for this session.) + **/ + g_object_class_install_property ( + object_class, PROP_PROXY_URI, + g_param_spec_boxed (SOUP_SESSION_PROXY_URI, + "Proxy URI", + "The HTTP Proxy to use for this session", + SOUP_TYPE_URI, + G_PARAM_READWRITE)); + /** + * SOUP_SESSION_MAX_CONNS: + * + * Alias for the #SoupSession:max-conns property. (The maximum + * number of connections that the session can open at once.) + **/ + g_object_class_install_property ( + object_class, PROP_MAX_CONNS, + g_param_spec_int (SOUP_SESSION_MAX_CONNS, + "Max Connection Count", + "The maximum number of connections that the session can open at once", + 1, + G_MAXINT, + SOUP_SESSION_MAX_CONNS_DEFAULT, + G_PARAM_READWRITE)); + /** + * SOUP_SESSION_MAX_CONNS_PER_HOST: + * + * Alias for the #SoupSession:max-conns-per-host property. + * (The maximum number of connections that the session can + * open at once to a given host.) + **/ + g_object_class_install_property ( + object_class, PROP_MAX_CONNS_PER_HOST, + g_param_spec_int (SOUP_SESSION_MAX_CONNS_PER_HOST, + "Max Per-Host Connection Count", + "The maximum number of connections that the session can open at once to a given host", + 1, + G_MAXINT, + SOUP_SESSION_MAX_CONNS_PER_HOST_DEFAULT, + G_PARAM_READWRITE)); + /** + * SoupSession:idle-timeout: + * + * Connection lifetime when idle + * + * Since: 2.4.1 + **/ + /** + * SOUP_SESSION_IDLE_TIMEOUT: + * + * Alias for the #SoupSession:idle-timeout property. (The idle + * connection lifetime.) + * + * Since: 2.4.1 + **/ + g_object_class_install_property ( + object_class, PROP_IDLE_TIMEOUT, + g_param_spec_uint (SOUP_SESSION_IDLE_TIMEOUT, + "Idle Timeout", + "Connection lifetime when idle", + 0, G_MAXUINT, 0, + G_PARAM_READWRITE)); + /** + * SoupSession:use-ntlm: + * + * Whether or not to use NTLM authentication. + * + * Deprecated: use soup_session_add_feature_by_type() with + * #SOUP_TYPE_AUTH_NTLM. + **/ + /** + * SOUP_SESSION_USE_NTLM: + * + * Alias for the #SoupSession:use-ntlm property. (Whether or + * not to use NTLM authentication.) + **/ + g_object_class_install_property ( + object_class, PROP_USE_NTLM, + g_param_spec_boolean (SOUP_SESSION_USE_NTLM, + "Use NTLM", + "Whether or not to use NTLM authentication", + FALSE, + G_PARAM_READWRITE)); + /** + * SOUP_SESSION_SSL_CA_FILE: + * + * Alias for the #SoupSession:ssl-ca-file property. (File + * containing SSL CA certificates.) + **/ + g_object_class_install_property ( + object_class, PROP_SSL_CA_FILE, + g_param_spec_string (SOUP_SESSION_SSL_CA_FILE, + "SSL CA file", + "File containing SSL CA certificates", + NULL, + G_PARAM_READWRITE)); + /** + * SOUP_SESSION_SSL_STRICT: + * + * Alias for the #SoupSession:ignore-ssl-cert-errors + * property. By default, when validating certificates against + * a CA file, Soup will consider invalid certificates as a + * connection error. Setting this property to %TRUE makes soup + * ignore the errors, and make the connection. + * + * Since: 2.30 + **/ + g_object_class_install_property ( + object_class, PROP_SSL_STRICT, + g_param_spec_boolean (SOUP_SESSION_SSL_STRICT, + "Strictly validate SSL certificates", + "Whether certificate errors should be considered a connection error", + TRUE, + G_PARAM_READWRITE)); + /** + * SOUP_SESSION_ASYNC_CONTEXT: + * + * Alias for the #SoupSession:async-context property. (The + * session's #GMainContext.) + **/ + g_object_class_install_property ( + object_class, PROP_ASYNC_CONTEXT, + g_param_spec_pointer (SOUP_SESSION_ASYNC_CONTEXT, + "Async GMainContext", + "The GMainContext to dispatch async I/O in", + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SESSION_TIMEOUT: + * + * Alias for the #SoupSession:timeout property. (The timeout + * in seconds for blocking socket I/O operations.) + **/ + g_object_class_install_property ( + object_class, PROP_TIMEOUT, + g_param_spec_uint (SOUP_SESSION_TIMEOUT, + "Timeout value", + "Value in seconds to timeout a blocking I/O", + 0, G_MAXUINT, 0, + G_PARAM_READWRITE)); + + /** + * SoupSession:user-agent: + * + * If non-%NULL, the value to use for the "User-Agent" header + * on #SoupMessages sent from this session. + * + * RFC 2616 says: "The User-Agent request-header field + * contains information about the user agent originating the + * request. This is for statistical purposes, the tracing of + * protocol violations, and automated recognition of user + * agents for the sake of tailoring responses to avoid + * particular user agent limitations. User agents SHOULD + * include this field with requests." + * + * The User-Agent header contains a list of one or more + * product tokens, separated by whitespace, with the most + * significant product token coming first. The tokens must be + * brief, ASCII, and mostly alphanumeric (although "-", "_", + * and "." are also allowed), and may optionally include a "/" + * followed by a version string. You may also put comments, + * enclosed in parentheses, between or after the tokens. + * + * If you set a %user_agent property that has trailing + * whitespace, #SoupSession will append its own product token + * (eg, "libsoup/2.3.2") to the end of the + * header for you. + **/ + /** + * SOUP_SESSION_USER_AGENT: + * + * Alias for the #SoupSession:user-agent property, qv. + **/ + g_object_class_install_property ( + object_class, PROP_USER_AGENT, + g_param_spec_string (SOUP_SESSION_USER_AGENT, + "User-Agent string", + "User-Agent string", + NULL, + G_PARAM_READWRITE)); + + /** + * SoupSession:accept-language: + * + * If non-%NULL, the value to use for the "Accept-Language" header + * on #SoupMessages sent from this session. + * + * Setting this will disable + * #SoupSession:accept-language-auto. + * + * Since: 2.30 + **/ + /** + * SOUP_SESSION_ACCEPT_LANGUAGE: + * + * Alias for the #SoupSession:accept-language property, qv. + * + * Since: 2.30 + **/ + g_object_class_install_property ( + object_class, PROP_ACCEPT_LANGUAGE, + g_param_spec_string (SOUP_SESSION_ACCEPT_LANGUAGE, + "Accept-Language string", + "Accept-Language string", + NULL, + G_PARAM_READWRITE)); + + /** + * SoupSession:accept-language-auto: + * + * If %TRUE, #SoupSession will automatically set the string + * for the "Accept-Language" header on every #SoupMessage + * sent, based on the return value of g_get_language_names(). + * + * Setting this will override any previous value of + * #SoupSession:accept-language. + * + * Since: 2.30 + **/ + /** + * SOUP_SESSION_ACCEPT_LANGUAGE_AUTO: + * + * Alias for the #SoupSession:accept-language-auto property, qv. + * + * Since: 2.30 + **/ + g_object_class_install_property ( + object_class, PROP_ACCEPT_LANGUAGE_AUTO, + g_param_spec_boolean (SOUP_SESSION_ACCEPT_LANGUAGE_AUTO, + "Accept-Language automatic mode", + "Accept-Language automatic mode", + FALSE, + G_PARAM_READWRITE)); + + /** + * SoupSession:add-feature: (skip) + * + * Add a feature object to the session. (Shortcut for calling + * soup_session_add_feature().) + * + * Since: 2.24 + **/ + /** + * SOUP_SESSION_ADD_FEATURE: (skip) + * + * Alias for the #SoupSession:add-feature property. (Shortcut + * for calling soup_session_add_feature(). + * + * Since: 2.24 + **/ + g_object_class_install_property ( + object_class, PROP_ADD_FEATURE, + g_param_spec_object (SOUP_SESSION_ADD_FEATURE, + "Add Feature", + "Add a feature object to the session", + SOUP_TYPE_SESSION_FEATURE, + G_PARAM_READWRITE)); + /** + * SoupSession:add-feature-by-type: (skip) + * + * Add a feature object of the given type to the session. + * (Shortcut for calling soup_session_add_feature_by_type().) + * + * Since: 2.24 + **/ + /** + * SOUP_SESSION_ADD_FEATURE_BY_TYPE: (skip) + * + * Alias for the #SoupSession:add-feature-by-type property. + * (Shortcut for calling soup_session_add_feature_by_type(). + * + * Since: 2.24 + **/ + g_object_class_install_property ( + object_class, PROP_ADD_FEATURE_BY_TYPE, + g_param_spec_gtype (SOUP_SESSION_ADD_FEATURE_BY_TYPE, + "Add Feature By Type", + "Add a feature object of the given type to the session", + SOUP_TYPE_SESSION_FEATURE, + G_PARAM_READWRITE)); + /** + * SoupSession:remove-feature-by-type: (skip) + * + * Remove feature objects from the session. (Shortcut for + * calling soup_session_remove_feature_by_type().) + * + * Since: 2.24 + **/ + /** + * SOUP_SESSION_REMOVE_FEATURE_BY_TYPE: (skip) + * + * Alias for the #SoupSession:remove-feature-by-type + * property. (Shortcut for calling + * soup_session_remove_feature_by_type(). + * + * Since: 2.24 + **/ + g_object_class_install_property ( + object_class, PROP_REMOVE_FEATURE_BY_TYPE, + g_param_spec_gtype (SOUP_SESSION_REMOVE_FEATURE_BY_TYPE, + "Remove Feature By Type", + "Remove features of the given type from the session", + SOUP_TYPE_SESSION_FEATURE, + G_PARAM_READWRITE)); +} + +static gboolean +safe_str_equal (const char *a, const char *b) +{ + if (!a && !b) + return TRUE; + + if ((a && !b) || (b && !a)) + return FALSE; + + return strcmp (a, b) == 0; +} + +/* Converts a language in POSIX format and to be RFC2616 compliant */ +/* Based on code from epiphany-webkit (ephy_langs_append_languages()) */ +static gchar * +posix_lang_to_rfc2616 (const gchar *language) +{ + /* Don't include charset variants, etc */ + if (strchr (language, '.') || strchr (language, '@')) + return NULL; + + /* Ignore "C" locale, which g_get_language_names() always + * includes as a fallback. + */ + if (!strcmp (language, "C")) + return NULL; + + return g_strdelimit (g_ascii_strdown (language, -1), "_", '-'); +} + +/* Converts @quality from 0-100 to 0.0-1.0 and appends to @str */ +static gchar * +add_quality_value (const gchar *str, int quality) +{ + g_return_val_if_fail (str != NULL, NULL); + + if (quality >= 0 && quality < 100) { + /* We don't use %.02g because of "." vs "," locale issues */ + if (quality % 10) + return g_strdup_printf ("%s;q=0.%02d", str, quality); + else + return g_strdup_printf ("%s;q=0.%d", str, quality / 10); + } else + return g_strdup (str); +} + +/* Returns a RFC2616 compliant languages list from system locales */ +static gchar * +accept_languages_from_system (void) +{ + const char * const * lang_names; + GPtrArray *langs = NULL; + char *lang, **langs_array, *langs_str; + int delta; + int i; + + lang_names = g_get_language_names (); + g_return_val_if_fail (lang_names != NULL, NULL); + + /* Build the array of languages */ + langs = g_ptr_array_new (); + for (i = 0; lang_names[i] != NULL; i++) { + lang = posix_lang_to_rfc2616 (lang_names[i]); + if (lang) + g_ptr_array_add (langs, lang); + } + + /* Add quality values */ + if (langs->len < 10) + delta = 10; + else if (langs->len < 20) + delta = 5; + else + delta = 1; + + for (i = 0; i < langs->len; i++) { + lang = langs->pdata[i]; + langs->pdata[i] = add_quality_value (lang, 100 - i * delta); + g_free (lang); + } + + /* Fallback: add "en" if list is empty */ + if (langs->len == 0) + g_ptr_array_add (langs, g_strdup ("en")); + + g_ptr_array_add (langs, NULL); + langs_array = (char **)langs->pdata; + langs_str = g_strjoinv (", ", langs_array); + + g_strfreev (langs_array); + g_ptr_array_free (langs, FALSE); + + return langs_str; +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupSession *session = SOUP_SESSION (object); + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupURI *uri; + gboolean ca_file_changed = FALSE; + const char *new_ca_file, *user_agent; + SoupSessionFeature *feature; + + switch (prop_id) { + case PROP_PROXY_URI: + uri = g_value_get_boxed (value); + + if (uri) { + soup_session_remove_feature_by_type (session, SOUP_TYPE_PROXY_RESOLVER); + feature = SOUP_SESSION_FEATURE (soup_proxy_resolver_static_new (uri)); + soup_session_add_feature (session, feature); + g_object_unref (feature); + } else + soup_session_remove_feature_by_type (session, SOUP_TYPE_PROXY_RESOLVER_STATIC); + + soup_session_abort (session); + break; + case PROP_MAX_CONNS: + priv->max_conns = g_value_get_int (value); + break; + case PROP_MAX_CONNS_PER_HOST: + priv->max_conns_per_host = g_value_get_int (value); + break; + case PROP_USE_NTLM: + feature = soup_session_get_feature (session, SOUP_TYPE_AUTH_MANAGER_NTLM); + if (feature) { + if (g_value_get_boolean (value)) + soup_session_feature_add_feature (feature, SOUP_TYPE_AUTH_NTLM); + else + soup_session_feature_remove_feature (feature, SOUP_TYPE_AUTH_NTLM); + } else + g_warning ("Trying to set use-ntlm on session with no auth-manager"); + break; + case PROP_SSL_CA_FILE: + new_ca_file = g_value_get_string (value); + + if (!safe_str_equal (priv->ssl_ca_file, new_ca_file)) + ca_file_changed = TRUE; + + g_free (priv->ssl_ca_file); + priv->ssl_ca_file = g_strdup (new_ca_file); + + if (ca_file_changed && priv->ssl_creds) { + soup_ssl_free_client_credentials (priv->ssl_creds); + priv->ssl_creds = NULL; + } + + break; + case PROP_SSL_STRICT: + priv->ssl_strict = g_value_get_boolean (value); + break; + case PROP_ASYNC_CONTEXT: + priv->async_context = g_value_get_pointer (value); + if (priv->async_context) + g_main_context_ref (priv->async_context); + break; + case PROP_TIMEOUT: + priv->io_timeout = g_value_get_uint (value); + break; + case PROP_USER_AGENT: + g_free (priv->user_agent); + user_agent = g_value_get_string (value); + if (!user_agent) + priv->user_agent = NULL; + else if (!*user_agent) { + priv->user_agent = + g_strdup (SOUP_SESSION_USER_AGENT_BASE); + } else if (g_str_has_suffix (user_agent, " ")) { + priv->user_agent = + g_strdup_printf ("%s%s", user_agent, + SOUP_SESSION_USER_AGENT_BASE); + } else + priv->user_agent = g_strdup (user_agent); + break; + case PROP_ACCEPT_LANGUAGE: + g_free (priv->accept_language); + priv->accept_language = g_strdup (g_value_get_string (value)); + priv->accept_language_auto = FALSE; + break; + case PROP_ACCEPT_LANGUAGE_AUTO: + priv->accept_language_auto = g_value_get_boolean (value); + if (priv->accept_language) { + g_free (priv->accept_language); + priv->accept_language = NULL; + } + + /* Get languages from system if needed */ + if (priv->accept_language_auto) + priv->accept_language = accept_languages_from_system (); + break; + case PROP_IDLE_TIMEOUT: + priv->idle_timeout = g_value_get_uint (value); + break; + case PROP_ADD_FEATURE: + soup_session_add_feature (session, g_value_get_object (value)); + break; + case PROP_ADD_FEATURE_BY_TYPE: + soup_session_add_feature_by_type (session, g_value_get_gtype (value)); + break; + case PROP_REMOVE_FEATURE_BY_TYPE: + soup_session_remove_feature_by_type (session, g_value_get_gtype (value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupSession *session = SOUP_SESSION (object); + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupSessionFeature *feature; + + switch (prop_id) { + case PROP_PROXY_URI: + feature = soup_session_get_feature (session, SOUP_TYPE_PROXY_RESOLVER_STATIC); + if (feature) { + g_object_get_property (G_OBJECT (feature), + SOUP_PROXY_RESOLVER_STATIC_PROXY_URI, + value); + } else + g_value_set_boxed (value, NULL); + break; + case PROP_MAX_CONNS: + g_value_set_int (value, priv->max_conns); + break; + case PROP_MAX_CONNS_PER_HOST: + g_value_set_int (value, priv->max_conns_per_host); + break; + case PROP_USE_NTLM: + feature = soup_session_get_feature (session, SOUP_TYPE_AUTH_MANAGER_NTLM); + if (feature) + g_value_set_boolean (value, soup_session_feature_has_feature (feature, SOUP_TYPE_AUTH_NTLM)); + else + g_value_set_boolean (value, FALSE); + break; + case PROP_SSL_CA_FILE: + g_value_set_string (value, priv->ssl_ca_file); + break; + case PROP_SSL_STRICT: + g_value_set_boolean (value, priv->ssl_strict); + break; + case PROP_ASYNC_CONTEXT: + g_value_set_pointer (value, priv->async_context ? g_main_context_ref (priv->async_context) : NULL); + break; + case PROP_TIMEOUT: + g_value_set_uint (value, priv->io_timeout); + break; + case PROP_USER_AGENT: + g_value_set_string (value, priv->user_agent); + break; + case PROP_ACCEPT_LANGUAGE: + g_value_set_string (value, priv->accept_language); + break; + case PROP_ACCEPT_LANGUAGE_AUTO: + g_value_set_boolean (value, priv->accept_language_auto); + break; + case PROP_IDLE_TIMEOUT: + g_value_set_uint (value, priv->idle_timeout); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + + +/** + * soup_session_get_async_context: + * @session: a #SoupSession + * + * Gets @session's async_context. This does not add a ref to the + * context, so you will need to ref it yourself if you want it to + * outlive its session. + * + * Return value: (transfer none): @session's #GMainContext, which may + * be %NULL + **/ +GMainContext * +soup_session_get_async_context (SoupSession *session) +{ + SoupSessionPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SESSION (session), NULL); + priv = SOUP_SESSION_GET_PRIVATE (session); + + return priv->async_context; +} + +/* Hosts */ + +static SoupSessionHost * +soup_session_host_new (SoupSession *session, SoupURI *uri) +{ + SoupSessionHost *host; + + host = g_slice_new0 (SoupSessionHost); + host->uri = soup_uri_copy_host (uri); + host->addr = soup_address_new (host->uri->host, host->uri->port); + + return host; +} + +/* Requires host_lock to be locked */ +static SoupSessionHost * +get_host_for_uri (SoupSession *session, SoupURI *uri) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupSessionHost *host; + + host = g_hash_table_lookup (priv->hosts, uri); + if (host) + return host; + + host = soup_session_host_new (session, uri); + g_hash_table_insert (priv->hosts, host->uri, host); + + return host; +} + +/* Note: get_host_for_message doesn't lock the host_lock. The caller + * must do it itself if there's a chance the host doesn't already + * exist. + */ +static SoupSessionHost * +get_host_for_message (SoupSession *session, SoupMessage *msg) +{ + return get_host_for_uri (session, soup_message_get_uri (msg)); +} + +static void +free_host (SoupSessionHost *host) +{ + while (host->connections) { + SoupConnection *conn = host->connections->data; + + host->connections = g_slist_remove (host->connections, conn); + soup_connection_disconnect (conn); + } + + soup_uri_free (host->uri); + g_object_unref (host->addr); + g_slice_free (SoupSessionHost, host); +} + +static void +auth_required (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying) +{ + g_signal_emit (session, signals[AUTHENTICATE], 0, msg, auth, retrying); +} + +static void +auth_manager_authenticate (SoupAuthManager *manager, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, + gpointer session) +{ + SOUP_SESSION_GET_CLASS (session)->auth_required ( + session, msg, auth, retrying); +} + +#define SOUP_METHOD_IS_SAFE(method) (method == SOUP_METHOD_GET || \ + method == SOUP_METHOD_HEAD || \ + method == SOUP_METHOD_OPTIONS || \ + method == SOUP_METHOD_PROPFIND) + +static void +redirect_handler (SoupMessage *msg, gpointer user_data) +{ + SoupMessageQueueItem *item = user_data; + SoupSession *session = item->session; + const char *new_loc; + SoupURI *new_uri; + + new_loc = soup_message_headers_get_one (msg->response_headers, + "Location"); + g_return_if_fail (new_loc != NULL); + + if (item->redirection_count >= SOUP_SESSION_MAX_REDIRECTION_COUNT) { + soup_session_cancel_message (session, msg, SOUP_STATUS_TOO_MANY_REDIRECTS); + return; + } + item->redirection_count++; + + if (msg->status_code == SOUP_STATUS_SEE_OTHER || + (msg->status_code == SOUP_STATUS_FOUND && + !SOUP_METHOD_IS_SAFE (msg->method)) || + (msg->status_code == SOUP_STATUS_MOVED_PERMANENTLY && + msg->method == SOUP_METHOD_POST)) { + if (msg->method != SOUP_METHOD_HEAD) { + /* Redirect using a GET */ + g_object_set (msg, + SOUP_MESSAGE_METHOD, SOUP_METHOD_GET, + NULL); + } + soup_message_set_request (msg, NULL, + SOUP_MEMORY_STATIC, NULL, 0); + soup_message_headers_set_encoding (msg->request_headers, + SOUP_ENCODING_NONE); + } else if (msg->status_code == SOUP_STATUS_MOVED_PERMANENTLY || + msg->status_code == SOUP_STATUS_TEMPORARY_REDIRECT || + msg->status_code == SOUP_STATUS_FOUND) { + /* Don't redirect non-safe methods */ + if (!SOUP_METHOD_IS_SAFE (msg->method)) + return; + } else { + /* Three possibilities: + * + * 1) This was a non-3xx response that happened to + * have a "Location" header + * 2) It's a non-redirecty 3xx response (300, 304, + * 305, 306) + * 3) It's some newly-defined 3xx response (308+) + * + * We ignore all of these cases. In the first two, + * redirecting would be explicitly wrong, and in the + * last case, we have no clue if the 3xx response is + * supposed to be redirecty or non-redirecty. Plus, + * 2616 says unrecognized status codes should be + * treated as the equivalent to the x00 code, and we + * don't redirect on 300, so therefore we shouldn't + * redirect on 308+ either. + */ + return; + } + + /* Location is supposed to be an absolute URI, but some sites + * are lame, so we use soup_uri_new_with_base(). + */ + new_uri = soup_uri_new_with_base (soup_message_get_uri (msg), new_loc); + if (!new_uri || !new_uri->host) { + if (new_uri) + soup_uri_free (new_uri); + soup_message_set_status_full (msg, + SOUP_STATUS_MALFORMED, + "Invalid Redirect URL"); + return; + } + + soup_message_set_uri (msg, new_uri); + soup_uri_free (new_uri); + + soup_session_requeue_message (session, msg); +} + +void +soup_session_send_queue_item (SoupSession *session, + SoupMessageQueueItem *item, + SoupMessageCompletionFn completion_cb) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + const char *conn_header; + + if (priv->user_agent) { + soup_message_headers_replace (item->msg->request_headers, + "User-Agent", priv->user_agent); + } + + if (priv->accept_language && + !soup_message_headers_get_list (item->msg->request_headers, + "Accept-Language")) { + soup_message_headers_append (item->msg->request_headers, + "Accept-Language", + priv->accept_language); + } + + /* Force keep alive connections for HTTP 1.0. Performance will + * improve when issuing multiple requests to the same host in + * a short period of time, as we wouldn't need to establish + * new connections. Keep alive is implicit for HTTP 1.1. + */ + conn_header = soup_message_headers_get_list (item->msg->request_headers, "Connection"); + if (!conn_header || + (!soup_header_contains (conn_header, "Keep-Alive") && + !soup_header_contains (conn_header, "close"))) + soup_message_headers_append (item->msg->request_headers, + "Connection", "Keep-Alive"); + + g_signal_emit (session, signals[REQUEST_STARTED], 0, + item->msg, soup_connection_get_socket (item->conn)); + soup_connection_send_request (item->conn, item, completion_cb, item); +} + +gboolean +soup_session_cleanup_connections (SoupSession *session, + gboolean prune_idle) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + GSList *conns = NULL, *c; + GHashTableIter iter; + gpointer conn, host; + SoupConnectionState state; + + g_mutex_lock (priv->host_lock); + g_hash_table_iter_init (&iter, priv->conns); + while (g_hash_table_iter_next (&iter, &conn, &host)) { + state = soup_connection_get_state (conn); + if (state == SOUP_CONNECTION_REMOTE_DISCONNECTED || + (prune_idle && state == SOUP_CONNECTION_IDLE)) + conns = g_slist_prepend (conns, g_object_ref (conn)); + } + g_mutex_unlock (priv->host_lock); + + if (!conns) + return FALSE; + + for (c = conns; c; c = c->next) { + conn = c->data; + soup_connection_disconnect (conn); + g_object_unref (conn); + } + g_slist_free (conns); + + return TRUE; +} + +static void +connection_disconnected (SoupConnection *conn, gpointer user_data) +{ + SoupSession *session = user_data; + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupSessionHost *host; + + g_mutex_lock (priv->host_lock); + + host = g_hash_table_lookup (priv->conns, conn); + if (host) { + g_hash_table_remove (priv->conns, conn); + host->connections = g_slist_remove (host->connections, conn); + host->num_conns--; + + if (soup_connection_get_ssl_fallback (conn)) + host->ssl_fallback = TRUE; + } + + g_signal_handlers_disconnect_by_func (conn, connection_disconnected, session); + priv->num_conns--; + + g_mutex_unlock (priv->host_lock); + g_object_unref (conn); +} + +SoupMessageQueueItem * +soup_session_make_connect_message (SoupSession *session, + SoupConnection *conn) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupAddress *server_addr = soup_connection_get_tunnel_addr (conn); + SoupURI *uri; + SoupMessage *msg; + SoupMessageQueueItem *item; + + uri = soup_uri_new (NULL); + soup_uri_set_scheme (uri, SOUP_URI_SCHEME_HTTPS); + soup_uri_set_host (uri, soup_address_get_name (server_addr)); + soup_uri_set_port (uri, soup_address_get_port (server_addr)); + soup_uri_set_path (uri, ""); + msg = soup_message_new_from_uri (SOUP_METHOD_CONNECT, uri); + soup_message_set_flags (msg, SOUP_MESSAGE_NO_REDIRECT); + soup_uri_free (uri); + + /* Call the base implementation of soup_session_queue_message + * directly, to add msg to the SoupMessageQueue and cause all + * the right signals to be emitted. + */ + queue_message (session, msg, NULL, NULL); + item = soup_message_queue_lookup (priv->queue, msg); + item->conn = g_object_ref (conn); + g_object_unref (msg); + + g_signal_emit (session, signals[TUNNELING], 0, conn); + return item; +} + +gboolean +soup_session_get_connection (SoupSession *session, + SoupMessageQueueItem *item, + gboolean *try_pruning) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupConnection *conn; + SoupSessionHost *host; + SoupAddress *remote_addr, *tunnel_addr; + SoupSSLCredentials *ssl_creds; + GSList *conns; + int num_pending = 0; + SoupURI *uri; + + if (item->conn) { + g_return_val_if_fail (soup_connection_get_state (item->conn) != SOUP_CONNECTION_DISCONNECTED, FALSE); + return TRUE; + } + + g_mutex_lock (priv->host_lock); + + host = get_host_for_message (session, item->msg); + for (conns = host->connections; conns; conns = conns->next) { + if (soup_connection_get_state (conns->data) == SOUP_CONNECTION_IDLE) { + soup_connection_set_state (conns->data, SOUP_CONNECTION_IN_USE); + g_mutex_unlock (priv->host_lock); + item->conn = g_object_ref (conns->data); + return TRUE; + } else if (soup_connection_get_state (conns->data) == SOUP_CONNECTION_CONNECTING) + num_pending++; + } + + /* Limit the number of pending connections; num_messages / 2 + * is somewhat arbitrary... + */ + if (num_pending > host->num_messages / 2) { + g_mutex_unlock (priv->host_lock); + return FALSE; + } + + if (host->num_conns >= priv->max_conns_per_host) { + g_mutex_unlock (priv->host_lock); + return FALSE; + } + + if (priv->num_conns >= priv->max_conns) { + *try_pruning = TRUE; + g_mutex_unlock (priv->host_lock); + return FALSE; + } + + if (item->proxy_addr) { + remote_addr = item->proxy_addr; + tunnel_addr = NULL; + } else { + remote_addr = host->addr; + tunnel_addr = NULL; + } + + uri = soup_message_get_uri (item->msg); + if (uri->scheme == SOUP_URI_SCHEME_HTTPS) { + if (!priv->ssl_creds) + priv->ssl_creds = soup_ssl_get_client_credentials (priv->ssl_ca_file); + ssl_creds = priv->ssl_creds; + + if (item->proxy_addr) + tunnel_addr = host->addr; + } else + ssl_creds = NULL; + + conn = soup_connection_new ( + SOUP_CONNECTION_REMOTE_ADDRESS, remote_addr, + SOUP_CONNECTION_TUNNEL_ADDRESS, tunnel_addr, + SOUP_CONNECTION_PROXY_URI, item->proxy_uri, + SOUP_CONNECTION_SSL_CREDENTIALS, ssl_creds, + SOUP_CONNECTION_SSL_STRICT, priv->ssl_strict, + SOUP_CONNECTION_ASYNC_CONTEXT, priv->async_context, + SOUP_CONNECTION_TIMEOUT, priv->io_timeout, + SOUP_CONNECTION_IDLE_TIMEOUT, priv->idle_timeout, + SOUP_CONNECTION_SSL_FALLBACK, host->ssl_fallback, + NULL); + g_signal_connect (conn, "disconnected", + G_CALLBACK (connection_disconnected), + session); + + g_signal_emit (session, signals[CONNECTION_CREATED], 0, conn); + + g_hash_table_insert (priv->conns, conn, host); + + priv->num_conns++; + host->num_conns++; + host->connections = g_slist_prepend (host->connections, conn); + + g_mutex_unlock (priv->host_lock); + item->conn = g_object_ref (conn); + return TRUE; +} + +SoupMessageQueue * +soup_session_get_queue (SoupSession *session) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + + return priv->queue; +} + +void +soup_session_unqueue_item (SoupSession *session, + SoupMessageQueueItem *item) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupSessionHost *host; + + if (item->conn) { + g_object_unref (item->conn); + item->conn = NULL; + } + + if (item->state != SOUP_MESSAGE_FINISHED) { + g_warning ("finished an item with state %d", item->state); + return; + } + + soup_message_queue_remove (priv->queue, item); + + g_mutex_lock (priv->host_lock); + host = get_host_for_message (session, item->msg); + host->num_messages--; + g_mutex_unlock (priv->host_lock); + + /* g_signal_handlers_disconnect_by_func doesn't work if you + * have a metamarshal, meaning it doesn't work with + * soup_message_add_header_handler() + */ + g_signal_handlers_disconnect_matched (item->msg, G_SIGNAL_MATCH_DATA, + 0, 0, NULL, NULL, item); + g_signal_emit (session, signals[REQUEST_UNQUEUED], 0, item->msg); + soup_message_queue_item_unref (item); +} + +void +soup_session_set_item_status (SoupSession *session, + SoupMessageQueueItem *item, + guint status_code) +{ + SoupURI *uri; + char *msg; + + switch (status_code) { + case SOUP_STATUS_CANT_RESOLVE: + case SOUP_STATUS_CANT_CONNECT: + uri = soup_message_get_uri (item->msg); + msg = g_strdup_printf ("%s (%s)", + soup_status_get_phrase (status_code), + uri->host); + soup_message_set_status_full (item->msg, status_code, msg); + g_free (msg); + break; + + case SOUP_STATUS_CANT_RESOLVE_PROXY: + case SOUP_STATUS_CANT_CONNECT_PROXY: + if (item->proxy_uri && item->proxy_uri->host) { + msg = g_strdup_printf ("%s (%s)", + soup_status_get_phrase (status_code), + item->proxy_uri->host); + soup_message_set_status_full (item->msg, status_code, msg); + g_free (msg); + break; + } + soup_message_set_status (item->msg, status_code); + break; + + case SOUP_STATUS_SSL_FAILED: + if (!g_tls_backend_supports_tls (g_tls_backend_get_default ())) { + soup_message_set_status_full (item->msg, status_code, + "TLS/SSL support not available; install glib-networking"); + } else + soup_message_set_status (item->msg, status_code); + break; + + default: + soup_message_set_status (item->msg, status_code); + break; + } +} + +static void +queue_message (SoupSession *session, SoupMessage *msg, + SoupSessionCallback callback, gpointer user_data) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupMessageQueueItem *item; + SoupSessionHost *host; + + item = soup_message_queue_append (priv->queue, msg, callback, user_data); + + g_mutex_lock (priv->host_lock); + host = get_host_for_message (session, item->msg); + host->num_messages++; + g_mutex_unlock (priv->host_lock); + + if (!(soup_message_get_flags (msg) & SOUP_MESSAGE_NO_REDIRECT)) { + soup_message_add_header_handler ( + msg, "got_body", "Location", + G_CALLBACK (redirect_handler), item); + } + + g_signal_emit (session, signals[REQUEST_QUEUED], 0, msg); +} + +/** + * SoupSessionCallback: + * @session: the session + * @msg: the message that has finished + * @user_data: the data passed to soup_session_queue_message + * + * Prototype for the callback passed to soup_session_queue_message(), + * qv. + **/ + +/** + * soup_session_queue_message: + * @session: a #SoupSession + * @msg: (transfer full): the message to queue + * @callback: (allow-none) (scope async): a #SoupSessionCallback which will + * be called after the message completes or when an unrecoverable error occurs. + * @user_data: (allow-none): a pointer passed to @callback. + * + * Queues the message @msg for sending. All messages are processed + * while the glib main loop runs. If @msg has been processed before, + * any resources related to the time it was last sent are freed. + * + * Upon message completion, the callback specified in @callback will + * be invoked (in the thread associated with @session's async + * context). If after returning from this callback the message has not + * been requeued, @msg will be unreffed. + */ +void +soup_session_queue_message (SoupSession *session, SoupMessage *msg, + SoupSessionCallback callback, gpointer user_data) +{ + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + SOUP_SESSION_GET_CLASS (session)->queue_message (session, msg, + callback, user_data); +} + +static void +requeue_message (SoupSession *session, SoupMessage *msg) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupMessageQueueItem *item; + + item = soup_message_queue_lookup (priv->queue, msg); + g_return_if_fail (item != NULL); + item->state = SOUP_MESSAGE_RESTARTING; + soup_message_queue_item_unref (item); +} + +/** + * soup_session_requeue_message: + * @session: a #SoupSession + * @msg: the message to requeue + * + * This causes @msg to be placed back on the queue to be attempted + * again. + **/ +void +soup_session_requeue_message (SoupSession *session, SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + SOUP_SESSION_GET_CLASS (session)->requeue_message (session, msg); +} + + +/** + * soup_session_send_message: + * @session: a #SoupSession + * @msg: the message to send + * + * Synchronously send @msg. This call will not return until the + * transfer is finished successfully or there is an unrecoverable + * error. + * + * @msg is not freed upon return. + * + * Return value: the HTTP status code of the response + */ +guint +soup_session_send_message (SoupSession *session, SoupMessage *msg) +{ + g_return_val_if_fail (SOUP_IS_SESSION (session), SOUP_STATUS_MALFORMED); + g_return_val_if_fail (SOUP_IS_MESSAGE (msg), SOUP_STATUS_MALFORMED); + + return SOUP_SESSION_GET_CLASS (session)->send_message (session, msg); +} + + +/** + * soup_session_pause_message: + * @session: a #SoupSession + * @msg: a #SoupMessage currently running on @session + * + * Pauses HTTP I/O on @msg. Call soup_session_unpause_message() to + * resume I/O. + **/ +void +soup_session_pause_message (SoupSession *session, + SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + +#if ENABLE(TIZEN_FIX_PAUSE_MESSAGE) + if(soup_message_io_in_progress (msg)) + soup_message_io_pause (msg); +#else + soup_message_io_pause (msg); +#endif +} + +/** + * soup_session_unpause_message: + * @session: a #SoupSession + * @msg: a #SoupMessage currently running on @session + * + * Resumes HTTP I/O on @msg. Use this to resume after calling + * soup_session_pause_message(). + * + * If @msg is being sent via blocking I/O, this will resume reading or + * writing immediately. If @msg is using non-blocking I/O, then + * reading or writing won't resume until you return to the main loop. + **/ +void +soup_session_unpause_message (SoupSession *session, + SoupMessage *msg) +{ + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + +#if ENABLE(TIZEN_FIX_PAUSE_MESSAGE) + if(soup_message_io_in_progress (msg)) + soup_message_io_unpause (msg); +#else + soup_message_io_unpause (msg); +#endif +} + + +static void +cancel_message (SoupSession *session, SoupMessage *msg, guint status_code) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupMessageQueueItem *item; + + item = soup_message_queue_lookup (priv->queue, msg); + g_return_if_fail (item != NULL); + + soup_message_set_status (msg, status_code); + g_cancellable_cancel (item->cancellable); + + soup_message_queue_item_unref (item); +} + +/** + * soup_session_cancel_message: + * @session: a #SoupSession + * @msg: the message to cancel + * @status_code: status code to set on @msg (generally + * %SOUP_STATUS_CANCELLED) + * + * Causes @session to immediately finish processing @msg (regardless + * of its current state) with a final status_code of @status_code. You + * may call this at any time after handing @msg off to @session; if + * @session has started sending the request but has not yet received + * the complete response, then it will close the request's connection. + * Note that with non-idempotent requests (eg, %POST, %PUT, %DELETE) + * it is possible that you might cancel the request after the server + * acts on it, but before it returns a response, leaving the remote + * resource in an unknown state. + * + * If the message is cancelled while its response body is being read, + * then the response body in @msg will be left partially-filled-in. + * The response headers, on the other hand, will always be either + * empty or complete. + * + * For messages queued with soup_session_queue_message() (and + * cancelled from the same thread), the callback will be invoked + * before soup_session_cancel_message() returns. + **/ +void +soup_session_cancel_message (SoupSession *session, SoupMessage *msg, + guint status_code) +{ + SoupSessionPrivate *priv; + SoupMessageQueueItem *item; + + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_MESSAGE (msg)); + + priv = SOUP_SESSION_GET_PRIVATE (session); + item = soup_message_queue_lookup (priv->queue, msg); + /* If the message is already ending, don't do anything */ + if (!item) + return; + if (item->state == SOUP_MESSAGE_FINISHED) { + soup_message_queue_item_unref (item); + return; + } + + SOUP_SESSION_GET_CLASS (session)->cancel_message (session, msg, status_code); + soup_message_queue_item_unref (item); +} + +static void +gather_conns (gpointer key, gpointer host, gpointer data) +{ + SoupConnection *conn = key; + GSList **conns = data; + + *conns = g_slist_prepend (*conns, g_object_ref (conn)); +} + +static void +flush_queue (SoupSession *session) +{ + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + SoupMessageQueueItem *item; + + for (item = soup_message_queue_first (priv->queue); + item; + item = soup_message_queue_next (priv->queue, item)) { + soup_session_cancel_message (session, item->msg, + SOUP_STATUS_CANCELLED); + } +} + +/** + * soup_session_abort: + * @session: the session + * + * Cancels all pending requests in @session. + **/ +void +soup_session_abort (SoupSession *session) +{ + SoupSessionPrivate *priv; + GSList *conns, *c; + + g_return_if_fail (SOUP_IS_SESSION (session)); + priv = SOUP_SESSION_GET_PRIVATE (session); + + SOUP_SESSION_GET_CLASS (session)->flush_queue (session); + + /* Close all connections */ + g_mutex_lock (priv->host_lock); + conns = NULL; + g_hash_table_foreach (priv->conns, gather_conns, &conns); + + g_mutex_unlock (priv->host_lock); + for (c = conns; c; c = c->next) { + soup_connection_disconnect (c->data); + g_object_unref (c->data); + } + + g_slist_free (conns); +} + +/** +* soup_session_prepare_for_uri: +* @session: a #SoupSession +* @uri: a #SoupURI which may be required +* +* Tells @session that @uri may be requested shortly, and so the +* session can try to prepare (resolving the domain name, obtaining +* proxy address, etc.) in order to work more quickly once the URI is +* actually requested. +* +* This method acts asynchronously, in @session's %async_context. +* If you are using #SoupSessionSync and do not have a main loop running, +* then you can't use this method. +* +* Since: 2.30 +**/ +void +soup_session_prepare_for_uri (SoupSession *session, SoupURI *uri) +{ + SoupSessionPrivate *priv; + SoupSessionHost *host; + SoupAddress *addr; + + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (uri != NULL); + + if (!uri->host) + return; + + priv = SOUP_SESSION_GET_PRIVATE (session); + + g_mutex_lock (priv->host_lock); + host = get_host_for_uri (session, uri); + addr = g_object_ref (host->addr); + g_mutex_unlock (priv->host_lock); + + soup_address_resolve_async (addr, priv->async_context, + NULL, NULL, NULL); + g_object_unref (addr); +} + +/** + * soup_session_add_feature: + * @session: a #SoupSession + * @feature: an object that implements #SoupSessionFeature + * + * Adds @feature's functionality to @session. You can also add a + * feature to the session at construct time by using the + * %SOUP_SESSION_ADD_FEATURE property. + * + * Since: 2.24 + **/ +void +soup_session_add_feature (SoupSession *session, SoupSessionFeature *feature) +{ + SoupSessionPrivate *priv; + + g_return_if_fail (SOUP_IS_SESSION (session)); + g_return_if_fail (SOUP_IS_SESSION_FEATURE (feature)); + + priv = SOUP_SESSION_GET_PRIVATE (session); + priv->features = g_slist_prepend (priv->features, g_object_ref (feature)); + g_hash_table_remove_all (priv->features_cache); + soup_session_feature_attach (feature, session); +} + +/** + * soup_session_add_feature_by_type: + * @session: a #SoupSession + * @feature_type: a #GType + * + * If @feature_type is the type of a class that implements + * #SoupSessionFeature, this creates a new feature of that type and + * adds it to @session as with soup_session_add_feature(). You can use + * this when you don't need to customize the new feature in any way. + * + * If @feature_type is not a #SoupSessionFeature type, this gives + * each existing feature on @session the chance to accept @feature_type + * as a "subfeature". This can be used to add new #SoupAuth types, + * for instance. + * + * You can also add a feature to the session at construct time by + * using the %SOUP_SESSION_ADD_FEATURE_BY_TYPE property. + * + * Since: 2.24 + **/ +void +soup_session_add_feature_by_type (SoupSession *session, GType feature_type) +{ + g_return_if_fail (SOUP_IS_SESSION (session)); + + if (g_type_is_a (feature_type, SOUP_TYPE_SESSION_FEATURE)) { + SoupSessionFeature *feature; + + feature = g_object_new (feature_type, NULL); + soup_session_add_feature (session, feature); + g_object_unref (feature); + } else { + SoupSessionPrivate *priv = SOUP_SESSION_GET_PRIVATE (session); + GSList *f; + + for (f = priv->features; f; f = f->next) { + if (soup_session_feature_add_feature (f->data, feature_type)) + return; + } + g_warning ("No feature manager for feature of type '%s'", g_type_name (feature_type)); + } +} + +/** + * soup_session_remove_feature: + * @session: a #SoupSession + * @feature: a feature that has previously been added to @session + * + * Removes @feature's functionality from @session. + * + * Since: 2.24 + **/ +void +soup_session_remove_feature (SoupSession *session, SoupSessionFeature *feature) +{ + SoupSessionPrivate *priv; + + g_return_if_fail (SOUP_IS_SESSION (session)); + + priv = SOUP_SESSION_GET_PRIVATE (session); + if (g_slist_find (priv->features, feature)) { + priv->features = g_slist_remove (priv->features, feature); + g_hash_table_remove_all (priv->features_cache); + soup_session_feature_detach (feature, session); + g_object_unref (feature); + } +} + +/** + * soup_session_remove_feature_by_type: + * @session: a #SoupSession + * @feature_type: a #GType + * + * Removes all features of type @feature_type (or any subclass of + * @feature_type) from @session. You can also remove standard features + * from the session at construct time by using the + * %SOUP_SESSION_REMOVE_FEATURE_BY_TYPE property. + * + * Since: 2.24 + **/ +void +soup_session_remove_feature_by_type (SoupSession *session, GType feature_type) +{ + SoupSessionPrivate *priv; + GSList *f; + + g_return_if_fail (SOUP_IS_SESSION (session)); + + priv = SOUP_SESSION_GET_PRIVATE (session); + + if (g_type_is_a (feature_type, SOUP_TYPE_SESSION_FEATURE)) { + restart: + for (f = priv->features; f; f = f->next) { + if (G_TYPE_CHECK_INSTANCE_TYPE (f->data, feature_type)) { + soup_session_remove_feature (session, f->data); + goto restart; + } + } + } else { + for (f = priv->features; f; f = f->next) { + if (soup_session_feature_remove_feature (f->data, feature_type)) + return; + } + g_warning ("No feature manager for feature of type '%s'", g_type_name (feature_type)); + } +} + +/** + * soup_session_get_features: + * @session: a #SoupSession + * @feature_type: the #GType of the class of features to get + * + * Generates a list of @session's features of type @feature_type. (If + * you want to see all features, you can pass %G_TYPE_SESSION_FEATURE + * for @feature_type.) + * + * Return value: (transfer container) (element-type Soup.SessionFeature): + * a list of features. You must free the list, but not its contents + * + * Since: 2.26 + **/ +GSList * +soup_session_get_features (SoupSession *session, GType feature_type) +{ + SoupSessionPrivate *priv; + GSList *f, *ret; + + g_return_val_if_fail (SOUP_IS_SESSION (session), NULL); + + priv = SOUP_SESSION_GET_PRIVATE (session); + for (f = priv->features, ret = NULL; f; f = f->next) { + if (G_TYPE_CHECK_INSTANCE_TYPE (f->data, feature_type)) + ret = g_slist_prepend (ret, f->data); + } + return g_slist_reverse (ret); +} + +/** + * soup_session_get_feature: + * @session: a #SoupSession + * @feature_type: the #GType of the feature to get + * + * Gets the first feature in @session of type @feature_type. For + * features where there may be more than one feature of a given type, + * use soup_session_get_features(). + * + * Return value: (transfer none): a #SoupSessionFeature, or %NULL. The + * feature is owned by @session. + * + * Since: 2.26 + **/ +SoupSessionFeature * +soup_session_get_feature (SoupSession *session, GType feature_type) +{ + SoupSessionPrivate *priv; + SoupSessionFeature *feature; + GSList *f; + + g_return_val_if_fail (SOUP_IS_SESSION (session), NULL); + + priv = SOUP_SESSION_GET_PRIVATE (session); + + feature = g_hash_table_lookup (priv->features_cache, + GSIZE_TO_POINTER (feature_type)); + if (feature) + return feature; + + for (f = priv->features; f; f = f->next) { + feature = f->data; + if (G_TYPE_CHECK_INSTANCE_TYPE (feature, feature_type)) { + g_hash_table_insert (priv->features_cache, + GSIZE_TO_POINTER (feature_type), + feature); + return feature; + } + } + return NULL; +} + +/** + * soup_session_get_feature_for_message: + * @session: a #SoupSession + * @feature_type: the #GType of the feature to get + * @msg: a #SoupMessage + * + * Gets the first feature in @session of type @feature_type, provided + * that it is not disabled for @msg. As with + * soup_session_get_feature(), this should only be used for features + * where @feature_type is only expected to match a single feature. In + * particular, if there are two matching features, and the first is + * disabled on @msg, and the second is not, then this will return + * %NULL, not the second feature. + * + * Return value: (transfer none): a #SoupSessionFeature, or %NULL. The + * feature is owned by @session. + * + * Since: 2.28 + **/ +SoupSessionFeature * +soup_session_get_feature_for_message (SoupSession *session, GType feature_type, + SoupMessage *msg) +{ + SoupSessionFeature *feature; + + feature = soup_session_get_feature (session, feature_type); + if (feature && soup_message_disables_feature (msg, feature)) + return NULL; + return feature; +} diff --git a/libsoup/soup-session.h b/libsoup/soup-session.h new file mode 100644 index 0000000..4b6661f --- /dev/null +++ b/libsoup/soup-session.h @@ -0,0 +1,120 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SESSION_H +#define SOUP_SESSION_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_SESSION (soup_session_get_type ()) +#define SOUP_SESSION(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_SESSION, SoupSession)) +#define SOUP_SESSION_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_SESSION, SoupSessionClass)) +#define SOUP_IS_SESSION(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_SESSION)) +#define SOUP_IS_SESSION_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_SESSION)) +#define SOUP_SESSION_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_SESSION, SoupSessionClass)) + +typedef void (*SoupSessionCallback) (SoupSession *session, + SoupMessage *msg, + gpointer user_data); + +struct _SoupSession { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + /* signals */ + void (*request_started) (SoupSession *session, SoupMessage *msg, + SoupSocket *socket); + void (*authenticate) (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying); + + /* methods */ + void (*queue_message) (SoupSession *session, SoupMessage *msg, + SoupSessionCallback callback, + gpointer user_data); + void (*requeue_message) (SoupSession *session, SoupMessage *msg); + guint (*send_message) (SoupSession *session, SoupMessage *msg); + + void (*cancel_message) (SoupSession *session, SoupMessage *msg, + guint status_code); + + void (*auth_required) (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying); + + void (*flush_queue) (SoupSession *session); + + /* Padding for future expansion */ + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupSessionClass; + +GType soup_session_get_type (void); + +#define SOUP_SESSION_PROXY_URI "proxy-uri" +#define SOUP_SESSION_MAX_CONNS "max-conns" +#define SOUP_SESSION_MAX_CONNS_PER_HOST "max-conns-per-host" +#define SOUP_SESSION_USE_NTLM "use-ntlm" +#define SOUP_SESSION_SSL_CA_FILE "ssl-ca-file" +#define SOUP_SESSION_SSL_STRICT "ssl-strict" +#define SOUP_SESSION_ASYNC_CONTEXT "async-context" +#define SOUP_SESSION_TIMEOUT "timeout" +#define SOUP_SESSION_USER_AGENT "user-agent" +#define SOUP_SESSION_ACCEPT_LANGUAGE "accept-language" +#define SOUP_SESSION_ACCEPT_LANGUAGE_AUTO "accept-language-auto" +#define SOUP_SESSION_IDLE_TIMEOUT "idle-timeout" +#define SOUP_SESSION_ADD_FEATURE "add-feature" +#define SOUP_SESSION_ADD_FEATURE_BY_TYPE "add-feature-by-type" +#define SOUP_SESSION_REMOVE_FEATURE_BY_TYPE "remove-feature-by-type" + +GMainContext *soup_session_get_async_context(SoupSession *session); + +void soup_session_queue_message (SoupSession *session, + SoupMessage *msg, + SoupSessionCallback callback, + gpointer user_data); +void soup_session_requeue_message (SoupSession *session, + SoupMessage *msg); + +guint soup_session_send_message (SoupSession *session, + SoupMessage *msg); + +void soup_session_pause_message (SoupSession *session, + SoupMessage *msg); +void soup_session_unpause_message (SoupSession *session, + SoupMessage *msg); + +void soup_session_cancel_message (SoupSession *session, + SoupMessage *msg, + guint status_code); +void soup_session_abort (SoupSession *session); + +void soup_session_prepare_for_uri (SoupSession *session, + SoupURI *uri); + +void soup_session_add_feature (SoupSession *session, + SoupSessionFeature *feature); +void soup_session_add_feature_by_type (SoupSession *session, + GType feature_type); +void soup_session_remove_feature (SoupSession *session, + SoupSessionFeature *feature); +void soup_session_remove_feature_by_type (SoupSession *session, + GType feature_type); +GSList *soup_session_get_features (SoupSession *session, + GType feature_type); +SoupSessionFeature *soup_session_get_feature (SoupSession *session, + GType feature_type); +SoupSessionFeature *soup_session_get_feature_for_message(SoupSession *session, + GType feature_type, + SoupMessage *msg); + +G_END_DECLS + +#endif /* SOUP_SESSION_H */ diff --git a/libsoup/soup-socket.c b/libsoup/soup-socket.c new file mode 100644 index 0000000..49939e3 --- /dev/null +++ b/libsoup/soup-socket.c @@ -0,0 +1,1549 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-socket.c: Socket networking code. + * + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "soup-address.h" +#include "soup-socket.h" +#include "soup-marshal.h" +#include "soup-misc.h" +#include "soup-misc-private.h" +#include "soup-ssl.h" + +/** + * SECTION:soup-socket + * @short_description: A network socket + * + * #SoupSocket is libsoup's TCP socket type. While it is primarily + * intended for internal use, #SoupSockets are exposed in the + * API in various places, and some of their methods (eg, + * soup_socket_get_remote_address()) may be useful to applications. + **/ + +G_DEFINE_TYPE (SoupSocket, soup_socket, G_TYPE_OBJECT) + +enum { + READABLE, + WRITABLE, + DISCONNECTED, + NEW_CONNECTION, + LAST_SIGNAL +}; + +static guint signals[LAST_SIGNAL] = { 0 }; + +enum { + PROP_0, + + PROP_LOCAL_ADDRESS, + PROP_REMOTE_ADDRESS, + PROP_NON_BLOCKING, + PROP_IS_SERVER, + PROP_SSL_CREDENTIALS, + PROP_SSL_STRICT, + PROP_SSL_FALLBACK, + PROP_ASYNC_CONTEXT, + PROP_TIMEOUT, + PROP_TRUSTED_CERTIFICATE, + PROP_CLEAN_DISPOSE, + PROP_TLS_CERTIFICATE, + PROP_TLS_ERRORS, + + LAST_PROP +}; + +typedef struct { + SoupAddress *local_addr, *remote_addr; + GIOStream *conn; + GSocket *gsock; + GPollableInputStream *istream; + GPollableOutputStream *ostream; + GTlsCertificateFlags tls_errors; + + guint non_blocking:1; + guint is_server:1; + guint ssl_strict:1; + guint ssl_fallback:1; + guint ssl_ca_in_creds:1; + guint clean_dispose:1; + gpointer ssl_creds; + + GMainContext *async_context; + GSource *watch_src; + GSource *read_src, *write_src; + GByteArray *read_buf; + + GMutex *iolock, *addrlock; + guint timeout; + + GCancellable *connect_cancel; +} SoupSocketPrivate; +#define SOUP_SOCKET_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), SOUP_TYPE_SOCKET, SoupSocketPrivate)) + +static void set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec); +static void get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec); + +static void soup_socket_peer_certificate_changed (GObject *conn, + GParamSpec *pspec, + gpointer user_data); + +static void +soup_socket_init (SoupSocket *sock) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + priv->non_blocking = TRUE; + priv->addrlock = g_mutex_new (); + priv->iolock = g_mutex_new (); +} + +static void +disconnect_internal (SoupSocket *sock) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + if (priv->gsock) { + g_socket_close (priv->gsock, NULL); + g_object_unref (priv->gsock); + priv->gsock = NULL; + } + if (priv->conn) { + if (G_IS_TLS_CONNECTION (priv->conn)) + g_signal_handlers_disconnect_by_func (priv->conn, soup_socket_peer_certificate_changed, sock); + g_object_unref (priv->conn); + priv->conn = NULL; + priv->istream = NULL; + priv->ostream = NULL; + } + + if (priv->read_src) { + g_source_destroy (priv->read_src); + priv->read_src = NULL; + } + if (priv->write_src) { + g_source_destroy (priv->write_src); + priv->write_src = NULL; + } +} + +static void +finalize (GObject *object) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (object); + + if (priv->connect_cancel) { + if (priv->clean_dispose) + g_warning ("Disposing socket %p during connect", object); + g_object_unref (priv->connect_cancel); + } + if (priv->conn) { + if (priv->clean_dispose) + g_warning ("Disposing socket %p while still connected", object); + disconnect_internal (SOUP_SOCKET (object)); + } + + if (priv->local_addr) + g_object_unref (priv->local_addr); + if (priv->remote_addr) + g_object_unref (priv->remote_addr); + + if (priv->watch_src) { + if (priv->clean_dispose && !priv->is_server) + g_warning ("Disposing socket %p during async op", object); + g_source_destroy (priv->watch_src); + } + if (priv->async_context) + g_main_context_unref (priv->async_context); + + if (priv->read_buf) + g_byte_array_free (priv->read_buf, TRUE); + + g_mutex_free (priv->addrlock); + g_mutex_free (priv->iolock); + + G_OBJECT_CLASS (soup_socket_parent_class)->finalize (object); +} + +static void +soup_socket_class_init (SoupSocketClass *socket_class) +{ + GObjectClass *object_class = G_OBJECT_CLASS (socket_class); + + g_type_class_add_private (socket_class, sizeof (SoupSocketPrivate)); + + /* virtual method override */ + object_class->finalize = finalize; + object_class->set_property = set_property; + object_class->get_property = get_property; + + /* signals */ + + /** + * SoupSocket::readable: + * @sock: the socket + * + * Emitted when an async socket is readable. See + * soup_socket_read(), soup_socket_read_until() and + * #SoupSocket:non-blocking. + **/ + signals[READABLE] = + g_signal_new ("readable", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_LAST, + G_STRUCT_OFFSET (SoupSocketClass, readable), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupSocket::writable: + * @sock: the socket + * + * Emitted when an async socket is writable. See + * soup_socket_write() and #SoupSocket:non-blocking. + **/ + signals[WRITABLE] = + g_signal_new ("writable", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_LAST, + G_STRUCT_OFFSET (SoupSocketClass, writable), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupSocket::disconnected: + * @sock: the socket + * + * Emitted when the socket is disconnected, for whatever + * reason. + **/ + signals[DISCONNECTED] = + g_signal_new ("disconnected", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_LAST, + G_STRUCT_OFFSET (SoupSocketClass, disconnected), + NULL, NULL, + soup_marshal_NONE__NONE, + G_TYPE_NONE, 0); + + /** + * SoupSocket::new-connection: + * @sock: the socket + * @new: the new socket + * + * Emitted when a listening socket (set up with + * soup_socket_listen()) receives a new connection. + * + * You must ref the @new if you want to keep it; otherwise it + * will be destroyed after the signal is emitted. + **/ + signals[NEW_CONNECTION] = + g_signal_new ("new_connection", + G_OBJECT_CLASS_TYPE (object_class), + G_SIGNAL_RUN_FIRST, + G_STRUCT_OFFSET (SoupSocketClass, new_connection), + NULL, NULL, + soup_marshal_NONE__OBJECT, + G_TYPE_NONE, 1, + SOUP_TYPE_SOCKET); + + /* properties */ + /** + * SOUP_SOCKET_LOCAL_ADDRESS: + * + * Alias for the #SoupSocket:local-address property. (Address + * of local end of socket.) + **/ + g_object_class_install_property ( + object_class, PROP_LOCAL_ADDRESS, + g_param_spec_object (SOUP_SOCKET_LOCAL_ADDRESS, + "Local address", + "Address of local end of socket", + SOUP_TYPE_ADDRESS, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SOCKET_REMOTE_ADDRESS: + * + * Alias for the #SoupSocket:remote-address property. (Address + * of remote end of socket.) + **/ + g_object_class_install_property ( + object_class, PROP_REMOTE_ADDRESS, + g_param_spec_object (SOUP_SOCKET_REMOTE_ADDRESS, + "Remote address", + "Address of remote end of socket", + SOUP_TYPE_ADDRESS, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SoupSocket:non-blocking: + * + * Whether or not the socket uses non-blocking I/O. + * + * #SoupSocket's I/O methods are designed around the idea of + * using a single codepath for both synchronous and + * asynchronous I/O. If you want to read off a #SoupSocket, + * the "correct" way to do it is to call soup_socket_read() or + * soup_socket_read_until() repeatedly until you have read + * everything you want. If it returns %SOUP_SOCKET_WOULD_BLOCK + * at any point, stop reading and wait for it to emit the + * #SoupSocket::readable signal. Then go back to the + * reading-as-much-as-you-can loop. Likewise, for writing to a + * #SoupSocket, you should call soup_socket_write() either + * until you have written everything, or it returns + * %SOUP_SOCKET_WOULD_BLOCK (in which case you wait for + * #SoupSocket::writable and then go back into the loop). + * + * Code written this way will work correctly with both + * blocking and non-blocking sockets; blocking sockets will + * simply never return %SOUP_SOCKET_WOULD_BLOCK, and so the + * code that handles that case just won't get used for them. + **/ + /** + * SOUP_SOCKET_FLAG_NONBLOCKING: + * + * Alias for the #SoupSocket:non-blocking property. (Whether + * or not the socket uses non-blocking I/O.) + **/ + g_object_class_install_property ( + object_class, PROP_NON_BLOCKING, + g_param_spec_boolean (SOUP_SOCKET_FLAG_NONBLOCKING, + "Non-blocking", + "Whether or not the socket uses non-blocking I/O", + TRUE, + G_PARAM_READWRITE)); + /** + * SOUP_SOCKET_IS_SERVER: + * + * Alias for the #SoupSocket:is-server property. (Whether or + * not the socket is a server socket.) + **/ + g_object_class_install_property ( + object_class, PROP_IS_SERVER, + g_param_spec_boolean (SOUP_SOCKET_IS_SERVER, + "Server", + "Whether or not the socket is a server socket", + FALSE, + G_PARAM_READABLE)); + /** + * SOUP_SOCKET_SSL_CREDENTIALS: + * + * Alias for the #SoupSocket:ssl-credentials property. + * (SSL credential information.) + **/ + g_object_class_install_property ( + object_class, PROP_SSL_CREDENTIALS, + g_param_spec_pointer (SOUP_SOCKET_SSL_CREDENTIALS, + "SSL credentials", + "SSL credential information, passed from the session to the SSL implementation", + G_PARAM_READWRITE)); + /** + * SOUP_SOCKET_SSL_STRICT: + * + * Alias for the #SoupSocket:ssl-strict property. + **/ + g_object_class_install_property ( + object_class, PROP_SSL_STRICT, + g_param_spec_boolean (SOUP_SOCKET_SSL_STRICT, + "Strictly validate SSL certificates", + "Whether certificate errors should be considered a connection error", + TRUE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SOCKET_SSL_FALLBACK: + * + * Alias for the #SoupSocket:ssl-fallback property. + **/ + g_object_class_install_property ( + object_class, PROP_SSL_FALLBACK, + g_param_spec_boolean (SOUP_SOCKET_SSL_FALLBACK, + "SSLv3 fallback", + "Use SSLv3 instead of TLS (client-side only)", + FALSE, + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SOCKET_TRUSTED_CERTIFICATE: + * + * Alias for the #SoupSocket:trusted-certificate + * property. + **/ + g_object_class_install_property ( + object_class, PROP_TRUSTED_CERTIFICATE, + g_param_spec_boolean (SOUP_SOCKET_TRUSTED_CERTIFICATE, + "Trusted Certificate", + "Whether the server certificate is trusted, if this is an SSL socket", + FALSE, + G_PARAM_READABLE)); + /** + * SOUP_SOCKET_ASYNC_CONTEXT: + * + * Alias for the #SoupSocket:async-context property. (The + * socket's #GMainContext.) + **/ + g_object_class_install_property ( + object_class, PROP_ASYNC_CONTEXT, + g_param_spec_pointer (SOUP_SOCKET_ASYNC_CONTEXT, + "Async GMainContext", + "The GMainContext to dispatch this socket's async I/O in", + G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + + /** + * SOUP_SOCKET_TIMEOUT: + * + * Alias for the #SoupSocket:timeout property. (The timeout + * in seconds for blocking socket I/O operations.) + **/ + g_object_class_install_property ( + object_class, PROP_TIMEOUT, + g_param_spec_uint (SOUP_SOCKET_TIMEOUT, + "Timeout value", + "Value in seconds to timeout a blocking I/O", + 0, G_MAXUINT, 0, + G_PARAM_READWRITE)); + + g_object_class_install_property ( + object_class, PROP_CLEAN_DISPOSE, + g_param_spec_boolean ("clean-dispose", + "Clean dispose", + "Warn on unclean dispose", + FALSE, + G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); + /** + * SOUP_SOCKET_TLS_CERTIFICATE: + * + * Alias for the #SoupSocket:tls-certificate + * property. Note that this property's value is only useful + * if the socket is for a TLS connection, and only reliable + * after some data has been transferred to or from it. + * + * Since: 2.34 + **/ + g_object_class_install_property ( + object_class, PROP_TLS_CERTIFICATE, + g_param_spec_object (SOUP_SOCKET_TLS_CERTIFICATE, + "TLS certificate", + "The peer's TLS certificate", + G_TYPE_TLS_CERTIFICATE, + G_PARAM_READABLE)); + /** + * SOUP_SOCKET_TLS_ERRORS: + * + * Alias for the #SoupSocket:tls-errors + * property. Note that this property's value is only useful + * if the socket is for a TLS connection, and only reliable + * after some data has been transferred to or from it. + * + * Since: 2.34 + **/ + g_object_class_install_property ( + object_class, PROP_TLS_ERRORS, + g_param_spec_flags (SOUP_SOCKET_TLS_ERRORS, + "TLS errors", + "Errors with the peer's TLS certificate", + G_TYPE_TLS_CERTIFICATE_FLAGS, 0, + G_PARAM_READABLE)); +} + + +static void +finish_socket_setup (SoupSocketPrivate *priv) +{ + if (!priv->gsock) + return; + + if (!priv->conn) + priv->conn = (GIOStream *)g_socket_connection_factory_create_connection (priv->gsock); + if (!priv->istream) + priv->istream = G_POLLABLE_INPUT_STREAM (g_io_stream_get_input_stream (priv->conn)); + if (!priv->ostream) + priv->ostream = G_POLLABLE_OUTPUT_STREAM (g_io_stream_get_output_stream (priv->conn)); + + g_socket_set_timeout (priv->gsock, priv->timeout); +} + +static void +set_property (GObject *object, guint prop_id, + const GValue *value, GParamSpec *pspec) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_LOCAL_ADDRESS: + priv->local_addr = (SoupAddress *)g_value_dup_object (value); + break; + case PROP_REMOTE_ADDRESS: + priv->remote_addr = (SoupAddress *)g_value_dup_object (value); + break; + case PROP_NON_BLOCKING: + priv->non_blocking = g_value_get_boolean (value); + break; + case PROP_SSL_CREDENTIALS: + priv->ssl_creds = g_value_get_pointer (value); + break; + case PROP_SSL_STRICT: + priv->ssl_strict = g_value_get_boolean (value); + break; + case PROP_SSL_FALLBACK: + priv->ssl_fallback = g_value_get_boolean (value); + break; + case PROP_ASYNC_CONTEXT: + priv->async_context = g_value_get_pointer (value); + if (priv->async_context) + g_main_context_ref (priv->async_context); + break; + case PROP_TIMEOUT: + priv->timeout = g_value_get_uint (value); + if (priv->conn) + g_socket_set_timeout (priv->gsock, priv->timeout); + break; + case PROP_CLEAN_DISPOSE: + priv->clean_dispose = g_value_get_boolean (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +get_property (GObject *object, guint prop_id, + GValue *value, GParamSpec *pspec) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (object); + + switch (prop_id) { + case PROP_LOCAL_ADDRESS: + g_value_set_object (value, soup_socket_get_local_address (SOUP_SOCKET (object))); + break; + case PROP_REMOTE_ADDRESS: + g_value_set_object (value, soup_socket_get_remote_address (SOUP_SOCKET (object))); + break; + case PROP_NON_BLOCKING: + g_value_set_boolean (value, priv->non_blocking); + break; + case PROP_IS_SERVER: + g_value_set_boolean (value, priv->is_server); + break; + case PROP_SSL_CREDENTIALS: + g_value_set_pointer (value, priv->ssl_creds); + break; + case PROP_SSL_STRICT: + g_value_set_boolean (value, priv->ssl_strict); + break; + case PROP_SSL_FALLBACK: + g_value_set_boolean (value, priv->ssl_fallback); + break; + case PROP_TRUSTED_CERTIFICATE: + g_value_set_boolean (value, priv->tls_errors == 0); + break; + case PROP_ASYNC_CONTEXT: + g_value_set_pointer (value, priv->async_context ? g_main_context_ref (priv->async_context) : NULL); + break; + case PROP_TIMEOUT: + g_value_set_uint (value, priv->timeout); + break; + case PROP_TLS_CERTIFICATE: + if (G_IS_TLS_CONNECTION (priv->conn)) + g_value_set_object (value, g_tls_connection_get_peer_certificate (G_TLS_CONNECTION (priv->conn))); + else + g_value_set_object (value, NULL); + break; + case PROP_TLS_ERRORS: + g_value_set_flags (value, priv->tls_errors); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + + +/** + * soup_socket_new: + * @optname1: name of first property to set (or %NULL) + * @...: value of @optname1, followed by additional property/value pairs + * + * Creates a new (disconnected) socket + * + * Return value: the new socket + **/ +SoupSocket * +soup_socket_new (const char *optname1, ...) +{ + SoupSocket *sock; + va_list ap; + + va_start (ap, optname1); + sock = (SoupSocket *)g_object_new_valist (SOUP_TYPE_SOCKET, + optname1, ap); + va_end (ap); + + return sock; +} + +static guint +socket_connected (SoupSocket *sock, GSocketConnection *conn, GError *error) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + g_object_unref (priv->connect_cancel); + priv->connect_cancel = NULL; + + if (error) { + if (error->domain == G_RESOLVER_ERROR) { + g_error_free (error); + return SOUP_STATUS_CANT_RESOLVE; + } else { + g_error_free (error); + return SOUP_STATUS_CANT_CONNECT; + } + } + + priv->conn = (GIOStream *)conn; + priv->gsock = g_object_ref (g_socket_connection_get_socket (conn)); + finish_socket_setup (priv); + + return SOUP_STATUS_OK; +} + +/** + * SoupSocketCallback: + * @sock: the #SoupSocket + * @status: an HTTP status code indicating success or failure + * @user_data: the data passed to soup_socket_connect_async() + * + * The callback function passed to soup_socket_connect_async(). + **/ + +typedef struct { + SoupSocket *sock; + SoupSocketCallback callback; + gpointer user_data; +} SoupSocketAsyncConnectData; + +static void +async_connected (GObject *client, GAsyncResult *result, gpointer data) +{ + SoupSocketAsyncConnectData *sacd = data; + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sacd->sock); + GError *error = NULL; + GSocketConnection *conn; + guint status; + + if (priv->async_context) + g_main_context_pop_thread_default (priv->async_context); + + conn = g_socket_client_connect_finish (G_SOCKET_CLIENT (client), + result, &error); + status = socket_connected (sacd->sock, conn, error); + + sacd->callback (sacd->sock, status, sacd->user_data); + g_object_unref (sacd->sock); + g_slice_free (SoupSocketAsyncConnectData, sacd); +} + +/** + * soup_socket_connect_async: + * @sock: a client #SoupSocket (which must not already be connected) + * @cancellable: a #GCancellable, or %NULL + * @callback: (scope async): callback to call after connecting + * @user_data: data to pass to @callback + * + * Begins asynchronously connecting to @sock's remote address. The + * socket will call @callback when it succeeds or fails (but not + * before returning from this function). + * + * If @cancellable is non-%NULL, it can be used to cancel the + * connection. @callback will still be invoked in this case, with a + * status of %SOUP_STATUS_CANCELLED. + **/ +void +soup_socket_connect_async (SoupSocket *sock, GCancellable *cancellable, + SoupSocketCallback callback, gpointer user_data) +{ + SoupSocketPrivate *priv; + SoupSocketAsyncConnectData *sacd; + GSocketClient *client; + + g_return_if_fail (SOUP_IS_SOCKET (sock)); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + g_return_if_fail (priv->remote_addr != NULL); + + sacd = g_slice_new0 (SoupSocketAsyncConnectData); + sacd->sock = g_object_ref (sock); + sacd->callback = callback; + sacd->user_data = user_data; + + priv->connect_cancel = cancellable ? g_object_ref (cancellable) : g_cancellable_new (); + + if (priv->async_context) + g_main_context_push_thread_default (priv->async_context); + + client = g_socket_client_new (); + if (priv->timeout) + g_socket_client_set_timeout (client, priv->timeout); + g_socket_client_connect_async (client, + G_SOCKET_CONNECTABLE (priv->remote_addr), + priv->connect_cancel, + async_connected, sacd); + g_object_unref (client); +} + +/** + * soup_socket_connect_sync: + * @sock: a client #SoupSocket (which must not already be connected) + * @cancellable: a #GCancellable, or %NULL + * + * Attempt to synchronously connect @sock to its remote address. + * + * If @cancellable is non-%NULL, it can be used to cancel the + * connection, in which case soup_socket_connect_sync() will return + * %SOUP_STATUS_CANCELLED. + * + * Return value: a success or failure code. + **/ +guint +soup_socket_connect_sync (SoupSocket *sock, GCancellable *cancellable) +{ + SoupSocketPrivate *priv; + GSocketClient *client; + GSocketConnection *conn; + GError *error = NULL; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), SOUP_STATUS_MALFORMED); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + g_return_val_if_fail (!priv->is_server, SOUP_STATUS_MALFORMED); + g_return_val_if_fail (priv->gsock == NULL, SOUP_STATUS_MALFORMED); + g_return_val_if_fail (priv->remote_addr != NULL, SOUP_STATUS_MALFORMED); + + if (cancellable) + g_object_ref (cancellable); + else + cancellable = g_cancellable_new (); + priv->connect_cancel = cancellable; + + client = g_socket_client_new (); + if (priv->timeout) + g_socket_client_set_timeout (client, priv->timeout); + conn = g_socket_client_connect (client, + G_SOCKET_CONNECTABLE (priv->remote_addr), + priv->connect_cancel, &error); + g_object_unref (client); + + return socket_connected (sock, conn, error); +} + +int +soup_socket_get_fd (SoupSocket *sock) +{ + g_return_val_if_fail (SOUP_IS_SOCKET (sock), -1); + + return g_socket_get_fd (SOUP_SOCKET_GET_PRIVATE (sock)->gsock); +} + +static GSource * +soup_socket_create_watch (SoupSocketPrivate *priv, GIOCondition cond, + GPollableSourceFunc callback, gpointer user_data, + GCancellable *cancellable) +{ + GSource *watch; + + if (cond == G_IO_IN) + watch = g_pollable_input_stream_create_source (priv->istream, cancellable); + else + watch = g_pollable_output_stream_create_source (priv->ostream, cancellable); + g_source_set_callback (watch, (GSourceFunc)callback, user_data, NULL); + g_source_attach (watch, priv->async_context); + g_source_unref (watch); + + return watch; +} + +static gboolean +listen_watch (GObject *pollable, gpointer data) +{ + SoupSocket *sock = data, *new; + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock), *new_priv; + GSocket *new_gsock; + + new_gsock = g_socket_accept (priv->gsock, NULL, NULL); + if (!new_gsock) + return FALSE; + + new = g_object_new (SOUP_TYPE_SOCKET, NULL); + new_priv = SOUP_SOCKET_GET_PRIVATE (new); + new_priv->gsock = new_gsock; + if (priv->async_context) + new_priv->async_context = g_main_context_ref (priv->async_context); + new_priv->non_blocking = priv->non_blocking; + new_priv->is_server = TRUE; + if (priv->ssl_creds) + new_priv->ssl_creds = priv->ssl_creds; + finish_socket_setup (new_priv); + + if (new_priv->ssl_creds) { + if (!soup_socket_start_proxy_ssl (new, NULL, NULL)) { + g_object_unref (new); + return TRUE; + } + } + + g_signal_emit (sock, signals[NEW_CONNECTION], 0, new); + g_object_unref (new); + + return TRUE; +} + +/** + * soup_socket_listen: + * @sock: a server #SoupSocket (which must not already be connected or + * listening) + * + * Makes @sock start listening on its local address. When connections + * come in, @sock will emit %new_connection. + * + * Return value: whether or not @sock is now listening. + **/ +gboolean +soup_socket_listen (SoupSocket *sock) + +{ + SoupSocketPrivate *priv; + GSocketAddress *addr; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), FALSE); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + g_return_val_if_fail (priv->gsock == NULL, FALSE); + g_return_val_if_fail (priv->local_addr != NULL, FALSE); + + priv->is_server = TRUE; + + /* @local_addr may have its port set to 0. So we intentionally + * don't store it in priv->local_addr, so that if the + * caller calls soup_socket_get_local_address() later, we'll + * have to make a new addr by calling getsockname(), which + * will have the right port number. + */ + addr = soup_address_get_gsockaddr (priv->local_addr); + g_return_val_if_fail (addr != NULL, FALSE); + + priv->gsock = g_socket_new (g_socket_address_get_family (addr), + G_SOCKET_TYPE_STREAM, + G_SOCKET_PROTOCOL_DEFAULT, + NULL); + if (!priv->gsock) + goto cant_listen; + finish_socket_setup (priv); + + /* Bind */ + if (!g_socket_bind (priv->gsock, addr, TRUE, NULL)) + goto cant_listen; + /* Force local_addr to be re-resolved now */ + g_object_unref (priv->local_addr); + priv->local_addr = NULL; + + /* Listen */ + if (!g_socket_listen (priv->gsock, NULL)) + goto cant_listen; + + priv->watch_src = soup_socket_create_watch (priv, G_IO_IN, + listen_watch, sock, + NULL); + g_object_unref (addr); + return TRUE; + + cant_listen: + if (priv->conn) + disconnect_internal (sock); + g_object_unref (addr); + + return FALSE; +} + +static void +soup_socket_peer_certificate_changed (GObject *conn, GParamSpec *pspec, + gpointer sock) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + priv->tls_errors = g_tls_connection_get_peer_certificate_errors (G_TLS_CONNECTION (priv->conn)); + if (priv->ssl_ca_in_creds) + priv->tls_errors &= ~G_TLS_CERTIFICATE_UNKNOWN_CA; + + g_object_notify (sock, "tls-certificate"); + g_object_notify (sock, "tls-errors"); +} + +static gboolean +soup_socket_accept_certificate (GTlsConnection *conn, GTlsCertificate *cert, + GTlsCertificateFlags errors, gpointer sock) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + if (soup_ssl_credentials_verify_certificate (priv->ssl_creds, + cert, errors)) { + priv->ssl_ca_in_creds = TRUE; + return TRUE; + } + + return !priv->ssl_strict; +} + +/** + * soup_socket_start_ssl: + * @sock: the socket + * @cancellable: a #GCancellable + * + * Starts using SSL on @socket. + * + * Return value: success or failure + **/ +gboolean +soup_socket_start_ssl (SoupSocket *sock, GCancellable *cancellable) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + return soup_socket_start_proxy_ssl (sock, soup_address_get_name (priv->remote_addr), cancellable); +} + +/** + * soup_socket_start_proxy_ssl: + * @sock: the socket + * @ssl_host: hostname of the SSL server + * @cancellable: a #GCancellable + * + * Starts using SSL on @socket, expecting to find a host named + * @ssl_host. + * + * Return value: success or failure + **/ +gboolean +soup_socket_start_proxy_ssl (SoupSocket *sock, const char *ssl_host, + GCancellable *cancellable) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + GTlsBackend *backend = g_tls_backend_get_default (); + + if (G_IS_TLS_CONNECTION (priv->conn)) + return TRUE; + if (!priv->ssl_creds) + return FALSE; + + if (!priv->is_server) { + GTlsClientConnection *conn; + GSocketConnectable *identity; + + identity = g_network_address_new (ssl_host, 0); + conn = g_initable_new (g_tls_backend_get_client_connection_type (backend), + NULL, NULL, + "base-io-stream", priv->conn, + "server-identity", identity, + "use-system-certdb", FALSE, + "require-close-notify", FALSE, + "use-ssl3", TRUE, + NULL); + g_object_unref (identity); + + if (!conn) + return FALSE; + + g_object_unref (priv->conn); + priv->conn = G_IO_STREAM (conn); + + g_signal_connect (conn, "accept-certificate", + G_CALLBACK (soup_socket_accept_certificate), + sock); + } else { + GTlsServerConnection *conn; + + conn = g_initable_new (g_tls_backend_get_server_connection_type (backend), + NULL, NULL, + "base-io-stream", priv->conn, + "certificate", soup_ssl_credentials_get_certificate (priv->ssl_creds), + "use-system-certdb", FALSE, + "require-close-notify", FALSE, + NULL); + if (!conn) + return FALSE; + + g_object_unref (priv->conn); + priv->conn = G_IO_STREAM (conn); + } + + priv->ssl_ca_in_creds = FALSE; + g_signal_connect (priv->conn, "notify::peer-certificate", + G_CALLBACK (soup_socket_peer_certificate_changed), sock); + + priv->istream = G_POLLABLE_INPUT_STREAM (g_io_stream_get_input_stream (priv->conn)); + priv->ostream = G_POLLABLE_OUTPUT_STREAM (g_io_stream_get_output_stream (priv->conn)); + return TRUE; +} + +guint +soup_socket_handshake_sync (SoupSocket *sock, + GCancellable *cancellable) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + GError *error = NULL; + + if (g_tls_connection_handshake (G_TLS_CONNECTION (priv->conn), + cancellable, &error)) + return SOUP_STATUS_OK; + else if (!priv->ssl_fallback && + g_error_matches (error, G_TLS_ERROR, G_TLS_ERROR_NOT_TLS)) { + g_error_free (error); + return SOUP_STATUS_TLS_FAILED; + } else { + g_error_free (error); + return SOUP_STATUS_SSL_FAILED; + } +} + +static void +handshake_async_ready (GObject *source, GAsyncResult *result, gpointer user_data) +{ + SoupSocketAsyncConnectData *data = user_data; + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (data->sock); + GError *error = NULL; + guint status; + + if (priv->async_context) + g_main_context_pop_thread_default (priv->async_context); + + if (g_tls_connection_handshake_finish (G_TLS_CONNECTION (priv->conn), + result, &error)) + status = SOUP_STATUS_OK; + else if (!priv->ssl_fallback && + g_error_matches (error, G_TLS_ERROR, G_TLS_ERROR_NOT_TLS)) + status = SOUP_STATUS_TLS_FAILED; + else + status = SOUP_STATUS_SSL_FAILED; + g_clear_error (&error); + + data->callback (data->sock, status, data->user_data); + g_object_unref (data->sock); + g_slice_free (SoupSocketAsyncConnectData, data); +} + +void +soup_socket_handshake_async (SoupSocket *sock, + GCancellable *cancellable, + SoupSocketCallback callback, + gpointer user_data) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + SoupSocketAsyncConnectData *data; + + data = g_slice_new (SoupSocketAsyncConnectData); + data->sock = g_object_ref (sock); + data->callback = callback; + data->user_data = user_data; + + if (priv->async_context) + g_main_context_push_thread_default (priv->async_context); + g_tls_connection_handshake_async (G_TLS_CONNECTION (priv->conn), + G_PRIORITY_DEFAULT, + cancellable, handshake_async_ready, + data); +} + +/** + * soup_socket_is_ssl: + * @sock: a #SoupSocket + * + * Tests if @sock is set up to do SSL. Note that this simply means + * that the %SOUP_SOCKET_SSL_CREDENTIALS property has been set; it + * does not mean that soup_socket_start_ssl() has been called. + * + * Return value: %TRUE if @sock has SSL credentials set + **/ +gboolean +soup_socket_is_ssl (SoupSocket *sock) +{ + g_return_if_fail (SOUP_IS_SOCKET (sock)); + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + return priv->ssl_creds != NULL; +} + +/** + * soup_socket_disconnect: + * @sock: a #SoupSocket + * + * Disconnects @sock. Any further read or write attempts on it will + * fail. + **/ +void +soup_socket_disconnect (SoupSocket *sock) +{ + SoupSocketPrivate *priv; + gboolean already_disconnected = FALSE; + + g_return_if_fail (SOUP_IS_SOCKET (sock)); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + if (priv->connect_cancel) { + g_cancellable_cancel (priv->connect_cancel); + return; + } else if (g_mutex_trylock (priv->iolock)) { + if (priv->conn) + disconnect_internal (sock); + else + already_disconnected = TRUE; + g_mutex_unlock (priv->iolock); + } else { + /* Another thread is currently doing IO, so + * we can't close the socket. So just shutdown + * the file descriptor to force the I/O to fail. + * (It will actually be closed when the socket + * is destroyed.) + */ + g_socket_shutdown (priv->gsock, TRUE, TRUE, NULL); + } + + if (already_disconnected) + return; + + /* Keep ref around signals in case the object is unreferenced + * in a handler + */ + g_object_ref (sock); + + /* Give all readers a chance to notice the connection close */ + g_signal_emit (sock, signals[READABLE], 0); + + /* FIXME: can't disconnect until all data is read */ + + /* Then let everyone know we're disconnected */ + g_signal_emit (sock, signals[DISCONNECTED], 0); + + g_object_unref (sock); +} + +/** + * soup_socket_is_connected: + * @sock: a #SoupSocket + * + * Tests if @sock is connected to another host + * + * Return value: %TRUE or %FALSE. + **/ +gboolean +soup_socket_is_connected (SoupSocket *sock) +{ + SoupSocketPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), FALSE); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + return priv->conn != NULL; +} + +/** + * soup_socket_get_local_address: + * @sock: a #SoupSocket + * + * Returns the #SoupAddress corresponding to the local end of @sock. + * + * Return value: (transfer none): the #SoupAddress + **/ +SoupAddress * +soup_socket_get_local_address (SoupSocket *sock) +{ + SoupSocketPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), NULL); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + g_mutex_lock (priv->addrlock); + if (!priv->local_addr) { + GSocketAddress *addr; + struct sockaddr_storage sa; + gssize sa_len; + + addr = g_socket_get_local_address (priv->gsock, NULL); + sa_len = g_socket_address_get_native_size (addr); + g_socket_address_to_native (addr, &sa, sa_len, NULL); + priv->local_addr = soup_address_new_from_sockaddr ((struct sockaddr *)&sa, sa_len); + g_object_unref (addr); + } + g_mutex_unlock (priv->addrlock); + + return priv->local_addr; +} + +/** + * soup_socket_get_remote_address: + * @sock: a #SoupSocket + * + * Returns the #SoupAddress corresponding to the remote end of @sock. + * + * Return value: (transfer none): the #SoupAddress + **/ +SoupAddress * +soup_socket_get_remote_address (SoupSocket *sock) +{ + SoupSocketPrivate *priv; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), NULL); + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + g_mutex_lock (priv->addrlock); + if (!priv->remote_addr) { + GSocketAddress *addr; + struct sockaddr_storage sa; + gssize sa_len; + + addr = g_socket_get_remote_address (priv->gsock, NULL); + sa_len = g_socket_address_get_native_size (addr); + g_socket_address_to_native (addr, &sa, sa_len, NULL); + priv->remote_addr = soup_address_new_from_sockaddr ((struct sockaddr *)&sa, sa_len); + g_object_unref (addr); + } + g_mutex_unlock (priv->addrlock); + + return priv->remote_addr; +} + + +static gboolean +socket_read_watch (GObject *pollable, gpointer user_data) +{ + SoupSocket *sock = user_data; + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + priv->read_src = NULL; + g_signal_emit (sock, signals[READABLE], 0); + return FALSE; +} + +static SoupSocketIOStatus +read_from_network (SoupSocket *sock, gpointer buffer, gsize len, + gsize *nread, GCancellable *cancellable, GError **error) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + GError *my_err = NULL; + gssize my_nread; + + *nread = 0; + + if (!priv->conn) + return SOUP_SOCKET_EOF; + + if (!priv->non_blocking) { + my_nread = g_input_stream_read (G_INPUT_STREAM (priv->istream), + buffer, len, + cancellable, &my_err); + } else { + my_nread = g_pollable_input_stream_read_nonblocking ( + priv->istream, buffer, len, + cancellable, &my_err); + } + + if (my_nread > 0) { + g_clear_error (&my_err); + *nread = my_nread; + return SOUP_SOCKET_OK; + } else if (my_nread == 0) { + g_clear_error (&my_err); + *nread = my_nread; + return SOUP_SOCKET_EOF; + } else if (g_error_matches (my_err, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) { + g_clear_error (&my_err); + if (!priv->read_src) { + priv->read_src = + soup_socket_create_watch (priv, G_IO_IN, + socket_read_watch, sock, + cancellable); + } + return SOUP_SOCKET_WOULD_BLOCK; + } else if (g_error_matches (my_err, G_TLS_ERROR, G_TLS_ERROR_HANDSHAKE)) { + my_err->domain = SOUP_SSL_ERROR; + my_err->code = SOUP_SSL_ERROR_CERTIFICATE; + } + + g_propagate_error (error, my_err); + return SOUP_SOCKET_ERROR; +} + +static SoupSocketIOStatus +read_from_buf (SoupSocket *sock, gpointer buffer, gsize len, gsize *nread) +{ + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + GByteArray *read_buf = priv->read_buf; + + *nread = MIN (read_buf->len, len); + memcpy (buffer, read_buf->data, *nread); + + if (*nread == read_buf->len) { + g_byte_array_free (read_buf, TRUE); + priv->read_buf = NULL; + } else { + memmove (read_buf->data, read_buf->data + *nread, + read_buf->len - *nread); + g_byte_array_set_size (read_buf, read_buf->len - *nread); + } + + return SOUP_SOCKET_OK; +} + +/** + * SoupSocketIOStatus: + * @SOUP_SOCKET_OK: Success + * @SOUP_SOCKET_WOULD_BLOCK: Cannot read/write any more at this time + * @SOUP_SOCKET_EOF: End of file + * @SOUP_SOCKET_ERROR: Other error + * + * Return value from the #SoupSocket IO methods. + **/ + +/** + * soup_socket_read: + * @sock: the socket + * @buffer: buffer to read into + * @len: size of @buffer in bytes + * @nread: (out): on return, the number of bytes read into @buffer + * @cancellable: a #GCancellable, or %NULL + * @error: error pointer + * + * Attempts to read up to @len bytes from @sock into @buffer. If some + * data is successfully read, soup_socket_read() will return + * %SOUP_SOCKET_OK, and *@nread will contain the number of bytes + * actually read (which may be less than @len). + * + * If @sock is non-blocking, and no data is available, the return + * value will be %SOUP_SOCKET_WOULD_BLOCK. In this case, the caller + * can connect to the #SoupSocket::readable signal to know when there + * is more data to read. (NB: You MUST read all available data off the + * socket first. #SoupSocket::readable is only emitted after + * soup_socket_read() returns %SOUP_SOCKET_WOULD_BLOCK, and it is only + * emitted once. See the documentation for #SoupSocket:non-blocking.) + * + * Return value: a #SoupSocketIOStatus, as described above (or + * %SOUP_SOCKET_EOF if the socket is no longer connected, or + * %SOUP_SOCKET_ERROR on any other error, in which case @error will + * also be set). + **/ +SoupSocketIOStatus +soup_socket_read (SoupSocket *sock, gpointer buffer, gsize len, + gsize *nread, GCancellable *cancellable, GError **error) +{ + SoupSocketPrivate *priv; + SoupSocketIOStatus status; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), SOUP_SOCKET_ERROR); + g_return_val_if_fail (nread != NULL, SOUP_SOCKET_ERROR); + + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + g_mutex_lock (priv->iolock); + if (priv->read_buf) + status = read_from_buf (sock, buffer, len, nread); + else + status = read_from_network (sock, buffer, len, nread, cancellable, error); + g_mutex_unlock (priv->iolock); + + return status; +} + +/** + * soup_socket_read_until: + * @sock: the socket + * @buffer: buffer to read into + * @len: size of @buffer in bytes + * @boundary: boundary to read until + * @boundary_len: length of @boundary in bytes + * @nread: (out): on return, the number of bytes read into @buffer + * @got_boundary: on return, whether or not the data in @buffer + * ends with the boundary string + * @cancellable: a #GCancellable, or %NULL + * @error: error pointer + * + * Like soup_socket_read(), but reads no further than the first + * occurrence of @boundary. (If the boundary is found, it will be + * included in the returned data, and *@got_boundary will be set to + * %TRUE.) Any data after the boundary will returned in future reads. + * + * soup_socket_read_until() will almost always return fewer than @len + * bytes: if the boundary is found, then it will only return the bytes + * up until the end of the boundary, and if the boundary is not found, + * then it will leave the last (boundary_len - 1) + * bytes in its internal buffer, in case they form the start of the + * boundary string. Thus, @len normally needs to be at least 1 byte + * longer than @boundary_len if you want to make any progress at all. + * + * Return value: as for soup_socket_read() + **/ +SoupSocketIOStatus +soup_socket_read_until (SoupSocket *sock, gpointer buffer, gsize len, + gconstpointer boundary, gsize boundary_len, + gsize *nread, gboolean *got_boundary, + GCancellable *cancellable, GError **error) +{ + SoupSocketPrivate *priv; + SoupSocketIOStatus status; + GByteArray *read_buf; + guint match_len, prev_len; + guint8 *p, *end; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), SOUP_SOCKET_ERROR); + g_return_val_if_fail (nread != NULL, SOUP_SOCKET_ERROR); + g_return_val_if_fail (len >= boundary_len, SOUP_SOCKET_ERROR); + + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + g_mutex_lock (priv->iolock); + + *got_boundary = FALSE; + + if (!priv->read_buf) + priv->read_buf = g_byte_array_new (); + read_buf = priv->read_buf; + + if (read_buf->len < boundary_len) { + prev_len = read_buf->len; + g_byte_array_set_size (read_buf, len); + status = read_from_network (sock, + read_buf->data + prev_len, + len - prev_len, nread, cancellable, error); + read_buf->len = prev_len + *nread; + + if (status != SOUP_SOCKET_OK) { + g_mutex_unlock (priv->iolock); + return status; + } + } + + /* Scan for the boundary */ + end = read_buf->data + read_buf->len; + for (p = read_buf->data; p <= end - boundary_len; p++) { + if (!memcmp (p, boundary, boundary_len)) { + p += boundary_len; + *got_boundary = TRUE; + break; + } + } + + /* Return everything up to 'p' (which is either just after the + * boundary, or @boundary_len - 1 bytes before the end of the + * buffer). + */ + match_len = p - read_buf->data; + status = read_from_buf (sock, buffer, MIN (len, match_len), nread); + + g_mutex_unlock (priv->iolock); + return status; +} + +static gboolean +socket_write_watch (GObject *pollable, gpointer user_data) +{ + SoupSocket *sock = user_data; + SoupSocketPrivate *priv = SOUP_SOCKET_GET_PRIVATE (sock); + + priv->write_src = NULL; + g_signal_emit (sock, signals[WRITABLE], 0); + return FALSE; +} + +/** + * soup_socket_write: + * @sock: the socket + * @buffer: data to write + * @len: size of @buffer, in bytes + * @nwrote: (out): on return, number of bytes written + * @cancellable: a #GCancellable, or %NULL + * @error: error pointer + * + * Attempts to write @len bytes from @buffer to @sock. If some data is + * successfully written, the return status will be %SOUP_SOCKET_OK, + * and *@nwrote will contain the number of bytes actually written + * (which may be less than @len). + * + * If @sock is non-blocking, and no data could be written right away, + * the return value will be %SOUP_SOCKET_WOULD_BLOCK. In this case, + * the caller can connect to the #SoupSocket::writable signal to know + * when more data can be written. (NB: #SoupSocket::writable is only + * emitted after soup_socket_write() returns %SOUP_SOCKET_WOULD_BLOCK, + * and it is only emitted once. See the documentation for + * #SoupSocket:non-blocking.) + * + * Return value: a #SoupSocketIOStatus, as described above (or + * %SOUP_SOCKET_EOF or %SOUP_SOCKET_ERROR. @error will be set if the + * return value is %SOUP_SOCKET_ERROR.) + **/ +SoupSocketIOStatus +soup_socket_write (SoupSocket *sock, gconstpointer buffer, + gsize len, gsize *nwrote, + GCancellable *cancellable, GError **error) +{ + SoupSocketPrivate *priv; + GError *my_err = NULL; + gssize my_nwrote; + + g_return_val_if_fail (SOUP_IS_SOCKET (sock), SOUP_SOCKET_ERROR); + g_return_val_if_fail (nwrote != NULL, SOUP_SOCKET_ERROR); + + priv = SOUP_SOCKET_GET_PRIVATE (sock); + + g_mutex_lock (priv->iolock); + + if (!priv->conn) { + g_mutex_unlock (priv->iolock); + return SOUP_SOCKET_EOF; + } + if (priv->write_src) { + g_mutex_unlock (priv->iolock); + return SOUP_SOCKET_WOULD_BLOCK; + } + + if (!priv->non_blocking) { + my_nwrote = g_output_stream_write (G_OUTPUT_STREAM (priv->ostream), + buffer, len, + cancellable, &my_err); + } else { + my_nwrote = g_pollable_output_stream_write_nonblocking ( + priv->ostream, buffer, len, + cancellable, &my_err); + } + + if (my_nwrote > 0) { + g_mutex_unlock (priv->iolock); + g_clear_error (&my_err); + *nwrote = my_nwrote; + return SOUP_SOCKET_OK; + } + + if (g_error_matches (my_err, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) { + g_mutex_unlock (priv->iolock); + g_clear_error (&my_err); + + priv->write_src = + soup_socket_create_watch (priv, + G_IO_OUT, + socket_write_watch, sock, cancellable); + return SOUP_SOCKET_WOULD_BLOCK; + } else if (g_error_matches (my_err, G_TLS_ERROR, G_TLS_ERROR_HANDSHAKE)) { + my_err->domain = SOUP_SSL_ERROR; + my_err->code = SOUP_SSL_ERROR_CERTIFICATE; + } + + g_mutex_unlock (priv->iolock); + g_propagate_error (error, my_err); + return SOUP_SOCKET_ERROR; +} diff --git a/libsoup/soup-socket.h b/libsoup/soup-socket.h new file mode 100644 index 0000000..4d1550f --- /dev/null +++ b/libsoup/soup-socket.h @@ -0,0 +1,120 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SOCKET_H +#define SOUP_SOCKET_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_TYPE_SOCKET (soup_socket_get_type ()) +#define SOUP_SOCKET(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_SOCKET, SoupSocket)) +#define SOUP_SOCKET_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_SOCKET, SoupSocketClass)) +#define SOUP_IS_SOCKET(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_SOCKET)) +#define SOUP_IS_SOCKET_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_SOCKET)) +#define SOUP_SOCKET_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_SOCKET, SoupSocketClass)) + +struct _SoupSocket { + GObject parent; + +}; + +typedef struct { + GObjectClass parent_class; + + /* signals */ + void (*readable) (SoupSocket *); + void (*writable) (SoupSocket *); + void (*disconnected) (SoupSocket *); + + void (*new_connection) (SoupSocket *, SoupSocket *); + + /* Padding for future expansion */ + void (*_libsoup_reserved1) (void); + void (*_libsoup_reserved2) (void); + void (*_libsoup_reserved3) (void); + void (*_libsoup_reserved4) (void); +} SoupSocketClass; + +#define SOUP_SOCKET_LOCAL_ADDRESS "local-address" +#define SOUP_SOCKET_REMOTE_ADDRESS "remote-address" +#define SOUP_SOCKET_FLAG_NONBLOCKING "non-blocking" +#define SOUP_SOCKET_IS_SERVER "is-server" +#define SOUP_SOCKET_SSL_CREDENTIALS "ssl-creds" +#define SOUP_SOCKET_SSL_STRICT "ssl-strict" +#define SOUP_SOCKET_SSL_FALLBACK "ssl-fallback" +#define SOUP_SOCKET_TRUSTED_CERTIFICATE "trusted-certificate" +#define SOUP_SOCKET_ASYNC_CONTEXT "async-context" +#define SOUP_SOCKET_TIMEOUT "timeout" +#define SOUP_SOCKET_TLS_CERTIFICATE "tls-certificate" +#define SOUP_SOCKET_TLS_ERRORS "tls-errors" + +typedef void (*SoupSocketCallback) (SoupSocket *sock, + guint status, + gpointer user_data); + +GType soup_socket_get_type (void); + +SoupSocket *soup_socket_new (const char *optname1, + ...) G_GNUC_NULL_TERMINATED; + +void soup_socket_connect_async (SoupSocket *sock, + GCancellable *cancellable, + SoupSocketCallback callback, + gpointer user_data); +guint soup_socket_connect_sync (SoupSocket *sock, + GCancellable *cancellable); +int soup_socket_get_fd (SoupSocket *sock); + +gboolean soup_socket_listen (SoupSocket *sock); + +gboolean soup_socket_start_ssl (SoupSocket *sock, + GCancellable *cancellable); +gboolean soup_socket_start_proxy_ssl (SoupSocket *sock, + const char *ssl_host, + GCancellable *cancellable); +gboolean soup_socket_is_ssl (SoupSocket *sock); + +void soup_socket_disconnect (SoupSocket *sock); +gboolean soup_socket_is_connected (SoupSocket *sock); + +SoupAddress *soup_socket_get_local_address (SoupSocket *sock); +SoupAddress *soup_socket_get_remote_address (SoupSocket *sock); + + +typedef enum { + SOUP_SOCKET_OK, + SOUP_SOCKET_WOULD_BLOCK, + SOUP_SOCKET_EOF, + SOUP_SOCKET_ERROR +} SoupSocketIOStatus; + +SoupSocketIOStatus soup_socket_read (SoupSocket *sock, + gpointer buffer, + gsize len, + gsize *nread, + GCancellable *cancellable, + GError **error); +SoupSocketIOStatus soup_socket_read_until (SoupSocket *sock, + gpointer buffer, + gsize len, + gconstpointer boundary, + gsize boundary_len, + gsize *nread, + gboolean *got_boundary, + GCancellable *cancellable, + GError **error); + +SoupSocketIOStatus soup_socket_write (SoupSocket *sock, + gconstpointer buffer, + gsize len, + gsize *nwrote, + GCancellable *cancellable, + GError **error); + +G_END_DECLS + +#endif /* SOUP_SOCKET_H */ diff --git a/libsoup/soup-ssl.c b/libsoup/soup-ssl.c new file mode 100644 index 0000000..74d87f2 --- /dev/null +++ b/libsoup/soup-ssl.c @@ -0,0 +1,145 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-ssl.c: temporary ssl integration + * + * Copyright (C) 2010 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "soup-ssl.h" +#include "soup-misc.h" + +const gboolean soup_ssl_supported = TRUE; + +struct SoupSSLCredentials { + GList *ca_list; + GTlsCertificateFlags validation_flags; + GTlsCertificate *certificate; +}; + +SoupSSLCredentials * +soup_ssl_get_client_credentials (const char *ca_file) +{ + SoupSSLCredentials *creds; + + creds = g_slice_new0 (SoupSSLCredentials); + + if (ca_file) { + GError *error = NULL; + + creds->ca_list = g_tls_certificate_list_new_from_file (ca_file, &error); + if (error) { + if (!g_error_matches (error, G_TLS_ERROR, G_TLS_ERROR_UNAVAILABLE)) { + g_warning ("Could not set SSL credentials from '%s': %s", + ca_file, error->message); + } + g_error_free (error); + } + creds->validation_flags = G_TLS_CERTIFICATE_VALIDATE_ALL; + } + + return creds; +} + +gboolean +soup_ssl_credentials_verify_certificate (SoupSSLCredentials *creds, + GTlsCertificate *cert, + GTlsCertificateFlags errors) +{ + errors = errors & creds->validation_flags; + + if (errors & G_TLS_CERTIFICATE_UNKNOWN_CA) { + GList *ca; + + for (ca = creds->ca_list; ca; ca = ca->next) { + if ((g_tls_certificate_verify (cert, NULL, ca->data) & G_TLS_CERTIFICATE_UNKNOWN_CA) == 0) { + errors &= ~G_TLS_CERTIFICATE_UNKNOWN_CA; + break; + } + } + } + + return errors == 0; +} + +void +soup_ssl_free_client_credentials (SoupSSLCredentials *client_creds) +{ + GList *c; + + for (c = client_creds->ca_list; c; c = c->next) + g_object_unref (c->data); + g_list_free (client_creds->ca_list); + g_slice_free (SoupSSLCredentials, client_creds); +} + +SoupSSLCredentials * +soup_ssl_get_server_credentials (const char *cert_file, const char *key_file) +{ + SoupSSLCredentials *creds; + GError *error = NULL; + + creds = g_slice_new0 (SoupSSLCredentials); + + creds->certificate = g_tls_certificate_new_from_files (cert_file, key_file, &error); + if (!creds->certificate) { + g_warning ("Could not read SSL certificate from '%s': %s", + cert_file, error->message); + g_error_free (error); + g_slice_free (SoupSSLCredentials, creds); + return NULL; + } + + return creds; +} + +GTlsCertificate * +soup_ssl_credentials_get_certificate (SoupSSLCredentials *creds) +{ + return creds->certificate; +} + +void +soup_ssl_free_server_credentials (SoupSSLCredentials *server_creds) +{ + g_object_unref (server_creds->certificate); + g_slice_free (SoupSSLCredentials, server_creds); +} + +/** + * SOUP_SSL_ERROR: + * + * A #GError domain representing an SSL error. Used with #SoupSSLError. + **/ +/** + * soup_ssl_error_quark: + * + * The quark used as %SOUP_SSL_ERROR + * + * Return value: The quark used as %SOUP_SSL_ERROR + **/ +GQuark +soup_ssl_error_quark (void) +{ + static GQuark error; + if (!error) + error = g_quark_from_static_string ("soup_ssl_error_quark"); + return error; +} + +/** + * SoupSSLError: + * @SOUP_SSL_ERROR_HANDSHAKE_NEEDS_READ: Internal error. Never exposed + * outside of libsoup. + * @SOUP_SSL_ERROR_HANDSHAKE_NEEDS_WRITE: Internal error. Never exposed + * outside of libsoup. + * @SOUP_SSL_ERROR_CERTIFICATE: Indicates an error validating an SSL + * certificate + * + * SSL-related I/O errors. + **/ diff --git a/libsoup/soup-ssl.h b/libsoup/soup-ssl.h new file mode 100644 index 0000000..5858199 --- /dev/null +++ b/libsoup/soup-ssl.h @@ -0,0 +1,29 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_SSL_H +#define SOUP_SSL_H 1 + +#include "soup-types.h" + +typedef enum { + SOUP_SSL_TYPE_CLIENT = 0, + SOUP_SSL_TYPE_SERVER +} SoupSSLType; + +typedef struct SoupSSLCredentials SoupSSLCredentials; + +SoupSSLCredentials *soup_ssl_get_client_credentials (const char *ca_file); +void soup_ssl_free_client_credentials (SoupSSLCredentials *creds); +gboolean soup_ssl_credentials_verify_certificate (SoupSSLCredentials *creds, + GTlsCertificate *cert, + GTlsCertificateFlags errors); + +SoupSSLCredentials *soup_ssl_get_server_credentials (const char *cert_file, + const char *key_file); +void soup_ssl_free_server_credentials (SoupSSLCredentials *creds); +GTlsCertificate *soup_ssl_credentials_get_certificate (SoupSSLCredentials *creds); + +#endif /* SOUP_SSL_H */ diff --git a/libsoup/soup-status.c b/libsoup/soup-status.c new file mode 100644 index 0000000..11fe51b --- /dev/null +++ b/libsoup/soup-status.c @@ -0,0 +1,314 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-status.c: Status code descriptions + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include "config.h" + +#include + +#include "soup-status.h" + +/** + * SECTION:soup-status + * @short_description: HTTP (and libsoup) status codes + * + **/ + +/** + * SOUP_STATUS_IS_TRANSPORT_ERROR: + * @status: a status code + * + * Tests if @status is a libsoup transport error. + * + * Return value: %TRUE or %FALSE + **/ +/** + * SOUP_STATUS_IS_INFORMATIONAL: + * @status: an HTTP status code + * + * Tests if @status is an Informational (1xx) response. + * + * Return value: %TRUE or %FALSE + **/ +/** + * SOUP_STATUS_IS_SUCCESSFUL: + * @status: an HTTP status code + * + * Tests if @status is a Successful (2xx) response. + * + * Return value: %TRUE or %FALSE + **/ +/** + * SOUP_STATUS_IS_REDIRECTION: + * @status: an HTTP status code + * + * Tests if @status is a Redirection (3xx) response. + * + * Return value: %TRUE or %FALSE + **/ +/** + * SOUP_STATUS_IS_CLIENT_ERROR: + * @status: an HTTP status code + * + * Tests if @status is a Client Error (4xx) response. + * + * Return value: %TRUE or %FALSE + **/ +/** + * SOUP_STATUS_IS_SERVER_ERROR: + * @status: an HTTP status code + * + * Tests if @status is a Server Error (5xx) response. + * + * Return value: %TRUE or %FALSE + **/ + +/** + * SoupKnownStatusCode: + * @SOUP_STATUS_NONE: No status available. (Eg, the message has not + * been sent yet) + * @SOUP_STATUS_CANCELLED: Message was cancelled locally + * @SOUP_STATUS_CANT_RESOLVE: Unable to resolve destination host name + * @SOUP_STATUS_CANT_RESOLVE_PROXY: Unable to resolve proxy host name + * @SOUP_STATUS_CANT_CONNECT: Unable to connect to remote host + * @SOUP_STATUS_CANT_CONNECT_PROXY: Unable to connect to proxy + * @SOUP_STATUS_SSL_FAILED: SSL/TLS negotiation failed + * @SOUP_STATUS_IO_ERROR: A network error occurred, or the other end + * closed the connection unexpectedly + * @SOUP_STATUS_MALFORMED: Malformed data (usually a programmer error) + * @SOUP_STATUS_TRY_AGAIN: Used internally + * @SOUP_STATUS_TOO_MANY_REDIRECTS: There were too many redirections + * @SOUP_STATUS_TLS_FAILED: Used internally + * @SOUP_STATUS_CONTINUE: 100 Continue (HTTP) + * @SOUP_STATUS_SWITCHING_PROTOCOLS: 101 Switching Protocols (HTTP) + * @SOUP_STATUS_PROCESSING: 102 Processing (WebDAV) + * @SOUP_STATUS_OK: 200 Success (HTTP). Also used by many lower-level + * soup routines to indicate success. + * @SOUP_STATUS_CREATED: 201 Created (HTTP) + * @SOUP_STATUS_ACCEPTED: 202 Accepted (HTTP) + * @SOUP_STATUS_NON_AUTHORITATIVE: 203 Non-Authoritative Information + * (HTTP) + * @SOUP_STATUS_NO_CONTENT: 204 No Content (HTTP) + * @SOUP_STATUS_RESET_CONTENT: 205 Reset Content (HTTP) + * @SOUP_STATUS_PARTIAL_CONTENT: 206 Partial Content (HTTP) + * @SOUP_STATUS_MULTI_STATUS: 207 Multi-Status (WebDAV) + * @SOUP_STATUS_MULTIPLE_CHOICES: 300 Multiple Choices (HTTP) + * @SOUP_STATUS_MOVED_PERMANENTLY: 301 Moved Permanently (HTTP) + * @SOUP_STATUS_FOUND: 302 Found (HTTP) + * @SOUP_STATUS_MOVED_TEMPORARILY: 302 Moved Temporarily (old name, + * RFC 2068) + * @SOUP_STATUS_SEE_OTHER: 303 See Other (HTTP) + * @SOUP_STATUS_NOT_MODIFIED: 304 Not Modified (HTTP) + * @SOUP_STATUS_USE_PROXY: 305 Use Proxy (HTTP) + * @SOUP_STATUS_NOT_APPEARING_IN_THIS_PROTOCOL: 306 [Unused] (HTTP) + * @SOUP_STATUS_TEMPORARY_REDIRECT: 307 Temporary Redirect (HTTP) + * @SOUP_STATUS_BAD_REQUEST: 400 Bad Request (HTTP) + * @SOUP_STATUS_UNAUTHORIZED: 401 Unauthorized (HTTP) + * @SOUP_STATUS_PAYMENT_REQUIRED: 402 Payment Required (HTTP) + * @SOUP_STATUS_FORBIDDEN: 403 Forbidden (HTTP) + * @SOUP_STATUS_NOT_FOUND: 404 Not Found (HTTP) + * @SOUP_STATUS_METHOD_NOT_ALLOWED: 405 Method Not Allowed (HTTP) + * @SOUP_STATUS_NOT_ACCEPTABLE: 406 Not Acceptable (HTTP) + * @SOUP_STATUS_PROXY_AUTHENTICATION_REQUIRED: 407 Proxy Authentication + * Required (HTTP) + * @SOUP_STATUS_PROXY_UNAUTHORIZED: shorter alias for + * %SOUP_STATUS_PROXY_AUTHENTICATION_REQUIRED + * @SOUP_STATUS_REQUEST_TIMEOUT: 408 Request Timeout (HTTP) + * @SOUP_STATUS_CONFLICT: 409 Conflict (HTTP) + * @SOUP_STATUS_GONE: 410 Gone (HTTP) + * @SOUP_STATUS_LENGTH_REQUIRED: 411 Length Required (HTTP) + * @SOUP_STATUS_PRECONDITION_FAILED: 412 Precondition Failed (HTTP) + * @SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE: 413 Request Entity Too Large + * (HTTP) + * @SOUP_STATUS_REQUEST_URI_TOO_LONG: 414 Request-URI Too Long (HTTP) + * @SOUP_STATUS_UNSUPPORTED_MEDIA_TYPE: 415 Unsupported Media Type + * (HTTP) + * @SOUP_STATUS_REQUESTED_RANGE_NOT_SATISFIABLE: 416 Requested Range + * Not Satisfiable (HTTP) + * @SOUP_STATUS_INVALID_RANGE: shorter alias for + * %SOUP_STATUS_REQUESTED_RANGE_NOT_SATISFIABLE + * @SOUP_STATUS_EXPECTATION_FAILED: 417 Expectation Failed (HTTP) + * @SOUP_STATUS_UNPROCESSABLE_ENTITY: 422 Unprocessable Entity + * (WebDAV) + * @SOUP_STATUS_LOCKED: 423 Locked (WebDAV) + * @SOUP_STATUS_FAILED_DEPENDENCY: 424 Failed Dependency (WebDAV) + * @SOUP_STATUS_INTERNAL_SERVER_ERROR: 500 Internal Server Error + * (HTTP) + * @SOUP_STATUS_NOT_IMPLEMENTED: 501 Not Implemented (HTTP) + * @SOUP_STATUS_BAD_GATEWAY: 502 Bad Gateway (HTTP) + * @SOUP_STATUS_SERVICE_UNAVAILABLE: 503 Service Unavailable (HTTP) + * @SOUP_STATUS_GATEWAY_TIMEOUT: 504 Gateway Timeout (HTTP) + * @SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED: 505 HTTP Version Not + * Supported (HTTP) + * @SOUP_STATUS_INSUFFICIENT_STORAGE: 507 Insufficient Storage + * (WebDAV) + * @SOUP_STATUS_NOT_EXTENDED: 510 Not Extended (RFC 2774) + * + * These represent the known HTTP status code values, plus various + * network and internal errors. + **/ + +/** + * SOUP_HTTP_ERROR: + * + * A #GError domain representing an HTTP status. Use a + * #SoupKnownStatusCode for the code + * value. + **/ + + +/* The reason_phrases are not localized because: + * + * 1. Only ASCII can be used portably in the HTTP Status-Line, so we + * would not be able to return localized reason phrases from + * SoupServer anyway. + * + * 2. Having a way for clients to get a localized version of a status + * code would just encourage them to present those strings to the + * user, which is bad because many of them are fairly + * incomprehensible anyway. + */ + +static const struct { + guint code; + const char *phrase; +} reason_phrases [] = { + /* Transport errors */ + { SOUP_STATUS_CANCELLED, "Cancelled" }, + { SOUP_STATUS_CANT_RESOLVE, "Cannot resolve hostname" }, + { SOUP_STATUS_CANT_RESOLVE_PROXY, "Cannot resolve proxy hostname" }, + { SOUP_STATUS_CANT_CONNECT, "Cannot connect to destination" }, + { SOUP_STATUS_CANT_CONNECT_PROXY, "Cannot connect to proxy" }, + { SOUP_STATUS_SSL_FAILED, "SSL handshake failed" }, + { SOUP_STATUS_IO_ERROR, "Connection terminated unexpectedly" }, + { SOUP_STATUS_MALFORMED, "Message Corrupt" }, + { SOUP_STATUS_TOO_MANY_REDIRECTS, "Too many redirects" }, + + /* Informational */ + { SOUP_STATUS_CONTINUE, "Continue" }, + { SOUP_STATUS_SWITCHING_PROTOCOLS, "Switching Protocols" }, + { SOUP_STATUS_PROCESSING, "Processing" }, + + /* Success */ + { SOUP_STATUS_OK, "OK" }, + { SOUP_STATUS_CREATED, "Created" }, + { SOUP_STATUS_ACCEPTED, "Accepted" }, + { SOUP_STATUS_NON_AUTHORITATIVE, "Non-Authoritative Information" }, + { SOUP_STATUS_NO_CONTENT, "No Content" }, + { SOUP_STATUS_RESET_CONTENT, "Reset Content" }, + { SOUP_STATUS_PARTIAL_CONTENT, "Partial Content" }, + { SOUP_STATUS_MULTI_STATUS, "Multi-Status" }, + + /* Redirection */ + { SOUP_STATUS_MULTIPLE_CHOICES, "Multiple Choices" }, + { SOUP_STATUS_MOVED_PERMANENTLY, "Moved Permanently" }, + { SOUP_STATUS_FOUND, "Found" }, + { SOUP_STATUS_SEE_OTHER, "See Other" }, + { SOUP_STATUS_NOT_MODIFIED, "Not Modified" }, + { SOUP_STATUS_USE_PROXY, "Use Proxy" }, + { SOUP_STATUS_TEMPORARY_REDIRECT, "Temporary Redirect" }, + + /* Client error */ + { SOUP_STATUS_BAD_REQUEST, "Bad Request" }, + { SOUP_STATUS_UNAUTHORIZED, "Unauthorized" }, + { SOUP_STATUS_PAYMENT_REQUIRED, "Payment Required" }, + { SOUP_STATUS_FORBIDDEN, "Forbidden" }, + { SOUP_STATUS_NOT_FOUND, "Not Found" }, + { SOUP_STATUS_METHOD_NOT_ALLOWED, "Method Not Allowed" }, + { SOUP_STATUS_NOT_ACCEPTABLE, "Not Acceptable" }, + { SOUP_STATUS_PROXY_UNAUTHORIZED, "Proxy Authentication Required" }, + { SOUP_STATUS_REQUEST_TIMEOUT, "Request Timeout" }, + { SOUP_STATUS_CONFLICT, "Conflict" }, + { SOUP_STATUS_GONE, "Gone" }, + { SOUP_STATUS_LENGTH_REQUIRED, "Length Required" }, + { SOUP_STATUS_PRECONDITION_FAILED, "Precondition Failed" }, + { SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE, "Request Entity Too Large" }, + { SOUP_STATUS_REQUEST_URI_TOO_LONG, "Request-URI Too Long" }, + { SOUP_STATUS_UNSUPPORTED_MEDIA_TYPE, "Unsupported Media Type" }, + { SOUP_STATUS_INVALID_RANGE, "Requested Range Not Satisfiable" }, + { SOUP_STATUS_EXPECTATION_FAILED, "Expectation Failed" }, + { SOUP_STATUS_UNPROCESSABLE_ENTITY, "Unprocessable Entity" }, + { SOUP_STATUS_LOCKED, "Locked" }, + { SOUP_STATUS_FAILED_DEPENDENCY, "Failed Dependency" }, + + /* Server error */ + { SOUP_STATUS_INTERNAL_SERVER_ERROR, "Internal Server Error" }, + { SOUP_STATUS_NOT_IMPLEMENTED, "Not Implemented" }, + { SOUP_STATUS_BAD_GATEWAY, "Bad Gateway" }, + { SOUP_STATUS_SERVICE_UNAVAILABLE, "Service Unavailable" }, + { SOUP_STATUS_GATEWAY_TIMEOUT, "Gateway Timeout" }, + { SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED, "HTTP Version Not Supported" }, + { SOUP_STATUS_INSUFFICIENT_STORAGE, "Insufficient Storage" }, + { SOUP_STATUS_NOT_EXTENDED, "Not Extended" }, + + { 0 } +}; + +/** + * soup_status_get_phrase: + * @status_code: an HTTP status code + * + * Looks up the stock HTTP description of @status_code. This is used + * by soup_message_set_status() to get the correct text to go with a + * given status code. + * + * There is no reason for you to ever use this + * function. If you wanted the textual description for the + * %status_code of a given #SoupMessage, you should just look at the + * message's %reason_phrase. However, you should only do that for use + * in debugging messages; HTTP reason phrases are not localized, and + * are not generally very descriptive anyway, and so they should never + * be presented to the user directly. Instead, you should create you + * own error messages based on the status code, and on what you were + * trying to do. + * + * Return value: the (terse, English) description of @status_code + **/ +const char * +soup_status_get_phrase (guint status_code) +{ + int i; + + for (i = 0; reason_phrases [i].code; i++) { + if (reason_phrases [i].code == status_code) + return reason_phrases [i].phrase; + } + + return "Unknown Error"; +} + +/** + * soup_status_proxify: + * @status_code: a status code + * + * Turns %SOUP_STATUS_CANT_RESOLVE into + * %SOUP_STATUS_CANT_RESOLVE_PROXY and %SOUP_STATUS_CANT_CONNECT into + * %SOUP_STATUS_CANT_CONNECT_PROXY. Other status codes are passed + * through unchanged. + * + * Return value: the "proxified" equivalent of @status_code. + * + * Since: 2.26 + **/ +guint +soup_status_proxify (guint status_code) +{ + if (status_code == SOUP_STATUS_CANT_RESOLVE) + return SOUP_STATUS_CANT_RESOLVE_PROXY; + else if (status_code == SOUP_STATUS_CANT_CONNECT) + return SOUP_STATUS_CANT_CONNECT_PROXY; + else + return status_code; +} + +GQuark +soup_http_error_quark (void) +{ + static GQuark error; + if (!error) + error = g_quark_from_static_string ("soup_http_error_quark"); + return error; +} diff --git a/libsoup/soup-status.h b/libsoup/soup-status.h new file mode 100644 index 0000000..fc8d5a3 --- /dev/null +++ b/libsoup/soup-status.h @@ -0,0 +1,104 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-status.h: HTTP status code and status class definitions + * + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifndef SOUP_STATUS_H +#define SOUP_STATUS_H 1 + +#include + +G_BEGIN_DECLS + +#define SOUP_STATUS_IS_TRANSPORT_ERROR(status) ((status) > 0 && (status) < 100) +#define SOUP_STATUS_IS_INFORMATIONAL(status) ((status) >= 100 && (status) < 200) +#define SOUP_STATUS_IS_SUCCESSFUL(status) ((status) >= 200 && (status) < 300) +#define SOUP_STATUS_IS_REDIRECTION(status) ((status) >= 300 && (status) < 400) +#define SOUP_STATUS_IS_CLIENT_ERROR(status) ((status) >= 400 && (status) < 500) +#define SOUP_STATUS_IS_SERVER_ERROR(status) ((status) >= 500 && (status) < 600) + +typedef enum { + SOUP_STATUS_NONE, + + /* Transport Errors */ + SOUP_STATUS_CANCELLED = 1, + SOUP_STATUS_CANT_RESOLVE, + SOUP_STATUS_CANT_RESOLVE_PROXY, + SOUP_STATUS_CANT_CONNECT, + SOUP_STATUS_CANT_CONNECT_PROXY, + SOUP_STATUS_SSL_FAILED, + SOUP_STATUS_IO_ERROR, + SOUP_STATUS_MALFORMED, + SOUP_STATUS_TRY_AGAIN, + SOUP_STATUS_TOO_MANY_REDIRECTS, + SOUP_STATUS_TLS_FAILED, + + /* HTTP Status Codes */ + SOUP_STATUS_CONTINUE = 100, + SOUP_STATUS_SWITCHING_PROTOCOLS = 101, + SOUP_STATUS_PROCESSING = 102, /* WebDAV */ + + SOUP_STATUS_OK = 200, + SOUP_STATUS_CREATED = 201, + SOUP_STATUS_ACCEPTED = 202, + SOUP_STATUS_NON_AUTHORITATIVE = 203, + SOUP_STATUS_NO_CONTENT = 204, + SOUP_STATUS_RESET_CONTENT = 205, + SOUP_STATUS_PARTIAL_CONTENT = 206, + SOUP_STATUS_MULTI_STATUS = 207, /* WebDAV */ + + SOUP_STATUS_MULTIPLE_CHOICES = 300, + SOUP_STATUS_MOVED_PERMANENTLY = 301, + SOUP_STATUS_FOUND = 302, + SOUP_STATUS_MOVED_TEMPORARILY = 302, /* RFC 2068 */ + SOUP_STATUS_SEE_OTHER = 303, + SOUP_STATUS_NOT_MODIFIED = 304, + SOUP_STATUS_USE_PROXY = 305, + SOUP_STATUS_NOT_APPEARING_IN_THIS_PROTOCOL = 306, /* (reserved) */ + SOUP_STATUS_TEMPORARY_REDIRECT = 307, + + SOUP_STATUS_BAD_REQUEST = 400, + SOUP_STATUS_UNAUTHORIZED = 401, + SOUP_STATUS_PAYMENT_REQUIRED = 402, /* (reserved) */ + SOUP_STATUS_FORBIDDEN = 403, + SOUP_STATUS_NOT_FOUND = 404, + SOUP_STATUS_METHOD_NOT_ALLOWED = 405, + SOUP_STATUS_NOT_ACCEPTABLE = 406, + SOUP_STATUS_PROXY_AUTHENTICATION_REQUIRED = 407, + SOUP_STATUS_PROXY_UNAUTHORIZED = SOUP_STATUS_PROXY_AUTHENTICATION_REQUIRED, + SOUP_STATUS_REQUEST_TIMEOUT = 408, + SOUP_STATUS_CONFLICT = 409, + SOUP_STATUS_GONE = 410, + SOUP_STATUS_LENGTH_REQUIRED = 411, + SOUP_STATUS_PRECONDITION_FAILED = 412, + SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE = 413, + SOUP_STATUS_REQUEST_URI_TOO_LONG = 414, + SOUP_STATUS_UNSUPPORTED_MEDIA_TYPE = 415, + SOUP_STATUS_REQUESTED_RANGE_NOT_SATISFIABLE = 416, + SOUP_STATUS_INVALID_RANGE = SOUP_STATUS_REQUESTED_RANGE_NOT_SATISFIABLE, + SOUP_STATUS_EXPECTATION_FAILED = 417, + SOUP_STATUS_UNPROCESSABLE_ENTITY = 422, /* WebDAV */ + SOUP_STATUS_LOCKED = 423, /* WebDAV */ + SOUP_STATUS_FAILED_DEPENDENCY = 424, /* WebDAV */ + + SOUP_STATUS_INTERNAL_SERVER_ERROR = 500, + SOUP_STATUS_NOT_IMPLEMENTED = 501, + SOUP_STATUS_BAD_GATEWAY = 502, + SOUP_STATUS_SERVICE_UNAVAILABLE = 503, + SOUP_STATUS_GATEWAY_TIMEOUT = 504, + SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED = 505, + SOUP_STATUS_INSUFFICIENT_STORAGE = 507, /* WebDAV search */ + SOUP_STATUS_NOT_EXTENDED = 510 /* RFC 2774 */ +} SoupKnownStatusCode; + +const char *soup_status_get_phrase (guint status_code); +guint soup_status_proxify (guint status_code); + +#define SOUP_HTTP_ERROR soup_http_error_quark() +GQuark soup_http_error_quark (void); + +G_END_DECLS + +#endif /* SOUP_STATUS_H */ diff --git a/libsoup/soup-types.h b/libsoup/soup-types.h new file mode 100644 index 0000000..d022039 --- /dev/null +++ b/libsoup/soup-types.h @@ -0,0 +1,37 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2003, Ximian, Inc. + */ + +#ifndef SOUP_TYPES_H +#define SOUP_TYPES_H 1 + +#include + +#include + +G_BEGIN_DECLS + +typedef struct _SoupAddress SoupAddress; +typedef struct _SoupAuth SoupAuth; +typedef struct _SoupAuthDomain SoupAuthDomain; +typedef struct _SoupCookie SoupCookie; +typedef struct _SoupCookieJar SoupCookieJar; +typedef struct _SoupDate SoupDate; +typedef struct _SoupMessage SoupMessage; +typedef struct _SoupServer SoupServer; +typedef struct _SoupSession SoupSession; +typedef struct _SoupSessionAsync SoupSessionAsync; +typedef struct _SoupSessionFeature SoupSessionFeature; +typedef struct _SoupSessionSync SoupSessionSync; +typedef struct _SoupSocket SoupSocket; +typedef struct _SoupURI SoupURI; + +/*< private >*/ +typedef struct _SoupConnection SoupConnection; +typedef struct _SoupMessageQueue SoupMessageQueue; +typedef struct _SoupMessageQueueItem SoupMessageQueueItem; + +G_END_DECLS + +#endif diff --git a/libsoup/soup-uri.c b/libsoup/soup-uri.c new file mode 100644 index 0000000..b5c247d --- /dev/null +++ b/libsoup/soup-uri.c @@ -0,0 +1,1143 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* soup-uri.c : utility functions to parse URLs */ + +/* + * Copyright 1999-2003 Ximian, Inc. + */ + +#include +#include +#include + +#include "soup-uri.h" +#include "soup-misc-private.h" +#include "soup-form.h" +#include "soup-misc.h" + +/** + * SECTION:soup-uri + * @short_description: URIs + * + * A #SoupURI represents a (parsed) URI. + * + * Many applications will not need to use #SoupURI directly at all; on + * the client side, soup_message_new() takes a stringified URI, and on + * the server side, the path and query components are provided for you + * in the server callback. + **/ + +/** + * SoupURI: + * @scheme: the URI scheme (eg, "http") + * @user: a username, or %NULL + * @password: a password, or %NULL + * @host: the hostname or IP address + * @port: the port number on @host + * @path: the path on @host + * @query: a query for @path, or %NULL + * @fragment: a fragment identifier within @path, or %NULL + * + * A #SoupURI represents a (parsed) URI. #SoupURI supports RFC 3986 + * (URI Generic Syntax), and can parse any valid URI. However, libsoup + * only uses "http" and "https" URIs internally; You can use + * SOUP_URI_VALID_FOR_HTTP() to test if a #SoupURI is a valid HTTP + * URI. + * + * @scheme will always be set in any URI. It is an interned string and + * is always all lowercase. (If you parse a URI with a non-lowercase + * scheme, it will be converted to lowercase.) The macros + * %SOUP_URI_SCHEME_HTTP and %SOUP_URI_SCHEME_HTTPS provide the + * interned values for "http" and "https" and can be compared against + * URI @scheme values. + * + * @user and @password are parsed as defined in the older URI specs + * (ie, separated by a colon; RFC 3986 only talks about a single + * "userinfo" field). Note that @password is not included in the + * output of soup_uri_to_string(). libsoup does not normally use these + * fields; authentication is handled via #SoupSession signals. + * + * @host contains the hostname, and @port the port specified in the + * URI. If the URI doesn't contain a hostname, @host will be %NULL, + * and if it doesn't specify a port, @port may be 0. However, for + * "http" and "https" URIs, @host is guaranteed to be non-%NULL + * (trying to parse an http URI with no @host will return %NULL), and + * @port will always be non-0 (because libsoup knows the default value + * to use when it is not specified in the URI). + * + * @path is always non-%NULL. For http/https URIs, @path will never be + * an empty string either; if the input URI has no path, the parsed + * #SoupURI will have a @path of "/". + * + * @query and @fragment are optional for all URI types. + * soup_form_decode() may be useful for parsing @query. + * + * Note that @path, @query, and @fragment may contain + * %-encoded characters. soup_uri_new() calls + * soup_uri_normalize() on them, but not soup_uri_decode(). This is + * necessary to ensure that soup_uri_to_string() will generate a URI + * that has exactly the same meaning as the original. (In theory, + * #SoupURI should leave @user, @password, and @host partially-encoded + * as well, but this would be more annoying than useful.) + **/ + +/** + * SOUP_URI_VALID_FOR_HTTP: + * @uri: a #SoupURI + * + * Tests if @uri is a valid #SoupURI for HTTP communication; that is, if + * it can be used to construct a #SoupMessage. + * + * Return value: %TRUE if @uri is a valid "http" or "https" URI. + * + * Since: 2.24 + **/ + +static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars); +static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra); + +gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS; +gpointer _SOUP_URI_SCHEME_FTP; +gpointer _SOUP_URI_SCHEME_FILE, _SOUP_URI_SCHEME_DATA; + +static inline const char * +soup_uri_parse_scheme (const char *scheme, int len) +{ + if (len == 4 && !g_ascii_strncasecmp (scheme, "http", len)) { + return SOUP_URI_SCHEME_HTTP; + } else if (len == 5 && !g_ascii_strncasecmp (scheme, "https", len)) { + return SOUP_URI_SCHEME_HTTPS; + } else { + char *lower_scheme; + + lower_scheme = g_ascii_strdown (scheme, len); + scheme = g_intern_static_string (lower_scheme); + if (scheme != (const char *)lower_scheme) + g_free (lower_scheme); + return scheme; + } +} + +static inline guint +soup_scheme_default_port (const char *scheme) +{ + if (scheme == SOUP_URI_SCHEME_HTTP) + return 80; + else if (scheme == SOUP_URI_SCHEME_HTTPS) + return 443; + else if (scheme == SOUP_URI_SCHEME_FTP) + return 21; + else + return 0; +} + +/** + * soup_uri_new_with_base: + * @base: a base URI + * @uri_string: the URI + * + * Parses @uri_string relative to @base. + * + * Return value: a parsed #SoupURI. + **/ +SoupURI * +soup_uri_new_with_base (SoupURI *base, const char *uri_string) +{ + SoupURI *uri; + const char *end, *hash, *colon, *at, *path, *question; + const char *p, *hostend; + gboolean remove_dot_segments = TRUE; + int len; + + /* First some cleanup steps (which are supposed to all be no-ops, + * but...). Skip initial whitespace, strip out internal tabs and + * line breaks, and ignore trailing whitespace. + */ + while (g_ascii_isspace (*uri_string)) + uri_string++; + + len = strcspn (uri_string, "\t\n\r"); + if (uri_string[len]) { + char *clean = g_malloc (strlen (uri_string) + 1), *d; + const char *s; + + for (s = uri_string, d = clean; *s; s++) { + if (*s != '\t' && *s != '\n' && *s != '\r') + *d++ = *s; + } + *d = '\0'; + + uri = soup_uri_new_with_base (base, clean); + g_free (clean); + return uri; + } + end = uri_string + len; + while (end > uri_string && g_ascii_isspace (end[-1])) + end--; + + uri = g_slice_new0 (SoupURI); + + /* Find fragment. */ + hash = strchr (uri_string, '#'); + if (hash) { + uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1, + NULL); + end = hash; + } + + /* Find scheme: initial [a-z+.-]* substring until ":" */ + p = uri_string; + while (p < end && (g_ascii_isalnum (*p) || + *p == '.' || *p == '+' || *p == '-')) + p++; + + if (p > uri_string && *p == ':') { + uri->scheme = soup_uri_parse_scheme (uri_string, p - uri_string); + uri_string = p + 1; + } + + if (uri_string == end && !base && !uri->fragment) + return uri; + + /* Check for authority */ + if (strncmp (uri_string, "//", 2) == 0) { + uri_string += 2; + + path = uri_string + strcspn (uri_string, "/?#"); + if (path > end) + path = end; + at = strchr (uri_string, '@'); + if (at && at < path) { + colon = strchr (uri_string, ':'); + if (colon && colon < at) { + uri->password = uri_decoded_copy (colon + 1, + at - colon - 1); + } else { + uri->password = NULL; + colon = at; + } + + uri->user = uri_decoded_copy (uri_string, + colon - uri_string); + uri_string = at + 1; + } else + uri->user = uri->password = NULL; + + /* Find host and port. */ + if (*uri_string == '[') { + uri_string++; + hostend = strchr (uri_string, ']'); + if (!hostend || hostend > path) { + soup_uri_free (uri); + return NULL; + } + if (*(hostend + 1) == ':') + colon = hostend + 1; + else + colon = NULL; + } else { + colon = memchr (uri_string, ':', path - uri_string); + hostend = colon ? colon : path; + } + + uri->host = uri_decoded_copy (uri_string, hostend - uri_string); + + if (colon && colon != path - 1) { + char *portend; + uri->port = strtoul (colon + 1, &portend, 10); + if (portend != (char *)path) { + soup_uri_free (uri); + return NULL; + } + } + + uri_string = path; + } + + /* Find query */ + question = memchr (uri_string, '?', end - uri_string); + if (question) { + uri->query = uri_normalized_copy (question + 1, + end - (question + 1), + NULL); + end = question; + } + + if (end != uri_string) { + uri->path = uri_normalized_copy (uri_string, end - uri_string, + NULL); + } + + /* Apply base URI. This is spelled out in RFC 3986. */ + if (base && !uri->scheme && uri->host) + uri->scheme = base->scheme; + else if (base && !uri->scheme) { + uri->scheme = base->scheme; + uri->user = g_strdup (base->user); + uri->password = g_strdup (base->password); + uri->host = g_strdup (base->host); + uri->port = base->port; + + if (!uri->path) { + uri->path = g_strdup (base->path); + if (!uri->query) + uri->query = g_strdup (base->query); + remove_dot_segments = FALSE; + } else if (*uri->path != '/') { + char *newpath, *last; + + last = strrchr (base->path, '/'); + if (last) { + newpath = g_strdup_printf ("%.*s%s", + (int)(last + 1 - base->path), + base->path, + uri->path); + } else + newpath = g_strdup_printf ("/%s", uri->path); + + g_free (uri->path); + uri->path = newpath; + } + } + + if (remove_dot_segments && uri->path && *uri->path) { + char *p, *q; + + /* Remove "./" where "." is a complete segment. */ + for (p = uri->path + 1; *p; ) { + if (*(p - 1) == '/' && + *p == '.' && *(p + 1) == '/') + memmove (p, p + 2, strlen (p + 2) + 1); + else + p++; + } + /* Remove "." at end. */ + if (p > uri->path + 2 && + *(p - 1) == '.' && *(p - 2) == '/') + *(p - 1) = '\0'; + + /* Remove "/../" where != ".." */ + for (p = uri->path + 1; *p; ) { + if (!strncmp (p, "../", 3)) { + p += 3; + continue; + } + q = strchr (p + 1, '/'); + if (!q) + break; + if (strncmp (q, "/../", 4) != 0) { + p = q + 1; + continue; + } + memmove (p, q + 4, strlen (q + 4) + 1); + p = uri->path + 1; + } + /* Remove "/.." at end where != ".." */ + q = strrchr (uri->path, '/'); + if (q && !strcmp (q, "/..")) { + p = q - 1; + while (p > uri->path && *p != '/') + p--; + if (strncmp (p, "/../", 4) != 0) + *(p + 1) = 0; + } + + /* Remove extraneous initial "/.."s */ + while (!strncmp (uri->path, "/../", 4)) + memmove (uri->path, uri->path + 3, strlen (uri->path) - 2); + if (!strcmp (uri->path, "/..")) + uri->path[1] = '\0'; + } + + /* HTTP-specific stuff */ + if (uri->scheme == SOUP_URI_SCHEME_HTTP || + uri->scheme == SOUP_URI_SCHEME_HTTPS) { + if (!uri->path) + uri->path = g_strdup ("/"); + if (!SOUP_URI_VALID_FOR_HTTP (uri)) { + soup_uri_free (uri); + return NULL; + } + } + + if (uri->scheme == SOUP_URI_SCHEME_FTP) { + if (!uri->host) { + soup_uri_free (uri); + return NULL; + } + } + + if (!uri->port) + uri->port = soup_scheme_default_port (uri->scheme); + if (!uri->path) + uri->path = g_strdup (""); + + return uri; +} + +/** + * soup_uri_new: + * @uri_string: a URI + * + * Parses an absolute URI. + * + * You can also pass %NULL for @uri_string if you want to get back an + * "empty" #SoupURI that you can fill in by hand. (You will need to + * call at least soup_uri_set_scheme() and soup_uri_set_path(), since + * those fields are required.) + * + * Return value: a #SoupURI, or %NULL. + **/ +SoupURI * +soup_uri_new (const char *uri_string) +{ + SoupURI *uri; + + if (!uri_string) + return g_slice_new0 (SoupURI); + + uri = soup_uri_new_with_base (NULL, uri_string); + if (!uri) + return NULL; + if (!uri->scheme) { + soup_uri_free (uri); + return NULL; + } + + return uri; +} + + +/** + * soup_uri_to_string: + * @uri: a #SoupURI + * @just_path_and_query: if %TRUE, output just the path and query portions + * + * Returns a string representing @uri. + * + * If @just_path_and_query is %TRUE, this concatenates the path and query + * together. That is, it constructs the string that would be needed in + * the Request-Line of an HTTP request for @uri. + * + * Return value: a string representing @uri, which the caller must free. + **/ +char * +soup_uri_to_string (SoupURI *uri, gboolean just_path_and_query) +{ + GString *str; + char *return_result; + + g_return_val_if_fail (uri != NULL, NULL); + + /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN + * tests/uri-parsing AFTERWARD. + */ + + str = g_string_sized_new (20); + + if (uri->scheme && !just_path_and_query) + g_string_append_printf (str, "%s:", uri->scheme); + if (uri->host && !just_path_and_query) { + g_string_append (str, "//"); + if (uri->user) { + append_uri_encoded (str, uri->user, ":;@?/"); + g_string_append_c (str, '@'); + } + if (strchr (uri->host, ':')) { + g_string_append_c (str, '['); + g_string_append (str, uri->host); + g_string_append_c (str, ']'); + } else + append_uri_encoded (str, uri->host, ":/"); + if (uri->port && uri->port != soup_scheme_default_port (uri->scheme)) + g_string_append_printf (str, ":%u", uri->port); + if (!uri->path && (uri->query || uri->fragment)) + g_string_append_c (str, '/'); + } + + if (uri->path && *uri->path) + g_string_append (str, uri->path); + + if (uri->query) { + g_string_append_c (str, '?'); + g_string_append (str, uri->query); + } + if (uri->fragment && !just_path_and_query) { + g_string_append_c (str, '#'); + g_string_append (str, uri->fragment); + } + + return_result = str->str; + g_string_free (str, FALSE); + + return return_result; +} + +/** + * soup_uri_copy: + * @uri: a #SoupURI + * + * Copies @uri + * + * Return value: a copy of @uri, which must be freed with soup_uri_free() + **/ +SoupURI * +soup_uri_copy (SoupURI *uri) +{ + SoupURI *dup; + + g_return_val_if_fail (uri != NULL, NULL); + + dup = g_slice_new0 (SoupURI); + dup->scheme = uri->scheme; + dup->user = g_strdup (uri->user); + dup->password = g_strdup (uri->password); + dup->host = g_strdup (uri->host); + dup->port = uri->port; + dup->path = g_strdup (uri->path); + dup->query = g_strdup (uri->query); + dup->fragment = g_strdup (uri->fragment); + + return dup; +} + +static inline gboolean +parts_equal (const char *one, const char *two, gboolean insensitive) +{ + if (!one && !two) + return TRUE; + if (!one || !two) + return FALSE; + return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two); +} + +/** + * soup_uri_equal: + * @uri1: a #SoupURI + * @uri2: another #SoupURI + * + * Tests whether or not @uri1 and @uri2 are equal in all parts + * + * Return value: %TRUE or %FALSE + **/ +gboolean +soup_uri_equal (SoupURI *uri1, SoupURI *uri2) +{ + if (uri1->scheme != uri2->scheme || + uri1->port != uri2->port || + !parts_equal (uri1->user, uri2->user, FALSE) || + !parts_equal (uri1->password, uri2->password, FALSE) || + !parts_equal (uri1->host, uri2->host, TRUE) || + !parts_equal (uri1->path, uri2->path, FALSE) || + !parts_equal (uri1->query, uri2->query, FALSE) || + !parts_equal (uri1->fragment, uri2->fragment, FALSE)) + return FALSE; + + return TRUE; +} + +/** + * soup_uri_free: + * @uri: a #SoupURI + * + * Frees @uri. + **/ +void +soup_uri_free (SoupURI *uri) +{ + g_return_if_fail (uri != NULL); + + g_free (uri->user); + g_free (uri->password); + g_free (uri->host); + g_free (uri->path); + g_free (uri->query); + g_free (uri->fragment); + + g_slice_free (SoupURI, uri); +} + +static void +append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars) +{ + const unsigned char *s = (const unsigned char *)in; + + while (*s) { + if (soup_char_is_uri_percent_encoded (*s) || + soup_char_is_uri_gen_delims (*s) || + (extra_enc_chars && strchr (extra_enc_chars, *s))) + g_string_append_printf (str, "%%%02X", (int)*s++); + else + g_string_append_c (str, *s++); + } +} + +/** + * soup_uri_encode: + * @part: a URI part + * @escape_extra: (allow-none): additional reserved characters to + * escape (or %NULL) + * + * This %-encodes the given URI part and returns the escaped + * version in allocated memory, which the caller must free when it is + * done. + * + * Return value: the encoded URI part + **/ +char * +soup_uri_encode (const char *part, const char *escape_extra) +{ + GString *str; + char *encoded; + + str = g_string_new (NULL); + append_uri_encoded (str, part, escape_extra); + encoded = str->str; + g_string_free (str, FALSE); + + return encoded; +} + +#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10) +#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2])) + +char * +uri_decoded_copy (const char *part, int length) +{ + unsigned char *s, *d; + char *decoded = g_strndup (part, length); + + s = d = (unsigned char *)decoded; + do { + if (*s == '%') { + if (!g_ascii_isxdigit (s[1]) || + !g_ascii_isxdigit (s[2])) { + *d++ = *s; + continue; + } + *d++ = HEXCHAR (s); + s += 2; + } else + *d++ = *s; + } while (*s++); + + return decoded; +} + +/** + * soup_uri_decode: + * @part: a URI part + * + * Fully %-decodes @part. + * + * In the past, this would return %NULL if @part contained invalid + * percent-encoding, but now it just ignores the problem (as + * soup_uri_new() already did). + * + * Return value: the decoded URI part. + */ +char * +soup_uri_decode (const char *part) +{ + return uri_decoded_copy (part, strlen (part)); +} + +static char * +uri_normalized_copy (const char *part, int length, + const char *unescape_extra) +{ + unsigned char *s, *d, c; + char *normalized = g_strndup (part, length); + gboolean need_fixup = FALSE; + + s = d = (unsigned char *)normalized; + do { + if (*s == '%') { + if (!g_ascii_isxdigit (s[1]) || + !g_ascii_isxdigit (s[2])) { + *d++ = *s; + continue; + } + + c = HEXCHAR (s); + if (soup_char_is_uri_unreserved (c) || + (unescape_extra && strchr (unescape_extra, c))) { + *d++ = c; + s += 2; + } else { + /* We leave it unchanged. We used to uppercase percent-encoded + * triplets but we do not do it any more as RFC3986 Section 6.2.2.1 + * says that they only SHOULD be case normalized. + */ + *d++ = *s++; + *d++ = *s++; + *d++ = *s; + } + } else { + if (*s == ' ') + need_fixup = TRUE; + *d++ = *s; + } + } while (*s++); + + if (need_fixup) { + GString *fixed; + char *sp, *p; + + fixed = g_string_new (NULL); + p = normalized; + while ((sp = strchr (p, ' '))) { + g_string_append_len (fixed, p, sp - p); + g_string_append (fixed, "%20"); + p = sp + 1; + } + g_string_append (fixed, p); + g_free (normalized); + normalized = g_string_free (fixed, FALSE); + } + + return normalized; +} + +/** + * soup_uri_normalize: + * @part: a URI part + * @unescape_extra: reserved characters to unescape (or %NULL) + * + * %-decodes any "unreserved" characters (or characters in + * @unescape_extra) in @part. + * + * "Unreserved" characters are those that are not allowed to be used + * for punctuation according to the URI spec. For example, letters are + * unreserved, so soup_uri_normalize() will turn + * http://example.com/foo/b%61r into + * http://example.com/foo/bar, which is guaranteed + * to mean the same thing. However, "/" is "reserved", so + * http://example.com/foo%2Fbar would not + * be changed, because it might mean something different to the + * server. + * + * In the past, this would return %NULL if @part contained invalid + * percent-encoding, but now it just ignores the problem (as + * soup_uri_new() already did). + * + * Return value: the normalized URI part + */ +char * +soup_uri_normalize (const char *part, const char *unescape_extra) +{ + return uri_normalized_copy (part, strlen (part), unescape_extra); +} + + +/** + * soup_uri_uses_default_port: + * @uri: a #SoupURI + * + * Tests if @uri uses the default port for its scheme. (Eg, 80 for + * http.) (This only works for http and https; libsoup does not know + * the default ports of other protocols.) + * + * Return value: %TRUE or %FALSE + **/ +gboolean +soup_uri_uses_default_port (SoupURI *uri) +{ + g_return_val_if_fail (uri->scheme == SOUP_URI_SCHEME_HTTP || + uri->scheme == SOUP_URI_SCHEME_HTTPS || + uri->scheme == SOUP_URI_SCHEME_FTP, FALSE); + + return uri->port == soup_scheme_default_port (uri->scheme); +} + +/** + * SOUP_URI_SCHEME_HTTP: + * + * "http" as an interned string. This can be compared directly against + * the value of a #SoupURI's scheme + **/ + +/** + * SOUP_URI_SCHEME_HTTPS: + * + * "https" as an interned string. This can be compared directly + * against the value of a #SoupURI's scheme + **/ + +/** + * soup_uri_get_scheme: + * @uri: a #SoupURI + * + * Gets @uri's scheme. + * + * Return value: @uri's scheme. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_scheme (SoupURI *uri) +{ + return uri->scheme; +} + +/** + * soup_uri_set_scheme: + * @uri: a #SoupURI + * @scheme: the URI scheme + * + * Sets @uri's scheme to @scheme. This will also set @uri's port to + * the default port for @scheme, if known. + **/ +void +soup_uri_set_scheme (SoupURI *uri, const char *scheme) +{ + uri->scheme = soup_uri_parse_scheme (scheme, strlen (scheme)); + uri->port = soup_scheme_default_port (uri->scheme); +} + +/** + * soup_uri_get_user: + * @uri: a #SoupURI + * + * Gets @uri's user. + * + * Return value: @uri's user. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_user (SoupURI *uri) +{ + return uri->user; +} + +/** + * soup_uri_set_user: + * @uri: a #SoupURI + * @user: the username, or %NULL + * + * Sets @uri's user to @user. + **/ +void +soup_uri_set_user (SoupURI *uri, const char *user) +{ + g_free (uri->user); + uri->user = g_strdup (user); +} + +/** + * soup_uri_get_password: + * @uri: a #SoupURI + * + * Gets @uri's password. + * + * Return value: @uri's password. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_password (SoupURI *uri) +{ + return uri->password; +} + +/** + * soup_uri_set_password: + * @uri: a #SoupURI + * @password: the password, or %NULL + * + * Sets @uri's password to @password. + **/ +void +soup_uri_set_password (SoupURI *uri, const char *password) +{ + g_free (uri->password); + uri->password = g_strdup (password); +} + +/** + * soup_uri_get_host: + * @uri: a #SoupURI + * + * Gets @uri's host. + * + * Return value: @uri's host. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_host (SoupURI *uri) +{ + return uri->host; +} + +/** + * soup_uri_set_host: + * @uri: a #SoupURI + * @host: the hostname or IP address, or %NULL + * + * Sets @uri's host to @host. + * + * If @host is an IPv6 IP address, it should not include the brackets + * required by the URI syntax; they will be added automatically when + * converting @uri to a string. + **/ +void +soup_uri_set_host (SoupURI *uri, const char *host) +{ + g_free (uri->host); + uri->host = g_strdup (host); +} + +/** + * soup_uri_get_port: + * @uri: a #SoupURI + * + * Gets @uri's port. + * + * Return value: @uri's port. + * + * Since: 2.32 + **/ +guint +soup_uri_get_port (SoupURI *uri) +{ + return uri->port; +} + +/** + * soup_uri_set_port: + * @uri: a #SoupURI + * @port: the port, or 0 + * + * Sets @uri's port to @port. If @port is 0, @uri will not have an + * explicitly-specified port. + **/ +void +soup_uri_set_port (SoupURI *uri, guint port) +{ + uri->port = port; +} + +/** + * soup_uri_get_path: + * @uri: a #SoupURI + * + * Gets @uri's path. + * + * Return value: @uri's path. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_path (SoupURI *uri) +{ + return uri->path; +} + +/** + * soup_uri_set_path: + * @uri: a #SoupURI + * @path: the path + * + * Sets @uri's path to @path. + **/ +void +soup_uri_set_path (SoupURI *uri, const char *path) +{ + g_free (uri->path); + uri->path = g_strdup (path); +} + +/** + * soup_uri_get_query: + * @uri: a #SoupURI + * + * Gets @uri's query. + * + * Return value: @uri's query. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_query (SoupURI *uri) +{ + return uri->query; +} + +/** + * soup_uri_set_query: + * @uri: a #SoupURI + * @query: the query + * + * Sets @uri's query to @query. + **/ +void +soup_uri_set_query (SoupURI *uri, const char *query) +{ + g_free (uri->query); + uri->query = g_strdup (query); +} + +/** + * soup_uri_set_query_from_form: + * @uri: a #SoupURI + * @form: (element-type utf8 utf8): a #GHashTable containing HTML form + * information + * + * Sets @uri's query to the result of encoding @form according to the + * HTML form rules. See soup_form_encode_hash() for more information. + **/ +void +soup_uri_set_query_from_form (SoupURI *uri, GHashTable *form) +{ + g_free (uri->query); + uri->query = soup_form_encode_hash (form); +} + +/** + * soup_uri_set_query_from_fields: + * @uri: a #SoupURI + * @first_field: name of the first form field to encode into query + * @...: value of @first_field, followed by additional field names + * and values, terminated by %NULL. + * + * Sets @uri's query to the result of encoding the given form fields + * and values according to the * HTML form rules. See + * soup_form_encode() for more information. + **/ +void +soup_uri_set_query_from_fields (SoupURI *uri, + const char *first_field, + ...) +{ + va_list args; + + g_free (uri->query); + va_start (args, first_field); + uri->query = soup_form_encode_valist (first_field, args); + va_end (args); +} + +/** + * soup_uri_get_fragment: + * @uri: a #SoupURI + * + * Gets @uri's fragment. + * + * Return value: @uri's fragment. + * + * Since: 2.32 + **/ +const char * +soup_uri_get_fragment (SoupURI *uri) +{ + return uri->fragment; +} + +/** + * soup_uri_set_fragment: + * @uri: a #SoupURI + * @fragment: the fragment + * + * Sets @uri's fragment to @fragment. + **/ +void +soup_uri_set_fragment (SoupURI *uri, const char *fragment) +{ + g_free (uri->fragment); + uri->fragment = g_strdup (fragment); +} + +/** + * soup_uri_copy_host: + * @uri: a #SoupUri + * + * Makes a copy of @uri, considering only the protocol, host, and port + * + * Return value: the new #SoupUri + * + * Since: 2.26.3 + **/ +SoupURI * +soup_uri_copy_host (SoupURI *uri) +{ + SoupURI *dup; + + g_return_val_if_fail (uri != NULL, NULL); + + dup = soup_uri_new (NULL); + dup->scheme = uri->scheme; + dup->host = g_strdup (uri->host); + dup->port = uri->port; + if (dup->scheme == SOUP_URI_SCHEME_HTTP || + dup->scheme == SOUP_URI_SCHEME_HTTPS) + dup->path = g_strdup (""); + + return dup; +} + +/** + * soup_uri_host_hash: + * @key: (type Soup.URI): a #SoupURI + * + * Hashes @key, considering only the scheme, host, and port. + * + * Return value: a hash + * + * Since: 2.26.3 + **/ +guint +soup_uri_host_hash (gconstpointer key) +{ + const SoupURI *uri = key; + + g_return_val_if_fail (uri != NULL && uri->host != NULL, 0); + + return GPOINTER_TO_UINT (uri->scheme) + uri->port + + soup_str_case_hash (uri->host); +} + +/** + * soup_uri_host_equal: + * @v1: (type Soup.URI): a #SoupURI + * @v2: (type Soup.URI): a #SoupURI + * + * Compares @v1 and @v2, considering only the scheme, host, and port. + * + * Return value: whether or not the URIs are equal in scheme, host, + * and port. + * + * Since: 2.26.3 + **/ +gboolean +soup_uri_host_equal (gconstpointer v1, gconstpointer v2) +{ + const SoupURI *one = v1; + const SoupURI *two = v2; + + g_return_val_if_fail (one != NULL && two != NULL, one == two); + g_return_val_if_fail (one->host != NULL && two->host != NULL, one->host == two->host); + + if (one->scheme != two->scheme) + return FALSE; + if (one->port != two->port) + return FALSE; + + return g_ascii_strcasecmp (one->host, two->host) == 0; +} + + +GType +soup_uri_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupURI"), + (GBoxedCopyFunc) soup_uri_copy, + (GBoxedFreeFunc) soup_uri_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} diff --git a/libsoup/soup-uri.h b/libsoup/soup-uri.h new file mode 100644 index 0000000..a5c54a6 --- /dev/null +++ b/libsoup/soup-uri.h @@ -0,0 +1,104 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ + +/* + * Copyright 1999-2002 Ximian, Inc. + */ + + +#ifndef SOUP_URI_H +#define SOUP_URI_H 1 + +#include +#include + +G_BEGIN_DECLS + +struct _SoupURI { + const char *scheme; + + char *user; + char *password; + + char *host; + guint port; + + char *path; + char *query; + + char *fragment; +}; + +GType soup_uri_get_type (void); +#define SOUP_TYPE_URI (soup_uri_get_type ()) + +#define SOUP_URI_SCHEME_HTTP _SOUP_ATOMIC_INTERN_STRING (_SOUP_URI_SCHEME_HTTP, "http") +#define SOUP_URI_SCHEME_HTTPS _SOUP_ATOMIC_INTERN_STRING (_SOUP_URI_SCHEME_HTTPS, "https") +#define SOUP_URI_SCHEME_FTP _SOUP_ATOMIC_INTERN_STRING (_SOUP_URI_SCHEME_FTP, "ftp") +#define SOUP_URI_SCHEME_FILE _SOUP_ATOMIC_INTERN_STRING (_SOUP_URI_SCHEME_FILE, "file") +#define SOUP_URI_SCHEME_DATA _SOUP_ATOMIC_INTERN_STRING (_SOUP_URI_SCHEME_DATA, "data") +extern gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS; +extern gpointer _SOUP_URI_SCHEME_FTP; +extern gpointer _SOUP_URI_SCHEME_FILE, _SOUP_URI_SCHEME_DATA; + +SoupURI *soup_uri_new_with_base (SoupURI *base, + const char *uri_string); +SoupURI *soup_uri_new (const char *uri_string); + +char *soup_uri_to_string (SoupURI *uri, + gboolean just_path_and_query); + +SoupURI *soup_uri_copy (SoupURI *uri); + +gboolean soup_uri_equal (SoupURI *uri1, + SoupURI *uri2); + +void soup_uri_free (SoupURI *uri); + +char *soup_uri_encode (const char *part, + const char *escape_extra); +char *soup_uri_decode (const char *part); +char *soup_uri_normalize (const char *part, + const char *unescape_extra); + +gboolean soup_uri_uses_default_port (SoupURI *uri); + +const char *soup_uri_get_scheme (SoupURI *uri); +void soup_uri_set_scheme (SoupURI *uri, + const char *scheme); +const char *soup_uri_get_user (SoupURI *uri); +void soup_uri_set_user (SoupURI *uri, + const char *user); +const char *soup_uri_get_password (SoupURI *uri); +void soup_uri_set_password (SoupURI *uri, + const char *password); +const char *soup_uri_get_host (SoupURI *uri); +void soup_uri_set_host (SoupURI *uri, + const char *host); +guint soup_uri_get_port (SoupURI *uri); +void soup_uri_set_port (SoupURI *uri, + guint port); +const char *soup_uri_get_path (SoupURI *uri); +void soup_uri_set_path (SoupURI *uri, + const char *path); +const char *soup_uri_get_query (SoupURI *uri); +void soup_uri_set_query (SoupURI *uri, + const char *query); +void soup_uri_set_query_from_form (SoupURI *uri, + GHashTable *form); +void soup_uri_set_query_from_fields (SoupURI *uri, + const char *first_field, + ...) G_GNUC_NULL_TERMINATED; +const char *soup_uri_get_fragment (SoupURI *uri); +void soup_uri_set_fragment (SoupURI *uri, + const char *fragment); + +SoupURI *soup_uri_copy_host (SoupURI *uri); +guint soup_uri_host_hash (gconstpointer key); +gboolean soup_uri_host_equal (gconstpointer v1, + gconstpointer v2); + +#define SOUP_URI_VALID_FOR_HTTP(uri) ((uri) && ((uri)->scheme == SOUP_URI_SCHEME_HTTP || (uri)->scheme == SOUP_URI_SCHEME_HTTPS) && (uri)->host && (uri)->path) + +G_END_DECLS + +#endif /*SOUP_URI_H*/ diff --git a/libsoup/soup-value-utils.c b/libsoup/soup-value-utils.c new file mode 100644 index 0000000..351b3da --- /dev/null +++ b/libsoup/soup-value-utils.c @@ -0,0 +1,494 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-type-utils.c: GValue and GType-related utilities + * + * Copyright (C) 2007 Red Hat, Inc. + */ + +#include "soup-value-utils.h" + +#include + +/** + * SECTION:soup-value-utils + * @short_description: #GValue utilities + * + * These methods are useful for manipulating #GValues, and in + * particular, arrays and hash tables of #GValues, in a + * slightly nicer way than the standard #GValue API. + * + * They are written for use with soup-xmlrpc, but they also work with + * types not used by XML-RPC. + **/ + +/** + * SOUP_VALUE_SETV: + * @val: a #GValue + * @type: a #GType + * @args: #va_list pointing to a value of type @type + * + * Copies an argument of type @type from @args into @val. @val will + * point directly to the value in @args rather than copying it, so you + * must g_value_copy() it if you want it to remain valid. + **/ + +/** + * SOUP_VALUE_GETV: + * @val: a #GValue + * @type: a #GType + * @args: #va_list pointing to a value of type pointer-to-@type + * + * Extracts a value of type @type from @val into @args. The return + * value will point to the same data as @val rather than being a copy + * of it. + **/ + +static void +soup_value_hash_value_free (gpointer val) +{ + g_value_unset (val); + g_free (val); +} + +/** + * soup_value_hash_new: + * + * Creates a #GHashTable whose keys are strings and whose values + * are #GValue. + * + * Return value: (element-type utf8 GValue) (transfer full): a new + * empty #GHashTable + **/ +GHashTable * +soup_value_hash_new (void) +{ + return g_hash_table_new_full (g_str_hash, g_str_equal, + g_free, soup_value_hash_value_free); +} + +static void +soup_value_hash_insert_valist (GHashTable *hash, const char *first_key, + va_list args) +{ + const char *key; + GType type; + GValue value; + + key = first_key; + while (key) { + type = va_arg (args, GType); + SOUP_VALUE_SETV (&value, type, args); + + soup_value_hash_insert_value (hash, key, &value); + key = va_arg (args, const char *); + } +} + +/** + * soup_value_hash_new_with_vals: + * @first_key: the key for the first value + * @...: the type of @first_key, followed by the value, followed + * by additional key/type/value triplets, terminated by %NULL + * + * Creates a #GHashTable whose keys are strings and whose values + * are #GValue, and initializes it with the provided data. As + * with soup_value_hash_insert(), the keys and values are copied + * rather than being inserted directly. + * + * Return value: (element-type utf8 GValue) (transfer full): a new + * #GHashTable, initialized with the given values + **/ +GHashTable * +soup_value_hash_new_with_vals (const char *first_key, ...) +{ + GHashTable *hash = soup_value_hash_new (); + va_list args; + + va_start (args, first_key); + soup_value_hash_insert_valist (hash, first_key, args); + va_end (args); + + return hash; +} + +/** + * soup_value_hash_insert_value: + * @hash: (element-type utf8 GValue): a value hash + * @key: the key + * @value: a value + * + * Inserts @value into @hash. (Unlike with g_hash_table_insert(), both + * the key and the value are copied). + **/ +void +soup_value_hash_insert_value (GHashTable *hash, const char *key, GValue *value) +{ + GValue *copy = g_new0 (GValue, 1); + + g_value_init (copy, G_VALUE_TYPE (value)); + g_value_copy (value, copy); + g_hash_table_insert (hash, g_strdup (key), copy); +} + +/** + * soup_value_hash_insert: + * @hash: (element-type utf8 GValue): a value hash + * @key: the key + * @type: a #GType + * @...: a value of type @type + * + * Inserts the provided value of type @type into @hash. (Unlike with + * g_hash_table_insert(), both the key and the value are copied). + **/ +void +soup_value_hash_insert (GHashTable *hash, const char *key, GType type, ...) +{ + va_list args; + GValue val; + + va_start (args, type); + SOUP_VALUE_SETV (&val, type, args); + va_end (args); + soup_value_hash_insert_value (hash, key, &val); +} + +/** + * soup_value_hash_insert_vals: + * @hash: (element-type utf8 GValue): a value hash + * @first_key: the key for the first value + * @...: the type of @first_key, followed by the value, followed + * by additional key/type/value triplets, terminated by %NULL + * + * Inserts the given data into @hash. As with + * soup_value_hash_insert(), the keys and values are copied rather + * than being inserted directly. + **/ +void +soup_value_hash_insert_vals (GHashTable *hash, const char *first_key, ...) +{ + va_list args; + + va_start (args, first_key); + soup_value_hash_insert_valist (hash, first_key, args); + va_end (args); +} + +/** + * soup_value_hash_lookup: + * @hash: (element-type utf8 GValue): a value hash + * @key: the key to look up + * @type: a #GType + * @...: a value of type pointer-to-@type + * + * Looks up @key in @hash and stores its value into the provided + * location. + * + * Return value: %TRUE if @hash contained a value with key @key and + * type @type, %FALSE if not. + **/ +gboolean +soup_value_hash_lookup (GHashTable *hash, const char *key, GType type, ...) +{ + va_list args; + GValue *value; + + value = g_hash_table_lookup (hash, key); + if (!value || !G_VALUE_HOLDS (value, type)) + return FALSE; + + va_start (args, type); + SOUP_VALUE_GETV (value, type, args); + va_end (args); + + return TRUE; +} + +/** + * soup_value_hash_lookup_vals: + * @hash: (element-type utf8 GValue): a value hash + * @first_key: the first key to look up + * @...: the type of @first_key, a pointer to that type, and + * then additional key/type/pointer triplets, terminated + * by %NULL. + * + * Looks up a number of keys in @hash and returns their values. + * + * Return value: %TRUE if all of the keys were found, %FALSE + * if any were missing; note that you will generally need to + * initialize each destination variable to a reasonable default + * value, since there is no way to tell which keys were found + * and which were not. + **/ +gboolean +soup_value_hash_lookup_vals (GHashTable *hash, const char *first_key, ...) +{ + va_list args; + GValue *value; + const char *key; + GType type; + gboolean found_all = TRUE; + + va_start (args, first_key); + key = first_key; + while (key) { + type = va_arg (args, GType); + + value = g_hash_table_lookup (hash, key); + if (!value || !G_VALUE_HOLDS (value, type)) { + found_all = FALSE; + /* skip a pointer */ + va_arg (args, gpointer); + } else + SOUP_VALUE_GETV (value, type, args); + + key = va_arg (args, const char *); + } + va_end (args); + + return found_all; +} + + +/** + * soup_value_array_from_args: + * @args: arguments to create a #GValueArray from + * + * Creates a #GValueArray from the provided arguments, which must + * consist of pairs of a #GType and a value of that type, terminated + * by %G_TYPE_INVALID. (The array will contain copies of the provided + * data rather than pointing to the passed-in data directly.) + * + * Return value: a new #GValueArray, or %NULL if an error occurred. + **/ +GValueArray * +soup_value_array_from_args (va_list args) +{ + GValueArray *array; + GType type; + GValue val; + + array = g_value_array_new (1); + while ((type = va_arg (args, GType)) != G_TYPE_INVALID) { + SOUP_VALUE_SETV (&val, type, args); + g_value_array_append (array, &val); + } + return array; +} + +/** + * soup_value_array_to_args: + * @array: a #GValueArray + * @args: arguments to extract @array into + * + * Extracts a #GValueArray into the provided arguments, which must + * consist of pairs of a #GType and a value of pointer-to-that-type, + * terminated by %G_TYPE_INVALID. The returned values will point to the + * same memory as the values in the array. + * + * Return value: success or failure + **/ +gboolean +soup_value_array_to_args (GValueArray *array, va_list args) +{ + GType type; + GValue *value; + int i; + + for (i = 0; i < array->n_values; i++) { + type = va_arg (args, GType); + if (type == G_TYPE_INVALID) + return FALSE; + value = g_value_array_get_nth (array, i); + if (!G_VALUE_HOLDS (value, type)) + return FALSE; + SOUP_VALUE_GETV (value, type, args); + } + return TRUE; +} + +/** + * soup_value_array_new: + * + * Creates a new %GValueArray. (This is just a wrapper around + * g_value_array_new(), for naming consistency purposes.) + * + * Return value: a new %GValueArray + **/ +GValueArray * +soup_value_array_new (void) +{ + return g_value_array_new (1); +} + +static void +soup_value_array_append_valist (GValueArray *array, + GType first_type, va_list args) +{ + GType type; + GValue value; + + type = first_type; + while (type != G_TYPE_INVALID) { + SOUP_VALUE_SETV (&value, type, args); + + g_value_array_append (array, &value); + type = va_arg (args, GType); + } +} + +/** + * soup_value_array_new_with_vals: + * @first_type: the type of the first value to add + * @...: the first value to add, followed by other type/value + * pairs, terminated by %G_TYPE_INVALID + * + * Creates a new %GValueArray and copies the provided values + * into it. + * + * Return value: a new %GValueArray + **/ +GValueArray * +soup_value_array_new_with_vals (GType first_type, ...) +{ + GValueArray *array = soup_value_array_new (); + va_list args; + + va_start (args, first_type); + soup_value_array_append_valist (array, first_type, args); + va_end (args); + + return array; +} + +/** + * soup_value_array_insert: + * @array: a #GValueArray + * @index_: the index to insert at + * @type: a #GType + * @...: a value of type @type + * + * Inserts the provided value of type @type into @array as with + * g_value_array_insert(). (The provided data is copied rather than + * being inserted directly.) + **/ +void +soup_value_array_insert (GValueArray *array, guint index_, GType type, ...) +{ + va_list args; + GValue val; + + va_start (args, type); + SOUP_VALUE_SETV (&val, type, args); + va_end (args); + g_value_array_insert (array, index_, &val); +} + +/** + * soup_value_array_append: + * @array: a #GValueArray + * @type: a #GType + * @...: a value of type @type + * + * Appends the provided value of type @type to @array as with + * g_value_array_append(). (The provided data is copied rather than + * being inserted directly.) + **/ +void +soup_value_array_append (GValueArray *array, GType type, ...) +{ + va_list args; + GValue val; + + va_start (args, type); + SOUP_VALUE_SETV (&val, type, args); + va_end (args); + g_value_array_append (array, &val); +} + +/** + * soup_value_array_append_vals: + * @array: a #GValueArray + * @first_type: the type of the first value to add + * @...: the first value to add, followed by other type/value + * pairs, terminated by %G_TYPE_INVALID + * + * Appends the provided values into @array as with + * g_value_array_append(). (The provided data is copied rather than + * being inserted directly.) + **/ +void +soup_value_array_append_vals (GValueArray *array, GType first_type, ...) +{ + va_list args; + + va_start (args, first_type); + soup_value_array_append_valist (array, first_type, args); + va_end (args); +} + +/** + * soup_value_array_get_nth: + * @array: a #GValueArray + * @index_: the index to look up + * @type: a #GType + * @...: a value of type pointer-to-@type + * + * Gets the @index_ element of @array and stores its value into the + * provided location. + * + * Return value: %TRUE if @array contained a value with index @index_ + * and type @type, %FALSE if not. + **/ +gboolean +soup_value_array_get_nth (GValueArray *array, guint index_, GType type, ...) +{ + GValue *value; + va_list args; + + value = g_value_array_get_nth (array, index_); + if (!value || !G_VALUE_HOLDS (value, type)) + return FALSE; + + va_start (args, type); + SOUP_VALUE_GETV (value, type, args); + va_end (args); + return TRUE; +} + + +static GByteArray * +soup_byte_array_copy (GByteArray *ba) +{ + GByteArray *copy; + + copy = g_byte_array_sized_new (ba->len); + g_byte_array_append (copy, ba->data, ba->len); + return copy; +} + +static void +soup_byte_array_free (GByteArray *ba) +{ + g_byte_array_free (ba, TRUE); +} + +/** + * SOUP_TYPE_BYTE_ARRAY: + * + * glib does not define a #GType for #GByteArray, so libsoup + * defines this one itself. + **/ +GType +soup_byte_array_get_type (void) +{ + static volatile gsize type_volatile = 0; + + if (g_once_init_enter (&type_volatile)) { + GType type = g_boxed_type_register_static ( + g_intern_static_string ("SoupByteArray"), + (GBoxedCopyFunc) soup_byte_array_copy, + (GBoxedFreeFunc) soup_byte_array_free); + g_once_init_leave (&type_volatile, type); + } + return type_volatile; +} diff --git a/libsoup/soup-value-utils.h b/libsoup/soup-value-utils.h new file mode 100644 index 0000000..1d26526 --- /dev/null +++ b/libsoup/soup-value-utils.h @@ -0,0 +1,84 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifndef SOUP_VALUE_UTILS_H +#define SOUP_VALUE_UTILS_H 1 + +#include +#include + +G_BEGIN_DECLS + +#define SOUP_VALUE_SETV(val, type, args) \ +G_STMT_START { \ + char *error = NULL; \ + \ + memset (val, 0, sizeof (GValue)); \ + g_value_init (val, type); \ + G_VALUE_COLLECT (val, args, G_VALUE_NOCOPY_CONTENTS, &error); \ + if (error) \ + g_free (error); \ +} G_STMT_END + +#define SOUP_VALUE_GETV(val, type, args) \ +G_STMT_START { \ + char *error = NULL; \ + \ + G_VALUE_LCOPY (val, args, G_VALUE_NOCOPY_CONTENTS, &error); \ + if (error) \ + g_free (error); \ +} G_STMT_END + +GHashTable *soup_value_hash_new (void); +GHashTable *soup_value_hash_new_with_vals (const char *first_key, + ...) G_GNUC_NULL_TERMINATED; +void soup_value_hash_insert_value (GHashTable *hash, + const char *key, + GValue *value); +void soup_value_hash_insert (GHashTable *hash, + const char *key, + GType type, + ...); +void soup_value_hash_insert_vals (GHashTable *hash, + const char *first_key, + ...) G_GNUC_NULL_TERMINATED; +gboolean soup_value_hash_lookup (GHashTable *hash, + const char *key, + GType type, + ...); +gboolean soup_value_hash_lookup_vals (GHashTable *hash, + const char *first_key, + ...) G_GNUC_NULL_TERMINATED; + +GValueArray *soup_value_array_from_args (va_list args); +gboolean soup_value_array_to_args (GValueArray *array, + va_list args); + +GValueArray *soup_value_array_new (void); +GValueArray *soup_value_array_new_with_vals (GType first_type, + ...) G_GNUC_NULL_TERMINATED; + +void soup_value_array_insert (GValueArray *array, + guint index_, + GType type, + ...); +void soup_value_array_append (GValueArray *array, + GType type, + ...); +void soup_value_array_append_vals (GValueArray *array, + GType first_type, + ...) G_GNUC_NULL_TERMINATED; +gboolean soup_value_array_get_nth (GValueArray *array, + guint index_, + GType type, + ...); + + +GType soup_byte_array_get_type (void); +#define SOUP_TYPE_BYTE_ARRAY (soup_byte_array_get_type ()) + +G_END_DECLS + +#endif /* SOUP_VALUE_UTILS_H */ diff --git a/libsoup/soup-xmlrpc.c b/libsoup/soup-xmlrpc.c new file mode 100644 index 0000000..71bf270 --- /dev/null +++ b/libsoup/soup-xmlrpc.c @@ -0,0 +1,801 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * soup-xmlrpc.c: XML-RPC parser/generator + * + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include + +#include "soup-xmlrpc.h" +#include "soup-value-utils.h" +#include "soup-date.h" +#include "soup-message.h" +#include "soup-misc.h" +#include "soup-session.h" + +/** + * SECTION:soup-xmlrpc + * @short_description: XML-RPC support + * + **/ + +static xmlNode *find_real_node (xmlNode *node); + +static gboolean insert_value (xmlNode *parent, GValue *value); + +static void +insert_member (gpointer name, gpointer value, gpointer data) +{ + xmlNode *member, **struct_node = data; + + if (!*struct_node) + return; + + member = xmlNewChild (*struct_node, NULL, + (const xmlChar *)"member", NULL); + xmlNewTextChild (member, NULL, + (const xmlChar *)"name", (const xmlChar *)name); + if (!insert_value (member, value)) { + xmlFreeNode (*struct_node); + *struct_node = NULL; + } +} + +static gboolean +insert_value (xmlNode *parent, GValue *value) +{ + GType type = G_VALUE_TYPE (value); + xmlNode *xvalue; + char buf[128]; + + xvalue = xmlNewChild (parent, NULL, (const xmlChar *)"value", NULL); + + if (type == G_TYPE_INT) { + snprintf (buf, sizeof (buf), "%d", g_value_get_int (value)); + xmlNewChild (xvalue, NULL, + (const xmlChar *)"int", + (const xmlChar *)buf); + } else if (type == G_TYPE_BOOLEAN) { + snprintf (buf, sizeof (buf), "%d", g_value_get_boolean (value)); + xmlNewChild (xvalue, NULL, + (const xmlChar *)"boolean", + (const xmlChar *)buf); + } else if (type == G_TYPE_STRING) { + xmlNewTextChild (xvalue, NULL, + (const xmlChar *)"string", + (const xmlChar *)g_value_get_string (value)); + } else if (type == G_TYPE_DOUBLE) { + g_ascii_dtostr (buf, sizeof (buf), g_value_get_double (value)); + xmlNewChild (xvalue, NULL, + (const xmlChar *)"double", + (const xmlChar *)buf); + } else if (type == SOUP_TYPE_DATE) { + SoupDate *date = g_value_get_boxed (value); + char *timestamp = soup_date_to_string (date, SOUP_DATE_ISO8601_XMLRPC); + xmlNewChild (xvalue, NULL, + (const xmlChar *)"dateTime.iso8601", + (const xmlChar *)timestamp); + g_free (timestamp); + } else if (type == SOUP_TYPE_BYTE_ARRAY) { + GByteArray *ba = g_value_get_boxed (value); + char *encoded; + + encoded = g_base64_encode (ba->data, ba->len); + xmlNewChild (xvalue, NULL, + (const xmlChar *)"base64", + (const xmlChar *)encoded); + g_free (encoded); + } else if (type == G_TYPE_HASH_TABLE) { + GHashTable *hash = g_value_get_boxed (value); + xmlNode *struct_node; + + struct_node = xmlNewChild (xvalue, NULL, + (const xmlChar *)"struct", NULL); + g_hash_table_foreach (hash, insert_member, &struct_node); + if (!struct_node) + return FALSE; + } else if (type == G_TYPE_VALUE_ARRAY) { + GValueArray *va = g_value_get_boxed (value); + xmlNode *node; + int i; + + node = xmlNewChild (xvalue, NULL, + (const xmlChar *)"array", NULL); + node = xmlNewChild (node, NULL, + (const xmlChar *)"data", NULL); + for (i = 0; i < va->n_values; i++) { + if (!insert_value (node, &va->values[i])) + return FALSE; + } + } else + return FALSE; + + return TRUE; +} + +/** + * soup_xmlrpc_build_method_call: + * @method_name: the name of the XML-RPC method + * @params: (array length=n_params): arguments to @method + * @n_params: length of @params + * + * This creates an XML-RPC methodCall and returns it as a string. + * This is the low-level method that soup_xmlrpc_request_new() and + * soup_xmlrpc_call() are built on. + * + * @params is an array of #GValue representing the parameters to + * @method. (It is *not* a #GValueArray, although if you have a + * #GValueArray, you can just pass its %values and %n_values fields.) + * + * The correspondence between glib types and XML-RPC types is: + * + * int: #int (%G_TYPE_INT) + * boolean: #gboolean (%G_TYPE_BOOLEAN) + * string: #char* (%G_TYPE_STRING) + * double: #double (%G_TYPE_DOUBLE) + * datetime.iso8601: #SoupDate (%SOUP_TYPE_DATE) + * base64: #GByteArray (%SOUP_TYPE_BYTE_ARRAY) + * struct: #GHashTable (%G_TYPE_HASH_TABLE) + * array: #GValueArray (%G_TYPE_VALUE_ARRAY) + * + * For structs, use a #GHashTable that maps strings to #GValue; + * soup_value_hash_new() and related methods can help with this. + * + * Return value: the text of the methodCall, or %NULL on error + **/ +char * +soup_xmlrpc_build_method_call (const char *method_name, + GValue *params, int n_params) +{ + xmlDoc *doc; + xmlNode *node, *param; + xmlChar *xmlbody; + int i, len; + char *body; + + doc = xmlNewDoc ((const xmlChar *)"1.0"); + doc->standalone = FALSE; + doc->encoding = xmlCharStrdup ("UTF-8"); + + node = xmlNewDocNode (doc, NULL, (const xmlChar *)"methodCall", NULL); + xmlDocSetRootElement (doc, node); + xmlNewChild (node, NULL, (const xmlChar *)"methodName", + (const xmlChar *)method_name); + + node = xmlNewChild (node, NULL, (const xmlChar *)"params", NULL); + for (i = 0; i < n_params; i++) { + param = xmlNewChild (node, NULL, + (const xmlChar *)"param", NULL); + if (!insert_value (param, ¶ms[i])) { + xmlFreeDoc (doc); + return NULL; + } + } + + xmlDocDumpMemory (doc, &xmlbody, &len); + body = g_strndup ((char *)xmlbody, len); + xmlFree (xmlbody); + xmlFreeDoc (doc); + return body; +} + +static SoupMessage * +soup_xmlrpc_request_newv (const char *uri, const char *method_name, va_list args) +{ + SoupMessage *msg; + GValueArray *params; + char *body; + + params = soup_value_array_from_args (args); + if (!params) + return NULL; + + body = soup_xmlrpc_build_method_call (method_name, params->values, + params->n_values); + g_value_array_free (params); + if (!body) + return NULL; + + msg = soup_message_new ("POST", uri); + soup_message_set_request (msg, "text/xml", SOUP_MEMORY_TAKE, + body, strlen (body)); + return msg; +} + +/** + * soup_xmlrpc_request_new: + * @uri: URI of the XML-RPC service + * @method_name: the name of the XML-RPC method to invoke at @uri + * @...: parameters for @method + * + * Creates an XML-RPC methodCall and returns a #SoupMessage, ready + * to send, for that method call. + * + * The parameters are passed as type/value pairs; ie, first a #GType, + * and then a value of the appropriate type, finally terminated by + * %G_TYPE_INVALID. + * + * Return value: (transfer full): a #SoupMessage encoding the + * indicated XML-RPC request. + **/ +SoupMessage * +soup_xmlrpc_request_new (const char *uri, const char *method_name, ...) +{ + SoupMessage *msg; + va_list args; + + va_start (args, method_name); + msg = soup_xmlrpc_request_newv (uri, method_name, args); + va_end (args); + return msg; +} + +/** + * soup_xmlrpc_build_method_response: + * @value: the return value + * + * This creates a (successful) XML-RPC methodResponse and returns it + * as a string. To create a fault response, use + * soup_xmlrpc_build_fault(). + * + * The glib type to XML-RPC type mapping is as with + * soup_xmlrpc_build_method_call(), qv. + * + * Return value: the text of the methodResponse, or %NULL on error + **/ +char * +soup_xmlrpc_build_method_response (GValue *value) +{ + xmlDoc *doc; + xmlNode *node; + xmlChar *xmlbody; + char *body; + int len; + + doc = xmlNewDoc ((const xmlChar *)"1.0"); + doc->standalone = FALSE; + doc->encoding = xmlCharStrdup ("UTF-8"); + + node = xmlNewDocNode (doc, NULL, + (const xmlChar *)"methodResponse", NULL); + xmlDocSetRootElement (doc, node); + + node = xmlNewChild (node, NULL, (const xmlChar *)"params", NULL); + node = xmlNewChild (node, NULL, (const xmlChar *)"param", NULL); + if (!insert_value (node, value)) { + xmlFreeDoc (doc); + return NULL; + } + + xmlDocDumpMemory (doc, &xmlbody, &len); + body = g_strndup ((char *)xmlbody, len); + xmlFree (xmlbody); + xmlFreeDoc (doc); + return body; +} + +static char * +soup_xmlrpc_build_faultv (int fault_code, const char *fault_format, va_list args) +{ + xmlDoc *doc; + xmlNode *node, *member; + GValue value; + xmlChar *xmlbody; + char *fault_string, *body; + int len; + + fault_string = g_strdup_vprintf (fault_format, args); + + doc = xmlNewDoc ((const xmlChar *)"1.0"); + doc->standalone = FALSE; + doc->encoding = xmlCharStrdup ("UTF-8"); + + node = xmlNewDocNode (doc, NULL, + (const xmlChar *)"methodResponse", NULL); + xmlDocSetRootElement (doc, node); + node = xmlNewChild (node, NULL, (const xmlChar *)"fault", NULL); + node = xmlNewChild (node, NULL, (const xmlChar *)"value", NULL); + node = xmlNewChild (node, NULL, (const xmlChar *)"struct", NULL); + + memset (&value, 0, sizeof (value)); + + member = xmlNewChild (node, NULL, (const xmlChar *)"member", NULL); + xmlNewChild (member, NULL, + (const xmlChar *)"name", (const xmlChar *)"faultCode"); + g_value_init (&value, G_TYPE_INT); + g_value_set_int (&value, fault_code); + insert_value (member, &value); + g_value_unset (&value); + + member = xmlNewChild (node, NULL, (const xmlChar *)"member", NULL); + xmlNewChild (member, NULL, + (const xmlChar *)"name", (const xmlChar *)"faultString"); + g_value_init (&value, G_TYPE_STRING); + g_value_take_string (&value, fault_string); + insert_value (member, &value); + g_value_unset (&value); + + xmlDocDumpMemory (doc, &xmlbody, &len); + body = g_strndup ((char *)xmlbody, len); + xmlFree (xmlbody); + xmlFreeDoc (doc); + + return body; +} + +/** + * soup_xmlrpc_build_fault: + * @fault_code: the fault code + * @fault_format: a printf()-style format string + * @...: the parameters to @fault_format + * + * This creates an XML-RPC fault response and returns it as a string. + * (To create a successful response, use + * soup_xmlrpc_build_method_response().) + * + * Return value: the text of the fault + **/ +char * +soup_xmlrpc_build_fault (int fault_code, const char *fault_format, ...) +{ + va_list args; + char *body; + + va_start (args, fault_format); + body = soup_xmlrpc_build_faultv (fault_code, fault_format, args); + va_end (args); + return body; +} + +/** + * soup_xmlrpc_set_response: + * @msg: an XML-RPC request + * @type: the type of the response value + * @...: the response value + * + * Sets the status code and response body of @msg to indicate a + * successful XML-RPC call, with a return value given by @type and the + * following varargs argument, of the type indicated by @type. + **/ +void +soup_xmlrpc_set_response (SoupMessage *msg, GType type, ...) +{ + va_list args; + GValue value; + char *body; + + va_start (args, type); + SOUP_VALUE_SETV (&value, type, args); + va_end (args); + + body = soup_xmlrpc_build_method_response (&value); + g_value_unset (&value); + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_set_response (msg, "text/xml", SOUP_MEMORY_TAKE, + body, strlen (body)); +} + +/** + * soup_xmlrpc_set_fault: + * @msg: an XML-RPC request + * @fault_code: the fault code + * @fault_format: a printf()-style format string + * @...: the parameters to @fault_format + * + * Sets the status code and response body of @msg to indicate an + * unsuccessful XML-RPC call, with the error described by @fault_code + * and @fault_format. + **/ +void +soup_xmlrpc_set_fault (SoupMessage *msg, int fault_code, + const char *fault_format, ...) +{ + va_list args; + char *body; + + va_start (args, fault_format); + body = soup_xmlrpc_build_faultv (fault_code, fault_format, args); + va_end (args); + + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_set_response (msg, "text/xml", SOUP_MEMORY_TAKE, + body, strlen (body)); +} + + + +static gboolean +parse_value (xmlNode *xmlvalue, GValue *value) +{ + xmlNode *typenode; + const char *typename; + xmlChar *content; + + memset (value, 0, sizeof (GValue)); + + typenode = find_real_node (xmlvalue->children); + if (!typenode) { + /* If no type node, it's a string */ + content = xmlNodeGetContent (typenode); + g_value_init (value, G_TYPE_STRING); + g_value_set_string (value, (char *)content); + xmlFree (content); + return TRUE; + } + + typename = (const char *)typenode->name; + + if (!strcmp (typename, "i4") || !strcmp (typename, "int")) { + content = xmlNodeGetContent (typenode); + g_value_init (value, G_TYPE_INT); + g_value_set_int (value, atoi ((char *)content)); + xmlFree (content); + } else if (!strcmp (typename, "boolean")) { + content = xmlNodeGetContent (typenode); + g_value_init (value, G_TYPE_BOOLEAN); + g_value_set_boolean (value, atoi ((char *)content)); + xmlFree (content); + } else if (!strcmp (typename, "string")) { + content = xmlNodeGetContent (typenode); + g_value_init (value, G_TYPE_STRING); + g_value_set_string (value, (char *)content); + xmlFree (content); + } else if (!strcmp (typename, "double")) { + content = xmlNodeGetContent (typenode); + g_value_init (value, G_TYPE_DOUBLE); + g_value_set_double (value, g_ascii_strtod ((char *)content, NULL)); + xmlFree (content); + } else if (!strcmp (typename, "dateTime.iso8601")) { + content = xmlNodeGetContent (typenode); + g_value_init (value, SOUP_TYPE_DATE); + g_value_take_boxed (value, soup_date_new_from_string ((char *)content)); + xmlFree (content); + } else if (!strcmp (typename, "base64")) { + GByteArray *ba; + guchar *decoded; + gsize len; + + content = xmlNodeGetContent (typenode); + decoded = g_base64_decode ((char *)content, &len); + ba = g_byte_array_sized_new (len); + g_byte_array_append (ba, decoded, len); + g_free (decoded); + xmlFree (content); + g_value_init (value, SOUP_TYPE_BYTE_ARRAY); + g_value_take_boxed (value, ba); + } else if (!strcmp (typename, "struct")) { + xmlNode *member, *child, *mname, *mxval; + GHashTable *hash; + GValue mgval; + + hash = soup_value_hash_new (); + for (member = find_real_node (typenode->children); + member; + member = find_real_node (member->next)) { + if (strcmp ((const char *)member->name, "member") != 0) { + g_hash_table_destroy (hash); + return FALSE; + } + mname = mxval = NULL; + memset (&mgval, 0, sizeof (mgval)); + + for (child = find_real_node (member->children); + child; + child = find_real_node (child->next)) { + if (!strcmp ((const char *)child->name, "name")) + mname = child; + else if (!strcmp ((const char *)child->name, "value")) + mxval = child; + else + break; + } + + if (!mname || !mxval || !parse_value (mxval, &mgval)) { + g_hash_table_destroy (hash); + return FALSE; + } + + content = xmlNodeGetContent (mname); + soup_value_hash_insert_value (hash, (char *)content, &mgval); + xmlFree (content); + g_value_unset (&mgval); + } + g_value_init (value, G_TYPE_HASH_TABLE); + g_value_take_boxed (value, hash); + } else if (!strcmp (typename, "array")) { + xmlNode *data, *xval; + GValueArray *array; + GValue gval; + + data = find_real_node (typenode->children); + if (!data || strcmp ((const char *)data->name, "data") != 0) + return FALSE; + + array = g_value_array_new (1); + for (xval = find_real_node (data->children); + xval; + xval = find_real_node (xval->next)) { + memset (&gval, 0, sizeof (gval)); + if (strcmp ((const char *)xval->name, "value") != 0 || + !parse_value (xval, &gval)) { + g_value_array_free (array); + return FALSE; + } + + g_value_array_append (array, &gval); + g_value_unset (&gval); + } + g_value_init (value, G_TYPE_VALUE_ARRAY); + g_value_take_boxed (value, array); + } else + return FALSE; + + return TRUE; +} + +/** + * soup_xmlrpc_parse_method_call: + * @method_call: the XML-RPC methodCall string + * @length: the length of @method_call, or -1 if it is NUL-terminated + * @method_name: (out): on return, the methodName from @method_call + * @params: (out): on return, the parameters from @method_call + * + * Parses @method_call to get the name and parameters, and returns the + * parameter values in a #GValueArray; see also + * soup_xmlrpc_extract_method_call(), which is more convenient if you + * know in advance what the types of the parameters will be. + * + * Return value: success or failure. + **/ +gboolean +soup_xmlrpc_parse_method_call (const char *method_call, int length, + char **method_name, GValueArray **params) +{ + xmlDoc *doc; + xmlNode *node, *param, *xval; + xmlChar *xmlMethodName = NULL; + gboolean success = FALSE; + GValue value; + + doc = xmlParseMemory (method_call, + length == -1 ? strlen (method_call) : length); + if (!doc) + return FALSE; + + node = xmlDocGetRootElement (doc); + if (!node || strcmp ((const char *)node->name, "methodCall") != 0) + goto fail; + + node = find_real_node (node->children); + if (!node || strcmp ((const char *)node->name, "methodName") != 0) + goto fail; + xmlMethodName = xmlNodeGetContent (node); + + node = find_real_node (node->next); + if (!node || strcmp ((const char *)node->name, "params") != 0) + goto fail; + + *params = g_value_array_new (1); + param = find_real_node (node->children); + while (param && !strcmp ((const char *)param->name, "param")) { + xval = find_real_node (param->children); + if (!xval || strcmp ((const char *)xval->name, "value") != 0 || + !parse_value (xval, &value)) { + g_value_array_free (*params); + goto fail; + } + g_value_array_append (*params, &value); + g_value_unset (&value); + + param = find_real_node (param->next); + } + + success = TRUE; + *method_name = g_strdup ((char *)xmlMethodName); + +fail: + xmlFreeDoc (doc); + if (xmlMethodName) + xmlFree (xmlMethodName); + return success; +} + +/** + * soup_xmlrpc_extract_method_call: + * @method_call: the XML-RPC methodCall string + * @length: the length of @method_call, or -1 if it is NUL-terminated + * @method_name: (out): on return, the methodName from @method_call + * @...: return types and locations for parameters + * + * Parses @method_call to get the name and parameters, and puts + * the parameters into variables of the appropriate types. + * + * The parameters are handled similarly to + * @soup_xmlrpc_build_method_call, with pairs of types and values, + * terminated by %G_TYPE_INVALID, except that values are pointers to + * variables of the indicated type, rather than values of the type. + * + * See also soup_xmlrpc_parse_method_call(), which can be used if + * you don't know the types of the parameters. + * + * Return value: success or failure. + **/ +gboolean +soup_xmlrpc_extract_method_call (const char *method_call, int length, + char **method_name, ...) +{ + GValueArray *params; + gboolean success; + va_list args; + + if (!soup_xmlrpc_parse_method_call (method_call, length, + method_name, ¶ms)) + return FALSE; + + va_start (args, method_name); + success = soup_value_array_to_args (params, args); + va_end (args); + + g_value_array_free (params); + return success; +} + +/** + * soup_xmlrpc_parse_method_response: + * @method_response: the XML-RPC methodResponse string + * @length: the length of @method_response, or -1 if it is NUL-terminated + * @value: (out): on return, the return value from @method_call + * @error: error return value + * + * Parses @method_response and returns the return value in @value. If + * @method_response is a fault, @value will be unchanged, and @error + * will be set to an error of type %SOUP_XMLRPC_FAULT, with the error + * #code containing the fault code, and the error #message containing + * the fault string. (If @method_response cannot be parsed at all, + * soup_xmlrpc_parse_method_response() will return %FALSE, but @error + * will be unset.) + * + * Return value: %TRUE if a return value was parsed, %FALSE if the + * response could not be parsed, or contained a fault. + **/ +gboolean +soup_xmlrpc_parse_method_response (const char *method_response, int length, + GValue *value, GError **error) +{ + xmlDoc *doc; + xmlNode *node; + gboolean success = FALSE; + + doc = xmlParseMemory (method_response, + length == -1 ? strlen (method_response) : length); + if (!doc) + return FALSE; + + node = xmlDocGetRootElement (doc); + if (!node || strcmp ((const char *)node->name, "methodResponse") != 0) + goto fail; + + node = find_real_node (node->children); + if (!node) + goto fail; + + if (!strcmp ((const char *)node->name, "fault") && error) { + int fault_code; + char *fault_string; + GValue fault_val; + GHashTable *fault_hash; + + node = find_real_node (node->children); + if (!node || strcmp ((const char *)node->name, "value") != 0) + goto fail; + if (!parse_value (node, &fault_val)) + goto fail; + if (!G_VALUE_HOLDS (&fault_val, G_TYPE_HASH_TABLE)) { + g_value_unset (&fault_val); + goto fail; + } + fault_hash = g_value_get_boxed (&fault_val); + if (!soup_value_hash_lookup (fault_hash, "faultCode", + G_TYPE_INT, &fault_code) || + !soup_value_hash_lookup (fault_hash, "faultString", + G_TYPE_STRING, &fault_string)) { + g_value_unset (&fault_val); + goto fail; + } + + g_set_error (error, SOUP_XMLRPC_FAULT, + fault_code, "%s", fault_string); + g_value_unset (&fault_val); + } else if (!strcmp ((const char *)node->name, "params")) { + node = find_real_node (node->children); + if (!node || strcmp ((const char *)node->name, "param") != 0) + goto fail; + node = find_real_node (node->children); + if (!node || strcmp ((const char *)node->name, "value") != 0) + goto fail; + if (!parse_value (node, value)) + goto fail; + success = TRUE; + } + +fail: + xmlFreeDoc (doc); + return success; +} + +/** + * soup_xmlrpc_extract_method_response: + * @method_response: the XML-RPC methodResponse string + * @length: the length of @method_response, or -1 if it is NUL-terminated + * @error: error return value + * @type: the expected type of the return value + * @...: location for return value + * + * Parses @method_response and extracts the return value into + * a variable of the correct type. + * + * If @method_response is a fault, the return value will be unset, + * and @error will be set to an error of type %SOUP_XMLRPC_FAULT, with + * the error #code containing the fault code, and the error #message + * containing the fault string. (If @method_response cannot be parsed + * at all, soup_xmlrpc_extract_method_response() will return %FALSE, + * but @error will be unset.) + * + * Return value: %TRUE if a return value was parsed, %FALSE if the + * response was of the wrong type, or contained a fault. + **/ +gboolean +soup_xmlrpc_extract_method_response (const char *method_response, int length, + GError **error, GType type, ...) +{ + GValue value; + va_list args; + + if (!soup_xmlrpc_parse_method_response (method_response, length, + &value, error)) + return FALSE; + if (!G_VALUE_HOLDS (&value, type)) + return FALSE; + + va_start (args, type); + SOUP_VALUE_GETV (&value, type, args); + va_end (args); + + return TRUE; +} + + +GQuark +soup_xmlrpc_error_quark (void) +{ + static GQuark error; + if (!error) + error = g_quark_from_static_string ("soup_xmlrpc_error_quark"); + return error; +} + +GQuark +soup_xmlrpc_fault_quark (void) +{ + static GQuark error; + if (!error) + error = g_quark_from_static_string ("soup_xmlrpc_fault_quark"); + return error; +} + +static xmlNode * +find_real_node (xmlNode *node) +{ + while (node && (node->type == XML_COMMENT_NODE || + xmlIsBlankNode (node))) + node = node->next; + return node; +} diff --git a/libsoup/soup-xmlrpc.h b/libsoup/soup-xmlrpc.h new file mode 100644 index 0000000..380a31e --- /dev/null +++ b/libsoup/soup-xmlrpc.h @@ -0,0 +1,83 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#ifndef SOUP_XMLRPC_H +#define SOUP_XMLRPC_H 1 + +#include + +G_BEGIN_DECLS + +/* XML-RPC client */ +char *soup_xmlrpc_build_method_call (const char *method_name, + GValue *params, + int n_params); +SoupMessage *soup_xmlrpc_request_new (const char *uri, + const char *method_name, + ...); +gboolean soup_xmlrpc_parse_method_response (const char *method_response, + int length, + GValue *value, + GError **error); +gboolean soup_xmlrpc_extract_method_response (const char *method_response, + int length, + GError **error, + GType type, + ...); + +/* XML-RPC server */ +gboolean soup_xmlrpc_parse_method_call (const char *method_call, + int length, + char **method_name, + GValueArray **params); +gboolean soup_xmlrpc_extract_method_call (const char *method_call, + int length, + char **method_name, + ...); +char *soup_xmlrpc_build_method_response (GValue *value); +char *soup_xmlrpc_build_fault (int fault_code, + const char *fault_format, + ...) G_GNUC_PRINTF (2, 3); +void soup_xmlrpc_set_response (SoupMessage *msg, + GType type, + ...); +void soup_xmlrpc_set_fault (SoupMessage *msg, + int fault_code, + const char *fault_format, + ...) G_GNUC_PRINTF (3, 4); + + +/* Errors */ +#define SOUP_XMLRPC_ERROR soup_xmlrpc_error_quark() +GQuark soup_xmlrpc_error_quark (void); + +typedef enum { + SOUP_XMLRPC_ERROR_ARGUMENTS, + SOUP_XMLRPC_ERROR_RETVAL +} SoupXMLRPCError; + +#define SOUP_XMLRPC_FAULT soup_xmlrpc_fault_quark() +GQuark soup_xmlrpc_fault_quark (void); + +/* From http://xmlrpc-epi.sourceforge.net/specs/rfc.fault_codes.php. + * These are an extension, not part of the XML-RPC spec; you can't + * assume servers will use them. + */ +typedef enum { + SOUP_XMLRPC_FAULT_PARSE_ERROR_NOT_WELL_FORMED = -32700, + SOUP_XMLRPC_FAULT_PARSE_ERROR_UNSUPPORTED_ENCODING = -32701, + SOUP_XMLRPC_FAULT_PARSE_ERROR_INVALID_CHARACTER_FOR_ENCODING = -32702, + SOUP_XMLRPC_FAULT_SERVER_ERROR_INVALID_XML_RPC = -32600, + SOUP_XMLRPC_FAULT_SERVER_ERROR_REQUESTED_METHOD_NOT_FOUND = -32601, + SOUP_XMLRPC_FAULT_SERVER_ERROR_INVALID_METHOD_PARAMETERS = -32602, + SOUP_XMLRPC_FAULT_SERVER_ERROR_INTERNAL_XML_RPC_ERROR = -32603, + SOUP_XMLRPC_FAULT_APPLICATION_ERROR = -32500, + SOUP_XMLRPC_FAULT_SYSTEM_ERROR = -32400, + SOUP_XMLRPC_FAULT_TRANSPORT_ERROR = -32300 +} SoupXMLRPCFault; + +G_END_DECLS + +#endif /* SOUP_XMLRPC_H */ diff --git a/libsoup/soup.h b/libsoup/soup.h new file mode 100644 index 0000000..97170a8 --- /dev/null +++ b/libsoup/soup.h @@ -0,0 +1,50 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2000-2003, Ximian, Inc. + */ + +#ifndef SOUP_H +#define SOUP_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif /* SOUP_H */ diff --git a/m4/introspection.m4 b/m4/introspection.m4 new file mode 100644 index 0000000..589721c --- /dev/null +++ b/m4/introspection.m4 @@ -0,0 +1,94 @@ +dnl -*- mode: autoconf -*- +dnl Copyright 2009 Johan Dahlin +dnl +dnl This file is free software; the author(s) gives unlimited +dnl permission to copy and/or distribute it, with or without +dnl modifications, as long as this notice is preserved. +dnl + +# serial 1 + +m4_define([_GOBJECT_INTROSPECTION_CHECK_INTERNAL], +[ + AC_BEFORE([AC_PROG_LIBTOOL],[$0])dnl setup libtool first + AC_BEFORE([AM_PROG_LIBTOOL],[$0])dnl setup libtool first + AC_BEFORE([LT_INIT],[$0])dnl setup libtool first + + dnl enable/disable introspection + m4_if([$2], [require], + [dnl + enable_introspection=yes + ],[dnl + AC_ARG_ENABLE(introspection, + AS_HELP_STRING([--enable-introspection[=@<:@no/auto/yes@:>@]], + [Enable introspection for this build]),, + [enable_introspection=auto]) + ])dnl + + AC_MSG_CHECKING([for gobject-introspection]) + + dnl presence/version checking + AS_CASE([$enable_introspection], + [no], [dnl + found_introspection="no (disabled, use --enable-introspection to enable)" + ],dnl + [yes],[dnl + PKG_CHECK_EXISTS([gobject-introspection-1.0],, + AC_MSG_ERROR([gobject-introspection-1.0 is not installed])) + PKG_CHECK_EXISTS([gobject-introspection-1.0 >= $1], + found_introspection=yes, + AC_MSG_ERROR([You need to have gobject-introspection >= $1 installed to build AC_PACKAGE_NAME])) + ],dnl + [auto],[dnl + PKG_CHECK_EXISTS([gobject-introspection-1.0 >= $1], found_introspection=yes, found_introspection=no) + ],dnl + [dnl + AC_MSG_ERROR([invalid argument passed to --enable-introspection, should be one of @<:@no/auto/yes@:>@]) + ])dnl + + AC_MSG_RESULT([$found_introspection]) + + INTROSPECTION_SCANNER= + INTROSPECTION_COMPILER= + INTROSPECTION_GENERATE= + INTROSPECTION_GIRDIR= + INTROSPECTION_TYPELIBDIR= + if test "x$found_introspection" = "xyes"; then + INTROSPECTION_SCANNER=`$PKG_CONFIG --variable=g_ir_scanner gobject-introspection-1.0` + INTROSPECTION_COMPILER=`$PKG_CONFIG --variable=g_ir_compiler gobject-introspection-1.0` + INTROSPECTION_GENERATE=`$PKG_CONFIG --variable=g_ir_generate gobject-introspection-1.0` + INTROSPECTION_GIRDIR=`$PKG_CONFIG --variable=girdir gobject-introspection-1.0` + INTROSPECTION_TYPELIBDIR="$($PKG_CONFIG --variable=typelibdir gobject-introspection-1.0)" + INTROSPECTION_CFLAGS=`$PKG_CONFIG --cflags gobject-introspection-1.0` + INTROSPECTION_LIBS=`$PKG_CONFIG --libs gobject-introspection-1.0` + INTROSPECTION_MAKEFILE=`$PKG_CONFIG --variable=datadir gobject-introspection-1.0`/gobject-introspection-1.0/Makefile.introspection + fi + AC_SUBST(INTROSPECTION_SCANNER) + AC_SUBST(INTROSPECTION_COMPILER) + AC_SUBST(INTROSPECTION_GENERATE) + AC_SUBST(INTROSPECTION_GIRDIR) + AC_SUBST(INTROSPECTION_TYPELIBDIR) + AC_SUBST(INTROSPECTION_CFLAGS) + AC_SUBST(INTROSPECTION_LIBS) + AC_SUBST(INTROSPECTION_MAKEFILE) + + AM_CONDITIONAL(HAVE_INTROSPECTION, test "x$found_introspection" = "xyes") +]) + + +dnl Usage: +dnl GOBJECT_INTROSPECTION_CHECK([minimum-g-i-version]) + +AC_DEFUN([GOBJECT_INTROSPECTION_CHECK], +[ + _GOBJECT_INTROSPECTION_CHECK_INTERNAL([$1]) +]) + +dnl Usage: +dnl GOBJECT_INTROSPECTION_REQUIRE([minimum-g-i-version]) + + +AC_DEFUN([GOBJECT_INTROSPECTION_REQUIRE], +[ + _GOBJECT_INTROSPECTION_CHECK_INTERNAL([$1], [require]) +]) diff --git a/m4/libgcrypt.m4 b/m4/libgcrypt.m4 new file mode 100644 index 0000000..854eaaa --- /dev/null +++ b/m4/libgcrypt.m4 @@ -0,0 +1,108 @@ +dnl Autoconf macros for libgcrypt +dnl Copyright (C) 2002, 2004 Free Software Foundation, Inc. +dnl +dnl This file is free software; as a special exception the author gives +dnl unlimited permission to copy and/or distribute it, with or without +dnl modifications, as long as this notice is preserved. +dnl +dnl This file is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY, to the extent permitted by law; without even the +dnl implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + +dnl AM_PATH_LIBGCRYPT([MINIMUM-VERSION, +dnl [ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND ]]]) +dnl Test for libgcrypt and define LIBGCRYPT_CFLAGS and LIBGCRYPT_LIBS. +dnl MINIMUN-VERSION is a string with the version number optionalliy prefixed +dnl with the API version to also check the API compatibility. Example: +dnl a MINIMUN-VERSION of 1:1.2.5 won't pass the test unless the installed +dnl version of libgcrypt is at least 1.2.5 *and* the API number is 1. Using +dnl this features allows to prevent build against newer versions of libgcrypt +dnl with a changed API. +dnl +AC_DEFUN([AM_PATH_LIBGCRYPT], +[ AC_ARG_WITH(libgcrypt-prefix, + AC_HELP_STRING([--with-libgcrypt-prefix=PFX], + [prefix where LIBGCRYPT is installed (optional)]), + libgcrypt_config_prefix="$withval", libgcrypt_config_prefix="") + if test x$libgcrypt_config_prefix != x ; then + if test x${LIBGCRYPT_CONFIG+set} != xset ; then + LIBGCRYPT_CONFIG=$libgcrypt_config_prefix/bin/libgcrypt-config + fi + fi + + AC_PATH_PROG(LIBGCRYPT_CONFIG, libgcrypt-config, no) + tmp=ifelse([$1], ,1:1.2.0,$1) + if echo "$tmp" | grep ':' >/dev/null 2>/dev/null ; then + req_libgcrypt_api=`echo "$tmp" | sed 's/\(.*\):\(.*\)/\1/'` + min_libgcrypt_version=`echo "$tmp" | sed 's/\(.*\):\(.*\)/\2/'` + else + req_libgcrypt_api=0 + min_libgcrypt_version="$tmp" + fi + + AC_MSG_CHECKING(for LIBGCRYPT - version >= $min_libgcrypt_version) + ok=no + if test "$LIBGCRYPT_CONFIG" != "no" ; then + req_major=`echo $min_libgcrypt_version | \ + sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\)/\1/'` + req_minor=`echo $min_libgcrypt_version | \ + sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\)/\2/'` + req_micro=`echo $min_libgcrypt_version | \ + sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\)/\3/'` + libgcrypt_config_version=`$LIBGCRYPT_CONFIG --version` + major=`echo $libgcrypt_config_version | \ + sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\).*/\1/'` + minor=`echo $libgcrypt_config_version | \ + sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\).*/\2/'` + micro=`echo $libgcrypt_config_version | \ + sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\).*/\3/'` + if test "$major" -gt "$req_major"; then + ok=yes + else + if test "$major" -eq "$req_major"; then + if test "$minor" -gt "$req_minor"; then + ok=yes + else + if test "$minor" -eq "$req_minor"; then + if test "$micro" -ge "$req_micro"; then + ok=yes + fi + fi + fi + fi + fi + fi + if test $ok = yes; then + AC_MSG_RESULT([yes ($libgcrypt_config_version)]) + else + AC_MSG_RESULT(no) + fi + if test $ok = yes; then + # If we have a recent libgcrypt, we should also check that the + # API is compatible + if test "$req_libgcrypt_api" -gt 0 ; then + tmp=`$LIBGCRYPT_CONFIG --api-version 2>/dev/null || echo 0` + if test "$tmp" -gt 0 ; then + AC_MSG_CHECKING([LIBGCRYPT API version]) + if test "$req_libgcrypt_api" -eq "$tmp" ; then + AC_MSG_RESULT([okay]) + else + ok=no + AC_MSG_RESULT([does not match. want=$req_libgcrypt_api got=$tmp]) + fi + fi + fi + fi + if test $ok = yes; then + LIBGCRYPT_CFLAGS=`$LIBGCRYPT_CONFIG --cflags` + LIBGCRYPT_LIBS=`$LIBGCRYPT_CONFIG --libs` + ifelse([$2], , :, [$2]) + else + LIBGCRYPT_CFLAGS="" + LIBGCRYPT_LIBS="" + ifelse([$3], , :, [$3]) + fi + AC_SUBST(LIBGCRYPT_CFLAGS) + AC_SUBST(LIBGCRYPT_LIBS) +]) diff --git a/tests/Makefile.am b/tests/Makefile.am new file mode 100644 index 0000000..06f6c58 --- /dev/null +++ b/tests/Makefile.am @@ -0,0 +1,134 @@ +INCLUDES = \ + -I$(top_srcdir) \ + -DSRCDIR=\""$(srcdir)"\" \ + -DLIBSOUP_DISABLE_DEPRECATED \ + $(SOUP_MAINTAINER_FLAGS) \ + $(XML_CFLAGS) \ + $(GLIB_CFLAGS) + +LIBS = \ + $(top_builddir)/libsoup/libsoup-2.4.la \ + $(LIBGNUTLS_LIBS) \ + $(GLIB_LIBS) + +noinst_PROGRAMS = \ + chunk-test \ + coding-test \ + context-test \ + continue-test \ + cookies-test \ + date \ + dns \ + forms-test \ + get \ + getbug \ + header-parsing \ + misc-test \ + ntlm-test \ + redirect-test \ + requester-test \ + simple-httpd \ + simple-proxy \ + sniffing-test \ + streaming-test \ + timeout-test \ + uri-parsing \ + $(CURL_TESTS) \ + $(APACHE_TESTS) \ + $(XMLRPC_TESTS) + +TEST_SRCS = test-utils.c test-utils.h + +auth_test_SOURCES = auth-test.c $(TEST_SRCS) +chunk_test_SOURCES = chunk-test.c $(TEST_SRCS) +coding_test_SOURCES = coding-test.c $(TEST_SRCS) +context_test_SOURCES = context-test.c $(TEST_SRCS) +continue_test_SOURCES = continue-test.c $(TEST_SRCS) +cookies_test_SOURCES = cookies-test.c $(TEST_SRCS) +date_SOURCES = date.c $(TEST_SRCS) +dns_SOURCES = dns.c +forms_test_SOURCES = forms-test.c $(TEST_SRCS) +get_SOURCES = get.c +if BUILD_LIBSOUP_GNOME +get_LDADD = $(top_builddir)/libsoup/libsoup-gnome-2.4.la +endif +getbug_SOURCES = getbug.c +header_parsing_SOURCES = header-parsing.c $(TEST_SRCS) +misc_test_SOURCES = misc-test.c $(TEST_SRCS) +ntlm_test_SOURCES = ntlm-test.c $(TEST_SRCS) +proxy_test_SOURCES = proxy-test.c $(TEST_SRCS) +pull_api_SOURCES = pull-api.c $(TEST_SRCS) +range_test_SOURCES = range-test.c $(TEST_SRCS) +redirect_test_SOURCES = redirect-test.c $(TEST_SRCS) +requester_test_SOURCES = requester-test.c $(TEST_SRCS) +server_auth_test_SOURCES = server-auth-test.c $(TEST_SRCS) +simple_httpd_SOURCES = simple-httpd.c +simple_proxy_SOURCES = simple-proxy.c +sniffing_test_SOURCES = sniffing-test.c $(TEST_SRCS) +streaming_test_SOURCES = streaming-test.c $(TEST_SRCS) +timeout_test_SOURCES = timeout-test.c $(TEST_SRCS) +uri_parsing_SOURCES = uri-parsing.c $(TEST_SRCS) +xmlrpc_test_SOURCES = xmlrpc-test.c $(TEST_SRCS) +xmlrpc_server_test_SOURCES = xmlrpc-server-test.c $(TEST_SRCS) + +if HAVE_APACHE +APACHE_TESTS = auth-test proxy-test pull-api range-test +endif +if HAVE_CURL +CURL_TESTS = forms-test server-auth-test +endif +if HAVE_XMLRPC_EPI_PHP +XMLRPC_TESTS = xmlrpc-test xmlrpc-server-test +endif + +TESTS = \ + chunk-test \ + coding-test \ + context-test \ + continue-test \ + cookies-test \ + date \ + header-parsing \ + misc-test \ + ntlm-test \ + redirect-test \ + requester-test \ + sniffing-test \ + streaming-test \ + timeout-test \ + uri-parsing \ + $(APACHE_TESTS) \ + $(CURL_TESTS) \ + $(XMLRPC_TESTS) + +SNIFFING_FILES = \ + resources/atom.xml \ + resources/home.gif \ + resources/html_binary.html \ + resources/mbox \ + resources/mbox.gz \ + resources/ps_binary.ps \ + resources/rss20.xml \ + resources/test.html \ + resources/text_binary.txt + +EXTRA_DIST = \ + htdigest \ + htpasswd \ + httpd.conf.in \ + index.txt \ + libsoup.supp \ + test-cert.pem \ + test-key.pem \ + xmlrpc-server.php \ + $(SNIFFING_FILES) + +if MISSING_REGRESSION_TEST_PACKAGES +check-local: check-TESTS + @echo "" + @echo "NOTE: some tests were not run due to missing packages:" $(MISSING_REGRESSION_TEST_PACKAGES) + @echo "" +endif + +kill-httpd: + $(APACHE_HTTPD) -d `pwd` -f httpd.conf -k stop diff --git a/tests/auth-test.c b/tests/auth-test.c new file mode 100644 index 0000000..5b45a73 --- /dev/null +++ b/tests/auth-test.c @@ -0,0 +1,1106 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "libsoup/soup.h" +#include "libsoup/soup-auth.h" +#include "libsoup/soup-session.h" + +#include "test-utils.h" + +static GMainLoop *loop; + +typedef struct { + /* Explanation of what you should see */ + const char *explanation; + + /* URL to test against */ + const char *url; + + /* Provided passwords, 1 character each. ('1', '2', and '3' + * mean the correct passwords for "realm1", "realm2", and + * "realm3" respectively. '4' means "use the wrong password".) + * The first password (if present) will be used by + * authenticate(), and the second (if present) will be used by + * reauthenticate(). + */ + const char *provided; + + /* Whether to pass user and password in the URL or not. + */ + gboolean url_auth; + + /* Expected passwords, 1 character each. (As with the provided + * passwords, with the addition that '0' means "no + * Authorization header expected".) Used to verify that soup + * used the password it was supposed to at each step. + */ + const char *expected; + + /* What the final status code should be. */ + guint final_status; +} SoupAuthTest; + +/* Will either point to main_tests or relogin_tests + */ +static SoupAuthTest *current_tests; + +static SoupAuthTest main_tests[] = { + { "No auth available, should fail", + "Basic/realm1/", "", FALSE, "0", SOUP_STATUS_UNAUTHORIZED }, + + { "Should fail with no auth, fail again with bad password, and give up", + "Basic/realm2/", "4", FALSE, "04", SOUP_STATUS_UNAUTHORIZED }, + + { "Auth provided this time, so should succeed", + "Basic/realm1/", "1", FALSE, "01", SOUP_STATUS_OK }, + + { "Now should automatically reuse previous auth", + "Basic/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Subdir should also automatically reuse auth", + "Basic/realm1/subdir/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Subdir should retry last auth, but will fail this time", + "Basic/realm1/realm2/", "", FALSE, "1", SOUP_STATUS_UNAUTHORIZED }, + + { "Now should use provided auth", + "Basic/realm1/realm2/", "2", FALSE, "02", SOUP_STATUS_OK }, + + { "Reusing last auth. Should succeed on first try", + "Basic/realm1/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Reuse will fail, but 2nd try will succeed because it's a known realm", + "Basic/realm1/realm2/realm1/", "", FALSE, "21", SOUP_STATUS_OK }, + + { "Should succeed on first try. (Known realm with cached password)", + "Basic/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Fail once, then use typoed password, then use right password", + "Basic/realm3/", "43", FALSE, "043", SOUP_STATUS_OK }, + + + { "No auth available, should fail", + "Digest/realm1/", "", FALSE, "0", SOUP_STATUS_UNAUTHORIZED }, + + { "Should fail with no auth, fail again with bad password, and give up", + "Digest/realm2/", "4", FALSE, "04", SOUP_STATUS_UNAUTHORIZED }, + + { "Known realm, auth provided, so should succeed", + "Digest/realm1/", "1", FALSE, "01", SOUP_STATUS_OK }, + + { "Now should automatically reuse previous auth", + "Digest/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Subdir should also automatically reuse auth", + "Digest/realm1/subdir/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Password provided, should succeed", + "Digest/realm2/", "2", FALSE, "02", SOUP_STATUS_OK }, + + { "Should already know correct domain and use provided auth on first try", + "Digest/realm1/realm2/", "2", FALSE, "2", SOUP_STATUS_OK }, + + { "Reusing last auth. Should succeed on first try", + "Digest/realm1/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Should succeed on first try because of earlier domain directive", + "Digest/realm1/realm2/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Fail once, then use typoed password, then use right password", + "Digest/realm3/", "43", FALSE, "043", SOUP_STATUS_OK }, + + + { "Make sure we haven't forgotten anything", + "Basic/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Basic/realm1/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Basic/realm1/realm2/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Basic/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Basic/realm3/", "", FALSE, "3", SOUP_STATUS_OK }, + + + { "Make sure we haven't forgotten anything", + "Digest/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Digest/realm1/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Digest/realm1/realm2/realm1/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Digest/realm2/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Make sure we haven't forgotten anything", + "Digest/realm3/", "", FALSE, "3", SOUP_STATUS_OK }, + + { "Now the server will reject the formerly-good password", + "Basic/realm1/not/", "1", FALSE, /* should not be used */ "1", SOUP_STATUS_UNAUTHORIZED }, + + { "Make sure we've forgotten it", + "Basic/realm1/", "", FALSE, "0", SOUP_STATUS_UNAUTHORIZED }, + + { "Likewise, reject the formerly-good Digest password", + "Digest/realm1/not/", "1", FALSE, /* should not be used */ "1", SOUP_STATUS_UNAUTHORIZED }, + + { "Make sure we've forgotten it", + "Digest/realm1/", "", FALSE, "0", SOUP_STATUS_UNAUTHORIZED } +}; + +static const char *auths[] = { + "no password", "password 1", + "password 2", "password 3", + "intentionally wrong password", +}; + +static int +identify_auth (SoupMessage *msg) +{ + const char *header; + int num; + + header = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + if (!header) + return 0; + + if (!g_ascii_strncasecmp (header, "Basic ", 6)) { + char *token; + gsize len; + + token = (char *)g_base64_decode (header + 6, &len); + num = token[len - 1] - '0'; + g_free (token); + } else { + const char *user; + + user = strstr (header, "username=\"user"); + if (user) + num = user[14] - '0'; + else + num = 0; + } + + g_assert (num >= 0 && num <= 4); + + return num; +} + +static void +handler (SoupMessage *msg, gpointer data) +{ + char *expected = data; + int auth, exp; + + auth = identify_auth (msg); + + debug_printf (1, " %d %s (using %s)\n", + msg->status_code, msg->reason_phrase, + auths[auth]); + + if (*expected) { + exp = *expected - '0'; + if (auth != exp) { + debug_printf (1, " expected %s!\n", auths[exp]); + errors++; + } + memmove (expected, expected + 1, strlen (expected)); + } else { + debug_printf (1, " expected to be finished\n"); + errors++; + } +} + +static void +authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + int *i = data; + char *username, *password; + char num; + + if (!current_tests[*i].provided[0]) + return; + if (retrying) { + if (!current_tests[*i].provided[1]) + return; + num = current_tests[*i].provided[1]; + } else + num = current_tests[*i].provided[0]; + + username = g_strdup_printf ("user%c", num); + password = g_strdup_printf ("realm%c", num); + soup_auth_authenticate (auth, username, password); + g_free (username); + g_free (password); +} + +static void +bug271540_sent (SoupMessage *msg, gpointer data) +{ + int n = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "#")); + gboolean *authenticated = data; + int auth = identify_auth (msg); + + if (!*authenticated && auth) { + debug_printf (1, " using auth on message %d before authenticating!!??\n", n); + errors++; + } else if (*authenticated && !auth) { + debug_printf (1, " sent unauthenticated message %d after authenticating!\n", n); + errors++; + } +} + +static void +bug271540_authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + int n = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "#")); + gboolean *authenticated = data; + + if (strcmp (soup_auth_get_scheme_name (auth), "Basic") != 0 || + strcmp (soup_auth_get_realm (auth), "realm1") != 0) + return; + + if (!*authenticated) { + debug_printf (1, " authenticating message %d\n", n); + soup_auth_authenticate (auth, "user1", "realm1"); + *authenticated = TRUE; + } else { + debug_printf (1, " asked to authenticate message %d after authenticating!\n", n); + errors++; + } +} + +static void +bug271540_finished (SoupSession *session, SoupMessage *msg, gpointer data) +{ + int *left = data; + int n = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "#")); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " got status '%d %s' on message %d!\n", + msg->status_code, msg->reason_phrase, n); + errors++; + } + + (*left)--; + if (!*left) + g_main_loop_quit (loop); +} + +static void +digest_nonce_authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + if (retrying) + return; + + if (strcmp (soup_auth_get_scheme_name (auth), "Digest") != 0 || + strcmp (soup_auth_get_realm (auth), "realm1") != 0) + return; + + soup_auth_authenticate (auth, "user1", "realm1"); +} + +static void +digest_nonce_unauthorized (SoupMessage *msg, gpointer data) +{ + gboolean *got_401 = data; + *got_401 = TRUE; +} + +static void +do_digest_nonce_test (SoupSession *session, + const char *nth, const char *uri, + gboolean expect_401, gboolean expect_signal) +{ + SoupMessage *msg; + gboolean got_401; + + msg = soup_message_new (SOUP_METHOD_GET, uri); + if (expect_signal) { + g_signal_connect (session, "authenticate", + G_CALLBACK (digest_nonce_authenticate), + NULL); + } + soup_message_add_status_code_handler (msg, "got_headers", + SOUP_STATUS_UNAUTHORIZED, + G_CALLBACK (digest_nonce_unauthorized), + &got_401); + got_401 = FALSE; + soup_session_send_message (session, msg); + if (got_401 != expect_401) { + debug_printf (1, " %s request %s a 401 Unauthorized!\n", nth, + got_401 ? "got" : "did not get"); + errors++; + } + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " %s request got status %d %s!\n", nth, + msg->status_code, msg->reason_phrase); + errors++; + } + if (errors == 0) + debug_printf (1, " %s request succeeded\n", nth); + g_object_unref (msg); +} + +/* Async auth test. We queue three requests to /Basic/realm1, ensuring + * that they are sent in order. The first and third ones will be + * paused from the authentication callback. The second will be allowed + * to fail. Shortly after the third one requests auth, we'll provide + * the auth and unpause the two remaining messages, allowing them to + * succeed. + */ + +static void +async_authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + SoupAuth **saved_auth = data; + int id = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "id")); + + debug_printf (2, " async_authenticate msg%d\n", id); + + /* The session will try to authenticate msg3 *before* sending + * it, because it already knows it's going to need the auth. + * Ignore that. + */ + if (msg->status_code != SOUP_STATUS_UNAUTHORIZED) { + debug_printf (2, " (ignoring)\n"); + return; + } + + soup_session_pause_message (session, msg); + if (saved_auth) + *saved_auth = g_object_ref (auth); + g_main_loop_quit (loop); +} + +static void +async_finished (SoupSession *session, SoupMessage *msg, gpointer user_data) +{ + int *remaining = user_data; + int id = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "id")); + + debug_printf (2, " async_finished msg%d\n", id); + + (*remaining)--; + if (!*remaining) + g_main_loop_quit (loop); +} + +static void +async_authenticate_assert_once (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + gboolean *been_here = data; + + debug_printf (2, " async_authenticate_assert_once\n"); + + if (*been_here) { + debug_printf (1, " ERROR: async_authenticate_assert_once called twice\n"); + errors++; + } + *been_here = TRUE; +} + +static void +async_authenticate_assert_once_and_stop (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + gboolean *been_here = data; + + debug_printf (2, " async_authenticate_assert_once_and_stop\n"); + + if (*been_here) { + debug_printf (1, " ERROR: async_authenticate_assert_once called twice\n"); + errors++; + } + *been_here = TRUE; + + soup_session_pause_message (session, msg); + g_main_loop_quit (loop); +} + +static void +do_async_auth_test (const char *base_uri) +{ + SoupSession *session; + SoupMessage *msg1, *msg2, *msg3, msg2_bak; + guint auth_id; + char *uri; + SoupAuth *auth = NULL; + int remaining; + gboolean been_there; + + debug_printf (1, "\nTesting async auth:\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + remaining = 0; + + uri = g_strconcat (base_uri, "Basic/realm1/", NULL); + + msg1 = soup_message_new ("GET", uri); + g_object_set_data (G_OBJECT (msg1), "id", GINT_TO_POINTER (1)); + auth_id = g_signal_connect (session, "authenticate", + G_CALLBACK (async_authenticate), &auth); + g_object_ref (msg1); + remaining++; + soup_session_queue_message (session, msg1, async_finished, &remaining); + g_main_loop_run (loop); + g_signal_handler_disconnect (session, auth_id); + + /* async_authenticate will pause msg1 and quit loop */ + + msg2 = soup_message_new ("GET", uri); + g_object_set_data (G_OBJECT (msg2), "id", GINT_TO_POINTER (2)); + soup_session_send_message (session, msg2); + + if (msg2->status_code == SOUP_STATUS_UNAUTHORIZED) + debug_printf (1, " msg2 failed as expected\n"); + else { + debug_printf (1, " msg2 got wrong status! (%u)\n", + msg2->status_code); + errors++; + } + + /* msg2 should be done at this point; assuming everything is + * working correctly, the session won't look at it again; we + * ensure that if it does, it will crash the test program. + */ + memcpy (&msg2_bak, msg2, sizeof (SoupMessage)); + memset (msg2, 0, sizeof (SoupMessage)); + + msg3 = soup_message_new ("GET", uri); + g_object_set_data (G_OBJECT (msg3), "id", GINT_TO_POINTER (3)); + auth_id = g_signal_connect (session, "authenticate", + G_CALLBACK (async_authenticate), NULL); + g_object_ref (msg3); + remaining++; + soup_session_queue_message (session, msg3, async_finished, &remaining); + g_main_loop_run (loop); + g_signal_handler_disconnect (session, auth_id); + + /* async_authenticate will pause msg3 and quit loop */ + + /* Now do the auth, and restart */ + if (auth) { + soup_auth_authenticate (auth, "user1", "realm1"); + g_object_unref (auth); + soup_session_unpause_message (session, msg1); + soup_session_unpause_message (session, msg3); + + g_main_loop_run (loop); + + /* async_finished will quit the loop */ + } else { + debug_printf (1, " msg1 didn't get authenticate signal!\n"); + errors++; + } + + if (msg1->status_code == SOUP_STATUS_OK) + debug_printf (1, " msg1 succeeded\n"); + else { + debug_printf (1, " msg1 FAILED! (%u %s)\n", + msg1->status_code, msg1->reason_phrase); + errors++; + } + if (msg3->status_code == SOUP_STATUS_OK) + debug_printf (1, " msg3 succeeded\n"); + else { + debug_printf (1, " msg3 FAILED! (%u %s)\n", + msg3->status_code, msg3->reason_phrase); + errors++; + } + + soup_test_session_abort_unref (session); + + g_object_unref (msg1); + g_object_unref (msg3); + memcpy (msg2, &msg2_bak, sizeof (SoupMessage)); + g_object_unref (msg2); + + /* Test that giving the wrong password doesn't cause multiple + * authenticate signals the second time. + */ + debug_printf (1, "\nTesting async auth with wrong password (#522601):\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + remaining = 0; + auth = NULL; + + msg1 = soup_message_new ("GET", uri); + g_object_set_data (G_OBJECT (msg1), "id", GINT_TO_POINTER (1)); + auth_id = g_signal_connect (session, "authenticate", + G_CALLBACK (async_authenticate), &auth); + g_object_ref (msg1); + remaining++; + soup_session_queue_message (session, msg1, async_finished, &remaining); + g_main_loop_run (loop); + g_signal_handler_disconnect (session, auth_id); + soup_auth_authenticate (auth, "user1", "wrong"); + g_object_unref (auth); + soup_session_unpause_message (session, msg1); + + been_there = FALSE; + auth_id = g_signal_connect (session, "authenticate", + G_CALLBACK (async_authenticate_assert_once), + &been_there); + g_main_loop_run (loop); + g_signal_handler_disconnect (session, auth_id); + + if (!been_there) { + debug_printf (1, " authenticate not emitted?\n"); + errors++; + } + + soup_test_session_abort_unref (session); + g_object_unref (msg1); + + /* Test that giving no password doesn't cause multiple + * authenticate signals the second time. + */ + debug_printf (1, "\nTesting async auth with no password (#583462):\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + remaining = 0; + + /* Send a message that doesn't actually authenticate + */ + msg1 = soup_message_new ("GET", uri); + g_object_set_data (G_OBJECT (msg1), "id", GINT_TO_POINTER (1)); + auth_id = g_signal_connect (session, "authenticate", + G_CALLBACK (async_authenticate), NULL); + g_object_ref (msg1); + remaining++; + soup_session_queue_message (session, msg1, async_finished, &remaining); + g_main_loop_run (loop); + g_signal_handler_disconnect (session, auth_id); + soup_session_unpause_message (session, msg1); + g_main_loop_run (loop); + g_object_unref(msg1); + + /* Now send a second message */ + msg1 = soup_message_new ("GET", uri); + g_object_set_data (G_OBJECT (msg1), "id", GINT_TO_POINTER (2)); + g_object_ref (msg1); + been_there = FALSE; + auth_id = g_signal_connect (session, "authenticate", + G_CALLBACK (async_authenticate_assert_once_and_stop), + &been_there); + remaining++; + soup_session_queue_message (session, msg1, async_finished, &remaining); + g_main_loop_run (loop); + soup_session_unpause_message (session, msg1); + + g_main_loop_run (loop); + g_signal_handler_disconnect (session, auth_id); + + soup_test_session_abort_unref (session); + g_object_unref (msg1); + + g_free (uri); +} + +typedef struct { + const char *password; + struct { + const char *headers; + const char *response; + } round[2]; +} SelectAuthData; + +static void +select_auth_authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + SelectAuthData *sad = data; + const char *header, *basic, *digest; + int round = retrying ? 1 : 0; + + header = soup_message_headers_get_list (msg->response_headers, + "WWW-Authenticate"); + basic = strstr (header, "Basic"); + digest = strstr (header, "Digest"); + if (basic && digest) { + if (basic < digest) + sad->round[round].headers = "Basic, Digest"; + else + sad->round[round].headers = "Digest, Basic"; + } else if (basic) + sad->round[round].headers = "Basic"; + else if (digest) + sad->round[round].headers = "Digest"; + + sad->round[round].response = soup_auth_get_scheme_name (auth); + if (sad->password && !retrying) + soup_auth_authenticate (auth, "user", sad->password); +} + +static void +select_auth_test_one (SoupURI *uri, + gboolean disable_digest, const char *password, + const char *first_headers, const char *first_response, + const char *second_headers, const char *second_response, + guint final_status) +{ + SelectAuthData sad; + SoupMessage *msg; + SoupSession *session; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + if (disable_digest) + soup_session_remove_feature_by_type (session, SOUP_TYPE_AUTH_DIGEST); + + g_signal_connect (session, "authenticate", + G_CALLBACK (select_auth_authenticate), &sad); + memset (&sad, 0, sizeof (sad)); + sad.password = password; + + msg = soup_message_new_from_uri ("GET", uri); + soup_session_send_message (session, msg); + + if (strcmp (sad.round[0].headers, first_headers) != 0) { + debug_printf (1, " Header order wrong: expected %s, got %s\n", + first_headers, sad.round[0].headers); + errors++; + } + if (strcmp (sad.round[0].response, first_response) != 0) { + debug_printf (1, " Selected auth type wrong: expected %s, got %s\n", + first_response, sad.round[0].response); + errors++; + } + + if (second_headers && !sad.round[1].headers) { + debug_printf (1, " Expected a second round!\n"); + errors++; + } else if (!second_headers && sad.round[1].headers) { + debug_printf (1, " Didn't expect a second round!\n"); + errors++; + } else if (second_headers) { + if (strcmp (sad.round[1].headers, second_headers) != 0) { + debug_printf (1, " Second round header order wrong: expected %s, got %s\n", + second_headers, sad.round[1].headers); + errors++; + } + if (strcmp (sad.round[1].response, second_response) != 0) { + debug_printf (1, " Second round selected auth type wrong: expected %s, got %s\n", + second_response, sad.round[1].response); + errors++; + } + } + + if (msg->status_code != final_status) { + debug_printf (1, " Final status wrong: expected %u, got %u\n", + final_status, msg->status_code); + errors++; + } + + g_object_unref (msg); + soup_test_session_abort_unref (session); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, + "OK\r\n", 4); + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +static gboolean +server_basic_auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, const char *password, gpointer data) +{ + if (strcmp (username, "user") != 0) + return FALSE; + return strcmp (password, "good-basic") == 0; +} + +static char * +server_digest_auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, gpointer data) +{ + if (strcmp (username, "user") != 0) + return NULL; + return soup_auth_domain_digest_encode_password ("user", + "auth-test", + "good"); +} + +static void +do_select_auth_test (void) +{ + SoupServer *server; + SoupAuthDomain *basic_auth_domain, *digest_auth_domain; + SoupURI *uri; + + debug_printf (1, "\nTesting selection among multiple auths:\n"); + + /* It doesn't seem to be possible to configure Apache to serve + * multiple auth types for a single URL. So we have to use + * SoupServer here. We know that SoupServer handles the server + * side of this scenario correctly, because we test it against + * curl in server-auth-test. + */ + server = soup_test_server_new (FALSE); + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + + uri = soup_uri_new ("http://127.0.0.1/"); + soup_uri_set_port (uri, soup_server_get_port (server)); + + basic_auth_domain = soup_auth_domain_basic_new ( + SOUP_AUTH_DOMAIN_REALM, "auth-test", + SOUP_AUTH_DOMAIN_ADD_PATH, "/", + SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, server_basic_auth_callback, + NULL); + soup_server_add_auth_domain (server, basic_auth_domain); + + digest_auth_domain = soup_auth_domain_digest_new ( + SOUP_AUTH_DOMAIN_REALM, "auth-test", + SOUP_AUTH_DOMAIN_ADD_PATH, "/", + SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK, server_digest_auth_callback, + NULL); + soup_server_add_auth_domain (server, digest_auth_domain); + + debug_printf (1, " Testing with no auth\n"); + select_auth_test_one (uri, FALSE, NULL, + "Basic, Digest", "Digest", + NULL, NULL, + SOUP_STATUS_UNAUTHORIZED); + + debug_printf (1, " Testing with bad password\n"); + select_auth_test_one (uri, FALSE, "bad", + "Basic, Digest", "Digest", + "Basic, Digest", "Digest", + SOUP_STATUS_UNAUTHORIZED); + + debug_printf (1, " Testing with good password\n"); + select_auth_test_one (uri, FALSE, "good", + "Basic, Digest", "Digest", + NULL, NULL, + SOUP_STATUS_OK); + + /* Test with Digest disabled in the client. */ + debug_printf (1, " Testing without Digest with no auth\n"); + select_auth_test_one (uri, TRUE, NULL, + "Basic, Digest", "Basic", + NULL, NULL, + SOUP_STATUS_UNAUTHORIZED); + + debug_printf (1, " Testing without Digest with bad password\n"); + select_auth_test_one (uri, TRUE, "bad", + "Basic, Digest", "Basic", + "Basic, Digest", "Basic", + SOUP_STATUS_UNAUTHORIZED); + + debug_printf (1, " Testing without Digest with good password\n"); + select_auth_test_one (uri, TRUE, "good-basic", + "Basic, Digest", "Basic", + NULL, NULL, + SOUP_STATUS_OK); + + /* Now flip the order of the domains, verify that this flips + * the order of the headers, and make sure that digest auth + * *still* gets used. + */ + + soup_server_remove_auth_domain (server, basic_auth_domain); + soup_server_remove_auth_domain (server, digest_auth_domain); + soup_server_add_auth_domain (server, digest_auth_domain); + soup_server_add_auth_domain (server, basic_auth_domain); + + debug_printf (1, " Testing flipped with no auth\n"); + select_auth_test_one (uri, FALSE, NULL, + "Digest, Basic", "Digest", + NULL, NULL, + SOUP_STATUS_UNAUTHORIZED); + + debug_printf (1, " Testing flipped with bad password\n"); + select_auth_test_one (uri, FALSE, "bad", + "Digest, Basic", "Digest", + "Digest, Basic", "Digest", + SOUP_STATUS_UNAUTHORIZED); + + debug_printf (1, " Testing flipped with good password\n"); + select_auth_test_one (uri, FALSE, "good", + "Digest, Basic", "Digest", + NULL, NULL, + SOUP_STATUS_OK); + + g_object_unref (basic_auth_domain); + g_object_unref (digest_auth_domain); + soup_uri_free (uri); + soup_test_server_quit_unref (server); +} + +static SoupAuthTest relogin_tests[] = { + { "Auth provided via URL, should succeed", + "Basic/realm12/", "1", TRUE, "01", SOUP_STATUS_OK }, + + { "Now should automatically reuse previous auth", + "Basic/realm12/", "", FALSE, "1", SOUP_STATUS_OK }, + + { "Different auth provided via URL for the same realm, should succeed", + "Basic/realm12/", "2", TRUE, "2", SOUP_STATUS_OK }, + + { "Subdir should also automatically reuse auth", + "Basic/realm12/subdir/", "", FALSE, "2", SOUP_STATUS_OK }, + + { "Should fail with no auth", + "Basic/realm12/", "4", TRUE, "4", SOUP_STATUS_UNAUTHORIZED }, + + { "Make sure we've forgotten it", + "Basic/realm12/", "", FALSE, "0", SOUP_STATUS_UNAUTHORIZED }, + + { "Should fail with no auth, fail again with bad password, and give up", + "Basic/realm12/", "3", FALSE, "03", SOUP_STATUS_UNAUTHORIZED }, +}; + +static void +do_batch_tests (const gchar *base_uri_str, gint ntests) +{ + SoupSession *session; + SoupMessage *msg; + char *expected, *uristr; + SoupURI *base_uri; + int i; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + g_signal_connect (session, "authenticate", + G_CALLBACK (authenticate), &i); + + base_uri = soup_uri_new (base_uri_str); + + for (i = 0; i < ntests; i++) { + SoupURI *soup_uri = soup_uri_new_with_base (base_uri, current_tests[i].url); + + debug_printf (1, "Test %d: %s\n", i + 1, current_tests[i].explanation); + + if (current_tests[i].url_auth) { + gchar *username = g_strdup_printf ("user%c", current_tests[i].provided[0]); + gchar *password = g_strdup_printf ("realm%c", current_tests[i].provided[0]); + soup_uri_set_user (soup_uri, username); + soup_uri_set_password (soup_uri, password); + g_free (username); + g_free (password); + } + + msg = soup_message_new_from_uri (SOUP_METHOD_GET, soup_uri); + soup_uri_free (soup_uri); + if (!msg) { + fprintf (stderr, "auth-test: Could not parse URI\n"); + exit (1); + } + + uristr = soup_uri_to_string (soup_message_get_uri (msg), FALSE); + debug_printf (1, " GET %s\n", uristr); + g_free (uristr); + + expected = g_strdup (current_tests[i].expected); + soup_message_add_status_code_handler ( + msg, "got_headers", SOUP_STATUS_UNAUTHORIZED, + G_CALLBACK (handler), expected); + soup_message_add_status_code_handler ( + msg, "got_headers", SOUP_STATUS_OK, + G_CALLBACK (handler), expected); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_UNAUTHORIZED && + msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " %d %s !\n", msg->status_code, + msg->reason_phrase); + errors++; + } + if (*expected) { + debug_printf (1, " expected %d more round(s)\n", + (int)strlen (expected)); + errors++; + } + g_free (expected); + + if (msg->status_code != current_tests[i].final_status) { + debug_printf (1, " expected %d\n", + current_tests[i].final_status); + } + + debug_printf (1, "\n"); + + g_object_unref (msg); + } + soup_uri_free (base_uri); + + soup_test_session_abort_unref (session); +} + +int +main (int argc, char **argv) +{ + SoupSession *session; + SoupMessage *msg; + const char *base_uri; + char *uri; + gboolean authenticated; + int i, ntests; + + test_init (argc, argv, NULL); + apache_init (); + + base_uri = "http://127.0.0.1:47524/"; + + /* Main tests */ + current_tests = main_tests; + ntests = G_N_ELEMENTS (main_tests); + do_batch_tests (base_uri, ntests); + + /* Re-login tests */ + current_tests = relogin_tests; + ntests = G_N_ELEMENTS (relogin_tests); + do_batch_tests (base_uri, ntests); + + /* And now for some regression tests */ + loop = g_main_loop_new (NULL, TRUE); + + debug_printf (1, "Testing pipelined auth (bug 271540):\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + + authenticated = FALSE; + g_signal_connect (session, "authenticate", + G_CALLBACK (bug271540_authenticate), &authenticated); + + uri = g_strconcat (base_uri, "Basic/realm1/", NULL); + for (i = 0; i < 10; i++) { + msg = soup_message_new (SOUP_METHOD_GET, uri); + g_object_set_data (G_OBJECT (msg), "#", GINT_TO_POINTER (i + 1)); + g_signal_connect (msg, "wrote_headers", + G_CALLBACK (bug271540_sent), &authenticated); + + soup_session_queue_message (session, msg, + bug271540_finished, &i); + } + g_free (uri); + + g_main_loop_run (loop); + soup_test_session_abort_unref (session); + + debug_printf (1, "\nTesting digest nonce expiration:\n"); + + /* We test two different things here: + * + * 1. If we get a 401 response with + * "WWW-Authenticate: Digest stale=true...", we should + * retry and succeed *without* the session asking for a + * password again. + * + * 2. If we get a successful response with + * "Authentication-Info: nextnonce=...", we should update + * the nonce automatically so as to avoid getting a + * stale nonce error on the next request. + * + * In our Apache config, /Digest/realm1 and + * /Digest/realm1/expire are set up to use the same auth info, + * but only the latter has an AuthDigestNonceLifetime (of 2 + * seconds). The way nonces work in Apache, a nonce received + * from /Digest/realm1 will still expire in + * /Digest/realm1/expire, but it won't issue a nextnonce for a + * request in /Digest/realm1. This lets us test both + * behaviors. + * + * The expected conversation is: + * + * First message + * GET /Digest/realm1 + * + * 401 Unauthorized + * WWW-Authenticate: Digest nonce=A + * + * [emit 'authenticate'] + * + * GET /Digest/realm1 + * Authorization: Digest nonce=A + * + * 200 OK + * [No Authentication-Info] + * + * [sleep 2 seconds: nonce A is no longer valid, but we have no + * way of knowing that] + * + * Second message + * GET /Digest/realm1/expire/ + * Authorization: Digest nonce=A + * + * 401 Unauthorized + * WWW-Authenticate: Digest stale=true nonce=B + * + * GET /Digest/realm1/expire/ + * Authorization: Digest nonce=B + * + * 200 OK + * Authentication-Info: nextnonce=C + * + * [sleep 1 second] + * + * Third message + * GET /Digest/realm1/expire/ + * Authorization: Digest nonce=C + * [nonce=B would work here too] + * + * 200 OK + * Authentication-Info: nextnonce=D + * + * [sleep 1 second; nonces B and C are no longer valid, but D is] + * + * Fourth message + * GET /Digest/realm1/expire/ + * Authorization: Digest nonce=D + * + * 200 OK + * Authentication-Info: nextnonce=D + * + */ + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + + uri = g_strconcat (base_uri, "Digest/realm1/", NULL); + do_digest_nonce_test (session, "First", uri, TRUE, TRUE); + g_free (uri); + sleep (2); + uri = g_strconcat (base_uri, "Digest/realm1/expire/", NULL); + do_digest_nonce_test (session, "Second", uri, TRUE, FALSE); + sleep (1); + do_digest_nonce_test (session, "Third", uri, FALSE, FALSE); + sleep (1); + do_digest_nonce_test (session, "Fourth", uri, FALSE, FALSE); + g_free (uri); + + soup_test_session_abort_unref (session); + + /* Async auth */ + do_async_auth_test (base_uri); + + /* Selecting correct auth when multiple auth types are available */ + do_select_auth_test (); + + g_main_loop_unref (loop); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/chunk-test.c b/tests/chunk-test.c new file mode 100644 index 0000000..435f7fa --- /dev/null +++ b/tests/chunk-test.c @@ -0,0 +1,494 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#include "config.h" + +#include +#include +#include + +#include +#include + +#include "test-utils.h" + +typedef struct { + SoupSession *session; + SoupBuffer *chunks[3]; + int next, nwrote, nfreed; + gboolean streaming; +} PutTestData; + +static SoupBuffer * +error_chunk_allocator (SoupMessage *msg, gsize max_len, gpointer user_data) +{ + /* This should never be called, because there is no response body. */ + debug_printf (1, " error_chunk_allocator called!\n"); + errors++; + return soup_buffer_new (SOUP_MEMORY_TAKE, g_malloc (100), 100); +} + +static void +write_next_chunk (SoupMessage *msg, gpointer user_data) +{ + PutTestData *ptd = user_data; + + debug_printf (2, " writing chunk %d\n", ptd->next); + + if (ptd->streaming && ptd->next > 0 && ptd->chunks[ptd->next - 1]) { + debug_printf (1, " error: next chunk requested before last one freed!\n"); + errors++; + } + + if (ptd->next < G_N_ELEMENTS (ptd->chunks)) { + soup_message_body_append_buffer (msg->request_body, + ptd->chunks[ptd->next]); + soup_buffer_free (ptd->chunks[ptd->next]); + ptd->next++; + } else + soup_message_body_complete (msg->request_body); + soup_session_unpause_message (ptd->session, msg); +} + +/* This is not a supported part of the API. Use SOUP_MESSAGE_CAN_REBUILD + * instead. + */ +static void +write_next_chunk_streaming_hack (SoupMessage *msg, gpointer user_data) +{ + PutTestData *ptd = user_data; + SoupBuffer *chunk; + + debug_printf (2, " freeing chunk at %d\n", ptd->nfreed); + chunk = soup_message_body_get_chunk (msg->request_body, ptd->nfreed); + if (chunk) { + ptd->nfreed += chunk->length; + soup_message_body_wrote_chunk (msg->request_body, chunk); + soup_buffer_free (chunk); + } else { + debug_printf (1, " error: written chunk does not exist!\n"); + errors++; + } + write_next_chunk (msg, user_data); +} + +static void +wrote_body_data (SoupMessage *msg, SoupBuffer *chunk, gpointer user_data) +{ + PutTestData *ptd = user_data; + + debug_printf (2, " wrote_body_data, %d bytes\n", + (int)chunk->length); + ptd->nwrote += chunk->length; +} + +static void +clear_buffer_ptr (gpointer data) +{ + SoupBuffer **buffer_ptr = data; + + debug_printf (2, " clearing chunk\n"); + if (*buffer_ptr) { + (*buffer_ptr)->length = 0; + g_free ((char *)(*buffer_ptr)->data); + *buffer_ptr = NULL; + } else { + debug_printf (2, " chunk is already clear!\n"); + errors++; + } +} + +/* Put a chunk containing @text into *@buffer, set up so that it will + * clear out *@buffer when the chunk is freed, allowing us to make sure + * the set_accumulate(FALSE) is working. + */ +static void +make_put_chunk (SoupBuffer **buffer, const char *text) +{ + *buffer = soup_buffer_new_with_owner (g_strdup (text), strlen (text), + buffer, clear_buffer_ptr); +} + +static void +setup_request_body (PutTestData *ptd) +{ + make_put_chunk (&ptd->chunks[0], "one\r\n"); + make_put_chunk (&ptd->chunks[1], "two\r\n"); + make_put_chunk (&ptd->chunks[2], "three\r\n"); + ptd->next = ptd->nwrote = ptd->nfreed = 0; +} + +static void +restarted_streaming (SoupMessage *msg, gpointer user_data) +{ + PutTestData *ptd = user_data; + + debug_printf (2, " --restarting--\n"); + + /* We're streaming, and we had to restart. So the data need + * to be regenerated. + */ + setup_request_body (ptd); + + /* The 302 redirect will turn it into a GET request and + * reset the body encoding back to "NONE". Fix that. + */ + soup_message_headers_set_encoding (msg->request_headers, + SOUP_ENCODING_CHUNKED); + msg->method = SOUP_METHOD_PUT; +} + +static void +restarted_streaming_hack (SoupMessage *msg, gpointer user_data) +{ + restarted_streaming (msg, user_data); + soup_message_body_truncate (msg->request_body); +} + +typedef enum { + HACKY_STREAMING = (1 << 0), + PROPER_STREAMING = (1 << 1), + RESTART = (1 << 2) +} RequestTestFlags; + +static void +do_request_test (SoupSession *session, SoupURI *base_uri, RequestTestFlags flags) +{ + SoupURI *uri = base_uri; + PutTestData ptd; + SoupMessage *msg; + const char *client_md5, *server_md5; + GChecksum *check; + int i, length; + + debug_printf (1, "PUT"); + if (flags & HACKY_STREAMING) + debug_printf (1, " w/ hacky streaming"); + else if (flags & PROPER_STREAMING) + debug_printf (1, " w/ proper streaming"); + if (flags & RESTART) { + debug_printf (1, " and restart"); + uri = soup_uri_copy (base_uri); + soup_uri_set_path (uri, "/redirect"); + } + debug_printf (1, "\n"); + + ptd.session = session; + setup_request_body (&ptd); + ptd.streaming = flags & (HACKY_STREAMING | PROPER_STREAMING); + + check = g_checksum_new (G_CHECKSUM_MD5); + length = 0; + for (i = 0; i < 3; i++) { + g_checksum_update (check, (guchar *)ptd.chunks[i]->data, + ptd.chunks[i]->length); + length += ptd.chunks[i]->length; + } + client_md5 = g_checksum_get_string (check); + + msg = soup_message_new_from_uri ("PUT", uri); + soup_message_headers_set_encoding (msg->request_headers, SOUP_ENCODING_CHUNKED); + soup_message_body_set_accumulate (msg->request_body, FALSE); + soup_message_set_chunk_allocator (msg, error_chunk_allocator, NULL, NULL); + if (flags & HACKY_STREAMING) { + g_signal_connect (msg, "wrote_chunk", + G_CALLBACK (write_next_chunk_streaming_hack), &ptd); + if (flags & RESTART) { + g_signal_connect (msg, "restarted", + G_CALLBACK (restarted_streaming_hack), &ptd); + } + } else { + g_signal_connect (msg, "wrote_chunk", + G_CALLBACK (write_next_chunk), &ptd); + } + + if (flags & PROPER_STREAMING) { + soup_message_set_flags (msg, SOUP_MESSAGE_CAN_REBUILD); + if (flags & RESTART) { + g_signal_connect (msg, "restarted", + G_CALLBACK (restarted_streaming), &ptd); + } + } + + g_signal_connect (msg, "wrote_headers", + G_CALLBACK (write_next_chunk), &ptd); + g_signal_connect (msg, "wrote_body_data", + G_CALLBACK (wrote_body_data), &ptd); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " message failed: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + if (msg->request_body->data) { + debug_printf (1, " msg->request_body set!\n"); + errors++; + } + if (msg->request_body->length != length || length != ptd.nwrote) { + debug_printf (1, " sent length mismatch: %d vs %d vs %d\n", + (int)msg->request_body->length, length, ptd.nwrote); + errors++; + } + + server_md5 = soup_message_headers_get_one (msg->response_headers, + "Content-MD5"); + if (!server_md5 || strcmp (client_md5, server_md5) != 0) { + debug_printf (1, " client/server data mismatch: %s vs %s\n", + client_md5, server_md5 ? server_md5 : "(null)"); + errors++; + } + + g_object_unref (msg); + g_checksum_free (check); + + if (uri != base_uri) + soup_uri_free (uri); +} + +typedef struct { + SoupBuffer *current_chunk; + GChecksum *check; + int length; +} GetTestData; + +static SoupBuffer * +chunk_allocator (SoupMessage *msg, gsize max_len, gpointer user_data) +{ + GetTestData *gtd = user_data; + + debug_printf (2, " allocating chunk\n"); + + if (gtd->current_chunk) { + debug_printf (1, " error: next chunk allocated before last one freed!\n"); + errors++; + } + gtd->current_chunk = soup_buffer_new_with_owner (g_malloc (6), 6, + >d->current_chunk, + clear_buffer_ptr); + return gtd->current_chunk; +} + +static void +got_chunk (SoupMessage *msg, SoupBuffer *chunk, gpointer user_data) +{ + GetTestData *gtd = user_data; + + debug_printf (2, " got chunk, %d bytes\n", + (int)chunk->length); + if (chunk != gtd->current_chunk) { + debug_printf (1, "chunk mismatch! %p vs %p\n", + chunk, gtd->current_chunk); + } + + g_checksum_update (gtd->check, (guchar *)chunk->data, chunk->length); + gtd->length += chunk->length; +} + +static void +do_response_test (SoupSession *session, SoupURI *base_uri) +{ + GetTestData gtd; + SoupMessage *msg; + const char *client_md5, *server_md5; + + debug_printf (1, "GET\n"); + + gtd.current_chunk = NULL; + gtd.length = 0; + gtd.check = g_checksum_new (G_CHECKSUM_MD5); + + msg = soup_message_new_from_uri ("GET", base_uri); + soup_message_body_set_accumulate (msg->response_body, FALSE); + soup_message_set_chunk_allocator (msg, chunk_allocator, >d, NULL); + g_signal_connect (msg, "got_chunk", + G_CALLBACK (got_chunk), >d); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " message failed: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + if (msg->response_body->data) { + debug_printf (1, " msg->response_body set!\n"); + errors++; + } + if (soup_message_headers_get_content_length (msg->response_headers) != gtd.length) { + debug_printf (1, " received length mismatch: %d vs %d\n", + (int)soup_message_headers_get_content_length (msg->response_headers), gtd.length); + errors++; + } + + client_md5 = g_checksum_get_string (gtd.check); + server_md5 = soup_message_headers_get_one (msg->response_headers, + "Content-MD5"); + if (!server_md5 || strcmp (client_md5, server_md5) != 0) { + debug_printf (1, " client/server data mismatch: %s vs %s\n", + client_md5, server_md5 ? server_md5 : "(null)"); + errors++; + } + + g_object_unref (msg); + g_checksum_free (gtd.check); +} + +/* Make sure TEMPORARY buffers are handled properly with non-accumulating + * message bodies. Part of https://bugs.webkit.org/show_bug.cgi?id=18343 + */ + +static void +temp_test_wrote_chunk (SoupMessage *msg, gpointer session) +{ + SoupBuffer *chunk; + + chunk = soup_message_body_get_chunk (msg->request_body, 5); + + /* When the bug is present, the second chunk will also be + * discarded after the first is written, which will cause + * the I/O to stall since soup-message-io will think it's + * done, but it hasn't written Content-Length bytes yet. + */ + if (!chunk) { + debug_printf (1, " Lost second chunk!\n"); + errors++; + soup_session_abort (session); + } else + soup_buffer_free (chunk); + + g_signal_handlers_disconnect_by_func (msg, temp_test_wrote_chunk, session); +} + +static void +do_temporary_test (SoupSession *session, SoupURI *base_uri) +{ + SoupMessage *msg; + char *client_md5; + const char *server_md5; + + debug_printf (1, "PUT w/ temporary buffers\n"); + + msg = soup_message_new_from_uri ("PUT", base_uri); + soup_message_body_append (msg->request_body, SOUP_MEMORY_TEMPORARY, + "one\r\n", 5); + soup_message_body_append (msg->request_body, SOUP_MEMORY_STATIC, + "two\r\n", 5); + soup_message_body_set_accumulate (msg->request_body, FALSE); + + client_md5 = g_compute_checksum_for_string (G_CHECKSUM_MD5, + "one\r\ntwo\r\n", 10); + g_signal_connect (msg, "wrote_chunk", + G_CALLBACK (temp_test_wrote_chunk), session); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " message failed: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + server_md5 = soup_message_headers_get_one (msg->response_headers, + "Content-MD5"); + if (!server_md5 || strcmp (client_md5, server_md5) != 0) { + debug_printf (1, " client/server data mismatch: %s vs %s\n", + client_md5, server_md5 ? server_md5 : "(null)"); + errors++; + } + + g_free (client_md5); + g_object_unref (msg); +} + +static void +do_chunk_tests (SoupURI *base_uri) +{ + SoupSession *session; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + do_request_test (session, base_uri, 0); + debug_printf (2, "\n\n"); + do_request_test (session, base_uri, PROPER_STREAMING); + debug_printf (2, "\n\n"); + do_request_test (session, base_uri, PROPER_STREAMING | RESTART); + debug_printf (2, "\n\n"); + do_request_test (session, base_uri, HACKY_STREAMING); + debug_printf (2, "\n\n"); + do_request_test (session, base_uri, HACKY_STREAMING | RESTART); + debug_printf (2, "\n\n"); + do_response_test (session, base_uri); + debug_printf (2, "\n\n"); + do_temporary_test (session, base_uri); + soup_test_session_abort_unref (session); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + SoupMessageBody *md5_body; + char *md5; + + if (g_str_has_prefix (path, "/redirect")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_replace (msg->response_headers, + "Location", "/"); + return; + } + + if (msg->method == SOUP_METHOD_GET) { + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, + "three\r\ntwo\r\none\r\n", + strlen ("three\r\ntwo\r\none\r\n")); + soup_buffer_free (soup_message_body_flatten (msg->response_body)); + md5_body = msg->response_body; + soup_message_set_status (msg, SOUP_STATUS_OK); + } else if (msg->method == SOUP_METHOD_PUT) { + soup_message_set_status (msg, SOUP_STATUS_CREATED); + md5_body = msg->request_body; + } else { + soup_message_set_status (msg, SOUP_STATUS_METHOD_NOT_ALLOWED); + return; + } + + md5 = g_compute_checksum_for_data (G_CHECKSUM_MD5, + (guchar *)md5_body->data, + md5_body->length); + soup_message_headers_append (msg->response_headers, + "Content-MD5", md5); + g_free (md5); +} + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server; + guint port; + SoupURI *base_uri; + + test_init (argc, argv, NULL); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + port = soup_server_get_port (server); + + loop = g_main_loop_new (NULL, TRUE); + + base_uri = soup_uri_new ("http://127.0.0.1"); + soup_uri_set_port (base_uri, port); + do_chunk_tests (base_uri); + soup_uri_free (base_uri); + + g_main_loop_unref (loop); + soup_test_server_quit_unref (server); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/coding-test.c b/tests/coding-test.c new file mode 100644 index 0000000..14b046b --- /dev/null +++ b/tests/coding-test.c @@ -0,0 +1,360 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test-utils.h" + +SoupServer *server; +SoupURI *base_uri; + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + const char *accept_encoding, *options; + GSList *codings; + char *file = NULL, *contents; + gsize length; + + options = soup_message_headers_get_one (msg->request_headers, + "X-Test-Options"); + if (!options) + options = ""; + + accept_encoding = soup_message_headers_get_list (msg->request_headers, + "Accept-Encoding"); + if (accept_encoding && !soup_header_contains (options, "force-encode")) + codings = soup_header_parse_quality_list (accept_encoding, NULL); + else + codings = NULL; + + if (codings && g_slist_find_custom (codings, "gzip", (GCompareFunc)g_ascii_strcasecmp)) { + file = g_strdup_printf (SRCDIR "/resources%s.gz", path); + if (g_file_test (file, G_FILE_TEST_EXISTS)) { + soup_message_headers_append (msg->response_headers, + "Content-Encoding", + "gzip"); + } else { + g_free (file); + file = NULL; + } + } + soup_header_free_list (codings); + + if (!file) + file = g_strdup_printf (SRCDIR "/resources%s", path); + if (!g_file_get_contents (file, &contents, &length, NULL)) { + /* If path.gz exists but can't be read, we'll send back + * the error with "Content-Encoding: gzip" but there's + * no body, so, eh. + */ + g_free (file); + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + return; + } + g_free (file); + + if (soup_header_contains (options, "force-encode")) { + soup_message_headers_replace (msg->response_headers, + "Content-Encoding", + "gzip"); + } + + /* Content-Type matches the "real" format, not the sent format */ + if (g_str_has_suffix (path, ".gz")) { + soup_message_headers_append (msg->response_headers, + "Content-Type", + "application/gzip"); + } else { + soup_message_headers_append (msg->response_headers, + "Content-Type", + "text/plain"); + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_body_append (msg->response_body, + SOUP_MEMORY_TAKE, contents, length); + + if (soup_header_contains (options, "trailing-junk")) { + soup_message_body_append (msg->response_body, SOUP_MEMORY_COPY, + options, strlen (options)); + } +} + +static void +do_coding_test (void) +{ + SoupSession *session; + SoupMessage *msg, *msgz, *msgj, *msge; + SoupURI *uri; + const char *coding, *type; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + uri = soup_uri_new_with_base (base_uri, "/mbox"); + + + debug_printf (1, "GET /mbox, plain\n"); + msg = soup_message_new_from_uri ("GET", uri); + soup_session_send_message (session, msg); + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " Unexpected status %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + coding = soup_message_headers_get_one (msg->response_headers, "Content-Encoding"); + if (coding) { + debug_printf (1, " Unexpected Content-Encoding: %s\n", + coding); + errors++; + } + if (soup_message_get_flags (msg) & SOUP_MESSAGE_CONTENT_DECODED) { + debug_printf (1, " SOUP_MESSAGE_CONTENT_DECODED set!\n"); + errors++; + } + type = soup_message_headers_get_one (msg->response_headers, "Content-Type"); + if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) { + debug_printf (1, " Unexpected Content-Type: %s\n", + type ? type : "(none)"); + errors++; + } + + debug_printf (1, "GET /mbox, Accept-Encoding: gzip\n"); + soup_session_add_feature_by_type (session, SOUP_TYPE_CONTENT_DECODER); + msgz = soup_message_new_from_uri ("GET", uri); + soup_session_send_message (session, msgz); + if (!SOUP_STATUS_IS_SUCCESSFUL (msgz->status_code)) { + debug_printf (1, " Unexpected status %d %s\n", + msgz->status_code, msgz->reason_phrase); + errors++; + } + coding = soup_message_headers_get_one (msgz->response_headers, "Content-Encoding"); + if (!coding || g_ascii_strcasecmp (coding, "gzip") != 0) { + debug_printf (1, " Unexpected Content-Encoding: %s\n", + coding ? coding : "(none)"); + errors++; + } + if (!(soup_message_get_flags (msgz) & SOUP_MESSAGE_CONTENT_DECODED)) { + debug_printf (1, " SOUP_MESSAGE_CONTENT_DECODED not set!\n"); + errors++; + } + type = soup_message_headers_get_one (msgz->response_headers, "Content-Type"); + if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) { + debug_printf (1, " Unexpected Content-Type: %s\n", + type ? type : "(none)"); + errors++; + } + + if (msg->response_body->length != msgz->response_body->length) { + debug_printf (1, " Message length mismatch: %lu (plain) vs %lu (compressed)\n", + (gulong)msg->response_body->length, + (gulong)msgz->response_body->length); + errors++; + } else if (memcmp (msg->response_body->data, + msgz->response_body->data, + msg->response_body->length) != 0) { + debug_printf (1, " Message data mismatch (plain/compressed)\n"); + errors++; + } + + + debug_printf (1, "GET /mbox, Accept-Encoding: gzip, plus trailing junk\n"); + msgj = soup_message_new_from_uri ("GET", uri); + soup_message_headers_append (msgj->request_headers, + "X-Test-Options", "trailing-junk"); + soup_session_send_message (session, msgj); + if (!SOUP_STATUS_IS_SUCCESSFUL (msgj->status_code)) { + debug_printf (1, " Unexpected status %d %s\n", + msgj->status_code, msgj->reason_phrase); + errors++; + } + coding = soup_message_headers_get_one (msgj->response_headers, "Content-Encoding"); + if (!coding || g_ascii_strcasecmp (coding, "gzip") != 0) { + debug_printf (1, " Unexpected Content-Encoding: %s\n", + coding ? coding : "(none)"); + errors++; + } + if (!(soup_message_get_flags (msgj) & SOUP_MESSAGE_CONTENT_DECODED)) { + debug_printf (1, " SOUP_MESSAGE_CONTENT_DECODED not set!\n"); + errors++; + } + type = soup_message_headers_get_one (msgj->response_headers, "Content-Type"); + if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) { + debug_printf (1, " Unexpected Content-Type: %s\n", + type ? type : "(none)"); + errors++; + } + + if (msg->response_body->length != msgj->response_body->length) { + debug_printf (1, " Message length mismatch: %lu (plain) vs %lu (compressed w/ junk)\n", + (gulong)msg->response_body->length, + (gulong)msgj->response_body->length); + errors++; + } else if (memcmp (msg->response_body->data, + msgj->response_body->data, + msg->response_body->length) != 0) { + debug_printf (1, " Message data mismatch (plain/compressed w/ junk)\n"); + errors++; + } + + + debug_printf (1, "GET /mbox, Accept-Encoding: gzip, with server error\n"); + msge = soup_message_new_from_uri ("GET", uri); + soup_message_headers_append (msge->request_headers, + "X-Test-Options", "force-encode"); + soup_session_send_message (session, msge); + if (!SOUP_STATUS_IS_SUCCESSFUL (msge->status_code)) { + debug_printf (1, " Unexpected status %d %s\n", + msge->status_code, msge->reason_phrase); + errors++; + } + coding = soup_message_headers_get_one (msge->response_headers, "Content-Encoding"); + if (!coding || g_ascii_strcasecmp (coding, "gzip") != 0) { + debug_printf (1, " Unexpected Content-Encoding: %s\n", + coding ? coding : "(none)"); + errors++; + } + /* Since the content wasn't actually gzip-encoded, decoding it + * should have failed and so the flag won't be set. + */ + if (soup_message_get_flags (msge) & SOUP_MESSAGE_CONTENT_DECODED) { + debug_printf (1, " SOUP_MESSAGE_CONTENT_DECODED set!\n"); + errors++; + } + type = soup_message_headers_get_one (msge->response_headers, "Content-Type"); + if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) { + debug_printf (1, " Unexpected Content-Type: %s\n", + type ? type : "(none)"); + errors++; + } + + /* Failed content-decoding should have left the body untouched + * from what the server sent... which happens to be the + * uncompressed data. + */ + if (msg->response_body->length != msge->response_body->length) { + debug_printf (1, " Message length mismatch: %lu (plain) vs %lu (mis-encoded)\n", + (gulong)msg->response_body->length, + (gulong)msge->response_body->length); + errors++; + } else if (memcmp (msg->response_body->data, + msge->response_body->data, + msg->response_body->length) != 0) { + debug_printf (1, " Message data mismatch (plain/misencoded)\n"); + errors++; + } + + + g_object_unref (msg); + g_object_unref (msgz); + g_object_unref (msgj); + g_object_unref (msge); + soup_uri_free (uri); + + + uri = soup_uri_new_with_base (base_uri, "/mbox.gz"); + + debug_printf (1, "GET /mbox.gz, Accept-Encoding: gzip\n"); + soup_session_add_feature_by_type (session, SOUP_TYPE_CONTENT_DECODER); + msgz = soup_message_new_from_uri ("GET", uri); + soup_session_send_message (session, msgz); + if (!SOUP_STATUS_IS_SUCCESSFUL (msgz->status_code)) { + debug_printf (1, " Unexpected status %d %s\n", + msgz->status_code, msgz->reason_phrase); + errors++; + } + coding = soup_message_headers_get_one (msgz->response_headers, "Content-Encoding"); + if (coding) { + debug_printf (1, " Unexpected Content-Encoding: %s\n", coding); + errors++; + } + type = soup_message_headers_get_one (msgz->response_headers, "Content-Type"); + if (!type || g_ascii_strcasecmp (type, "application/gzip") != 0) { + debug_printf (1, " Unexpected Content-Type: %s\n", + type ? type : "(none)"); + errors++; + } + + + debug_printf (1, "GET /mbox.gz, Accept-Encoding: gzip, with server error\n"); + msge = soup_message_new_from_uri ("GET", uri); + soup_message_headers_append (msge->request_headers, + "X-Test-Options", "force-encode"); + soup_session_send_message (session, msge); + if (!SOUP_STATUS_IS_SUCCESSFUL (msge->status_code)) { + debug_printf (1, " Unexpected status %d %s\n", + msge->status_code, msge->reason_phrase); + errors++; + } + coding = soup_message_headers_get_one (msge->response_headers, "Content-Encoding"); + if (!coding || g_ascii_strcasecmp (coding, "gzip") != 0) { + debug_printf (1, " Unexpected Content-Encoding: %s\n", + coding ? coding : "(none)"); + errors++; + } + /* SoupContentDecoder should have recognized the bug and thus + * not decoded it + */ + if (soup_message_get_flags (msge) & SOUP_MESSAGE_CONTENT_DECODED) { + debug_printf (1, " SOUP_MESSAGE_CONTENT_DECODED set!\n"); + errors++; + } + type = soup_message_headers_get_one (msge->response_headers, "Content-Type"); + if (!type || g_ascii_strcasecmp (type, "application/gzip") != 0) { + debug_printf (1, " Unexpected Content-Type: %s\n", + type ? type : "(none)"); + errors++; + } + + if (msgz->response_body->length != msge->response_body->length) { + debug_printf (1, " Message length mismatch: %lu (.gz) vs %lu (mis-encoded)\n", + (gulong)msgz->response_body->length, + (gulong)msge->response_body->length); + errors++; + } else if (memcmp (msgz->response_body->data, + msge->response_body->data, + msgz->response_body->length) != 0) { + debug_printf (1, " Message data mismatch (gz/misencoded)\n"); + errors++; + } + + + g_object_unref (msgz); + g_object_unref (msge); + soup_uri_free (uri); + + soup_test_session_abort_unref (session); +} + +int +main (int argc, char **argv) +{ + test_init (argc, argv, NULL); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + base_uri = soup_uri_new ("http://127.0.0.1/"); + soup_uri_set_port (base_uri, soup_server_get_port (server)); + + do_coding_test (); + + soup_uri_free (base_uri); + soup_test_server_quit_unref (server); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/context-test.c b/tests/context-test.c new file mode 100644 index 0000000..fe9af31 --- /dev/null +++ b/tests/context-test.c @@ -0,0 +1,277 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +static char *base_uri; + +typedef struct { + SoupServer *server; + SoupMessage *msg; + GSource *timeout; +} SlowData; + +static void +request_failed (SoupMessage *msg, gpointer data) +{ + SlowData *sd = data; + + if (SOUP_STATUS_IS_TRANSPORT_ERROR (msg->status_code)) + g_source_destroy (sd->timeout); + g_free (sd); +} + +static gboolean +add_body_chunk (gpointer data) +{ + SlowData *sd = data; + + soup_message_body_append (sd->msg->response_body, + SOUP_MEMORY_STATIC, "OK\r\n", 4); + soup_message_body_complete (sd->msg->response_body); + soup_server_unpause_message (sd->server, sd->msg); + g_object_unref (sd->msg); + + return FALSE; +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + SlowData *sd; + + if (msg->method != SOUP_METHOD_GET) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + if (!strcmp (path, "/fast")) { + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, "OK\r\n", 4); + return; + } + + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_CHUNKED); + g_object_ref (msg); + soup_server_pause_message (server, msg); + + sd = g_new (SlowData, 1); + sd->server = server; + sd->msg = msg; + sd->timeout = soup_add_timeout ( + soup_server_get_async_context (server), + 200, add_body_chunk, sd); + g_signal_connect (msg, "finished", + G_CALLBACK (request_failed), sd); +} + +/* Test 1: An async session in another thread with its own + * async_context can complete a request while the main thread's main + * loop is stopped. + */ + +static gboolean idle_start_test1_thread (gpointer loop); +static gpointer test1_thread (gpointer user_data); + +static GCond *test1_cond; +static GMutex *test1_mutex; + +static void +do_test1 (void) +{ + GMainLoop *loop; + + debug_printf (1, "Test 1: blocking the main thread does not block other thread\n"); + + test1_cond = g_cond_new (); + test1_mutex = g_mutex_new (); + + loop = g_main_loop_new (NULL, FALSE); + g_idle_add (idle_start_test1_thread, loop); + g_main_loop_run (loop); + g_main_loop_unref (loop); + + g_mutex_free (test1_mutex); + g_cond_free (test1_cond); +} + +static gboolean +idle_start_test1_thread (gpointer loop) +{ + GTimeVal time; + GThread *thread; + + g_mutex_lock (test1_mutex); + thread = g_thread_create (test1_thread, base_uri, TRUE, NULL); + + g_get_current_time (&time); + time.tv_sec += 5; + if (g_cond_timed_wait (test1_cond, test1_mutex, &time)) + g_thread_join (thread); + else { + debug_printf (1, " timeout!\n"); + errors++; + } + + g_mutex_unlock (test1_mutex); + g_main_loop_quit (loop); + return FALSE; +} + +static void +test1_finished (SoupSession *session, SoupMessage *msg, gpointer loop) +{ + g_main_loop_quit (loop); +} + +static gpointer +test1_thread (gpointer user_data) +{ + SoupSession *session; + GMainContext *async_context; + char *uri; + SoupMessage *msg; + GMainLoop *loop; + + /* Wait for main thread to be waiting on test1_cond */ + g_mutex_lock (test1_mutex); + g_mutex_unlock (test1_mutex); + + async_context = g_main_context_new (); + session = soup_test_session_new ( + SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_ASYNC_CONTEXT, async_context, + NULL); + g_main_context_unref (async_context); + + uri = g_build_filename (base_uri, "slow", NULL); + + debug_printf (1, " send_message\n"); + msg = soup_message_new ("GET", uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " unexpected status: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_object_unref (msg); + + debug_printf (1, " queue_message\n"); + msg = soup_message_new ("GET", uri); + loop = g_main_loop_new (async_context, FALSE); + g_object_ref (msg); + soup_session_queue_message (session, msg, test1_finished, loop); + g_main_loop_run (loop); + g_main_loop_unref (loop); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " unexpected status: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_object_unref (msg); + + soup_test_session_abort_unref (session); + g_free (uri); + + g_cond_signal (test1_cond); + return NULL; +} + +/* Test 2: An async session in the main thread with its own + * async_context runs independently of the default main loop. + */ + +static gboolean idle_test2_fail (gpointer user_data); + +static void +do_test2 (void) +{ + guint idle; + GMainContext *async_context; + SoupSession *session; + char *uri; + SoupMessage *msg; + + debug_printf (1, "Test 2: a session with its own context is independent of the main loop.\n"); + + idle = g_idle_add_full (G_PRIORITY_HIGH, idle_test2_fail, NULL, NULL); + + async_context = g_main_context_new (); + session = soup_test_session_new ( + SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_ASYNC_CONTEXT, async_context, + NULL); + g_main_context_unref (async_context); + + uri = g_build_filename (base_uri, "slow", NULL); + + debug_printf (1, " send_message\n"); + msg = soup_message_new ("GET", uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " unexpected status: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_object_unref (msg); + + soup_test_session_abort_unref (session); + g_free (uri); + + g_source_remove (idle); +} + +static gboolean +idle_test2_fail (gpointer user_data) +{ + debug_printf (1, " idle ran!\n"); + errors++; + return FALSE; +} + + +int +main (int argc, char **argv) +{ + SoupServer *server; + + test_init (argc, argv, NULL); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + base_uri = g_strdup_printf ("http://127.0.0.1:%u/", + soup_server_get_port (server)); + + do_test1 (); + do_test2 (); + + g_free (base_uri); + soup_test_server_quit_unref (server); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/continue-test.c b/tests/continue-test.c new file mode 100644 index 0000000..dafe182 --- /dev/null +++ b/tests/continue-test.c @@ -0,0 +1,457 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Novell, Inc. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "test-utils.h" + +#define SHORT_BODY "This is a test.\r\n" +#define LONG_BODY (SHORT_BODY SHORT_BODY) + +#define MAX_POST_LENGTH (sizeof (SHORT_BODY)) + +static int port; +static GSList *events; + +static void +event (SoupMessage *msg, const char *side, const char *message) +{ + char *data = g_strdup_printf ("%s-%s", side, message); + gboolean record_status = + (!strcmp (data, "server-wrote_headers") || + !strcmp (data, "server-wrote_informational")); + + debug_printf (2, " %s", data); + if (record_status) + debug_printf (2, " (%s)", msg->reason_phrase); + debug_printf (2, "\n"); + + events = g_slist_append (events, data); + if (record_status) + events = g_slist_append (events, GUINT_TO_POINTER (msg->status_code)); +} + +#define EVENT_HANDLER(name) \ +static void \ +name (SoupMessage *msg, gpointer side) \ +{ \ + event (msg, side, #name); \ +} + +EVENT_HANDLER (got_informational) +EVENT_HANDLER (got_headers) +EVENT_HANDLER (got_body) +EVENT_HANDLER (wrote_informational) +EVENT_HANDLER (wrote_headers) +EVENT_HANDLER (wrote_body) +EVENT_HANDLER (finished) + +static void +do_message (const char *path, gboolean long_body, + gboolean expect_continue, gboolean auth, + ...) +{ + SoupSession *session; + SoupMessage *msg; + const char *body; + char *uri; + va_list ap; + const char *expected_event; + char *actual_event; + int expected_status, actual_status; + static int count = 1; + + debug_printf (1, "%d. /%s, %s body, %sExpect, %s password\n", + count++, path, + long_body ? "long" : "short", + expect_continue ? "" : "no ", + auth ? "with" : "without"); + + uri = g_strdup_printf ("http://%s127.0.0.1:%d/%s", + auth ? "user:pass@" : "", + port, path); + msg = soup_message_new ("POST", uri); + g_free (uri); + + body = long_body ? LONG_BODY : SHORT_BODY; + soup_message_set_request (msg, "text/plain", SOUP_MEMORY_STATIC, + body, strlen (body)); + soup_message_headers_append (msg->request_headers, "Connection", "close"); + if (expect_continue) { + soup_message_headers_set_expectations (msg->request_headers, + SOUP_EXPECTATION_CONTINUE); + } + + g_signal_connect (msg, "got_informational", + G_CALLBACK (got_informational), "client"); + g_signal_connect (msg, "got_headers", + G_CALLBACK (got_headers), "client"); + g_signal_connect (msg, "got_body", + G_CALLBACK (got_body), "client"); + g_signal_connect (msg, "wrote_informational", + G_CALLBACK (wrote_informational), "client"); + g_signal_connect (msg, "wrote_headers", + G_CALLBACK (wrote_headers), "client"); + g_signal_connect (msg, "wrote_body", + G_CALLBACK (wrote_body), "client"); + g_signal_connect (msg, "finished", + G_CALLBACK (finished), "client"); + + events = NULL; + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + soup_session_send_message (session, msg); + soup_test_session_abort_unref (session); + + va_start (ap, auth); + while ((expected_event = va_arg (ap, const char *))) { + + if (!events) { + actual_event = g_strdup (""); + debug_printf (1, " Expected '%s', got end of list\n", + expected_event); + errors++; + } else { + actual_event = events->data; + if (strcmp (expected_event, actual_event) != 0) { + debug_printf (1, " Expected '%s', got '%s'\n", + expected_event, actual_event); + errors++; + } + events = g_slist_delete_link (events, events); + } + + if (!strcmp (expected_event, "server-wrote_headers") || + !strcmp (expected_event, "server-wrote_informational")) + expected_status = va_arg (ap, int); + else + expected_status = -1; + if (!strcmp (actual_event, "server-wrote_headers") || + !strcmp (actual_event, "server-wrote_informational")) { + actual_status = GPOINTER_TO_INT (events->data); + events = g_slist_delete_link (events, events); + } else + expected_status = -1; + + if (expected_status != -1 && actual_status != -1 && + expected_status != actual_status) { + debug_printf (1, " Expected status '%s', got '%s'\n", + soup_status_get_phrase (expected_status), + soup_status_get_phrase (actual_status)); + errors++; + } + + g_free (actual_event); + } + va_end (ap); + while (events) { + actual_event = events->data; + debug_printf (1, " Expected to be done, got '%s'\n", actual_event); + errors++; + events = g_slist_delete_link (events, events); + + if (!strcmp (actual_event, "server-wrote_headers") || + !strcmp (actual_event, "server-wrote_informational")) + events = g_slist_delete_link (events, events); + } + g_object_unref (msg); +} + +static void +run_tests (void) +{ + do_message ("unauth", FALSE, FALSE, FALSE, + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_CREATED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("unauth", TRUE, FALSE, FALSE, + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("unauth", FALSE, TRUE, FALSE, + "client-wrote_headers", + "server-got_headers", + "server-wrote_informational", SOUP_STATUS_CONTINUE, + "client-got_informational", + "client-wrote_body", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_CREATED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("unauth", TRUE, TRUE, FALSE, + "client-wrote_headers", + "server-got_headers", + "server-wrote_headers", SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + + do_message ("auth", FALSE, FALSE, FALSE, + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("auth", TRUE, FALSE, FALSE, + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("auth", FALSE, TRUE, FALSE, + "client-wrote_headers", + "server-got_headers", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("auth", TRUE, TRUE, FALSE, + "client-wrote_headers", + "server-got_headers", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + + do_message ("auth", FALSE, FALSE, TRUE, + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_CREATED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("auth", TRUE, FALSE, TRUE, + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-wrote_headers", + "client-wrote_body", + "server-got_headers", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("auth", FALSE, TRUE, TRUE, + "client-wrote_headers", + "server-got_headers", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-wrote_headers", + "server-got_headers", + "server-wrote_informational", SOUP_STATUS_CONTINUE, + "client-got_informational", + "client-wrote_body", + "server-got_body", + "server-wrote_headers", SOUP_STATUS_CREATED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); + do_message ("auth", TRUE, TRUE, TRUE, + "client-wrote_headers", + "server-got_headers", + "server-wrote_headers", SOUP_STATUS_UNAUTHORIZED, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-wrote_headers", + "server-got_headers", + "server-wrote_headers", SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE, + "server-wrote_body", + "server-finished", + "client-got_headers", + "client-got_body", + "client-finished", + NULL); +} + + +/* SERVER */ + +static void +server_got_headers (SoupMessage *msg, gpointer server) +{ + /* FIXME */ + if (msg->status_code != SOUP_STATUS_CONTINUE && + msg->status_code != 0) + return; + + if (soup_message_headers_get_expectations (msg->request_headers) & + SOUP_EXPECTATION_CONTINUE) { + const char *length; + + length = soup_message_headers_get_one (msg->request_headers, + "Content-Length"); + if (length && atoi (length) > MAX_POST_LENGTH) { + soup_message_set_status (msg, SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE); + soup_message_headers_append (msg->response_headers, "Connection", "close"); + } + } +} + +static void +request_started (SoupServer *server, SoupMessage *msg, + SoupClientContext *client, gpointer user_data) +{ + g_signal_connect (msg, "got_headers", + G_CALLBACK (server_got_headers), server); + + g_signal_connect (msg, "got_informational", + G_CALLBACK (got_informational), "server"); + g_signal_connect (msg, "got_headers", + G_CALLBACK (got_headers), "server"); + g_signal_connect (msg, "got_body", + G_CALLBACK (got_body), "server"); + g_signal_connect (msg, "wrote_informational", + G_CALLBACK (wrote_informational), "server"); + g_signal_connect (msg, "wrote_headers", + G_CALLBACK (wrote_headers), "server"); + g_signal_connect (msg, "wrote_body", + G_CALLBACK (wrote_body), "server"); + g_signal_connect (msg, "finished", + G_CALLBACK (finished), "server"); +} + +static gboolean +auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, const char *password, gpointer user_data) +{ + return !strcmp (username, "user") && !strcmp (password, "pass"); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + if (msg->method != SOUP_METHOD_POST) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + soup_message_headers_append (msg->response_headers, "Connection", "close"); + } else if (msg->request_body->length > MAX_POST_LENGTH) { + soup_message_set_status (msg, SOUP_STATUS_REQUEST_ENTITY_TOO_LARGE); + soup_message_headers_append (msg->response_headers, "Connection", "close"); + } else + soup_message_set_status (msg, SOUP_STATUS_CREATED); +} + +static SoupServer * +setup_server (void) +{ + SoupServer *server; + SoupAuthDomain *auth_domain; + + server = soup_test_server_new (FALSE); + + g_signal_connect (server, "request-started", + G_CALLBACK (request_started), NULL); + + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + + auth_domain = soup_auth_domain_basic_new ( + SOUP_AUTH_DOMAIN_REALM, "continue-test", + SOUP_AUTH_DOMAIN_ADD_PATH, "/auth", + SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, auth_callback, + NULL); + soup_server_add_auth_domain (server, auth_domain); + g_object_unref (auth_domain); + + return server; +} + +/* MAIN */ + +int +main (int argc, char **argv) +{ + SoupServer *server; + + test_init (argc, argv, NULL); + + server = setup_server (); + port = soup_server_get_port (server); + + run_tests (); + + soup_test_server_quit_unref (server); + test_cleanup (); + return errors != 0; +} diff --git a/tests/cookies-test.c b/tests/cookies-test.c new file mode 100644 index 0000000..2b8ebb1 --- /dev/null +++ b/tests/cookies-test.c @@ -0,0 +1,122 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2010 Igalia S.L. + */ + +#include +#include + +#include "test-utils.h" + +SoupServer *server; +SoupURI *first_party_uri, *third_party_uri; +const char *first_party = "http://127.0.0.1/"; +const char *third_party = "http://localhost/"; + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + if (g_str_equal(path, "/index.html")) + soup_message_headers_replace (msg->response_headers, + "Set-Cookie", + "foo=bar"); + else if (g_str_equal (path, "/foo.jpg")) + soup_message_headers_replace (msg->response_headers, + "Set-Cookie", + "baz=qux"); + else + g_return_if_reached (); + + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +typedef struct { + SoupCookieJarAcceptPolicy policy; + int n_cookies; +} CookiesForPolicy; + +static const CookiesForPolicy validResults[] = { + { SOUP_COOKIE_JAR_ACCEPT_ALWAYS, 2 }, + { SOUP_COOKIE_JAR_ACCEPT_NEVER, 0 }, + { SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY, 1 } +}; + +static void +do_cookies_accept_policy_test (void) +{ + SoupSession *session; + SoupMessage *msg; + SoupURI *uri; + SoupCookieJar *jar; + GSList *l, *p; + int i; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + soup_session_add_feature_by_type (session, SOUP_TYPE_COOKIE_JAR); + jar = SOUP_COOKIE_JAR (soup_session_get_feature (session, SOUP_TYPE_COOKIE_JAR)); + + for (i = 0; i < G_N_ELEMENTS (validResults); i++) { + soup_cookie_jar_set_accept_policy (jar, validResults[i].policy); + + uri = soup_uri_new_with_base (first_party_uri, "/index.html"); + msg = soup_message_new_from_uri ("GET", uri); + soup_message_set_first_party (msg, first_party_uri); + soup_session_send_message (session, msg); + soup_uri_free (uri); + g_object_unref (msg); + + /* We can't use to servers due to limitations in + * test_server, so let's swap first and third party here + * to simulate a cookie coming from a third party. + */ + uri = soup_uri_new_with_base (first_party_uri, "/foo.jpg"); + msg = soup_message_new_from_uri ("GET", uri); + soup_message_set_first_party (msg, third_party_uri); + soup_session_send_message (session, msg); + soup_uri_free (uri); + g_object_unref (msg); + + l = soup_cookie_jar_all_cookies (jar); + if (g_slist_length (l) < validResults[i].n_cookies) { + debug_printf (1, " accepted less cookies than it should have\n"); + errors++; + } else if (g_slist_length (l) > validResults[i].n_cookies) { + debug_printf (1, " accepted more cookies than it should have\n"); + errors++; + } + + for (p = l; p; p = p->next) { + soup_cookie_jar_delete_cookie (jar, p->data); + soup_cookie_free (p->data); + } + + g_slist_free (l); + } + + soup_test_session_abort_unref (session); +} + +int +main (int argc, char **argv) +{ + test_init (argc, argv, NULL); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + first_party_uri = soup_uri_new (first_party); + third_party_uri = soup_uri_new (third_party); + soup_uri_set_port (first_party_uri, soup_server_get_port (server)); + soup_uri_set_port (third_party_uri, soup_server_get_port (server)); + + do_cookies_accept_policy_test (); + + soup_uri_free (first_party_uri); + soup_uri_free (third_party_uri); + soup_test_server_quit_unref (server); + + test_cleanup (); + + return errors != 0; +} diff --git a/tests/date.c b/tests/date.c new file mode 100644 index 0000000..f35c41b --- /dev/null +++ b/tests/date.c @@ -0,0 +1,393 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2005 Novell, Inc. + */ + +#include +#include + +#include +#include + +#include "test-utils.h" + +static gboolean check_ok (const char *strdate, SoupDate *date); + +static SoupDate * +make_date (const char *strdate) +{ + char *dup; + SoupDate *date; + + /* We do it this way so that if soup_date_new_from_string() + * reads off the end of the string, it will trigger an error + * when valgrinding, rather than just reading the start of the + * next const string. + */ + dup = g_strdup (strdate); + date = soup_date_new_from_string (dup); + g_free (dup); + return date; +} + +static const struct { + SoupDateFormat format; + const char *date; +} good_dates[] = { + { SOUP_DATE_HTTP, "Sat, 06 Nov 2004 08:09:07 GMT" }, + { SOUP_DATE_COOKIE, "Sat, 06-Nov-2004 08:09:07 GMT" }, + { SOUP_DATE_RFC2822, "Sat, 6 Nov 2004 08:09:07 -0430" }, + { SOUP_DATE_ISO8601_COMPACT, "20041106T080907" }, + { SOUP_DATE_ISO8601_FULL, "2004-11-06T08:09:07" }, + { SOUP_DATE_ISO8601_XMLRPC, "20041106T08:09:07" } +}; + +static void +check_good (SoupDateFormat format, const char *strdate) +{ + SoupDate *date; + char *strdate2; + + date = make_date (strdate); + if (date) + strdate2 = soup_date_to_string (date, format); + if (!check_ok (strdate, date)) + return; + + if (strcmp (strdate, strdate2) != 0) { + debug_printf (1, " restringification failed: '%s' -> '%s'\n", + strdate, strdate2); + errors++; + } + g_free (strdate2); +} + +static const char *ok_dates[] = { + /* rfc1123-date, and broken variants */ + "Sat, 06 Nov 2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 GMT", + "Sat, 06 Nov 2004 08:09:07", + "06 Nov 2004 08:09:07 GMT", + "SAT, 06 NOV 2004 08:09:07 +1000", + + /* rfc850-date, and broken variants */ + "Saturday, 06-Nov-04 08:09:07 GMT", + "Saturday, 6-Nov-04 08:09:07 GMT", + "Saturday, 6-Nov-04 08:09:07 GMT", + "Saturday, 06-Nov-104 08:09:07 GMT", + "Saturday, 06-Nov-04 08:09:07", + "06-Nov-04 08:09:07 GMT", + + /* asctime-date, and broken variants */ + "Sat Nov 6 08:09:07 2004", + "Sat Nov 06 08:09:07 2004", + "Sat Nov 6 08:09:07 2004", + "Sat Nov 6 08:09:07 2004 GMT", + + /* ISO 8601 */ + "2004-11-06T08:09:07Z", + "20041106T08:09:07Z", + "20041106T08:09:07+00:00", + "20041106T080907+00:00", + + /* Netscape cookie spec date, and broken variants */ + "Sat, 06-Nov-2004 08:09:07 GMT", + "Sat, 6-Nov-2004 08:09:07 GMT", + "Sat, 6-Nov-2004 08:09:07 GMT", + "Sat, 06-Nov-2004 08:09:07", + + /* Original version of Netscape cookie spec, and broken variants */ + "Sat, 06-Nov-04 08:09:07 GMT", + "Sat, 6-Nov-04 08:09:07 GMT", + "Sat, 6-Nov-04 08:09:07 GMT", + "Sat, 06-Nov-104 08:09:07 GMT", + "Sat, 06-Nov-04 08:09:07", + + /* Netscape cookie spec example syntax, and broken variants */ + "Saturday, 06-Nov-04 08:09:07 GMT", + "Saturday, 6-Nov-04 08:09:07 GMT", + "Saturday, 6-Nov-04 08:09:07 GMT", + "Saturday, 06-Nov-104 08:09:07 GMT", + "Saturday, 06-Nov-2004 08:09:07 GMT", + "Saturday, 6-Nov-2004 08:09:07 GMT", + "Saturday, 6-Nov-2004 08:09:07 GMT", + "Saturday, 06-Nov-04 08:09:07", + + /* Miscellaneous broken formats seen on the web */ + "Sat 06-Nov-2004 08:9:07", + "Saturday, 06-Nov-04 8:9:07 GMT", + "Sat, 06 Nov 2004 08:09:7 GMT" +}; + +#define TIME_T 1099728547L +#define TIME_T_STRING "1099728547" + +static gboolean +check_ok (const char *strdate, SoupDate *date) +{ + debug_printf (2, "%s\n", strdate); + + if (date && + date->year == 2004 && date->month == 11 && date->day == 6 && + date->hour == 8 && date->minute == 9 && date->second == 7) { + soup_date_free (date); + return TRUE; + } + + debug_printf (1, " date parsing failed for '%s'.\n", strdate); + if (date) { + debug_printf (1, " got: %d %d %d - %d %d %d\n\n", + date->year, date->month, date->day, + date->hour, date->minute, date->second); + soup_date_free (date); + } + errors++; + return FALSE; +} + +static const char *bad_dates[] = { + /* broken rfc1123-date */ + ", 06 Nov 2004 08:09:07 GMT", + "Sat, Nov 2004 08:09:07 GMT", + "Sat, 06 2004 08:09:07 GMT", + "Sat, 06 Nov 08:09:07 GMT", + "Sat, 06 Nov 2004 :09:07 GMT", + "Sat, 06 Nov 2004 09:07 GMT", + "Sat, 06 Nov 2004 08::07 GMT", + "Sat, 06 Nov 2004 08:09: GMT", + + /* broken rfc850-date */ + ", 06-Nov-04 08:09:07 GMT", + "Saturday, -Nov-04 08:09:07 GMT", + "Saturday, Nov-04 08:09:07 GMT", + "Saturday, 06-04 08:09:07 GMT", + "Saturday, 06--04 08:09:07 GMT", + "Saturday, 06-Nov- 08:09:07 GMT", + "Saturday, 06-Nov 08:09:07 GMT", + "Saturday, 06-Nov-04 :09:07 GMT", + "Saturday, 06-Nov-04 09:07 GMT", + "Saturday, 06-Nov-04 08::07 GMT", + "Saturday, 06-Nov-04 08:09: GMT", + + /* broken asctime-date */ + "Nov 6 08:09:07 2004", + "Sat 6 08:09:07 2004", + "Sat Nov 08:09:07 2004", + "Sat Nov 6 :09:07 2004", + "Sat Nov 6 09:07 2004", + "Sat Nov 6 08::07 2004", + "Sat Nov 6 08:09: 2004", + "Sat Nov 6 08:09:07", + "Sat Nov 6 08:09:07 GMT 2004" +}; + +static void +check_bad (const char *strdate, SoupDate *date) +{ + debug_printf (2, "%s\n", strdate); + + if (!date) + return; + errors++; + + debug_printf (1, " date parsing succeeded for '%s'!\n", strdate); + debug_printf (1, " got: %d %d %d - %d %d %d\n\n", + date->year, date->month, date->day, + date->hour, date->minute, date->second); + soup_date_free (date); +} + +static const struct conversion { + const char *source; + const char *http, *cookie, *rfc2822, *compact, *full, *xmlrpc; +} conversions[] = { + /* SOUP_DATE_HTTP */ + { "Sat, 06 Nov 2004 08:09:07 GMT", + + "Sat, 06 Nov 2004 08:09:07 GMT", + "Sat, 06-Nov-2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 +0000", + "20041106T080907Z", + "2004-11-06T08:09:07Z", + "20041106T08:09:07" }, + + /* RFC2822 GMT */ + { "Sat, 6 Nov 2004 08:09:07 +0000", + + "Sat, 06 Nov 2004 08:09:07 GMT", + "Sat, 06-Nov-2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 +0000", + "20041106T080907Z", + "2004-11-06T08:09:07Z", + "20041106T08:09:07" }, + + /* RFC2822 with positive offset */ + { "Sat, 6 Nov 2004 08:09:07 +0430", + + "Sat, 06 Nov 2004 04:39:07 GMT", + "Sat, 06-Nov-2004 04:39:07 GMT", + "Sat, 6 Nov 2004 08:09:07 +0430", + "20041106T080907+0430", + "2004-11-06T08:09:07+04:30", + "20041106T08:09:07" }, + + /* RFC2822 with negative offset */ + { "Sat, 6 Nov 2004 08:09:07 -0430", + + "Sat, 06 Nov 2004 12:39:07 GMT", + "Sat, 06-Nov-2004 12:39:07 GMT", + "Sat, 6 Nov 2004 08:09:07 -0430", + "20041106T080907-0430", + "2004-11-06T08:09:07-04:30", + "20041106T08:09:07" }, + + /* RFC2822 floating */ + { "Sat, 6 Nov 2004 08:09:07 -0000", + + "Sat, 06 Nov 2004 08:09:07 GMT", + "Sat, 06-Nov-2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 -0000", + "20041106T080907", + "2004-11-06T08:09:07", + "20041106T08:09:07" }, + + /* ISO GMT */ + { "2004-11-06T08:09:07Z", + + "Sat, 06 Nov 2004 08:09:07 GMT", + "Sat, 06-Nov-2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 +0000", + "20041106T080907Z", + "2004-11-06T08:09:07Z", + "20041106T08:09:07" }, + + /* ISO with positive offset */ + { "2004-11-06T08:09:07+04:30", + + "Sat, 06 Nov 2004 04:39:07 GMT", + "Sat, 06-Nov-2004 04:39:07 GMT", + "Sat, 6 Nov 2004 08:09:07 +0430", + "20041106T080907+0430", + "2004-11-06T08:09:07+04:30", + "20041106T08:09:07" }, + + /* ISO with negative offset */ + { "2004-11-06T08:09:07-04:30", + + "Sat, 06 Nov 2004 12:39:07 GMT", + "Sat, 06-Nov-2004 12:39:07 GMT", + "Sat, 6 Nov 2004 08:09:07 -0430", + "20041106T080907-0430", + "2004-11-06T08:09:07-04:30", + "20041106T08:09:07" }, + + /* ISO floating */ + { "2004-11-06T08:09:07", + + "Sat, 06 Nov 2004 08:09:07 GMT", + "Sat, 06-Nov-2004 08:09:07 GMT", + "Sat, 6 Nov 2004 08:09:07 -0000", + "20041106T080907", + "2004-11-06T08:09:07", + "20041106T08:09:07" } +}; + +static void +check_conversion (const struct conversion *conv) +{ + SoupDate *date; + char *str; + + debug_printf (2, "%s\n", conv->source); + date = make_date (conv->source); + if (!date) { + debug_printf (1, " date parsing failed for '%s'.\n", conv->source); + errors++; + return; + } + + str = soup_date_to_string (date, SOUP_DATE_HTTP); + if (!str || strcmp (str, conv->http) != 0) { + debug_printf (1, " conversion of '%s' to HTTP failed:\n" + " wanted: %s\n got: %s\n", + conv->source, conv->http, str ? str : "(null)"); + errors++; + } + g_free (str); + + str = soup_date_to_string (date, SOUP_DATE_COOKIE); + if (!str || strcmp (str, conv->cookie) != 0) { + debug_printf (1, " conversion of '%s' to COOKIE failed:\n" + " wanted: %s\n got: %s\n", + conv->source, conv->cookie, str ? str : "(null)"); + errors++; + } + g_free (str); + + str = soup_date_to_string (date, SOUP_DATE_RFC2822); + if (!str || strcmp (str, conv->rfc2822) != 0) { + debug_printf (1, " conversion of '%s' to RFC2822 failed:\n" + " wanted: %s\n got: %s\n", + conv->source, conv->rfc2822, str ? str : "(null)"); + errors++; + } + g_free (str); + + str = soup_date_to_string (date, SOUP_DATE_ISO8601_COMPACT); + if (!str || strcmp (str, conv->compact) != 0) { + debug_printf (1, " conversion of '%s' to COMPACT failed:\n" + " wanted: %s\n got: %s\n", + conv->source, conv->compact, str ? str : "(null)"); + errors++; + } + g_free (str); + + str = soup_date_to_string (date, SOUP_DATE_ISO8601_FULL); + if (!str || strcmp (str, conv->full) != 0) { + debug_printf (1, " conversion of '%s' to FULL failed:\n" + " wanted: %s\n got: %s\n", + conv->source, conv->full, str ? str : "(null)"); + errors++; + } + g_free (str); + + str = soup_date_to_string (date, SOUP_DATE_ISO8601_XMLRPC); + if (!str || strcmp (str, conv->xmlrpc) != 0) { + debug_printf (1, " conversion of '%s' to XMLRPC failed:\n" + " wanted: %s\n got: %s\n", + conv->source, conv->xmlrpc, str ? str : "(null)"); + errors++; + } + g_free (str); + + soup_date_free (date); +} + +int +main (int argc, char **argv) +{ + int i; + + test_init (argc, argv, NULL); + + debug_printf (1, "Good dates:\n"); + for (i = 0; i < G_N_ELEMENTS (good_dates); i++) + check_good (good_dates[i].format, good_dates[i].date); + + debug_printf (1, "\nOK dates:\n"); + for (i = 0; i < G_N_ELEMENTS (ok_dates); i++) + check_ok (ok_dates[i], make_date (ok_dates[i])); + check_ok (TIME_T_STRING, soup_date_new_from_time_t (TIME_T)); + + debug_printf (1, "\nBad dates:\n"); + for (i = 0; i < G_N_ELEMENTS (bad_dates); i++) + check_bad (bad_dates[i], make_date (bad_dates[i])); + + debug_printf (1, "\nConversions:\n"); + for (i = 0; i < G_N_ELEMENTS (conversions); i++) + check_conversion (&conversions[i] ); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/dns.c b/tests/dns.c new file mode 100644 index 0000000..6c44800 --- /dev/null +++ b/tests/dns.c @@ -0,0 +1,66 @@ +#include + +#include +#include + +#include "libsoup/soup-address.h" + +static GMainLoop *loop; +static int nlookups = 0; + +static void +resolve_callback (SoupAddress *addr, guint status, gpointer data) +{ + if (status == SOUP_STATUS_OK) { + printf ("Name: %s\n", soup_address_get_name (addr)); + printf ("Address: %s\n", soup_address_get_physical (addr)); + } else { + printf ("Name: %s\n", soup_address_get_name (addr)); + printf ("Error: %s\n", soup_status_get_phrase (status)); + } + printf ("\n"); + + g_object_unref (addr); + + nlookups--; + if (nlookups == 0) + g_main_loop_quit (loop); +} + +static void +usage (void) +{ + fprintf (stderr, "Usage: dns hostname ...\n"); + exit (1); +} + +int +main (int argc, char **argv) +{ + SoupAddress *addr; + int i; + + if (argc < 2) + usage (); + + g_thread_init (NULL); + g_type_init (); + + for (i = 1; i < argc; i++) { + addr = soup_address_new (argv[i], 0); + if (!addr) { + fprintf (stderr, "Could not parse address %s\n", argv[1]); + exit (1); + } + + soup_address_resolve_async (addr, NULL, NULL, + resolve_callback, NULL); + nlookups++; + } + + loop = g_main_loop_new (NULL, TRUE); + g_main_loop_run (loop); + g_main_loop_unref (loop); + + return 0; +} diff --git a/tests/forms-test.c b/tests/forms-test.c new file mode 100644 index 0000000..4c2846e --- /dev/null +++ b/tests/forms-test.c @@ -0,0 +1,471 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007, 2008 Red Hat, Inc. + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test-utils.h" + +static struct { + const char *title, *name; + const char *result; +} tests[] = { + /* Both fields must be filled in */ + { NULL, "Name", "" }, + { "Mr.", NULL, "" }, + + /* Filled-in but empty is OK */ + { "", "", "Hello, " }, + { "", "Name", "Hello, Name" }, + { "Mr.", "", "Hello, MR. " }, + + /* Simple */ + { "Mr.", "Name", "Hello, MR. Name" }, + + /* Encoding of spaces */ + { "Mr.", "Full Name", "Hello, MR. Full Name" }, + { "Mr. and Mrs.", "Full Name", "Hello, MR. AND MRS. Full Name" }, + + /* Encoding of "+" */ + { "Mr.+Mrs.", "Full Name", "Hello, MR.+MRS. Full Name" }, + + /* Encoding of non-ASCII. */ + { "Se\xC3\xB1or", "Nombre", "Hello, SE\xC3\xB1OR Nombre" }, + + /* Encoding of '%' */ + { "Mr.", "Foo %2f Bar", "Hello, MR. Foo %2f Bar" }, +}; + +static void +do_hello_test (int n, gboolean extra, const char *uri) +{ + GPtrArray *args; + char *title_arg = NULL, *name_arg = NULL; + char *str_stdout = NULL; + + debug_printf (1, "%2d. '%s' '%s'%s: ", n * 2 + (extra ? 2 : 1), + tests[n].title ? tests[n].title : "(null)", + tests[n].name ? tests[n].name : "(null)", + extra ? " + extra" : ""); + + args = g_ptr_array_new (); + g_ptr_array_add (args, "curl"); + g_ptr_array_add (args, "-G"); + if (tests[n].title) { + title_arg = soup_form_encode ("title", tests[n].title, NULL); + g_ptr_array_add (args, "-d"); + g_ptr_array_add (args, title_arg); + } + if (tests[n].name) { + name_arg = soup_form_encode ("n@me", tests[n].name, NULL); + g_ptr_array_add (args, "-d"); + g_ptr_array_add (args, name_arg); + } + if (extra) { + g_ptr_array_add (args, "-d"); + g_ptr_array_add (args, "extra=something"); + } + g_ptr_array_add (args, (char *)uri); + g_ptr_array_add (args, NULL); + + if (g_spawn_sync (NULL, (char **)args->pdata, NULL, + G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, + NULL, NULL, + &str_stdout, NULL, NULL, NULL)) { + if (str_stdout && !strcmp (str_stdout, tests[n].result)) + debug_printf (1, "OK!\n"); + else { + debug_printf (1, "WRONG!\n"); + debug_printf (1, " expected '%s', got '%s'\n", + tests[n].result, + str_stdout ? str_stdout : "(error)"); + errors++; + } + g_free (str_stdout); + } else { + debug_printf (1, "ERROR!\n"); + errors++; + } + g_ptr_array_free (args, TRUE); + g_free (title_arg); + g_free (name_arg); +} + +static void +do_hello_tests (const char *uri) +{ + int n; + + debug_printf (1, "Hello tests (GET, application/x-www-form-urlencoded)\n"); + for (n = 0; n < G_N_ELEMENTS (tests); n++) { + do_hello_test (n, FALSE, uri); + do_hello_test (n, TRUE, uri); + } +} + +static void +do_md5_test_curl (const char *uri, const char *file, const char *md5) +{ + GPtrArray *args; + char *file_arg, *str_stdout; + + debug_printf (1, " via curl: "); + + args = g_ptr_array_new (); + g_ptr_array_add (args, "curl"); + g_ptr_array_add (args, "-L"); + g_ptr_array_add (args, "-F"); + file_arg = g_strdup_printf ("file=@%s", file); + g_ptr_array_add (args, file_arg); + g_ptr_array_add (args, "-F"); + g_ptr_array_add (args, "fmt=txt"); + g_ptr_array_add (args, (char *)uri); + g_ptr_array_add (args, NULL); + + if (g_spawn_sync (NULL, (char **)args->pdata, NULL, + G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, + NULL, NULL, + &str_stdout, NULL, NULL, NULL)) { + if (str_stdout && !strcmp (str_stdout, md5)) + debug_printf (1, "OK!\n"); + else { + debug_printf (1, "WRONG!\n"); + debug_printf (1, " expected '%s', got '%s'\n", + md5, str_stdout ? str_stdout : "(error)"); + errors++; + } + g_free (str_stdout); + } else { + debug_printf (1, "ERROR!\n"); + errors++; + } + g_ptr_array_free (args, TRUE); + g_free (file_arg); +} + +#define MD5_TEST_FILE SRCDIR "/resources/home.gif" +#define MD5_TEST_FILE_BASENAME "home.gif" +#define MD5_TEST_FILE_MIME_TYPE "image/gif" + +static void +do_md5_test_libsoup (const char *uri, const char *contents, + gsize length, const char *md5) +{ + SoupMultipart *multipart; + SoupBuffer *buffer; + SoupMessage *msg; + SoupSession *session; + + debug_printf (1, " via libsoup: "); + + multipart = soup_multipart_new (SOUP_FORM_MIME_TYPE_MULTIPART); + buffer = soup_buffer_new (SOUP_MEMORY_COPY, contents, length); + soup_multipart_append_form_file (multipart, "file", + MD5_TEST_FILE_BASENAME, + MD5_TEST_FILE_MIME_TYPE, + buffer); + soup_buffer_free (buffer); + soup_multipart_append_form_string (multipart, "fmt", "text"); + + msg = soup_form_request_new_from_multipart (uri, multipart); + soup_multipart_free (multipart); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, "ERROR: Unexpected status %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } else if (strcmp (msg->response_body->data, md5) != 0) { + debug_printf (1, "ERROR: Incorrect response: expected '%s' got '%s'\n", + md5, msg->response_body->data); + errors++; + } else + debug_printf (1, "OK!\n"); + + g_object_unref (msg); + soup_test_session_abort_unref (session); +} + +static void +do_md5_tests (const char *uri) +{ + char *contents, *md5; + gsize length; + GError *error = NULL; + + debug_printf (1, "\nMD5 tests (POST, multipart/form-data)\n"); + + if (!g_file_get_contents (MD5_TEST_FILE, &contents, &length, &error)) { + debug_printf (1, " ERROR: Could not read " MD5_TEST_FILE ": %s\n", error->message); + g_error_free (error); + errors++; + return; + } + + md5 = g_compute_checksum_for_string (G_CHECKSUM_MD5, contents, length); + + do_md5_test_curl (uri, MD5_TEST_FILE, md5); + do_md5_test_libsoup (uri, contents, length, md5); + + g_free (contents); + g_free (md5); +} + + +static void +do_form_decode_test (void) +{ + GHashTable *table; + const gchar *value; + gchar *tmp; + + debug_printf (1, "\nDecode tests\n"); + + /* Test that the code handles multiple values with the same key. */ + table = soup_form_decode ("foo=first&foo=second&foo=third"); + + /* Allocate some memory. We do this to test for a bug in + * soup_form_decode() that resulted in values from the hash + * table pointing to memory that is already released. + */ + tmp = g_strdup ("other"); + + value = g_hash_table_lookup (table, "foo"); + if (g_strcmp0 (value, "third") != 0) { + debug_printf (1, " ERROR: expected '%s', got '%s'\n", + "third", value ? value : "(null)"); + errors++; + } + + g_free (tmp); + g_hash_table_destroy (table); +} + +static void +hello_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + char *title, *name, *fmt; + const char *content_type; + GString *buf; + + if (msg->method != SOUP_METHOD_GET && msg->method != SOUP_METHOD_HEAD) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + if (query) { + title = g_hash_table_lookup (query, "title"); + name = g_hash_table_lookup (query, "n@me"); + fmt = g_hash_table_lookup (query, "fmt"); + } else + title = name = fmt = NULL; + + buf = g_string_new (NULL); + if (!query || (fmt && !strcmp (fmt, "html"))) { + content_type = "text/html"; + g_string_append (buf, "forms-test: hello\r\n"); + if (title && name) { + /* mumble mumble html-escape... */ + g_string_append_printf (buf, "

Hello, %s %s

\r\n", + title, name); + } + g_string_append (buf, "
" + "

Title:

" + "

Name:

" + "

" + "

" + "
\r\n"); + g_string_append (buf, "\r\n"); + } else { + content_type = "text/plain"; + if (title && name) { + char *uptitle = g_ascii_strup (title, -1); + g_string_append_printf (buf, "Hello, %s %s", + uptitle, name); + g_free (uptitle); + } + } + + soup_message_set_response (msg, content_type, + SOUP_MEMORY_TAKE, + buf->str, buf->len); + g_string_free (buf, FALSE); + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +static void +md5_get_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + const char *file = NULL, *md5sum = NULL, *fmt; + const char *content_type; + GString *buf; + + if (query) { + file = g_hash_table_lookup (query, "file"); + md5sum = g_hash_table_lookup (query, "md5sum"); + fmt = g_hash_table_lookup (query, "fmt"); + } else + fmt = "html"; + + buf = g_string_new (NULL); + if (!strcmp (fmt, "html")) { + content_type = "text/html"; + g_string_append (buf, "forms-test: md5\r\n"); + if (file && md5sum) { + /* mumble mumble html-escape... */ + g_string_append_printf (buf, "

File: %s
MD5: %s

\r\n", + file, md5sum); + } + g_string_append (buf, "
" + "

File:

" + "

" + "

" + "
\r\n"); + g_string_append (buf, "\r\n"); + } else { + content_type = "text/plain"; + if (md5sum) + g_string_append_printf (buf, "%s", md5sum); + } + + soup_message_set_response (msg, content_type, + SOUP_MEMORY_TAKE, + buf->str, buf->len); + g_string_free (buf, FALSE); + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +static void +md5_post_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + const char *content_type; + GHashTable *params; + const char *fmt; + char *filename, *md5sum, *redirect_uri; + SoupBuffer *file; + SoupURI *uri; + + content_type = soup_message_headers_get_content_type (msg->request_headers, NULL); + if (!content_type || strcmp (content_type, "multipart/form-data") != 0) { + soup_message_set_status (msg, SOUP_STATUS_BAD_REQUEST); + return; + } + + params = soup_form_decode_multipart (msg, "file", + &filename, NULL, &file); + if (!params) { + soup_message_set_status (msg, SOUP_STATUS_BAD_REQUEST); + return; + } + fmt = g_hash_table_lookup (params, "fmt"); + + md5sum = g_compute_checksum_for_data (G_CHECKSUM_MD5, + (gpointer)file->data, + file->length); + soup_buffer_free (file); + + uri = soup_uri_copy (soup_message_get_uri (msg)); + soup_uri_set_query_from_fields (uri, + "file", filename ? filename : "", + "md5sum", md5sum, + "fmt", fmt ? fmt : "html", + NULL); + redirect_uri = soup_uri_to_string (uri, FALSE); + + soup_message_set_status (msg, SOUP_STATUS_SEE_OTHER); + soup_message_headers_replace (msg->response_headers, "Location", + redirect_uri); + + g_free (redirect_uri); + soup_uri_free (uri); + g_free (md5sum); + g_free (filename); + g_hash_table_destroy (params); +} + +static void +md5_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + if (msg->method == SOUP_METHOD_GET || msg->method == SOUP_METHOD_HEAD) + md5_get_callback (server, msg, path, query, context, data); + else if (msg->method == SOUP_METHOD_POST) + md5_post_callback (server, msg, path, query, context, data); + else + soup_message_set_status (msg, SOUP_STATUS_METHOD_NOT_ALLOWED); +} + +static gboolean run_tests = TRUE; + +static GOptionEntry no_test_entry[] = { + { "no-tests", 'n', G_OPTION_FLAG_REVERSE, + G_OPTION_ARG_NONE, &run_tests, + "Don't run tests, just run the test server", NULL }, + { NULL } +}; + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server; + guint port; + char *uri_str; + + test_init (argc, argv, no_test_entry); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, "/hello", + hello_callback, NULL, NULL); + soup_server_add_handler (server, "/md5", + md5_callback, NULL, NULL); + port = soup_server_get_port (server); + + loop = g_main_loop_new (NULL, TRUE); + + if (run_tests) { + uri_str = g_strdup_printf ("http://127.0.0.1:%u/hello", port); + do_hello_tests (uri_str); + g_free (uri_str); + + uri_str = g_strdup_printf ("http://127.0.0.1:%u/md5", port); + do_md5_tests (uri_str); + g_free (uri_str); + + do_form_decode_test (); + } else { + printf ("Listening on port %d\n", port); + g_main_loop_run (loop); + } + + g_main_loop_unref (loop); + + soup_test_server_quit_unref (server); + if (run_tests) + test_cleanup (); + return errors != 0; +} diff --git a/tests/get.c b/tests/get.c new file mode 100644 index 0000000..79d6e80 --- /dev/null +++ b/tests/get.c @@ -0,0 +1,202 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_GNOME +#include +#else +#include +#endif + +static SoupSession *session; +static GMainLoop *loop; +static gboolean debug = FALSE, quiet = FALSE; +static const char *method; + +static void +get_url (const char *url) +{ + const char *name; + SoupMessage *msg; + const char *header; + + msg = soup_message_new (method, url); + soup_message_set_flags (msg, SOUP_MESSAGE_NO_REDIRECT); + + soup_session_send_message (session, msg); + + name = soup_message_get_uri (msg)->path; + + if (debug) { + SoupMessageHeadersIter iter; + const char *hname, *value; + char *path = soup_uri_to_string (soup_message_get_uri (msg), TRUE); + + printf ("%s %s HTTP/1.%d\n", method, path, + soup_message_get_http_version (msg)); + soup_message_headers_iter_init (&iter, msg->request_headers); + while (soup_message_headers_iter_next (&iter, &hname, &value)) + printf ("%s: %s\r\n", hname, value); + printf ("\n"); + + printf ("HTTP/1.%d %d %s\n", + soup_message_get_http_version (msg), + msg->status_code, msg->reason_phrase); + + soup_message_headers_iter_init (&iter, msg->response_headers); + while (soup_message_headers_iter_next (&iter, &hname, &value)) + printf ("%s: %s\r\n", hname, value); + printf ("\n"); + } else if (!quiet || SOUP_STATUS_IS_TRANSPORT_ERROR (msg->status_code)) + printf ("%s: %d %s\n", name, msg->status_code, msg->reason_phrase); + + if (SOUP_STATUS_IS_REDIRECTION (msg->status_code)) { + header = soup_message_headers_get_one (msg->response_headers, + "Location"); + if (header) { + SoupURI *uri; + char *uri_string; + + if (!debug && !quiet) + printf (" -> %s\n", header); + + uri = soup_uri_new_with_base (soup_message_get_uri (msg), header); + uri_string = soup_uri_to_string (uri, FALSE); + get_url (uri_string); + g_free (uri_string); + soup_uri_free (uri); + } + } else if (SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + fwrite (msg->response_body->data, 1, + msg->response_body->length, stdout); + } +} + +static void +usage (void) +{ + fprintf (stderr, "Usage: get [-c CAfile] [-p proxy URL] [-h] [-d] URL\n"); + exit (1); +} + +int +main (int argc, char **argv) +{ + const char *cafile = NULL, *url; + SoupURI *proxy = NULL, *parsed; + gboolean synchronous = FALSE; + int opt; + + g_thread_init (NULL); + g_type_init (); + + method = SOUP_METHOD_GET; + + while ((opt = getopt (argc, argv, "c:dhp:qs")) != -1) { + switch (opt) { + case 'c': + cafile = optarg; + break; + + case 'd': + debug = TRUE; + break; + + case 'h': + method = SOUP_METHOD_HEAD; + debug = TRUE; + break; + + case 'p': + proxy = soup_uri_new (optarg); + if (!proxy) { + fprintf (stderr, "Could not parse %s as URI\n", + optarg); + exit (1); + } + break; + + case 'q': + quiet = TRUE; + break; + + case 's': + synchronous = TRUE; + break; + + case '?': + usage (); + break; + } + } + argc -= optind; + argv += optind; + + if (argc != 1) + usage (); + url = argv[0]; + parsed = soup_uri_new (url); + if (!parsed) { + fprintf (stderr, "Could not parse '%s' as a URL\n", url); + exit (1); + } + soup_uri_free (parsed); + + if (synchronous) { + session = soup_session_sync_new_with_options ( + SOUP_SESSION_SSL_CA_FILE, cafile, +#ifdef HAVE_GNOME + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_GNOME_FEATURES_2_26, +#endif + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_CONTENT_DECODER, + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_COOKIE_JAR, + SOUP_SESSION_USER_AGENT, "get ", + SOUP_SESSION_ACCEPT_LANGUAGE_AUTO, TRUE, + NULL); + } else { + session = soup_session_async_new_with_options ( + SOUP_SESSION_SSL_CA_FILE, cafile, +#ifdef HAVE_GNOME + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_GNOME_FEATURES_2_26, +#endif + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_CONTENT_DECODER, + SOUP_SESSION_ADD_FEATURE_BY_TYPE, SOUP_TYPE_COOKIE_JAR, + SOUP_SESSION_USER_AGENT, "get ", + SOUP_SESSION_ACCEPT_LANGUAGE_AUTO, TRUE, + NULL); + } + + /* Need to do this after creating the session, since adding + * SOUP_TYPE_GNOME_FEATURE_2_26 will add a proxy resolver, thereby + * bashing over the manually-set proxy. + */ + if (proxy) { + g_object_set (G_OBJECT (session), + SOUP_SESSION_PROXY_URI, proxy, + NULL); + } + + if (!synchronous) + loop = g_main_loop_new (NULL, TRUE); + + get_url (url); + + if (!synchronous) + g_main_loop_unref (loop); + + return 0; +} diff --git a/tests/getbug.c b/tests/getbug.c new file mode 100644 index 0000000..86ae304 --- /dev/null +++ b/tests/getbug.c @@ -0,0 +1,148 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static GMainLoop *loop; + +static void +print_value (GValue *value) +{ + if (G_VALUE_HOLDS_STRING (value)) + printf ("%s", g_value_get_string (value)); + else if (G_VALUE_HOLDS_INT (value)) + printf ("%d", g_value_get_int (value)); + else if (G_VALUE_HOLDS_DOUBLE (value)) + printf ("%f", g_value_get_double (value)); + else if (G_VALUE_TYPE (value) == G_TYPE_VALUE_ARRAY) { + GValueArray *array = g_value_get_boxed (value); + int i; + printf ("[ "); + for (i = 0; i < array->n_values; i++) { + if (i != 0) + printf (", "); + print_value (&array->values[i]); + } + printf (" ]"); + } else + printf ("(%s)", g_type_name (G_VALUE_TYPE (value))); +} + +static void +print_struct_field (gpointer key, gpointer value, gpointer data) +{ + printf ("%s: ", (char *)key); + print_value (value); + printf ("\n"); +} + +static void +got_response (SoupSession *session, SoupMessage *msg, gpointer user_data) +{ + GHashTable *hash; + GError *error = NULL; + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + fprintf (stderr, "%d %s\n", msg->status_code, msg->reason_phrase); + exit (1); + } + + if (!soup_xmlrpc_extract_method_response (msg->response_body->data, + msg->response_body->length, + &error, + G_TYPE_HASH_TABLE, &hash)) { + if (!error) { + fprintf (stderr, "Could not parse XMLRPC response:\n%d %s\n\n", + msg->status_code, msg->reason_phrase); + fprintf (stderr, "%s\n", msg->response_body->data); + } else { + fprintf (stderr, "XML-RPC error: %d %s", + error->code, error->message); + } + exit (1); + } + + g_hash_table_foreach (hash, print_struct_field, NULL); + g_hash_table_destroy (hash); + + g_main_loop_quit (loop); +} + +static void +usage (void) +{ + fprintf (stderr, "Usage: getbug [-p proxy_uri] [bugzilla-uri] bug-number\n"); + exit (1); +} + +int +main (int argc, char **argv) +{ + SoupSession *session; + SoupURI *proxy = NULL; + SoupMessage *msg; + const char *uri = "http://bugzilla.redhat.com/bugzilla/xmlrpc.cgi"; + int opt, bug; + + g_thread_init (NULL); + g_type_init (); + + while ((opt = getopt (argc, argv, "p:")) != -1) { + switch (opt) { + case 'p': + proxy = soup_uri_new (optarg); + if (!proxy) { + fprintf (stderr, "Could not parse %s as URI\n", + optarg); + exit (1); + } + break; + + case '?': + usage (); + break; + } + } + argc -= optind; + argv += optind; + + if (argc > 1) { + uri = argv[0]; + argc--; + argv++; + } + + if (argc != 1 || (bug = atoi (argv[0])) == 0) + usage (); + + session = soup_session_async_new_with_options ( + SOUP_SESSION_PROXY_URI, proxy, + NULL); + + msg = soup_xmlrpc_request_new (uri, "bugzilla.getBug", + G_TYPE_INT, bug, + G_TYPE_INVALID); + if (!msg) { + fprintf (stderr, "Could not create web service request to '%s'\n", uri); + exit (1); + } + soup_session_queue_message (session, SOUP_MESSAGE (msg), + got_response, NULL); + + loop = g_main_loop_new (NULL, TRUE); + g_main_loop_run (loop); + g_main_loop_unref (loop); + + return 0; +} diff --git a/tests/header-parsing.c b/tests/header-parsing.c new file mode 100644 index 0000000..63a29bb --- /dev/null +++ b/tests/header-parsing.c @@ -0,0 +1,1130 @@ +#include + +#include +#include +#include + +#include + +#include "test-utils.h" + +typedef struct { + const char *name, *value; +} Header; + +static struct RequestTest { + const char *description; + const char *request; + int length; + guint status; + const char *method, *path; + SoupHTTPVersion version; + Header headers[4]; +} reqtests[] = { + /**********************/ + /*** VALID REQUESTS ***/ + /**********************/ + + { "HTTP 1.0 request with no headers", + "GET / HTTP/1.0\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_0, + { { NULL } } + }, + + { "Req w/ 1 header", + "GET / HTTP/1.1\r\nHost: example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + { "Req w/ 1 header, no leading whitespace", + "GET / HTTP/1.1\r\nHost:example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + { "Req w/ 1 header including trailing whitespace", + "GET / HTTP/1.1\r\nHost: example.com \r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + { "Req w/ 1 header, wrapped", + "GET / HTTP/1.1\r\nFoo: bar\r\n baz\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Foo", "bar baz" }, + { NULL } + } + }, + + { "Req w/ 1 header, wrapped with additional whitespace", + "GET / HTTP/1.1\r\nFoo: bar \r\n baz\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Foo", "bar baz" }, + { NULL } + } + }, + + { "Req w/ 1 header, wrapped with tab", + "GET / HTTP/1.1\r\nFoo: bar\r\n\tbaz\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Foo", "bar baz" }, + { NULL } + } + }, + + { "Req w/ 1 header, wrapped before value", + "GET / HTTP/1.1\r\nFoo:\r\n bar baz\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Foo", "bar baz" }, + { NULL } + } + }, + + { "Req w/ 1 header with empty value", + "GET / HTTP/1.1\r\nHost:\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "" }, + { NULL } + } + }, + + { "Req w/ 2 headers", + "GET / HTTP/1.1\r\nHost: example.com\r\nConnection: close\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { "Connection", "close" }, + { NULL } + } + }, + + { "Req w/ 3 headers", + "GET / HTTP/1.1\r\nHost: example.com\r\nConnection: close\r\nBlah: blah\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { "Connection", "close" }, + { "Blah", "blah" }, + { NULL } + } + }, + + { "Req w/ 3 headers, 1st wrapped", + "GET / HTTP/1.1\r\nFoo: bar\r\n baz\r\nConnection: close\r\nBlah: blah\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Foo", "bar baz" }, + { "Connection", "close" }, + { "Blah", "blah" }, + { NULL } + } + }, + + { "Req w/ 3 headers, 2nd wrapped", + "GET / HTTP/1.1\r\nConnection: close\r\nBlah: blah\r\nFoo: bar\r\n baz\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Connection", "close" }, + { "Blah", "blah" }, + { "Foo", "bar baz" }, + { NULL } + } + }, + + { "Req w/ 3 headers, 3rd wrapped", + "GET / HTTP/1.1\r\nConnection: close\r\nBlah: blah\r\nFoo: bar\r\n baz\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Connection", "close" }, + { "Blah", "blah" }, + { "Foo", "bar baz" }, + { NULL } + } + }, + + { "Req w/ same header multiple times", + "GET / HTTP/1.1\r\nFoo: bar\r\nFoo: baz\r\nFoo: quux\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Foo", "bar, baz, quux" }, + { NULL } + } + }, + + { "Connection header on HTTP/1.0 message", + "GET / HTTP/1.0\r\nFoo: bar\r\nConnection: Bar, Quux\r\nBar: baz\r\nQuux: foo\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_0, + { { "Foo", "bar" }, + { "Connection", "Bar, Quux" }, + { NULL } + } + }, + + /****************************/ + /*** RECOVERABLE REQUESTS ***/ + /****************************/ + + /* RFC 2616 section 4.1 says we SHOULD accept this */ + + { "Spurious leading CRLF", + "\r\nGET / HTTP/1.1\r\nHost: example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + /* RFC 2616 section 3.1 says we MUST accept this */ + + { "HTTP/01.01 request", + "GET / HTTP/01.01\r\nHost: example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + /* RFC 2616 section 19.3 says we SHOULD accept these */ + + { "LF instead of CRLF after header", + "GET / HTTP/1.1\nHost: example.com\nConnection: close\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { "Connection", "close" }, + { NULL } + } + }, + + { "LF instead of CRLF after Request-Line", + "GET / HTTP/1.1\nHost: example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + { "Req w/ incorrect whitespace in Request-Line", + "GET /\tHTTP/1.1\r\nHost: example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + { "Req w/ incorrect whitespace after Request-Line", + "GET / HTTP/1.1 \r\nHost: example.com\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { NULL } + } + }, + + /* qv bug 579318, do_bad_header_tests() below */ + { "Req w/ mangled header", + "GET / HTTP/1.1\r\nHost: example.com\r\nFoo one\r\nBar: two\r\n", -1, + SOUP_STATUS_OK, + "GET", "/", SOUP_HTTP_1_1, + { { "Host", "example.com" }, + { "Bar", "two" }, + { NULL } + } + }, + + /************************/ + /*** INVALID REQUESTS ***/ + /************************/ + + { "HTTP 0.9 request; not supported", + "GET /\r\n", -1, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "HTTP 1.2 request (no such thing)", + "GET / HTTP/1.2\r\n", -1, + SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED, + NULL, NULL, -1, + { { NULL } } + }, + + { "HTTP 2000 request (no such thing)", + "GET / HTTP/2000.0\r\n", -1, + SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED, + NULL, NULL, -1, + { { NULL } } + }, + + { "Non-HTTP request", + "GET / SOUP/1.1\r\nHost: example.com\r\n", -1, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "Junk after Request-Line", + "GET / HTTP/1.1 blah\r\nHost: example.com\r\n", -1, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "NUL in Method", + "G\x00T / HTTP/1.1\r\nHost: example.com\r\n", 37, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "NUL in Path", + "GET /\x00 HTTP/1.1\r\nHost: example.com\r\n", 38, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "NUL in Header", + "GET / HTTP/1.1\r\nHost: example\x00com\r\n", 37, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "No terminating CRLF", + "GET / HTTP/1.1\r\nHost: example.com", -1, + SOUP_STATUS_BAD_REQUEST, + NULL, NULL, -1, + { { NULL } } + }, + + { "Unrecognized expectation", + "GET / HTTP/1.1\r\nHost: example.com\r\nExpect: the-impossible\r\n", -1, + SOUP_STATUS_EXPECTATION_FAILED, + NULL, NULL, -1, + { { NULL } } + } +}; +static const int num_reqtests = G_N_ELEMENTS (reqtests); + +static struct ResponseTest { + const char *description; + const char *response; + int length; + SoupHTTPVersion version; + guint status_code; + const char *reason_phrase; + Header headers[4]; +} resptests[] = { + /***********************/ + /*** VALID RESPONSES ***/ + /***********************/ + + { "HTTP 1.0 response w/ no headers", + "HTTP/1.0 200 ok\r\n", -1, + SOUP_HTTP_1_0, SOUP_STATUS_OK, "ok", + { { NULL } } + }, + + { "HTTP 1.1 response w/ no headers", + "HTTP/1.1 200 ok\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { NULL } } + }, + + { "Response w/ multi-word Reason-Phrase", + "HTTP/1.1 400 bad request\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_BAD_REQUEST, "bad request", + { { NULL } } + }, + + { "Response w/ 1 header", + "HTTP/1.1 200 ok\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { NULL } + } + }, + + { "Response w/ 2 headers", + "HTTP/1.1 200 ok\r\nFoo: bar\r\nBaz: quux\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { "Baz", "quux" }, + { NULL } + } + }, + + { "Response w/ same header multiple times", + "HTTP/1.1 200 ok\r\nFoo: bar\r\nFoo: baz\r\nFoo: quux\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar, baz, quux" }, + { NULL } + } + }, + + { "Response w/ no reason phrase", + "HTTP/1.1 200 \r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "", + { { "Foo", "bar" }, + { NULL } + } + }, + + { "Connection header on HTTP/1.0 message", + "HTTP/1.0 200 ok\r\nFoo: bar\r\nConnection: Bar\r\nBar: quux\r\n", -1, + SOUP_HTTP_1_0, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { "Connection", "Bar" }, + { NULL } + } + }, + + /*****************************/ + /*** RECOVERABLE RESPONSES ***/ + /*****************************/ + + /* RFC 2616 section 3.1 says we MUST accept this */ + + { "HTTP/01.01 response", + "HTTP/01.01 200 ok\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { NULL } + } + }, + + /* RFC 2616 section 19.3 says we SHOULD accept these */ + + { "Response w/ LF instead of CRLF after Status-Line", + "HTTP/1.1 200 ok\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { NULL } + } + }, + + { "Response w/ incorrect spacing in Status-Line", + "HTTP/1.1 200\tok\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { NULL } + } + }, + + { "Response w/ no reason phrase or preceding SP", + "HTTP/1.1 200\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "", + { { "Foo", "bar" }, + { NULL } + } + }, + + { "Response w/ no whitespace after status code", + "HTTP/1.1 200ok\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { NULL } + } + }, + + /* Shoutcast support */ + { "Shoutcast server not-quite-HTTP", + "ICY 200 OK\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_0, SOUP_STATUS_OK, "OK", + { { "Foo", "bar" }, + { NULL } + } + }, + + /* qv bug 579318, do_bad_header_tests() below */ + { "Response w/ mangled header", + "HTTP/1.1 200 ok\r\nFoo: one\r\nBar two:2\r\nBaz: three\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "one" }, + { "Baz", "three" }, + { NULL } + } + }, + + /* qv bug 602863 */ + { "HTTP 1.1 response with leading line break", + "\nHTTP/1.1 200 ok\r\nFoo: bar\r\n", -1, + SOUP_HTTP_1_1, SOUP_STATUS_OK, "ok", + { { "Foo", "bar" }, + { NULL } } + }, + + /*************************/ + /*** INVALID RESPONSES ***/ + /*************************/ + + { "Invalid HTTP version", + "HTTP/1.2 200 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "Non-HTTP response", + "SOUP/1.1 200 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "Non-numeric status code", + "HTTP/1.1 XXX OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "No status code", + "HTTP/1.1 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "One-digit status code", + "HTTP/1.1 2 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "Two-digit status code", + "HTTP/1.1 20 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "Four-digit status code", + "HTTP/1.1 2000 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "Status code < 100", + "HTTP/1.1 001 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "Status code > 599", + "HTTP/1.1 600 OK\r\nFoo: bar\r\n", -1, + -1, 0, NULL, + { { NULL } } + }, + + { "NUL in Reason Phrase", + "HTTP/1.1 200 O\x00K\r\nFoo: bar\r\n", 28, + -1, 0, NULL, + { { NULL } } + }, + + { "NUL in Header", + "HTTP/1.1 200 OK\r\nFoo: b\x00ar\r\n", 28, + -1, 0, NULL, + { { NULL } } + }, +}; +static const int num_resptests = G_N_ELEMENTS (resptests); + +static struct QValueTest { + const char *header_value; + const char *acceptable[7]; + const char *unacceptable[2]; +} qvaluetests[] = { + { "text/plain; q=0.5, text/html,\t text/x-dvi; q=0.8, text/x-c", + { "text/html", "text/x-c", "text/x-dvi", "text/plain", NULL }, + { NULL }, + }, + + { "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5", + { "text/html;level=1", "text/html", "*/*", "text/html;level=2", + "text/*", NULL }, + { NULL } + }, + + { "gzip;q=1.0, identity; q=0.5, *;q=0", + { "gzip", "identity", NULL }, + { "*", NULL }, + } +}; +static const int num_qvaluetests = G_N_ELEMENTS (qvaluetests); + +static void +print_header (const char *name, const char *value, gpointer data) +{ + debug_printf (1, " '%s': '%s'\n", name, value); +} + +static gboolean +check_headers (Header *headers, SoupMessageHeaders *hdrs) +{ + GSList *header_names, *h; + SoupMessageHeadersIter iter; + const char *name, *value; + gboolean ok = TRUE; + int i; + + header_names = NULL; + soup_message_headers_iter_init (&iter, hdrs); + while (soup_message_headers_iter_next (&iter, &name, &value)) { + if (!g_slist_find_custom (header_names, name, + (GCompareFunc)strcmp)) + header_names = g_slist_append (header_names, (char *)name); + } + + for (i = 0, h = header_names; headers[i].name && h; i++, h = h->next) { + if (strcmp (h->data, headers[i].name) != 0) { + ok = FALSE; + break; + } + value = soup_message_headers_get_list (hdrs, headers[i].name); + if (strcmp (value, headers[i].value) != 0) { + ok = FALSE; + break; + } + } + if (headers[i].name || h) + ok = FALSE; + g_slist_free (header_names); + return ok; +} + +static void +do_request_tests (void) +{ + int i, len, h; + char *method, *path; + SoupHTTPVersion version; + SoupMessageHeaders *headers; + guint status; + + debug_printf (1, "Request tests\n"); + for (i = 0; i < num_reqtests; i++) { + gboolean ok = TRUE; + + debug_printf (1, "%2d. %s (%s): ", i + 1, reqtests[i].description, + soup_status_get_phrase (reqtests[i].status)); + + headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_REQUEST); + method = path = NULL; + + if (reqtests[i].length == -1) + len = strlen (reqtests[i].request); + else + len = reqtests[i].length; + status = soup_headers_parse_request (reqtests[i].request, len, + headers, &method, &path, + &version); + if (SOUP_STATUS_IS_SUCCESSFUL (status)) { + if ((reqtests[i].method && strcmp (reqtests[i].method, method) != 0) || !reqtests[i].method) + ok = FALSE; + if ((reqtests[i].path && strcmp (reqtests[i].path, path) != 0) || !reqtests[i].path) + ok = FALSE; + if (reqtests[i].version != version) + ok = FALSE; + + if (!check_headers (reqtests[i].headers, headers)) + ok = FALSE; + } else { + if (status != reqtests[i].status) + ok = FALSE; + } + + if (ok) + debug_printf (1, "OK!\n"); + else { + debug_printf (1, "BAD!\n"); + errors++; + if (reqtests[i].method) { + debug_printf (1, " expected: '%s' '%s' 'HTTP/1.%d'\n", + reqtests[i].method, + reqtests[i].path, + reqtests[i].version); + for (h = 0; reqtests[i].headers[h].name; h++) { + debug_printf (1, " '%s': '%s'\n", + reqtests[i].headers[h].name, + reqtests[i].headers[h].value); + } + } else { + debug_printf (1, " expected: %s\n", + soup_status_get_phrase (reqtests[i].status)); + } + if (method) { + debug_printf (1, " got: '%s' '%s' 'HTTP/1.%d'\n", + method, path, version); + soup_message_headers_foreach (headers, print_header, NULL); + } else { + debug_printf (1, " got: %s\n", + soup_status_get_phrase (status)); + } + } + + g_free (method); + g_free (path); + soup_message_headers_free (headers); + } + debug_printf (1, "\n"); +} + +static void +do_response_tests (void) +{ + int i, len, h; + guint status_code; + char *reason_phrase; + SoupHTTPVersion version; + SoupMessageHeaders *headers; + + debug_printf (1, "Response tests\n"); + for (i = 0; i < num_resptests; i++) { + gboolean ok = TRUE; + + debug_printf (1, "%2d. %s (%s): ", i + 1, resptests[i].description, + resptests[i].reason_phrase ? "should parse" : "should NOT parse"); + + headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE); + reason_phrase = NULL; + + if (resptests[i].length == -1) + len = strlen (resptests[i].response); + else + len = resptests[i].length; + if (soup_headers_parse_response (resptests[i].response, len, + headers, &version, + &status_code, &reason_phrase)) { + if (resptests[i].version != version) + ok = FALSE; + if (resptests[i].status_code != status_code) + ok = FALSE; + if ((resptests[i].reason_phrase && strcmp (resptests[i].reason_phrase, reason_phrase) != 0) || !resptests[i].reason_phrase) + ok = FALSE; + + if (!check_headers (resptests[i].headers, headers)) + ok = FALSE; + } else { + if (resptests[i].reason_phrase) + ok = FALSE; + } + + if (ok) + debug_printf (1, "OK!\n"); + else { + debug_printf (1, "BAD!\n"); + errors++; + if (resptests[i].reason_phrase) { + debug_printf (1, " expected: 'HTTP/1.%d' '%03d' '%s'\n", + resptests[i].version, + resptests[i].status_code, + resptests[i].reason_phrase); + for (h = 0; resptests[i].headers[h].name; h++) { + debug_printf (1, " '%s': '%s'\n", + resptests[i].headers[h].name, + resptests[i].headers[h].value); + } + } else + debug_printf (1, " expected: parse error\n"); + if (reason_phrase) { + debug_printf (1, " got: 'HTTP/1.%d' '%03d' '%s'\n", + version, status_code, reason_phrase); + soup_message_headers_foreach (headers, print_header, NULL); + } else + debug_printf (1, " got: parse error\n"); + } + + g_free (reason_phrase); + soup_message_headers_free (headers); + } + debug_printf (1, "\n"); +} + +static void +do_qvalue_tests (void) +{ + int i, j; + GSList *acceptable, *unacceptable, *iter; + gboolean wrong; + + debug_printf (1, "qvalue tests\n"); + for (i = 0; i < num_qvaluetests; i++) { + debug_printf (1, "%2d. %s:\n", i + 1, qvaluetests[i].header_value); + + unacceptable = NULL; + acceptable = soup_header_parse_quality_list (qvaluetests[i].header_value, + &unacceptable); + + debug_printf (1, " acceptable: "); + wrong = FALSE; + if (acceptable) { + for (iter = acceptable, j = 0; iter; iter = iter->next, j++) { + debug_printf (1, "%s ", (char *)iter->data); + if (!qvaluetests[i].acceptable[j] || + strcmp (iter->data, qvaluetests[i].acceptable[j]) != 0) + wrong = TRUE; + } + debug_printf (1, "\n"); + soup_header_free_list (acceptable); + } else + debug_printf (1, "(none)\n"); + if (wrong) { + debug_printf (1, " WRONG! expected: "); + for (j = 0; qvaluetests[i].acceptable[j]; j++) + debug_printf (1, "%s ", qvaluetests[i].acceptable[j]); + debug_printf (1, "\n"); + errors++; + } + + debug_printf (1, " unacceptable: "); + wrong = FALSE; + if (unacceptable) { + for (iter = unacceptable, j = 0; iter; iter = iter->next, j++) { + debug_printf (1, "%s ", (char *)iter->data); + if (!qvaluetests[i].unacceptable[j] || + strcmp (iter->data, qvaluetests[i].unacceptable[j]) != 0) + wrong = TRUE; + } + debug_printf (1, "\n"); + soup_header_free_list (unacceptable); + } else + debug_printf (1, "(none)\n"); + if (wrong) { + debug_printf (1, " WRONG! expected: "); + for (j = 0; qvaluetests[i].unacceptable[j]; j++) + debug_printf (1, "%s ", qvaluetests[i].unacceptable[j]); + debug_printf (1, "\n"); + errors++; + } + + debug_printf (1, "\n"); + } +} + +#define RFC5987_TEST_FILENAME "t\xC3\xA9st.txt" +#define RFC5987_TEST_FALLBACK_FILENAME "test.txt" + +#define RFC5987_TEST_HEADER_ENCODED "attachment; filename*=UTF-8''t%C3%A9st.txt" + +#define RFC5987_TEST_HEADER_UTF8 "attachment; filename*=UTF-8''t%C3%A9st.txt; filename=\"test.txt\"" +#define RFC5987_TEST_HEADER_ISO "attachment; filename=\"test.txt\"; filename*=iso-8859-1''t%E9st.txt" +#define RFC5987_TEST_HEADER_FALLBACK "attachment; filename*=Unknown''t%FF%FF%FFst.txt; filename=\"test.txt\"" + +static void +do_content_disposition_tests (void) +{ + SoupMessageHeaders *hdrs; + GHashTable *params; + const char *header, *filename; + char *disposition; + SoupBuffer *buffer; + SoupMultipart *multipart; + SoupMessageBody *body; + + debug_printf (1, "Content-Disposition tests\n"); + + hdrs = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + params = g_hash_table_new (g_str_hash, g_str_equal); + g_hash_table_insert (params, "filename", RFC5987_TEST_FILENAME); + soup_message_headers_set_content_disposition (hdrs, "attachment", params); + g_hash_table_destroy (params); + + header = soup_message_headers_get_one (hdrs, "Content-Disposition"); + if (!strcmp (header, RFC5987_TEST_HEADER_ENCODED)) + debug_printf (1, " encoded OK\n"); + else { + debug_printf (1, " encoding FAILED!\n expected: %s\n got: %s\n", + RFC5987_TEST_HEADER_ENCODED, header); + errors++; + } + + /* UTF-8 decoding */ + soup_message_headers_clear (hdrs); + soup_message_headers_append (hdrs, "Content-Disposition", + RFC5987_TEST_HEADER_UTF8); + if (!soup_message_headers_get_content_disposition (hdrs, + &disposition, + ¶ms)) { + debug_printf (1, " UTF-8 decoding FAILED!\n could not parse\n"); + errors++; + return; + } + g_free (disposition); + + filename = g_hash_table_lookup (params, "filename"); + if (!filename) { + debug_printf (1, " UTF-8 decoding FAILED!\n could not find filename\n"); + errors++; + } else if (strcmp (filename, RFC5987_TEST_FILENAME) != 0) { + debug_printf (1, " UTF-8 decoding FAILED!\n expected: %s\n got: %s\n", + RFC5987_TEST_FILENAME, filename); + errors++; + } else + debug_printf (1, " UTF-8 decoded OK\n"); + g_hash_table_destroy (params); + + /* ISO-8859-1 decoding */ + soup_message_headers_clear (hdrs); + soup_message_headers_append (hdrs, "Content-Disposition", + RFC5987_TEST_HEADER_ISO); + if (!soup_message_headers_get_content_disposition (hdrs, + &disposition, + ¶ms)) { + debug_printf (1, " iso-8859-1 decoding FAILED!\n could not parse\n"); + errors++; + return; + } + g_free (disposition); + + filename = g_hash_table_lookup (params, "filename"); + if (!filename) { + debug_printf (1, " iso-8859-1 decoding FAILED!\n could not find filename\n"); + errors++; + } else if (strcmp (filename, RFC5987_TEST_FILENAME) != 0) { + debug_printf (1, " iso-8859-1 decoding FAILED!\n expected: %s\n got: %s\n", + RFC5987_TEST_FILENAME, filename); + errors++; + } else + debug_printf (1, " iso-8859-1 decoded OK\n"); + g_hash_table_destroy (params); + + /* Fallback */ + soup_message_headers_clear (hdrs); + soup_message_headers_append (hdrs, "Content-Disposition", + RFC5987_TEST_HEADER_FALLBACK); + if (!soup_message_headers_get_content_disposition (hdrs, + &disposition, + ¶ms)) { + debug_printf (1, " fallback decoding FAILED!\n could not parse\n"); + errors++; + return; + } + g_free (disposition); + + filename = g_hash_table_lookup (params, "filename"); + if (!filename) { + debug_printf (1, " fallback decoding FAILED!\n could not find filename\n"); + errors++; + } else if (strcmp (filename, RFC5987_TEST_FALLBACK_FILENAME) != 0) { + debug_printf (1, " fallback decoding FAILED!\n expected: %s\n got: %s\n", + RFC5987_TEST_FALLBACK_FILENAME, filename); + errors++; + } else + debug_printf (1, " fallback decoded OK\n"); + g_hash_table_destroy (params); + + soup_message_headers_free (hdrs); + + /* Ensure that soup-multipart always quotes filename (bug 641280) */ + multipart = soup_multipart_new (SOUP_FORM_MIME_TYPE_MULTIPART); + buffer = soup_buffer_new (SOUP_MEMORY_STATIC, "foo", 3); + soup_multipart_append_form_file (multipart, "test", "token", + "text/plain", buffer); + soup_buffer_free (buffer); + + hdrs = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + body = soup_message_body_new (); + soup_multipart_to_message (multipart, hdrs, body); + soup_message_headers_free (hdrs); + soup_multipart_free (multipart); + + buffer = soup_message_body_flatten (body); + soup_message_body_free (body); + + if (strstr (buffer->data, "filename=\"token\"")) + debug_printf (1, " SoupMultipart encoded filename correctly\n"); + else { + debug_printf (1, " SoupMultipart encoded filename incorrectly!\n"); + errors++; + } + soup_buffer_free (buffer); + + debug_printf (1, "\n"); +} + +#define CONTENT_TYPE_TEST_MIME_TYPE "text/plain" +#define CONTENT_TYPE_TEST_ATTRIBUTE "charset" +#define CONTENT_TYPE_TEST_VALUE "US-ASCII" +#define CONTENT_TYPE_TEST_HEADER "text/plain; charset=US-ASCII" + +#define CONTENT_TYPE_BAD_HEADER "plain text, not text/html" + +static void +do_content_type_tests (void) +{ + SoupMessageHeaders *hdrs; + GHashTable *params; + const char *header, *mime_type; + + debug_printf (1, "Content-Type tests\n"); + + hdrs = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + params = g_hash_table_new (g_str_hash, g_str_equal); + g_hash_table_insert (params, CONTENT_TYPE_TEST_ATTRIBUTE, + CONTENT_TYPE_TEST_VALUE); + soup_message_headers_set_content_type (hdrs, CONTENT_TYPE_TEST_MIME_TYPE, params); + g_hash_table_destroy (params); + + header = soup_message_headers_get_one (hdrs, "Content-Type"); + if (!strcmp (header, CONTENT_TYPE_TEST_HEADER)) + debug_printf (1, " encoded OK\n"); + else { + debug_printf (1, " encoding FAILED!\n expected: %s\n got: %s\n", + CONTENT_TYPE_TEST_HEADER, header); + errors++; + } + + soup_message_headers_clear (hdrs); + soup_message_headers_append (hdrs, "Content-Type", + CONTENT_TYPE_TEST_MIME_TYPE); + /* Add a second Content-Type header: should be ignored */ + soup_message_headers_append (hdrs, "Content-Type", + CONTENT_TYPE_TEST_MIME_TYPE); + + mime_type = soup_message_headers_get_content_type (hdrs, ¶ms); + if (!mime_type) { + debug_printf (1, " decoding FAILED!\n could not parse\n"); + errors++; + } + + if (mime_type && strcmp (mime_type, CONTENT_TYPE_TEST_MIME_TYPE) != 0) { + debug_printf (1, " decoding FAILED!\n bad returned MIME type: %s\n", + mime_type); + errors++; + } else if (params && g_hash_table_size (params) != 0) { + debug_printf (1, " decoding FAILED!\n params contained %d params (should be 0)\n", + g_hash_table_size (params)); + errors++; + } else + debug_printf (1, " decoded OK\n"); + + if (params) + g_hash_table_destroy (params); + + soup_message_headers_clear (hdrs); + soup_message_headers_append (hdrs, "Content-Type", + CONTENT_TYPE_BAD_HEADER); + mime_type = soup_message_headers_get_content_type (hdrs, ¶ms); + if (mime_type) { + debug_printf (1, " Bad content rejection FAILED!\n"); + errors++; + } else + debug_printf (1, " Bad content rejection OK\n"); + + soup_message_headers_free (hdrs); + + debug_printf (1, "\n"); +} + +struct { + const char *name, *value; +} test_params[] = { + { "one", "foo" }, + { "two", "test with spaces" }, + { "three", "test with \"quotes\" and \\s" }, + { "four", NULL }, + { "five", "test with \xC3\xA1\xC3\xA7\xC4\x89\xC3\xA8\xC3\xB1\xC5\xA3\xC5\xA1" } +}; + +#define TEST_PARAMS_RESULT "one=foo, two=\"test with spaces\", three=\"test with \\\"quotes\\\" and \\\\s\", four, five*=UTF-8''test%20with%20%C3%A1%C3%A7%C4%89%C3%A8%C3%B1%C5%A3%C5%A1" + +static void +do_append_param_tests (void) +{ + GString *params; + int i; + + debug_printf (1, "soup_header_g_string_append_param() tests\n"); + + params = g_string_new (NULL); + for (i = 0; i < G_N_ELEMENTS (test_params); i++) { + if (i > 0) + g_string_append (params, ", "); + soup_header_g_string_append_param (params, + test_params[i].name, + test_params[i].value); + } + if (strcmp (params->str, TEST_PARAMS_RESULT) != 0) { + debug_printf (1, " FAILED!\n expected: %s\n got: %s\n", + TEST_PARAMS_RESULT, params->str); + errors++; + } else + debug_printf (1, " OK\n"); + g_string_free (params, TRUE); + + debug_printf (1, "\n"); +} + +static const struct { + const char *description, *name, *value; +} bad_headers[] = { + { "Empty name", "", "value" }, + { "Name with spaces", "na me", "value" }, + { "Name with colon", "na:me", "value" }, + { "Name with CR", "na\rme", "value" }, + { "Name with LF", "na\nme", "value" }, + { "Name with tab", "na\tme", "value" }, + { "Value with CR", "name", "val\rue" }, + { "Value with LF", "name", "val\nue" }, + { "Value with LWS", "name", "val\r\n ue" } +}; + +static void +do_bad_header_tests (void) +{ + SoupMessageHeaders *hdrs; + int i; + + debug_printf (1, "bad header rejection tests\n"); + + hdrs = soup_message_headers_new (SOUP_MESSAGE_HEADERS_MULTIPART); + for (i = 0; i < G_N_ELEMENTS (bad_headers); i++) { + debug_printf (1, " %s\n", bad_headers[i].description); + expect_warning = TRUE; + soup_message_headers_append (hdrs, bad_headers[i].name, + bad_headers[i].value); + if (expect_warning) { + expect_warning = FALSE; + debug_printf (1, " FAILED: soup_message_headers_append() did not reject it\n"); + errors++; + } + } + soup_message_headers_free (hdrs); +} + +int +main (int argc, char **argv) +{ + test_init (argc, argv, NULL); + + do_request_tests (); + do_response_tests (); + do_qvalue_tests (); + do_content_disposition_tests (); + do_content_type_tests (); + do_append_param_tests (); + do_bad_header_tests (); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/htdigest b/tests/htdigest new file mode 100644 index 0000000..352520f --- /dev/null +++ b/tests/htdigest @@ -0,0 +1,3 @@ +user1:realm1:69cb1fa0285304a71f8975aecd027008 +user2:realm2:b67d8ee3c2e271abba78f71d12fe472e +user3:realm3:601c319693279abbc07d332bd7637239 diff --git a/tests/htpasswd b/tests/htpasswd new file mode 100644 index 0000000..04e9ce2 --- /dev/null +++ b/tests/htpasswd @@ -0,0 +1,3 @@ +user1:sTFk2g6n8RsWY +user2:N.Dlbd.xU4K1w +user3:c2vSU/3eQHy.w diff --git a/tests/httpd.conf.in b/tests/httpd.conf.in new file mode 100644 index 0000000..de0b75f --- /dev/null +++ b/tests/httpd.conf.in @@ -0,0 +1,293 @@ +# http.conf used for testing auth-test + +ServerName 127.0.0.1 +Listen 127.0.0.1:47524 + +PidFile @builddir@/httpd.pid +DocumentRoot @srcdir@ + +# The tests shut down apache with "graceful-stop", because that makes +# it close its listening socket right away. But it seems to sometimes +# result in apache never fully exiting. This fixes that. +GracefulShutdownTimeout 1 + +# Change this to "./error.log" if it's failing and you don't know why +ErrorLog /dev/null + +LoadModule alias_module @APACHE_MODULE_DIR@/mod_alias.so +LoadModule auth_basic_module @APACHE_MODULE_DIR@/mod_auth_basic.so +LoadModule auth_digest_module @APACHE_MODULE_DIR@/mod_auth_digest.so +LoadModule authn_file_module @APACHE_MODULE_DIR@/mod_authn_file.so +LoadModule authz_host_module @APACHE_MODULE_DIR@/mod_authz_host.so +LoadModule authz_user_module @APACHE_MODULE_DIR@/mod_authz_user.so +LoadModule dir_module @APACHE_MODULE_DIR@/mod_dir.so +LoadModule mime_module @APACHE_MODULE_DIR@/mod_mime.so +@IF_HAVE_PHP@LoadModule php5_module @APACHE_PHP_MODULE_DIR@/@APACHE_PHP_MODULE@ +LoadModule proxy_module @APACHE_MODULE_DIR@/mod_proxy.so +LoadModule proxy_http_module @APACHE_MODULE_DIR@/mod_proxy_http.so +LoadModule proxy_connect_module @APACHE_MODULE_DIR@/mod_proxy_connect.so +LoadModule ssl_module @APACHE_SSL_MODULE_DIR@/mod_ssl.so + +DirectoryIndex index.txt +TypesConfig /dev/null +AddType application/x-httpd-php .php +Redirect permanent /redirected /index.txt + +# Proxy #1: unauthenticated +Listen 127.0.0.1:47526 + + ProxyRequests On + AllowCONNECT 47525 + + # Deny proxying by default + + Order Deny,Allow + Deny from all + + + # Allow local http connections + + Order Allow,Deny + Allow from all + + + # Allow CONNECT to local https port + + Order Allow,Deny + Allow from all + + + # Deny non-proxy requests + + Order Deny,Allow + Deny from all + + + +# Proxy #2: authenticated +Listen 127.0.0.1:47527 + + ProxyRequests On + AllowCONNECT 47525 + + # Deny proxying by default + + Order Deny,Allow + Deny from all + + + # Allow local http connections with authentication + + Order Allow,Deny + Allow from all + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require valid-user + + + # Allow CONNECT to local https port with authentication + + Order Allow,Deny + Allow from all + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require valid-user + + + # Fail non-proxy requests + + Order Deny,Allow + Deny from all + + + +# Proxy #3: unauthenticatable-to +Listen 127.0.0.1:47528 + + ProxyRequests On + AllowCONNECT 47525 + + # Deny proxying by default + + Order Deny,Allow + Deny from all + + + # Allow local http connections with authentication + + Order Allow,Deny + Allow from all + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require user no-such-user + + + # Allow CONNECT to local https port with authentication + + Order Allow,Deny + Allow from all + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require user no-such-user + + + # Fail non-proxy requests + + Order Deny,Allow + Deny from all + + + + +# SSL setup + + Listen 127.0.0.1:47525 + + + SSLEngine on + + SSLCertificateFile @srcdir@/test-cert.pem + SSLCertificateKeyFile @srcdir@/test-key.pem + + + + + +# Basic auth tests +Alias /Basic/realm1/realm2/realm1 @srcdir@ +Alias /Basic/realm1/realm2 @srcdir@ +Alias /Basic/realm1/subdir @srcdir@ +Alias /Basic/realm1/not @srcdir@ +Alias /Basic/realm1 @srcdir@ +Alias /Basic/realm12/subdir @srcdir@ +Alias /Basic/realm12 @srcdir@ +Alias /Basic/realm2 @srcdir@ +Alias /Basic/realm3 @srcdir@ +Alias /Basic @srcdir@ + + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require user user1 + + + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require user user2 + + + + AuthType Basic + AuthName realm12 + AuthUserFile @srcdir@/htpasswd + Require user user1 user2 + + + + AuthType Basic + AuthName realm2 + AuthUserFile @srcdir@/htpasswd + Require user user2 + + + + AuthType Basic + AuthName realm1 + AuthUserFile @srcdir@/htpasswd + Require user user1 + + + + AuthType Basic + AuthName realm2 + AuthUserFile @srcdir@/htpasswd + Require user user2 + + + + AuthType Basic + AuthName realm3 + AuthUserFile @srcdir@/htpasswd + Require user user3 + + +# Digest auth tests +Alias /Digest/realm1/realm2/realm1 @srcdir@ +Alias /Digest/realm1/realm2 @srcdir@ +Alias /Digest/realm1/subdir @srcdir@ +Alias /Digest/realm1/expire @srcdir@ +Alias /Digest/realm1/not @srcdir@ +Alias /Digest/realm1 @srcdir@ +Alias /Digest/realm2 @srcdir@ +Alias /Digest/realm3 @srcdir@ +Alias /Digest @srcdir@ + + + AuthType Digest + AuthName realm1 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm1 /Digest/realm1/realm2/realm1 + Require valid-user + + + + AuthType Digest + AuthName realm1 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm1 /Digest/realm1/realm2/realm1 + AuthDigestNonceLifetime 2 + Require valid-user + + + + AuthType Digest + AuthName realm1 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm1 /Digest/realm1/realm2/realm1 + Require user user2 + + + + AuthType Digest + AuthName realm2 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm2 /Digest/realm1/realm2 + Require valid-user + + + + AuthType Digest + AuthName realm1 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm1 /Digest/realm1/realm2/realm1 + Require valid-user + + + + AuthType Digest + AuthName realm2 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm2 /Digest/realm1/realm2 + Require valid-user + + + + AuthType Digest + AuthName realm3 + AuthUserFile @srcdir@/htdigest + AuthDigestDomain /Digest/realm3 + Require valid-user + # test RFC2069-style Digest + AuthDigestQop none + diff --git a/tests/index.txt b/tests/index.txt new file mode 100644 index 0000000..931a6aa --- /dev/null +++ b/tests/index.txt @@ -0,0 +1,430 @@ +Directory index file for test httpd. + +pull-api wants this file to be larger than 3 times +soup-message-io.c:RESPONSE_BUFFER_SIZE. Also, the contents of the file +should not repeat themselves. (There are no duplicated lines here +other than the two blank lines.) + + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef +g hijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN +OP QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 +789 :;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| +}~!" #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd +efghi jklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL +MNOPQR STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 +56789:; <=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz +{|}~!"#$ %&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab +cdefghijk lmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ +KLMNOPQRST UVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 +3456789:;<= >?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwx +yz{|}~!"#$%& '()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +abcdefghijklm nopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGH +IJKLMNOPQRSTUV WXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 +123456789:;<=>? @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuv +wxyz{|}~!"#$%&'( )*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ +_`abcdefghijklmno pqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEF +GHIJKLMNOPQRSTUVWX YZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-. +/0123456789:;<=>?@A BCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrst +uvwxyz{|}~!"#$%&'()* +,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ +]^_`abcdefghijklmnopq rstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCD +EFGHIJKLMNOPQRSTUVWXYZ [\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+, +-./0123456789:;<=>?@ABC DEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqr +stuvwxyz{|}~!"#$%&'()*+, -./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ +[\]^_`abcdefghijklmnopqrs tuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB +CDEFGHIJKLMNOPQRSTUVWXYZ[\ ]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* ++,-./0123456789:;<=>?@ABCDE FGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop +qrstuvwxyz{|}~!"#$%&'()*+,-. /0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX +YZ[\]^_`abcdefghijklmnopqrstu vwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ _`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( +)*+,-./0123456789:;<=>?@ABCDEFG HIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn +opqrstuvwxyz{|}~!"#$%&'()*+,-./0 123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV +WXYZ[\]^_`abcdefghijklmnopqrstuvw xyz{|}~!"#$%&'()*+,-./0123456789:;<=> +?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghijklmnopqrstuvwxyz{|}~!"#$%& +'()*+,-./0123456789:;<=>?@ABCDEFGHI JKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl +mnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 3456789:;<=>?@ABCDEFGHIJKLMNOPQRST +UVWXYZ[\]^_`abcdefghijklmnopqrstuvwxy z{|}~!"#$%&'()*+,-./0123456789:;< +=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab cdefghijklmnopqrstuvwxyz{|}~!"#$ +%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJK LMNOPQRSTUVWXYZ[\]^_`abcdefghij +klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 56789:;<=>?@ABCDEFGHIJKLMNOPQR +STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{ |}~!"#$%&'()*+,-./0123456789: +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd efghijklmnopqrstuvwxyz{|}~!" +#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLM NOPQRSTUVWXYZ[\]^_`abcdefgh +ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 789:;<=>?@ABCDEFGHIJKLMNOP +QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|} ~!"#$%&'()*+,-./012345678 +9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef ghijklmnopqrstuvwxyz{|}~ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNO PQRSTUVWXYZ[\]^_`abcdef +ghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345678 9:;<=>?@ABCDEFGHIJKLMN +OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~! "#$%&'()*+,-./0123456 +789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh ijklmnopqrstuvwxyz{| +}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQ RSTUVWXYZ[\]^_`abcd +efghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789: ;<=>?@ABCDEFGHIJKL +MNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"# $%&'()*+,-./01234 +56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghij klmnopqrstuvwxyz +{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRS TUVWXYZ[\]^_`ab +cdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;< =>?@ABCDEFGHIJ +KLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$% &'()*+,-./012 +3456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl mnopqrstuvwx +yz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU VWXYZ[\]^_` +abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=> ?@ABCDEFGH +IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&' ()*+,-./0 +123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn opqrstuv +wxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW XYZ[\]^ +_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ ABCDEF +GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'() *+,-. +/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop qrst +uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXY Z[\ +]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB CD +EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+ , +-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqr + stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ +[ \]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB +CD EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* ++,- ./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop +qrst uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX +YZ[\] ^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ +ABCDEF GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( +)*+,-./ 0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn +opqrstuv wxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV +WXYZ[\]^_ `abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=> +?@ABCDEFGH IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%& +'()*+,-./01 23456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl +mnopqrstuvwx yz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRST +UVWXYZ[\]^_`a bcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;< +=>?@ABCDEFGHIJ KLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$ +%&'()*+,-./0123 456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghij +klmnopqrstuvwxyz {|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQR +STUVWXYZ[\]^_`abc defghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789: +;<=>?@ABCDEFGHIJKL MNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!" +#$%&'()*+,-./012345 6789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +ijklmnopqrstuvwxyz{| }~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP +QRSTUVWXYZ[\]^_`abcde fghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345678 +9:;<=>?@ABCDEFGHIJKLMN OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +!"#$%&'()*+,-./01234567 89:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef +ghijklmnopqrstuvwxyz{|}~ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN +OPQRSTUVWXYZ[\]^_`abcdefg hijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 +789:;<=>?@ABCDEFGHIJKLMNOP QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| +}~!"#$%&'()*+,-./0123456789 :;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd +efghijklmnopqrstuvwxyz{|}~!" #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL +MNOPQRSTUVWXYZ[\]^_`abcdefghi jklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 +56789:;<=>?@ABCDEFGHIJKLMNOPQR STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz +{|}~!"#$%&'()*+,-./0123456789:; <=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab +cdefghijklmnopqrstuvwxyz{|}~!"#$ %&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ +KLMNOPQRSTUVWXYZ[\]^_`abcdefghijk lmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 +3456789:;<=>?@ABCDEFGHIJKLMNOPQRST UVWXYZ[\]^_`abcdefghijklmnopqrstuvwx +yz{|}~!"#$%&'()*+,-./0123456789:;<= >?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +abcdefghijklmnopqrstuvwxyz{|}~!"#$%& '()*+,-./0123456789:;<=>?@ABCDEFGH +IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklm nopqrstuvwxyz{|}~!"#$%&'()*+,-./0 +123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV WXYZ[\]^_`abcdefghijklmnopqrstuv +wxyz{|}~!"#$%&'()*+,-./0123456789:;<=>? @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ +_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( )*+,-./0123456789:;<=>?@ABCDEF +GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmno pqrstuvwxyz{|}~!"#$%&'()*+,-. +/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX YZ[\]^_`abcdefghijklmnopqrst +uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@A BCDEFGHIJKLMNOPQRSTUVWXYZ[\ +]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* +,-./0123456789:;<=>?@ABCD +EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopq rstuvwxyz{|}~!"#$%&'()*+, +-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ [\]^_`abcdefghijklmnopqr +stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABC DEFGHIJKLMNOPQRSTUVWXYZ +[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+, -./0123456789:;<=>?@AB +CDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrs tuvwxyz{|}~!"#$%&'()* ++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ ]^_`abcdefghijklmnop +qrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDE FGHIJKLMNOPQRSTUVWX +YZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-. /0123456789:;<=>?@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstu vwxyz{|}~!"#$%&'( +)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ _`abcdefghijklmn +opqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFG HIJKLMNOPQRSTUV +WXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 123456789:;<=> +?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvw xyz{|}~!"#$%& +'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghijkl +mnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI JKLMNOPQRST +UVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 3456789:;< +=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxy z{|}~!"#$ +%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab cdefghij +klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJK LMNOPQR +STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 56789: +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{ |}~!" +#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd efgh +ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLM NOP +QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 78 +9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|} ~ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef + ghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN +O PQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 +78 9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| +}~! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd +efgh ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL +MNOPQ RSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 +56789: ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz +{|}~!"# $%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab +cdefghij klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ +KLMNOPQRS TUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 +3456789:;< =>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwx +yz{|}~!"#$% &'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +abcdefghijkl mnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGH +IJKLMNOPQRSTU VWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 +123456789:;<=> ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuv +wxyz{|}~!"#$%&' ()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ +_`abcdefghijklmn opqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEF +GHIJKLMNOPQRSTUVW XYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-. +/0123456789:;<=>?@ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrst +uvwxyz{|}~!"#$%&'() *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ +]^_`abcdefghijklmnop qrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCD +EFGHIJKLMNOPQRSTUVWXY Z[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+, +-./0123456789:;<=>?@AB CDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqr +stuvwxyz{|}~!"#$%&'()*+ ,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ +[\]^_`abcdefghijklmnopqr stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB +CDEFGHIJKLMNOPQRSTUVWXYZ[ \]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* ++,-./0123456789:;<=>?@ABCD EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop +qrstuvwxyz{|}~!"#$%&'()*+,- ./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX +YZ[\]^_`abcdefghijklmnopqrst uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ[\] ^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( +)*+,-./0123456789:;<=>?@ABCDEF GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn +opqrstuvwxyz{|}~!"#$%&'()*+,-./ 0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV +WXYZ[\]^_`abcdefghijklmnopqrstuv wxyz{|}~!"#$%&'()*+,-./0123456789:;<=> +?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ `abcdefghijklmnopqrstuvwxyz{|}~!"#$%& +'()*+,-./0123456789:;<=>?@ABCDEFGH IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl +mnopqrstuvwxyz{|}~!"#$%&'()*+,-./01 23456789:;<=>?@ABCDEFGHIJKLMNOPQRST +UVWXYZ[\]^_`abcdefghijklmnopqrstuvwx yz{|}~!"#$%&'()*+,-./0123456789:;< +=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`a bcdefghijklmnopqrstuvwxyz{|}~!"#$ +%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ KLMNOPQRSTUVWXYZ[\]^_`abcdefghij +klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123 456789:;<=>?@ABCDEFGHIJKLMNOPQR +STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz {|}~!"#$%&'()*+,-./0123456789: +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abc defghijklmnopqrstuvwxyz{|}~!" +#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL MNOPQRSTUVWXYZ[\]^_`abcdefgh +ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345 6789:;<=>?@ABCDEFGHIJKLMNOP +QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| }~!"#$%&'()*+,-./012345678 +9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcde fghijklmnopqrstuvwxyz{|}~ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN OPQRSTUVWXYZ[\]^_`abcdef +ghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234567 89:;<=>?@ABCDEFGHIJKLMN +OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ !"#$%&'()*+,-./0123456 +789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefg hijklmnopqrstuvwxyz{| +}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP QRSTUVWXYZ[\]^_`abcd +efghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789 :;<=>?@ABCDEFGHIJKL +MNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!" #$%&'()*+,-./01234 +56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghi jklmnopqrstuvwxyz +{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQR STUVWXYZ[\]^_`ab +cdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:; <=>?@ABCDEFGHIJ +KLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$ %&'()*+,-./012 +3456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijk lmnopqrstuvwx +yz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRST UVWXYZ[\]^_` +abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<= >?@ABCDEFGH +IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%& '()*+,-./0 +123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklm nopqrstuv +wxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV WXYZ[\]^ +_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>? @ABCDEF +GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( )*+,-. +/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmno pqrst +uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX YZ[\ +]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@A BCD +EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* +, +-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopq r +stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ + [\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB +C DEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* ++, -./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop +qrs tuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX +YZ[\ ]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ +ABCDE FGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( +)*+,-. /0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn +opqrstu vwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV +WXYZ[\]^ _`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=> +?@ABCDEFG HIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%& +'()*+,-./0 123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl +mnopqrstuvw xyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRST +UVWXYZ[\]^_` abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;< +=>?@ABCDEFGHI JKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$ +%&'()*+,-./012 3456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghij +klmnopqrstuvwxy z{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQR +STUVWXYZ[\]^_`ab cdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789: +;<=>?@ABCDEFGHIJK LMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!" +#$%&'()*+,-./01234 56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +ijklmnopqrstuvwxyz{ |}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP +QRSTUVWXYZ[\]^_`abcd efghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345678 +9:;<=>?@ABCDEFGHIJKLM NOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +!"#$%&'()*+,-./0123456 789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef +ghijklmnopqrstuvwxyz{|} ~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN +OPQRSTUVWXYZ[\]^_`abcdef ghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 +789:;<=>?@ABCDEFGHIJKLMNO PQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| +}~!"#$%&'()*+,-./012345678 9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd +efghijklmnopqrstuvwxyz{|}~! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL +MNOPQRSTUVWXYZ[\]^_`abcdefgh ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 +56789:;<=>?@ABCDEFGHIJKLMNOPQ RSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz +{|}~!"#$%&'()*+,-./0123456789: ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab +cdefghijklmnopqrstuvwxyz{|}~!"# $%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ +KLMNOPQRSTUVWXYZ[\]^_`abcdefghij klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 +3456789:;<=>?@ABCDEFGHIJKLMNOPQRS TUVWXYZ[\]^_`abcdefghijklmnopqrstuvwx +yz{|}~!"#$%&'()*+,-./0123456789:;< =>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +abcdefghijklmnopqrstuvwxyz{|}~!"#$% &'()*+,-./0123456789:;<=>?@ABCDEFGH +IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl mnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 +123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU VWXYZ[\]^_`abcdefghijklmnopqrstuv +wxyz{|}~!"#$%&'()*+,-./0123456789:;<=> ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ +_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&' ()*+,-./0123456789:;<=>?@ABCDEF +GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn opqrstuvwxyz{|}~!"#$%&'()*+,-. +/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW XYZ[\]^_`abcdefghijklmnopqrst +uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ +]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'() *+,-./0123456789:;<=>?@ABCD +EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop qrstuvwxyz{|}~!"#$%&'()*+, +-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXY Z[\]^_`abcdefghijklmnopqr +stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB CDEFGHIJKLMNOPQRSTUVWXYZ +[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+ ,-./0123456789:;<=>?@AB +CDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqr stuvwxyz{|}~!"#$%&'()* ++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[ \]^_`abcdefghijklmnop +qrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCD EFGHIJKLMNOPQRSTUVWX +YZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,- ./0123456789:;<=>?@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrst uvwxyz{|}~!"#$%&'( +)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\] ^_`abcdefghijklmn +opqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEF GHIJKLMNOPQRSTUV +WXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./ 0123456789:;<=> +?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuv wxyz{|}~!"#$%& +'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ `abcdefghijkl +mnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGH IJKLMNOPQRST +UVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01 23456789:;< +=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwx yz{|}~!"#$ +%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`a bcdefghij +klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ KLMNOPQR +STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123 456789: +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz {|}~!" +#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abc defgh +ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL MNOP +QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345 678 +9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| }~ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcde f +ghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN + OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 +7 89:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| +}~ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd +efg hijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL +MNOP QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 +56789 :;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz +{|}~!" #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab +cdefghi jklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ +KLMNOPQR STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 +3456789:; <=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwx +yz{|}~!"#$ %&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +abcdefghijk lmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGH +IJKLMNOPQRST UVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 +123456789:;<= >?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuv +wxyz{|}~!"#$%& '()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ +_`abcdefghijklm nopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEF +GHIJKLMNOPQRSTUV WXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-. +/0123456789:;<=>? @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrst +uvwxyz{|}~!"#$%&'( )*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ +]^_`abcdefghijklmno pqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCD +EFGHIJKLMNOPQRSTUVWX YZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+, +-./0123456789:;<=>?@A BCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqr +stuvwxyz{|}~!"#$%&'()* +,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ +[\]^_`abcdefghijklmnopq rstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB +CDEFGHIJKLMNOPQRSTUVWXYZ [\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* ++,-./0123456789:;<=>?@ABC DEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop +qrstuvwxyz{|}~!"#$%&'()*+, -./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX +YZ[\]^_`abcdefghijklmnopqrs tuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ ]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( +)*+,-./0123456789:;<=>?@ABCDE FGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn +opqrstuvwxyz{|}~!"#$%&'()*+,-. /0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV +WXYZ[\]^_`abcdefghijklmnopqrstu vwxyz{|}~!"#$%&'()*+,-./0123456789:;<=> +?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ _`abcdefghijklmnopqrstuvwxyz{|}~!"#$%& +'()*+,-./0123456789:;<=>?@ABCDEFG HIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl +mnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 123456789:;<=>?@ABCDEFGHIJKLMNOPQRST +UVWXYZ[\]^_`abcdefghijklmnopqrstuvw xyz{|}~!"#$%&'()*+,-./0123456789:;< +=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghijklmnopqrstuvwxyz{|}~!"#$ +%&'()*+,-./0123456789:;<=>?@ABCDEFGHI JKLMNOPQRSTUVWXYZ[\]^_`abcdefghij +klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 3456789:;<=>?@ABCDEFGHIJKLMNOPQR +STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxy z{|}~!"#$%&'()*+,-./0123456789: +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab cdefghijklmnopqrstuvwxyz{|}~!" +#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJK LMNOPQRSTUVWXYZ[\]^_`abcdefgh +ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 56789:;<=>?@ABCDEFGHIJKLMNOP +QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{ |}~!"#$%&'()*+,-./012345678 +9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd efghijklmnopqrstuvwxyz{|}~ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLM NOPQRSTUVWXYZ[\]^_`abcdef +ghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 789:;<=>?@ABCDEFGHIJKLMN +OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|} ~!"#$%&'()*+,-./0123456 +789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef ghijklmnopqrstuvwxyz{| +}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNO PQRSTUVWXYZ[\]^_`abcd +efghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345678 9:;<=>?@ABCDEFGHIJKL +MNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~! "#$%&'()*+,-./01234 +56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh ijklmnopqrstuvwxyz +{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQ RSTUVWXYZ[\]^_`ab +cdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789: ;<=>?@ABCDEFGHIJ +KLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"# $%&'()*+,-./012 +3456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghij klmnopqrstuvwx +yz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRS TUVWXYZ[\]^_` +abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;< =>?@ABCDEFGH +IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$% &'()*+,-./0 +123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl mnopqrstuv +wxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU VWXYZ[\]^ +_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=> ?@ABCDEF +GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&' ()*+,-. +/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn opqrst +uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW XYZ[\ +]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ ABCD +EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'() *+, +-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop qr +stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXY Z +[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@AB + CDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* ++ ,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnop +qr stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX +YZ[ \]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ +ABCD EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( +)*+,- ./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn +opqrst uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV +WXYZ[\] ^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=> +?@ABCDEF GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%& +'()*+,-./ 0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijkl +mnopqrstuv wxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRST +UVWXYZ[\]^_ `abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;< +=>?@ABCDEFGH IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$ +%&'()*+,-./01 23456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghij +klmnopqrstuvwx yz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQR +STUVWXYZ[\]^_`a bcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789: +;<=>?@ABCDEFGHIJ KLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!" +#$%&'()*+,-./0123 456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +ijklmnopqrstuvwxyz {|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP +QRSTUVWXYZ[\]^_`abc defghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012345678 +9:;<=>?@ABCDEFGHIJKL MNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ +!"#$%&'()*+,-./012345 6789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdef +ghijklmnopqrstuvwxyz{| }~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN +OPQRSTUVWXYZ[\]^_`abcde fghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456 +789:;<=>?@ABCDEFGHIJKLMN OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| +}~!"#$%&'()*+,-./01234567 89:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcd +efghijklmnopqrstuvwxyz{|}~ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL +MNOPQRSTUVWXYZ[\]^_`abcdefg hijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 +56789:;<=>?@ABCDEFGHIJKLMNOP QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz +{|}~!"#$%&'()*+,-./0123456789 :;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab +cdefghijklmnopqrstuvwxyz{|}~!" #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ +KLMNOPQRSTUVWXYZ[\]^_`abcdefghi jklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 +3456789:;<=>?@ABCDEFGHIJKLMNOPQR STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwx +yz{|}~!"#$%&'()*+,-./0123456789:; <=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +abcdefghijklmnopqrstuvwxyz{|}~!"#$ %&'()*+,-./0123456789:;<=>?@ABCDEFGH +IJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijk lmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 +123456789:;<=>?@ABCDEFGHIJKLMNOPQRST UVWXYZ[\]^_`abcdefghijklmnopqrstuv +wxyz{|}~!"#$%&'()*+,-./0123456789:;<= >?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ +_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%& '()*+,-./0123456789:;<=>?@ABCDEF +GHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklm nopqrstuvwxyz{|}~!"#$%&'()*+,-. +/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV WXYZ[\]^_`abcdefghijklmnopqrst +uvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>? @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ +]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'( )*+,-./0123456789:;<=>?@ABCD +EFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmno pqrstuvwxyz{|}~!"#$%&'()*+, +-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX YZ[\]^_`abcdefghijklmnopqr +stuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@A BCDEFGHIJKLMNOPQRSTUVWXYZ +[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()* +,-./0123456789:;<=>?@AB +CDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopq rstuvwxyz{|}~!"#$%&'()* ++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ [\]^_`abcdefghijklmnop +qrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABC DEFGHIJKLMNOPQRSTUVWX +YZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+, -./0123456789:;<=>?@ +ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrs tuvwxyz{|}~!"#$%&'( +)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\ ]^_`abcdefghijklmn +opqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDE FGHIJKLMNOPQRSTUV +WXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-. /0123456789:;<=> +?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstu vwxyz{|}~!"#$%& +'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^ _`abcdefghijkl +mnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFG HIJKLMNOPQRST +UVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0 123456789:;< +=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvw xyz{|}~!"#$ +%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghij +klmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI JKLMNOPQR +STUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./012 3456789: +;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxy z{|}~!" +#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ab cdefgh +ijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJK LMNOP +QRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./01234 5678 +9:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{ |}~ diff --git a/tests/libsoup.supp b/tests/libsoup.supp new file mode 100644 index 0000000..69a72ac --- /dev/null +++ b/tests/libsoup.supp @@ -0,0 +1,246 @@ +# valgrind suppressions file + +{ + ld.so/map + Memcheck:Cond + fun:index + fun:expand_dynamic_string_token + fun:_dl_map_object +} +{ + ld.so/relocate + Memcheck:Cond + fun:_dl_relocate_object + fun:dl_main + fun:_dl_sysdep_start + fun:_dl_start +} + +{ + glib/g_type_init + Memcheck:Leak + ... + fun:g_type_init +} +{ + glib/g_type_init_with_debug_flags + Memcheck:Leak + ... + fun:g_type_init_with_debug_flags +} +{ + glib/g_thread_init + Memcheck:Leak + ... + fun:g_thread_init +} +{ + glib/g_thread_init_glib + Memcheck:Leak + ... + fun:g_thread_init_glib +} +{ + glib/g_type_register_static + Memcheck:Leak + ... + fun:g_type_register_static +} +{ + glib/g_boxed_type_register_static + Memcheck:Leak + ... + fun:g_boxed_type_register_static +} +{ + glib/g_type_add_interface_static + Memcheck:Leak + ... + fun:g_type_add_interface_static +} +{ + glib/g_type_interface_add_prerequisite + Memcheck:Leak + ... + fun:g_type_interface_add_prerequisite +} +{ + glib/g_type_class_ref + Memcheck:Leak + ... + fun:g_type_class_ref +} +{ + glib/g_set_prgname + Memcheck:Leak + ... + fun:g_set_prgname +} +{ + glib/g_intern_string + Memcheck:Leak + ... + fun:g_intern_string +} +{ + glib/g_intern_static_string + Memcheck:Leak + ... + fun:g_intern_static_string +} +{ + glib/g_quark_from_string + Memcheck:Leak + ... + fun:g_quark_from_string +} +{ + glib/g_quark_from_static_string + Memcheck:Leak + ... + fun:g_quark_from_static_string +} +{ + glib/get_dispatch + Memcheck:Leak + ... + fun:get_dispatch +} +{ + glib/g_signal_handlers_destroy + Memcheck:Leak + ... + fun:g_signal_handlers_destroy +} +{ + glib/g_data_initialize + Memcheck:Leak + ... + fun:g_data_initialize +} +{ + glib/g_static_private_set + Memcheck:Leak + ... + fun:g_static_private_set +} +{ + glib/g_child_watch_source_init_multi_threaded + Memcheck:Leak + ... + fun:g_child_watch_source_init_multi_threaded +} +{ + glib/xdg_mime_init + Memcheck:Leak + ... + fun:xdg_mime_init +} +{ + glib/GResolver + Memcheck:Leak + ... + fun:g_resolver_get_default +} +{ + glib/g_main_context_push_thread_default + Memcheck:Leak + ... + fun:g_main_context_push_thread_default +} +{ + glib/g_socket_connection_factory + Memcheck:Leak + ... + fun:g_socket_connection_factory_register_type +} +{ + glib/g_get_language_names + Memcheck:Leak + ... + fun:g_get_language_names +} +{ + glib/giomodules + Memcheck:Leak + ... + fun:_g_io_modules_ensure_loaded +} +{ + glib/tlsinit + Memcheck:Leak + ... + fun:get_default_tls_backend +} +{ + glib/tlscrypto + Memcheck:Leak + ... + fun:gcry_pthread_mutex_init +} +{ + glib/tlscache + Memcheck:Leak + ... + fun:g_tls_backend_gnutls_cache_session_data +} +{ + glib/tlspriority + Memcheck:Leak + ... + fun:g_tls_connection_gnutls_init_priorities +} +{ + glib/gfileinfo + Memcheck:Leak + ... + fun:ensure_attribute_hash + fun:lookup_attribute +} + +# probably using uninitialized memory as padding or something +{ + gnutls/handshake + Memcheck:Cond + ... + fun:gnutls_handshake +} + +{ + libxml2/xmlInitParser + Memcheck:Leak + ... + fun:xmlInitParser +} +{ + libxml2/xmlInitializeDict + Memcheck:Leak + ... + fun:xmlInitializeDict +} +{ + libxml2/xmlInitCharEncodingHandlers + Memcheck:Leak + ... + fun:xmlInitCharEncodingHandlers +} +{ + libxml2/xmlNewCharEncodingHandler + Memcheck:Leak + ... + fun:xmlNewCharEncodingHandler +} + +{ + libsoup/interned_uri_schemes + Memcheck:Leak + ... + fun:g_ascii_strdown + fun:soup_uri_parse_scheme +} +{ + libsoup/interned_headers + Memcheck:Leak + ... + fun:intern_header_name +} diff --git a/tests/misc-test.c b/tests/misc-test.c new file mode 100644 index 0000000..411cb11 --- /dev/null +++ b/tests/misc-test.c @@ -0,0 +1,1047 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test-utils.h" + +SoupServer *server; +SoupURI *base_uri; +GMutex *server_mutex; + +static gboolean +auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, const char *password, gpointer data) +{ + return !strcmp (username, "user") && !strcmp (password, "password"); +} + +static void +forget_close (SoupMessage *msg, gpointer user_data) +{ + soup_message_headers_remove (msg->response_headers, "Connection"); +} + +static void +close_socket (SoupMessage *msg, gpointer user_data) +{ + SoupSocket *sock = user_data; + + soup_socket_disconnect (sock); +} + +static void +timeout_socket (SoupSocket *sock, gpointer user_data) +{ + soup_socket_disconnect (sock); +} + +static void +timeout_request_started (SoupServer *server, SoupMessage *msg, + SoupClientContext *client, gpointer user_data) +{ + SoupSocket *sock; + GMainContext *context = soup_server_get_async_context (server); + guint readable; + + sock = soup_client_context_get_socket (client); + readable = g_signal_connect (sock, "readable", + G_CALLBACK (timeout_socket), NULL); + while (soup_socket_is_connected (sock)) + g_main_context_iteration (context, TRUE); + g_signal_handler_disconnect (sock, readable); + g_signal_handlers_disconnect_by_func (server, timeout_request_started, NULL); +} + +static void +setup_timeout_persistent (SoupServer *server, SoupSocket *sock) +{ + char buf[1]; + gsize nread; + + /* In order for the test to work correctly, we have to + * close the connection *after* the client side writes + * the request. To ensure that this happens reliably, + * regardless of thread scheduling, we: + * + * 1. Try to read off the socket now, knowing it will + * fail (since the client is waiting for us to + * return a response). This will cause it to + * emit "readable" later. + * 2. Connect to the server's request-started signal. + * 3. Run an inner main loop from that signal handler + * until the socket emits "readable". (If we don't + * do this then it's possible the client's next + * request would be ready before we returned to + * the main loop, and so the signal would never be + * emitted.) + * 4. Close the socket. + */ + + soup_socket_read (sock, buf, 1, &nread, NULL, NULL); + g_signal_connect (server, "request-started", + G_CALLBACK (timeout_request_started), NULL); +} + +static gboolean +timeout_finish_message (gpointer msg) +{ + SoupServer *server = g_object_get_data (G_OBJECT (msg), "server"); + + soup_server_unpause_message (server, msg); + return FALSE; +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + SoupURI *uri = soup_message_get_uri (msg); + + /* The way this gets used in the tests, we don't actually + * need to hold it through the whole function, so it's simpler + * to just release it right away. + */ + g_mutex_lock (server_mutex); + g_mutex_unlock (server_mutex); + + soup_message_headers_append (msg->response_headers, + "X-Handled-By", "server_callback"); + + if (!strcmp (path, "*")) { + debug_printf (1, " default server_callback got request for '*'!\n"); + errors++; + soup_message_set_status (msg, SOUP_STATUS_INTERNAL_SERVER_ERROR); + return; + } + + if (msg->method != SOUP_METHOD_GET) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + if (!strcmp (path, "/redirect")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_append (msg->response_headers, + /* Kids: don't try this at home! + * RFC2616 says to use an + * absolute URI! + */ + "Location", "/"); + return; + } + + if (g_str_has_prefix (path, "/content-length/")) { + gboolean too_long = strcmp (path, "/content-length/long") == 0; + gboolean no_close = strcmp (path, "/content-length/noclose") == 0; + + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, "foobar", 6); + if (too_long) + soup_message_headers_set_content_length (msg->response_headers, 9); + soup_message_headers_append (msg->response_headers, + "Connection", "close"); + + if (too_long) { + SoupSocket *sock; + + /* soup-message-io will wait for us to add + * another chunk after the first, to fill out + * the declared Content-Length. Instead, we + * forcibly close the socket at that point. + */ + sock = soup_client_context_get_socket (context); + g_signal_connect (msg, "wrote-chunk", + G_CALLBACK (close_socket), sock); + } else if (no_close) { + /* Remove the 'Connection: close' after writing + * the headers, so that when we check it after + * writing the body, we'll think we aren't + * supposed to close it. + */ + g_signal_connect (msg, "wrote-headers", + G_CALLBACK (forget_close), NULL); + } + return; + } + + if (!strcmp (path, "/timeout-persistent")) { + SoupSocket *sock; + + sock = soup_client_context_get_socket (context); + setup_timeout_persistent (server, sock); + } + + if (!strcmp (path, "/slow")) { + soup_server_pause_message (server, msg); + g_object_set_data (G_OBJECT (msg), "server", server); + soup_add_timeout (soup_server_get_async_context (server), + 1000, timeout_finish_message, msg); + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + if (!strcmp (uri->host, "foo")) { + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, "foo-index", 9); + return; + } else { + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, "index", 5); + return; + } +} + +static void +server_star_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + soup_message_headers_append (msg->response_headers, + "X-Handled-By", "star_callback"); + + if (strcmp (path, "*") != 0) { + debug_printf (1, " server_star_callback got request for '%s'!\n", path); + errors++; + soup_message_set_status (msg, SOUP_STATUS_INTERNAL_SERVER_ERROR); + return; + } + + if (msg->method != SOUP_METHOD_OPTIONS) { + soup_message_set_status (msg, SOUP_STATUS_METHOD_NOT_ALLOWED); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +/* Host header handling: client must be able to override the default + * value, server must be able to recognize different Host values. + * #539803. + */ +static void +do_host_test (void) +{ + SoupSession *session; + SoupMessage *one, *two; + + debug_printf (1, "Host handling\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, NULL); + + one = soup_message_new_from_uri ("GET", base_uri); + two = soup_message_new_from_uri ("GET", base_uri); + soup_message_headers_replace (two->request_headers, "Host", "foo"); + + soup_session_send_message (session, one); + soup_session_send_message (session, two); + + soup_test_session_abort_unref (session); + + if (!SOUP_STATUS_IS_SUCCESSFUL (one->status_code)) { + debug_printf (1, " Message 1 failed: %d %s\n", + one->status_code, one->reason_phrase); + errors++; + } else if (strcmp (one->response_body->data, "index") != 0) { + debug_printf (1, " Unexpected response to message 1: '%s'\n", + one->response_body->data); + errors++; + } + g_object_unref (one); + + if (!SOUP_STATUS_IS_SUCCESSFUL (two->status_code)) { + debug_printf (1, " Message 2 failed: %d %s\n", + two->status_code, two->reason_phrase); + errors++; + } else if (strcmp (two->response_body->data, "foo-index") != 0) { + debug_printf (1, " Unexpected response to message 2: '%s'\n", + two->response_body->data); + errors++; + } + g_object_unref (two); +} + +/* Dropping the application's ref on the session from a callback + * should not cause the session to be freed at an incorrect time. + * (This test will crash if it fails.) #533473 + */ +static void +cu_one_completed (SoupSession *session, SoupMessage *msg, gpointer loop) +{ + debug_printf (2, " Message 1 completed\n"); + if (msg->status_code != SOUP_STATUS_CANT_CONNECT) { + debug_printf (1, " Unexpected status on Message 1: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_object_unref (session); +} + +static gboolean +cu_idle_quit (gpointer loop) +{ + g_main_loop_quit (loop); + return FALSE; +} + +static void +cu_two_completed (SoupSession *session, SoupMessage *msg, gpointer loop) +{ + debug_printf (2, " Message 2 completed\n"); + if (msg->status_code != SOUP_STATUS_CANT_CONNECT) { + debug_printf (1, " Unexpected status on Message 2: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_idle_add (cu_idle_quit, loop); +} + +static void +do_callback_unref_test (void) +{ + SoupServer *bad_server; + SoupAddress *addr; + SoupSession *session; + SoupMessage *one, *two; + GMainLoop *loop; + char *bad_uri; + + debug_printf (1, "\nCallback unref handling\n"); + + /* Get a guaranteed-bad URI */ + addr = soup_address_new ("127.0.0.1", SOUP_ADDRESS_ANY_PORT); + soup_address_resolve_sync (addr, NULL); + bad_server = soup_server_new (SOUP_SERVER_INTERFACE, addr, + NULL); + g_object_unref (addr); + + bad_uri = g_strdup_printf ("http://127.0.0.1:%u/", + soup_server_get_port (bad_server)); + g_object_unref (bad_server); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + g_object_add_weak_pointer (G_OBJECT (session), (gpointer *)&session); + + loop = g_main_loop_new (NULL, TRUE); + + one = soup_message_new ("GET", bad_uri); + g_object_add_weak_pointer (G_OBJECT (one), (gpointer *)&one); + two = soup_message_new ("GET", bad_uri); + g_object_add_weak_pointer (G_OBJECT (two), (gpointer *)&two); + g_free (bad_uri); + + soup_session_queue_message (session, one, cu_one_completed, loop); + soup_session_queue_message (session, two, cu_two_completed, loop); + + g_main_loop_run (loop); + g_main_loop_unref (loop); + + if (session) { + g_object_remove_weak_pointer (G_OBJECT (session), (gpointer *)&session); + debug_printf (1, " Session not destroyed?\n"); + errors++; + g_object_unref (session); + } + if (one) { + g_object_remove_weak_pointer (G_OBJECT (one), (gpointer *)&one); + debug_printf (1, " Message 1 not destroyed?\n"); + errors++; + g_object_unref (one); + } + if (two) { + g_object_remove_weak_pointer (G_OBJECT (two), (gpointer *)&two); + debug_printf (1, " Message 2 not destroyed?\n"); + errors++; + g_object_unref (two); + } + + /* Otherwise, if we haven't crashed, we're ok. */ +} + +/* SoupSession should clean up all signal handlers on a message after + * it is finished, allowing the message to be reused if desired. + * #559054 + */ +static void +ensure_no_signal_handlers (SoupMessage *msg, guint *signal_ids, guint n_signal_ids) +{ + int i; + + for (i = 0; i < n_signal_ids; i++) { + if (g_signal_handler_find (msg, G_SIGNAL_MATCH_ID, signal_ids[i], + 0, NULL, NULL, NULL)) { + debug_printf (1, " Message has handler for '%s'\n", + g_signal_name (signal_ids[i])); + errors++; + } + } +} + +static void +reuse_test_authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying) +{ + /* Get it wrong the first time, then succeed */ + if (!retrying) + soup_auth_authenticate (auth, "user", "wrong password"); + else + soup_auth_authenticate (auth, "user", "password"); +} + +static void +do_msg_reuse_test (void) +{ + SoupSession *session; + SoupMessage *msg; + SoupURI *uri; + guint *signal_ids, n_signal_ids; + + debug_printf (1, "\nSoupMessage reuse\n"); + + signal_ids = g_signal_list_ids (SOUP_TYPE_MESSAGE, &n_signal_ids); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + g_signal_connect (session, "authenticate", + G_CALLBACK (reuse_test_authenticate), NULL); + + debug_printf (1, " First message\n"); + msg = soup_message_new_from_uri ("GET", base_uri); + soup_session_send_message (session, msg); + ensure_no_signal_handlers (msg, signal_ids, n_signal_ids); + + debug_printf (1, " Redirect message\n"); + uri = soup_uri_new_with_base (base_uri, "/redirect"); + soup_message_set_uri (msg, uri); + soup_uri_free (uri); + soup_session_send_message (session, msg); + if (!soup_uri_equal (soup_message_get_uri (msg), base_uri)) { + debug_printf (1, " Message did not get redirected!\n"); + errors++; + } + ensure_no_signal_handlers (msg, signal_ids, n_signal_ids); + + debug_printf (1, " Auth message\n"); + uri = soup_uri_new_with_base (base_uri, "/auth"); + soup_message_set_uri (msg, uri); + soup_uri_free (uri); + soup_session_send_message (session, msg); + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " Message did not get authenticated!\n"); + errors++; + } + ensure_no_signal_handlers (msg, signal_ids, n_signal_ids); + + /* One last try to make sure the auth stuff got cleaned up */ + debug_printf (1, " Last message\n"); + soup_message_set_uri (msg, base_uri); + soup_session_send_message (session, msg); + ensure_no_signal_handlers (msg, signal_ids, n_signal_ids); + + soup_test_session_abort_unref (session); + g_object_unref (msg); + g_free (signal_ids); +} + +/* Server handlers for "*" work but are separate from handlers for + * all other URIs. #590751 + */ +static void +do_star_test (void) +{ + SoupSession *session; + SoupMessage *msg; + SoupURI *star_uri; + const char *handled_by; + + debug_printf (1, "\nOPTIONS *\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, NULL); + star_uri = soup_uri_copy (base_uri); + soup_uri_set_path (star_uri, "*"); + + debug_printf (1, " Testing with no handler\n"); + msg = soup_message_new_from_uri ("OPTIONS", star_uri); + soup_session_send_message (session, msg); + + if (msg->status_code != SOUP_STATUS_NOT_FOUND) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + handled_by = soup_message_headers_get_one (msg->response_headers, + "X-Handled-By"); + if (handled_by) { + /* Should have been rejected by SoupServer directly */ + debug_printf (1, " Message reached handler '%s'\n", + handled_by); + errors++; + } + g_object_unref (msg); + + soup_server_add_handler (server, "*", server_star_callback, NULL, NULL); + + debug_printf (1, " Testing with handler\n"); + msg = soup_message_new_from_uri ("OPTIONS", star_uri); + soup_session_send_message (session, msg); + + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + handled_by = soup_message_headers_get_one (msg->response_headers, + "X-Handled-By"); + if (!handled_by) { + debug_printf (1, " Message did not reach handler!\n"); + errors++; + } else if (strcmp (handled_by, "star_callback") != 0) { + debug_printf (1, " Message reached incorrect handler '%s'\n", + handled_by); + errors++; + } + g_object_unref (msg); + + soup_test_session_abort_unref (session); + soup_uri_free (star_uri); +} + +/* Handle unexpectedly-early aborts. #596074, #618641 */ +static void +ea_msg_completed_one (SoupSession *session, SoupMessage *msg, gpointer loop) +{ + debug_printf (2, " Message 1 completed\n"); + if (msg->status_code != SOUP_STATUS_CANCELLED) { + debug_printf (1, " Unexpected status on Message 1: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_main_loop_quit (loop); +} + +static gboolean +ea_abort_session (gpointer session) +{ + soup_session_abort (session); + return FALSE; +} + +static void +ea_connection_state_changed (GObject *conn, GParamSpec *pspec, gpointer session) +{ + SoupConnectionState state; + + g_object_get (conn, "state", &state, NULL); + if (state == SOUP_CONNECTION_CONNECTING) { + g_idle_add_full (G_PRIORITY_HIGH, + ea_abort_session, + session, NULL); + g_signal_handlers_disconnect_by_func (conn, ea_connection_state_changed, session); + } +} + +static void +ea_connection_created (SoupSession *session, GObject *conn, gpointer user_data) +{ + g_signal_connect (conn, "notify::state", + G_CALLBACK (ea_connection_state_changed), session); + g_signal_handlers_disconnect_by_func (session, ea_connection_created, user_data); +} + +static void +do_early_abort_test (void) +{ + SoupSession *session; + SoupMessage *msg; + GMainContext *context; + GMainLoop *loop; + + debug_printf (1, "\nAbort with pending connection\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + msg = soup_message_new_from_uri ("GET", base_uri); + + context = g_main_context_default (); + loop = g_main_loop_new (context, TRUE); + soup_session_queue_message (session, msg, ea_msg_completed_one, loop); + g_main_context_iteration (context, FALSE); + + soup_session_abort (session); + while (g_main_context_pending (context)) + g_main_context_iteration (context, FALSE); + g_main_loop_unref (loop); + soup_test_session_abort_unref (session); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + msg = soup_message_new_from_uri ("GET", base_uri); + + g_signal_connect (session, "connection-created", + G_CALLBACK (ea_connection_created), NULL); + soup_session_send_message (session, msg); + debug_printf (2, " Message 2 completed\n"); + + if (msg->status_code != SOUP_STATUS_CANCELLED) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_object_unref (msg); + + while (g_main_context_pending (context)) + g_main_context_iteration (context, FALSE); + + soup_test_session_abort_unref (session); +} + +static void +do_content_length_framing_test (void) +{ + SoupSession *session; + SoupMessage *msg; + SoupURI *request_uri; + goffset declared_length; + + debug_printf (1, "\nInvalid Content-Length framing tests\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + + debug_printf (1, " Content-Length larger than message body length\n"); + request_uri = soup_uri_new_with_base (base_uri, "/content-length/long"); + msg = soup_message_new_from_uri ("GET", request_uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } else { + declared_length = soup_message_headers_get_content_length (msg->response_headers); + debug_printf (2, " Content-Length: %lu, body: %s\n", + (gulong)declared_length, msg->response_body->data); + if (msg->response_body->length >= declared_length) { + debug_printf (1, " Body length %lu >= declared length %lu\n", + (gulong)msg->response_body->length, + (gulong)declared_length); + errors++; + } + } + soup_uri_free (request_uri); + g_object_unref (msg); + + debug_printf (1, " Server claims 'Connection: close' but doesn't\n"); + request_uri = soup_uri_new_with_base (base_uri, "/content-length/noclose"); + msg = soup_message_new_from_uri ("GET", request_uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } else { + declared_length = soup_message_headers_get_content_length (msg->response_headers); + debug_printf (2, " Content-Length: %lu, body: %s\n", + (gulong)declared_length, msg->response_body->data); + if (msg->response_body->length != declared_length) { + debug_printf (1, " Body length %lu != declared length %lu\n", + (gulong)msg->response_body->length, + (gulong)declared_length); + errors++; + } + } + soup_uri_free (request_uri); + g_object_unref (msg); + + soup_test_session_abort_unref (session); +} + +static void +do_one_accept_language_test (const char *language, const char *expected_header) +{ + SoupSession *session; + SoupMessage *msg; + const char *val; + + debug_printf (1, " LANGUAGE=%s\n", language); + g_setenv ("LANGUAGE", language, TRUE); + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, + SOUP_SESSION_ACCEPT_LANGUAGE_AUTO, TRUE, + NULL); + msg = soup_message_new_from_uri ("GET", base_uri); + soup_session_send_message (session, msg); + soup_test_session_abort_unref (session); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " Message failed? %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + val = soup_message_headers_get_list (msg->request_headers, + "Accept-Language"); + if (!val) { + debug_printf (1, " No Accept-Language set!\n"); + errors++; + } else if (strcmp (val, expected_header) != 0) { + debug_printf (1, " Wrong Accept-Language: expected '%s', got '%s'\n", + expected_header, val); + errors++; + } + + g_object_unref (msg); +} + +static void +do_accept_language_test (void) +{ + const char *orig_language; + + debug_printf (1, "\nAutomatic Accept-Language processing\n"); + + orig_language = g_getenv ("LANGUAGE"); + do_one_accept_language_test ("C", "en"); + do_one_accept_language_test ("fr_FR", "fr-fr, fr;q=0.9"); + do_one_accept_language_test ("fr_FR:de:en_US", "fr-fr, fr;q=0.9, de;q=0.8, en-us;q=0.7, en;q=0.6"); + + if (orig_language) + g_setenv ("LANGUAGE", orig_language, TRUE); + else + g_unsetenv ("LANGUAGE"); +} + +static void +timeout_test_request_started (SoupSession *session, SoupMessage *msg, + SoupSocket *socket, gpointer user_data) +{ + SoupSocket **sockets = user_data; + int i; + + debug_printf (2, " msg %p => socket %p\n", msg, socket); + for (i = 0; i < 4; i++) { + if (!sockets[i]) { + /* We ref the socket to make sure that even if + * it gets disconnected, it doesn't get freed, + * since our checks would get messed up if the + * slice allocator reused the same address for + * two consecutive sockets. + */ + sockets[i] = g_object_ref (socket); + return; + } + } + + debug_printf (1, " socket queue overflowed!\n"); + errors++; + soup_session_cancel_message (session, msg, SOUP_STATUS_CANCELLED); +} + +static void +do_timeout_test_for_session (SoupSession *session) +{ + SoupMessage *msg; + SoupSocket *sockets[4] = { NULL, NULL, NULL, NULL }; + SoupURI *timeout_uri; + int i; + + g_signal_connect (session, "request-started", + G_CALLBACK (timeout_test_request_started), + &sockets); + + debug_printf (1, " First message\n"); + timeout_uri = soup_uri_new_with_base (base_uri, "/timeout-persistent"); + msg = soup_message_new_from_uri ("GET", timeout_uri); + soup_uri_free (timeout_uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + if (sockets[1]) { + debug_printf (1, " Message was retried??\n"); + errors++; + sockets[1] = sockets[2] = sockets[3] = NULL; + } + g_object_unref (msg); + + debug_printf (1, " Second message\n"); + msg = soup_message_new_from_uri ("GET", base_uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " Unexpected response: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + if (sockets[1] != sockets[0]) { + debug_printf (1, " Message was not retried on existing connection\n"); + errors++; + } else if (!sockets[2]) { + debug_printf (1, " Message was not retried after disconnect\n"); + errors++; + } else if (sockets[2] == sockets[1]) { + debug_printf (1, " Message was retried on closed connection??\n"); + errors++; + } else if (sockets[3]) { + debug_printf (1, " Message was retried again??\n"); + errors++; + } + g_object_unref (msg); + + for (i = 0; sockets[i]; i++) + g_object_unref (sockets[i]); +} + +static void +do_persistent_connection_timeout_test (void) +{ + SoupSession *session; + + debug_printf (1, "\nUnexpected timing out of persistent connections\n"); + + debug_printf (1, " Async session\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + do_timeout_test_for_session (session); + soup_test_session_abort_unref (session); + + debug_printf (1, " Sync session\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, NULL); + do_timeout_test_for_session (session); + soup_test_session_abort_unref (session); +} + +static GMainLoop *max_conns_loop; +static int msgs_done; +#define MAX_CONNS 2 +#define TEST_CONNS (MAX_CONNS * 2) + +static gboolean +idle_start_server (gpointer data) +{ + g_mutex_unlock (server_mutex); + return FALSE; +} + +static gboolean +quit_loop (gpointer data) +{ + g_main_loop_quit (max_conns_loop); + return FALSE; +} + +static void +max_conns_request_started (SoupSession *session, SoupMessage *msg, + SoupSocket *socket, gpointer user_data) +{ + if (++msgs_done == MAX_CONNS) + g_timeout_add (100, quit_loop, NULL); +} + +static void +max_conns_message_complete (SoupSession *session, SoupMessage *msg, gpointer user_data) +{ + if (++msgs_done == TEST_CONNS) + g_main_loop_quit (max_conns_loop); +} + +static void +do_max_conns_test_for_session (SoupSession *session) +{ + SoupMessage *msgs[TEST_CONNS]; + int i; + guint timeout_id; + + max_conns_loop = g_main_loop_new (NULL, TRUE); + + g_mutex_lock (server_mutex); + + g_signal_connect (session, "request-started", + G_CALLBACK (max_conns_request_started), NULL); + msgs_done = 0; + for (i = 0; i < TEST_CONNS; i++) { + msgs[i] = soup_message_new_from_uri ("GET", base_uri); + g_object_ref (msgs[i]); + soup_session_queue_message (session, msgs[i], + max_conns_message_complete, NULL); + } + + g_main_loop_run (max_conns_loop); + if (msgs_done != MAX_CONNS) { + debug_printf (1, " Queued %d connections out of max %d?", + msgs_done, MAX_CONNS); + errors++; + } + g_signal_handlers_disconnect_by_func (session, max_conns_request_started, NULL); + + msgs_done = 0; + g_idle_add (idle_start_server, NULL); + timeout_id = g_timeout_add (1000, quit_loop, NULL); + g_main_loop_run (max_conns_loop); + + for (i = 0; i < TEST_CONNS; i++) { + if (!SOUP_STATUS_IS_SUCCESSFUL (msgs[i]->status_code)) { + debug_printf (1, " Message %d failed? %d %s\n", + i, msgs[i]->status_code, + msgs[i]->reason_phrase ? msgs[i]->reason_phrase : "-"); + errors++; + } + } + + if (msgs_done != TEST_CONNS) { + /* Clean up so we don't get a spurious "Leaked + * session" error. + */ + for (i = 0; i < TEST_CONNS; i++) + soup_session_cancel_message (session, msgs[i], SOUP_STATUS_CANCELLED); + g_main_loop_run (max_conns_loop); + g_source_remove (timeout_id); + } + + g_main_loop_unref (max_conns_loop); + + for (i = 0; i < TEST_CONNS; i++) + g_object_unref (msgs[i]); +} + +static void +do_max_conns_test (void) +{ + SoupSession *session; + + debug_printf (1, "\nExceeding max-conns\n"); + + debug_printf (1, " Async session\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_MAX_CONNS, MAX_CONNS, + NULL); + do_max_conns_test_for_session (session); + soup_test_session_abort_unref (session); + + debug_printf (1, " Sync session\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, + SOUP_SESSION_MAX_CONNS, MAX_CONNS, + NULL); + do_max_conns_test_for_session (session); + soup_test_session_abort_unref (session); +} + +static gboolean +cancel_message_timeout (gpointer msg) +{ + SoupSession *session = g_object_get_data (G_OBJECT (msg), "session"); + + soup_session_cancel_message (session, msg, SOUP_STATUS_CANCELLED); + g_object_unref (msg); + g_object_unref (session); + return FALSE; +} + +static gpointer +cancel_message_thread (gpointer msg) +{ + SoupSession *session = g_object_get_data (G_OBJECT (msg), "session"); + + g_usleep (100000); /* .1s */ + soup_session_cancel_message (session, msg, SOUP_STATUS_CANCELLED); + g_object_unref (msg); + g_object_unref (session); + return NULL; +} + +static void +do_cancel_while_reading_test_for_session (SoupSession *session) +{ + SoupMessage *msg; + GThread *thread = NULL; + SoupURI *uri; + + uri = soup_uri_new_with_base (base_uri, "/slow"); + msg = soup_message_new_from_uri ("GET", uri); + soup_uri_free (uri); + + g_object_set_data (G_OBJECT (msg), "session", session); + g_object_ref (msg); + g_object_ref (session); + if (SOUP_IS_SESSION_ASYNC (session)) + g_timeout_add (100, cancel_message_timeout, msg); + else + thread = g_thread_create (cancel_message_thread, msg, TRUE, NULL); + + soup_session_send_message (session, msg); + + if (msg->status_code != SOUP_STATUS_CANCELLED) { + debug_printf (1, " FAILED: %d %s (expected Cancelled)\n", + msg->status_code, msg->reason_phrase); + errors++; + } + g_object_unref (msg); + + if (thread) + g_thread_join (thread); +} + +static void +do_cancel_while_reading_test (void) +{ + SoupSession *session; + + debug_printf (1, "\nCancelling message while reading response\n"); + + debug_printf (1, " Async session\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + do_cancel_while_reading_test_for_session (session); + soup_test_session_abort_unref (session); + + debug_printf (1, " Sync session\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, NULL); + do_cancel_while_reading_test_for_session (session); + soup_test_session_abort_unref (session); +} + +int +main (int argc, char **argv) +{ + SoupAuthDomain *auth_domain; + + test_init (argc, argv, NULL); + + server_mutex = g_mutex_new (); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + base_uri = soup_uri_new ("http://127.0.0.1/"); + soup_uri_set_port (base_uri, soup_server_get_port (server)); + + auth_domain = soup_auth_domain_basic_new ( + SOUP_AUTH_DOMAIN_REALM, "misc-test", + SOUP_AUTH_DOMAIN_ADD_PATH, "/auth", + SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, auth_callback, + NULL); + soup_server_add_auth_domain (server, auth_domain); + g_object_unref (auth_domain); + + do_host_test (); + do_callback_unref_test (); + do_msg_reuse_test (); + do_star_test (); + do_early_abort_test (); + do_content_length_framing_test (); + do_accept_language_test (); + do_persistent_connection_timeout_test (); + do_max_conns_test (); + do_cancel_while_reading_test (); + + soup_uri_free (base_uri); + soup_test_server_quit_unref (server); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/ntlm-test.c b/tests/ntlm-test.c new file mode 100644 index 0000000..cd74b15 --- /dev/null +++ b/tests/ntlm-test.c @@ -0,0 +1,435 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2007 Red Hat, Inc. + */ + +/* This doesn't implement full server-side NTLM, and it mostly doesn't + * even test that the client is doing the crypto/encoding/etc parts of + * NTLM correctly. It only tests that the right message headers get + * set in the right messages. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +typedef enum { + NTLM_UNAUTHENTICATED, + NTLM_RECEIVED_REQUEST, + NTLM_SENT_CHALLENGE, + NTLM_AUTHENTICATED_ALICE, + NTLM_AUTHENTICATED_BOB +} NTLMServerState; + +#define NTLM_REQUEST_START "TlRMTVNTUAABAAAA" +#define NTLM_RESPONSE_START "TlRMTVNTUAADAAAA" + +#define NTLM_CHALLENGE "TlRMTVNTUAACAAAADAAMADAAAAABAoEAASNFZ4mrze8AAAAAAAAAAGIAYgA8AAAARABPAE0AQQBJAE4AAgAMAEQATwBNAEEASQBOAAEADABTAEUAUgBWAEUAUgAEABQAZABvAG0AYQBpAG4ALgBjAG8AbQADACIAcwBlAHIAdgBlAHIALgBkAG8AbQBhAGkAbgAuAGMAbwBtAAAAAAA=" + +#define NTLM_RESPONSE_USER(response) ((response)[87] == 'h' ? NTLM_AUTHENTICATED_ALICE : NTLM_AUTHENTICATED_BOB) + +static void +clear_state (gpointer connections, GObject *ex_connection) +{ + g_hash_table_remove (connections, ex_connection); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *client, gpointer data) +{ + GHashTable *connections = data; + SoupSocket *socket; + const char *auth; + NTLMServerState state, required_user = 0; + gboolean auth_required = FALSE, not_found = FALSE; + gboolean basic_allowed = FALSE, ntlm_allowed = FALSE; + + if (msg->method != SOUP_METHOD_GET) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + if (!strncmp (path, "/alice", 6)) { + auth_required = TRUE; + ntlm_allowed = TRUE; + required_user = NTLM_AUTHENTICATED_ALICE; + } else if (!strncmp (path, "/bob", 4)) { + auth_required = TRUE; + ntlm_allowed = TRUE; + required_user = NTLM_AUTHENTICATED_BOB; + } else if (!strncmp (path, "/either", 7)) { + auth_required = TRUE; + ntlm_allowed = basic_allowed = TRUE; + } else if (!strncmp (path, "/basic", 6)) { + auth_required = TRUE; + basic_allowed = TRUE; + } + + if (strstr (path, "/404")) + not_found = TRUE; + + socket = soup_client_context_get_socket (client); + state = GPOINTER_TO_INT (g_hash_table_lookup (connections, socket)); + auth = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + + if (auth) { + if (!strncmp (auth, "NTLM ", 5)) { + if (!strncmp (auth + 5, NTLM_REQUEST_START, + strlen (NTLM_REQUEST_START))) { + state = NTLM_RECEIVED_REQUEST; + /* If they start, they must finish */ + auth_required = ntlm_allowed = TRUE; + basic_allowed = FALSE; + } else if (state == NTLM_SENT_CHALLENGE && + !strncmp (auth + 5, NTLM_RESPONSE_START, + strlen (NTLM_RESPONSE_START))) { + state = NTLM_RESPONSE_USER (auth + 5); + } else + state = NTLM_UNAUTHENTICATED; + } else if (!strncmp (auth, "Basic ", 6) && basic_allowed) { + gsize len; + char *decoded = (char *)g_base64_decode (auth + 6, &len); + + if (!strncmp (decoded, "alice:password", len) || + !strncmp (decoded, "bob:password", len)) + auth_required = FALSE; + g_free (decoded); + } + } + + if (ntlm_allowed && state > NTLM_SENT_CHALLENGE && + (!required_user || required_user == state)) + auth_required = FALSE; + + if (auth_required) { + soup_message_set_status (msg, SOUP_STATUS_UNAUTHORIZED); + + if (basic_allowed) { + soup_message_headers_append (msg->response_headers, + "WWW-Authenticate", + "Basic realm=\"ntlm-test\""); + } + + if (state == NTLM_RECEIVED_REQUEST) { + soup_message_headers_append (msg->response_headers, + "WWW-Authenticate", + "NTLM " NTLM_CHALLENGE); + state = NTLM_SENT_CHALLENGE; + } else if (ntlm_allowed) { + soup_message_headers_append (msg->response_headers, + "WWW-Authenticate", "NTLM"); + soup_message_headers_append (msg->response_headers, + "Connection", "close"); + } + } else { + if (not_found) + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + else { + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, + "OK\r\n", 4); + soup_message_set_status (msg, SOUP_STATUS_OK); + } + } + + g_hash_table_insert (connections, socket, GINT_TO_POINTER (state)); + g_object_weak_ref (G_OBJECT (socket), clear_state, connections); +} + +static void +authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer user) +{ + soup_auth_authenticate (auth, user, "password"); +} + +typedef struct { + gboolean got_ntlm_prompt; + gboolean got_basic_prompt; + gboolean sent_ntlm_request; + gboolean got_ntlm_challenge; + gboolean sent_ntlm_response; + gboolean sent_basic_response; +} NTLMState; + +static void +prompt_check (SoupMessage *msg, gpointer user_data) +{ + NTLMState *state = user_data; + const char *header; + + header = soup_message_headers_get_list (msg->response_headers, + "WWW-Authenticate"); + if (header && strstr (header, "Basic ")) + state->got_basic_prompt = TRUE; + if (!state->sent_ntlm_request) { + if (header && strstr (header, "NTLM") && + !strstr (header, NTLM_CHALLENGE)) + state->got_ntlm_prompt = TRUE; + } +} + +static void +challenge_check (SoupMessage *msg, gpointer user_data) +{ + NTLMState *state = user_data; + const char *header; + + header = soup_message_headers_get_list (msg->response_headers, + "WWW-Authenticate"); + if (header && !strncmp (header, "NTLM ", 5)) + state->got_ntlm_challenge = TRUE; +} + +static void +request_check (SoupMessage *msg, gpointer user_data) +{ + NTLMState *state = user_data; + const char *header; + + header = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + if (header && !strncmp (header, "NTLM " NTLM_REQUEST_START, + strlen ("NTLM " NTLM_REQUEST_START))) + state->sent_ntlm_request = TRUE; +} + +static void +response_check (SoupMessage *msg, gpointer user_data) +{ + NTLMState *state = user_data; + const char *header; + + header = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + if (header && !strncmp (header, "NTLM " NTLM_RESPONSE_START, + strlen ("NTLM " NTLM_RESPONSE_START))) + state->sent_ntlm_response = TRUE; + if (header && !strncmp (header, "Basic ", 6)) + state->sent_basic_response = TRUE; +} + +static void +do_message (SoupSession *session, SoupURI *base_uri, const char *path, + gboolean get_ntlm_prompt, gboolean do_ntlm, + gboolean get_basic_prompt, gboolean do_basic, + guint status_code) +{ + SoupURI *uri; + SoupMessage *msg; + NTLMState state = { FALSE, FALSE, FALSE, FALSE }; + + uri = soup_uri_new_with_base (base_uri, path); + msg = soup_message_new_from_uri ("GET", uri); + soup_uri_free (uri); + + g_signal_connect (msg, "got_headers", + G_CALLBACK (prompt_check), &state); + g_signal_connect (msg, "got_headers", + G_CALLBACK (challenge_check), &state); + g_signal_connect (msg, "wrote-headers", + G_CALLBACK (request_check), &state); + g_signal_connect (msg, "wrote-headers", + G_CALLBACK (response_check), &state); + + soup_session_send_message (session, msg); + debug_printf (1, " %-10s -> ", path); + + if (state.got_ntlm_prompt) { + debug_printf (1, " NTLM_PROMPT"); + if (!get_ntlm_prompt) { + debug_printf (1, "???"); + errors++; + } + } else if (get_ntlm_prompt) { + debug_printf (1, " no-ntlm-prompt???"); + errors++; + } + + if (state.got_basic_prompt) { + debug_printf (1, " BASIC_PROMPT"); + if (!get_basic_prompt) { + debug_printf (1, "???"); + errors++; + } + } else if (get_basic_prompt) { + debug_printf (1, " no-basic-prompt???"); + errors++; + } + + if (state.sent_ntlm_request) { + debug_printf (1, " REQUEST"); + if (!do_ntlm) { + debug_printf (1, "???"); + errors++; + } + } else if (do_ntlm) { + debug_printf (1, " no-request???"); + errors++; + } + + if (state.got_ntlm_challenge) { + debug_printf (1, " CHALLENGE"); + if (!do_ntlm) { + debug_printf (1, "???"); + errors++; + } + } else if (do_ntlm) { + debug_printf (1, " no-challenge???"); + errors++; + } + + if (state.sent_ntlm_response) { + debug_printf (1, " NTLM_RESPONSE"); + if (!do_ntlm) { + debug_printf (1, "???"); + errors++; + } + } else if (do_ntlm) { + debug_printf (1, " no-ntlm-response???"); + errors++; + } + + if (state.sent_basic_response) { + debug_printf (1, " BASIC_RESPONSE"); + if (!do_basic) { + debug_printf (1, "???"); + errors++; + } + } else if (do_basic) { + debug_printf (1, " no-basic-response???"); + errors++; + } + + debug_printf (1, " -> %s", msg->reason_phrase); + if (msg->status_code != status_code) { + debug_printf (1, "???"); + errors++; + } + debug_printf (1, "\n"); + + g_object_unref (msg); +} + +static void +do_ntlm_round (SoupURI *base_uri, gboolean use_ntlm, const char *user) +{ + SoupSession *session; + gboolean alice = use_ntlm && !strcmp (user, "alice"); + gboolean bob = use_ntlm && !strcmp (user, "bob"); + + g_return_if_fail (use_ntlm || !alice); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + if (use_ntlm) + soup_session_add_feature_by_type (session, SOUP_TYPE_AUTH_NTLM); + + if (user) { + g_signal_connect (session, "authenticate", + G_CALLBACK (authenticate), (char *)user); + } + + do_message (session, base_uri, "/noauth", + FALSE, use_ntlm, + FALSE, FALSE, + SOUP_STATUS_OK); + do_message (session, base_uri, "/alice", + !use_ntlm || bob, FALSE, + FALSE, FALSE, + alice ? SOUP_STATUS_OK : + SOUP_STATUS_UNAUTHORIZED); + do_message (session, base_uri, "/alice/404", + !use_ntlm, bob, + FALSE, FALSE, + alice ? SOUP_STATUS_NOT_FOUND : + SOUP_STATUS_UNAUTHORIZED); + do_message (session, base_uri, "/alice", + !use_ntlm, bob, + FALSE, FALSE, + alice ? SOUP_STATUS_OK : + SOUP_STATUS_UNAUTHORIZED); + do_message (session, base_uri, "/bob", + !use_ntlm || alice, bob, + FALSE, FALSE, + bob ? SOUP_STATUS_OK : + SOUP_STATUS_UNAUTHORIZED); + do_message (session, base_uri, "/alice", + !use_ntlm || bob, alice, + FALSE, FALSE, + alice ? SOUP_STATUS_OK : + SOUP_STATUS_UNAUTHORIZED); + do_message (session, base_uri, "/basic", + FALSE, bob, + TRUE, user != NULL, + user != NULL ? SOUP_STATUS_OK : + SOUP_STATUS_UNAUTHORIZED); + do_message (session, base_uri, "/either", + !use_ntlm, FALSE, + !use_ntlm, !use_ntlm && user != NULL, + user != NULL ? SOUP_STATUS_OK : + SOUP_STATUS_UNAUTHORIZED); + + soup_test_session_abort_unref (session); +} + +static void +do_ntlm_tests (SoupURI *base_uri) +{ + debug_printf (1, "Round 1: Non-NTLM Connection, no auth\n"); + do_ntlm_round (base_uri, FALSE, NULL); + debug_printf (1, "Round 2: NTLM Connection, user=alice\n"); + do_ntlm_round (base_uri, TRUE, "alice"); + debug_printf (1, "Round 3: NTLM Connection, user=bob\n"); + do_ntlm_round (base_uri, TRUE, "bob"); + debug_printf (1, "Round 4: Non-NTLM Connection, user=alice\n"); + do_ntlm_round (base_uri, FALSE, "alice"); +} + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server; + GHashTable *connections; + SoupURI *uri; + + test_init (argc, argv, NULL); + + server = soup_test_server_new (FALSE); + connections = g_hash_table_new (NULL, NULL); + soup_server_add_handler (server, NULL, + server_callback, connections, NULL); + + loop = g_main_loop_new (NULL, TRUE); + + uri = soup_uri_new ("http://127.0.0.1/"); + soup_uri_set_port (uri, soup_server_get_port (server)); + do_ntlm_tests (uri); + soup_uri_free (uri); + + g_main_loop_unref (loop); + + soup_test_server_quit_unref (server); + test_cleanup (); + g_hash_table_destroy (connections); + + return errors != 0; +} diff --git a/tests/proxy-test.c b/tests/proxy-test.c new file mode 100644 index 0000000..27d8d2e --- /dev/null +++ b/tests/proxy-test.c @@ -0,0 +1,270 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include "libsoup/soup.h" +#include "test-utils.h" + +typedef struct { + const char *explanation; + const char *url; + const guint final_status; +} SoupProxyTest; + +static SoupProxyTest tests[] = { + { "GET -> 200", "", SOUP_STATUS_OK }, + { "GET -> 404", "/not-found", SOUP_STATUS_NOT_FOUND }, + { "GET -> 401 -> 200", "/Basic/realm1/", SOUP_STATUS_OK }, + { "GET -> 401 -> 401", "/Basic/realm2/", SOUP_STATUS_UNAUTHORIZED }, + { "GET -> 403", "http://no-such-hostname.xx/", SOUP_STATUS_FORBIDDEN }, +}; +static int ntests = sizeof (tests) / sizeof (tests[0]); + +#define HTTP_SERVER "http://127.0.0.1:47524" +#define HTTPS_SERVER "https://127.0.0.1:47525" + +enum { + SIMPLE_PROXY, + AUTH_PROXY, + UNAUTH_PROXY +}; +static const char *proxies[] = { + "http://127.0.0.1:47526", + "http://127.0.0.1:47527", + "http://127.0.0.1:47528" +}; +static const char *proxy_names[] = { + "simple proxy", + "authenticated proxy", + "unauthenticatable-to proxy" +}; + +static void +authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + if (msg->status_code == SOUP_STATUS_UNAUTHORIZED) { + if (soup_auth_is_for_proxy (auth)) { + debug_printf (1, " got proxy auth object for 401!\n"); + errors++; + } + } else if (msg->status_code == SOUP_STATUS_PROXY_UNAUTHORIZED) { + if (!soup_auth_is_for_proxy (auth)) { + debug_printf (1, " got regular auth object for 407!\n"); + errors++; + } + } else { + debug_printf (1, " got authenticate signal with status %d\n", + msg->status_code); + errors++; + } + + if (!retrying) + soup_auth_authenticate (auth, "user1", "realm1"); +} + +static void +set_close_on_connect (SoupSession *session, SoupMessage *msg, + SoupSocket *sock, gpointer user_data) +{ + /* This is used to test that we can handle the server closing + * the connection when returning a 407 in response to a + * CONNECT. (Rude!) + */ + if (msg->method == SOUP_METHOD_CONNECT) { + soup_message_headers_append (msg->request_headers, + "Connection", "close"); + } +} + +static void +test_url (const char *url, int proxy, guint expected, + gboolean sync, gboolean close) +{ + SoupSession *session; + SoupURI *proxy_uri; + SoupMessage *msg; + + if (!tls_available && g_str_has_prefix (url, "https:")) + return; + + debug_printf (1, " GET %s via %s%s\n", url, proxy_names[proxy], + close ? " (with Connection: close)" : ""); + if (proxy == UNAUTH_PROXY && expected != SOUP_STATUS_FORBIDDEN) + expected = SOUP_STATUS_PROXY_UNAUTHORIZED; + + /* We create a new session for each request to ensure that + * connections/auth aren't cached between tests. + */ + proxy_uri = soup_uri_new (proxies[proxy]); + session = soup_test_session_new (sync ? SOUP_TYPE_SESSION_SYNC : SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_PROXY_URI, proxy_uri, + NULL); + soup_uri_free (proxy_uri); + g_signal_connect (session, "authenticate", + G_CALLBACK (authenticate), NULL); + if (close) { + g_signal_connect (session, "request-started", + G_CALLBACK (set_close_on_connect), NULL); + } + + msg = soup_message_new (SOUP_METHOD_GET, url); + if (!msg) { + fprintf (stderr, "proxy-test: Could not parse URI\n"); + exit (1); + } + + soup_session_send_message (session, msg); + + debug_printf (1, " %d %s\n", msg->status_code, msg->reason_phrase); + if (msg->status_code != expected) { + debug_printf (1, " EXPECTED %d!\n", expected); + errors++; + } + + g_object_unref (msg); + soup_test_session_abort_unref (session); +} + +static void +run_test (int i, gboolean sync) +{ + char *http_url, *https_url; + + debug_printf (1, "Test %d: %s (%s)\n", i + 1, tests[i].explanation, + sync ? "sync" : "async"); + + if (!strncmp (tests[i].url, "http", 4)) { + http_url = g_strdup (tests[i].url); + https_url = g_strdup_printf ("https%s", tests[i].url + 4); + } else { + http_url = g_strconcat (HTTP_SERVER, tests[i].url, NULL); + https_url = g_strconcat (HTTPS_SERVER, tests[i].url, NULL); + } + test_url (http_url, SIMPLE_PROXY, tests[i].final_status, sync, FALSE); + test_url (https_url, SIMPLE_PROXY, tests[i].final_status, sync, FALSE); + test_url (http_url, AUTH_PROXY, tests[i].final_status, sync, FALSE); + test_url (https_url, AUTH_PROXY, tests[i].final_status, sync, FALSE); + test_url (https_url, AUTH_PROXY, tests[i].final_status, sync, TRUE); + test_url (http_url, UNAUTH_PROXY, tests[i].final_status, sync, FALSE); + test_url (https_url, UNAUTH_PROXY, tests[i].final_status, sync, FALSE); + + g_free (http_url); + g_free (https_url); + + debug_printf (1, "\n"); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + SoupURI *uri = soup_message_get_uri (msg); + + soup_message_set_status (msg, uri->fragment ? SOUP_STATUS_BAD_REQUEST : SOUP_STATUS_OK); +} + +static void +do_proxy_fragment_test (SoupURI *base_uri) +{ + SoupSession *session; + SoupURI *proxy_uri, *req_uri; + SoupMessage *msg; + + debug_printf (1, "\nTesting request with fragment via proxy\n"); + + proxy_uri = soup_uri_new (proxies[SIMPLE_PROXY]); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_PROXY_URI, proxy_uri, + NULL); + soup_uri_free (proxy_uri); + + req_uri = soup_uri_new_with_base (base_uri, "/#foo"); + msg = soup_message_new_from_uri (SOUP_METHOD_GET, req_uri); + soup_uri_free (req_uri); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " unexpected status %d %s!\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + g_object_unref (msg); + soup_test_session_abort_unref (session); +} + +static void +do_proxy_redirect_test (void) +{ + SoupSession *session; + SoupURI *proxy_uri, *req_uri, *new_uri; + SoupMessage *msg; + + debug_printf (1, "\nTesting redirection through proxy\n"); + + proxy_uri = soup_uri_new (proxies[SIMPLE_PROXY]); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_PROXY_URI, proxy_uri, + NULL); + soup_uri_free (proxy_uri); + + req_uri = soup_uri_new (HTTPS_SERVER); + soup_uri_set_path (req_uri, "/redirected"); + msg = soup_message_new_from_uri (SOUP_METHOD_GET, req_uri); + soup_message_headers_append (msg->request_headers, + "Connection", "close"); + soup_session_send_message (session, msg); + + new_uri = soup_message_get_uri (msg); + if (!strcmp (req_uri->path, new_uri->path)) { + debug_printf (1, " message was not redirected!\n"); + errors++; + } + soup_uri_free (req_uri); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " unexpected status %d %s!\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + g_object_unref (msg); + soup_test_session_abort_unref (session); +} + +int +main (int argc, char **argv) +{ + SoupServer *server; + SoupURI *base_uri; + int i; + + test_init (argc, argv, NULL); + apache_init (); + + for (i = 0; i < ntests; i++) { + run_test (i, FALSE); + run_test (i, TRUE); + } + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + base_uri = soup_uri_new ("http://127.0.0.1/"); + soup_uri_set_port (base_uri, soup_server_get_port (server)); + + do_proxy_fragment_test (base_uri); + do_proxy_redirect_test (); + + soup_uri_free (base_uri); + soup_test_server_quit_unref (server); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/pull-api.c b/tests/pull-api.c new file mode 100644 index 0000000..febe490 --- /dev/null +++ b/tests/pull-api.c @@ -0,0 +1,540 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "libsoup/soup.h" +#include "libsoup/soup-session.h" + +#include "test-utils.h" + +static SoupBuffer *correct_response; + +static void +authenticate (SoupSession *session, SoupMessage *msg, + SoupAuth *auth, gboolean retrying, gpointer data) +{ + if (!retrying) + soup_auth_authenticate (auth, "user2", "realm2"); +} + +static void +get_correct_response (const char *uri) +{ + SoupSession *session; + SoupMessage *msg; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + msg = soup_message_new (SOUP_METHOD_GET, uri); + soup_session_send_message (session, msg); + if (msg->status_code != SOUP_STATUS_OK) { + fprintf (stderr, "Could not fetch %s: %d %s\n", uri, + msg->status_code, msg->reason_phrase); + exit (1); + } + + correct_response = soup_message_body_flatten (msg->response_body); + + g_object_unref (msg); + soup_test_session_abort_unref (session); +} + +/* Pull API version 1: fully-async. More like a "poke" API. Rather + * than having SoupMessage emit "got_chunk" signals whenever it wants, + * we stop it after it finishes reading the message headers, and then + * tell it when we want to hear about new chunks. + */ + +typedef struct { + GMainLoop *loop; + SoupSession *session; + SoupMessage *msg; + guint timeout; + gboolean chunks_ready; + gboolean chunk_wanted; + gboolean did_first_timeout; + gsize read_so_far; + guint expected_status; +} FullyAsyncData; + +static void fully_async_got_headers (SoupMessage *msg, gpointer user_data); +static void fully_async_got_chunk (SoupMessage *msg, SoupBuffer *chunk, + gpointer user_data); +static void fully_async_finished (SoupSession *session, SoupMessage *msg, + gpointer user_data); +static gboolean fully_async_request_chunk (gpointer user_data); + +static void +do_fully_async_test (SoupSession *session, + const char *base_uri, const char *sub_uri, + gboolean fast_request, guint expected_status) +{ + GMainLoop *loop; + FullyAsyncData ad; + SoupMessage *msg; + char *uri; + + loop = g_main_loop_new (NULL, FALSE); + + uri = g_build_filename (base_uri, sub_uri, NULL); + debug_printf (1, "GET %s\n", uri); + + msg = soup_message_new (SOUP_METHOD_GET, uri); + g_free (uri); + + ad.loop = loop; + ad.session = session; + ad.msg = msg; + ad.chunks_ready = FALSE; + ad.chunk_wanted = FALSE; + ad.did_first_timeout = FALSE; + ad.read_so_far = 0; + ad.expected_status = expected_status; + + /* Since we aren't going to look at the final value of + * msg->response_body, we tell libsoup to not even bother + * generating it. + */ + soup_message_body_set_accumulate (msg->response_body, FALSE); + + /* Connect to "got_headers", from which we'll decide where to + * go next. + */ + g_signal_connect (msg, "got_headers", + G_CALLBACK (fully_async_got_headers), &ad); + + /* Queue the request */ + soup_session_queue_message (session, msg, fully_async_finished, &ad); + + /* In a real program, we'd probably just return at this point. + * Eventually the caller would return all the way to the main + * loop, and then eventually, some event would cause the + * application to request a chunk of data from the message + * response. + * + * In our test program, there is no "real" main loop, so we + * had to create our own. We use a timeout to represent the + * event that causes the app to decide to request another body + * chunk. We use short timeouts in one set of tests, and long + * ones in another, to test both the + * chunk-requested-before-its-been-read and + * chunk-read-before-its-been-requested cases. + */ + ad.timeout = g_timeout_add (fast_request ? 0 : 100, + fully_async_request_chunk, &ad); + g_main_loop_run (ad.loop); + g_main_loop_unref (ad.loop); +} + +static gboolean +fully_async_request_chunk (gpointer user_data) +{ + FullyAsyncData *ad = user_data; + + if (!ad->did_first_timeout) { + debug_printf (1, " first timeout\n"); + ad->did_first_timeout = TRUE; + } else + debug_printf (2, " timeout\n"); + ad->timeout = 0; + + /* ad->chunks_ready and ad->chunk_wanted are used because + * there's a race condition between the application requesting + * the first chunk, and the message reaching a point where + * it's actually ready to read chunks. If chunks_ready has + * been set, we can just call soup_session_unpause_message() to + * cause the first chunk to be read. But if it's not, we just + * set chunk_wanted, to let the got_headers handler below know + * that a chunk has already been requested. + */ + if (ad->chunks_ready) + soup_session_unpause_message (ad->session, ad->msg); + else + ad->chunk_wanted = TRUE; + + return FALSE; +} + +static void +fully_async_got_headers (SoupMessage *msg, gpointer user_data) +{ + FullyAsyncData *ad = user_data; + + debug_printf (1, " %d %s\n", msg->status_code, msg->reason_phrase); + if (msg->status_code == SOUP_STATUS_UNAUTHORIZED) { + /* Let soup handle this one; this got_headers handler + * will get called again next time around. + */ + return; + } else if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " unexpected status: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + return; + } + + /* OK, we're happy with the response. So, we connect to + * "got_chunk". If there has already been a chunk requested, + * we let I/O continue; but if there hasn't, we pause now + * until one is requested. + */ + ad->chunks_ready = TRUE; + g_signal_connect (msg, "got_chunk", + G_CALLBACK (fully_async_got_chunk), ad); + if (!ad->chunk_wanted) + soup_session_pause_message (ad->session, msg); +} + +static void +fully_async_got_chunk (SoupMessage *msg, SoupBuffer *chunk, gpointer user_data) +{ + FullyAsyncData *ad = user_data; + + debug_printf (2, " got chunk from %lu - %lu\n", + (unsigned long) ad->read_so_far, + (unsigned long) ad->read_so_far + chunk->length); + + /* We've got a chunk, let's process it. In the case of the + * test program, that means comparing it against + * correct_response to make sure that we got the right data. + */ + if (ad->read_so_far + chunk->length > correct_response->length) { + debug_printf (1, " read too far! (%lu > %lu)\n", + (unsigned long) (ad->read_so_far + chunk->length), + (unsigned long) correct_response->length); + errors++; + } else if (memcmp (chunk->data, + correct_response->data + ad->read_so_far, + chunk->length) != 0) { + debug_printf (1, " data mismatch in block starting at %lu\n", + (unsigned long) ad->read_so_far); + errors++; + } + ad->read_so_far += chunk->length; + + /* Now pause I/O, and prepare to read another chunk later. + * (Again, the timeout just abstractly represents the idea of + * the application requesting another chunk at some random + * point in the future. You wouldn't be using a timeout in a + * real program.) + */ + soup_session_pause_message (ad->session, msg); + ad->chunk_wanted = FALSE; + + ad->timeout = g_timeout_add (10, fully_async_request_chunk, ad); +} + +static void +fully_async_finished (SoupSession *session, SoupMessage *msg, + gpointer user_data) +{ + FullyAsyncData *ad = user_data; + + if (msg->status_code != ad->expected_status) { + debug_printf (1, " unexpected final status: %d %s !\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + if (ad->timeout != 0) + g_source_remove (ad->timeout); + + /* Since our test program is only running the loop for the + * purpose of this one test, we quit the loop once the + * test is done. + */ + g_main_loop_quit (ad->loop); +} + + +/* Pull API version 2: synchronous pull API via async I/O. */ + +typedef struct { + GMainLoop *loop; + SoupSession *session; + SoupBuffer *chunk; +} SyncAsyncData; + +static void sync_async_send (SoupSession *session, + SoupMessage *msg); +static gboolean sync_async_is_finished(SoupMessage *msg); +static SoupBuffer *sync_async_read_chunk (SoupMessage *msg); +static void sync_async_cleanup (SoupMessage *msg); + +static void sync_async_got_headers (SoupMessage *msg, gpointer user_data); +static void sync_async_copy_chunk (SoupMessage *msg, SoupBuffer *chunk, + gpointer user_data); +static void sync_async_finished (SoupSession *session, SoupMessage *msg, + gpointer user_data); + +static void +do_synchronously_async_test (SoupSession *session, + const char *base_uri, const char *sub_uri, + guint expected_status) +{ + SoupMessage *msg; + char *uri; + gsize read_so_far; + SoupBuffer *chunk; + + uri = g_build_filename (base_uri, sub_uri, NULL); + debug_printf (1, "GET %s\n", uri); + + msg = soup_message_new (SOUP_METHOD_GET, uri); + g_free (uri); + + /* As in the fully-async case, we turn off accumulate, as an + * optimization. + */ + soup_message_body_set_accumulate (msg->response_body, FALSE); + + /* Send the message, get back headers */ + sync_async_send (session, msg); + if (sync_async_is_finished (msg) && + expected_status == SOUP_STATUS_OK) { + debug_printf (1, " finished without reading response!\n"); + errors++; + } else if (!sync_async_is_finished (msg) && + expected_status != SOUP_STATUS_OK) { + debug_printf (1, " request failed to fail!\n"); + errors++; + } + + /* Now we're ready to read the response body (though we could + * put that off until later if we really wanted). + */ + read_so_far = 0; + while ((chunk = sync_async_read_chunk (msg))) { + debug_printf (2, " read chunk from %lu - %lu\n", + (unsigned long) read_so_far, + (unsigned long) read_so_far + chunk->length); + + if (read_so_far + chunk->length > correct_response->length) { + debug_printf (1, " read too far! (%lu > %lu)\n", + (unsigned long) read_so_far + chunk->length, + (unsigned long) correct_response->length); + errors++; + } else if (memcmp (chunk->data, + correct_response->data + read_so_far, + chunk->length) != 0) { + debug_printf (1, " data mismatch in block starting at %lu\n", + (unsigned long) read_so_far); + errors++; + } + read_so_far += chunk->length; + soup_buffer_free (chunk); + } + + if (!sync_async_is_finished (msg) || + (msg->status_code == SOUP_STATUS_OK && + read_so_far != correct_response->length)) { + debug_printf (1, " loop ended before message was fully read!\n"); + errors++; + } else if (msg->status_code != expected_status) { + debug_printf (1, " unexpected final status: %d %s !\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + sync_async_cleanup (msg); + g_object_unref (msg); +} + +/* Sends @msg on async session @session and returns after the headers + * of a successful response (or the complete body of a failed + * response) have been read. + */ +static void +sync_async_send (SoupSession *session, SoupMessage *msg) +{ + SyncAsyncData *ad; + + ad = g_new0 (SyncAsyncData, 1); + g_object_set_data (G_OBJECT (msg), "SyncAsyncData", ad); + + /* In this case, unlike the fully-async case, the loop + * actually belongs to us, not the application; it will only + * be run when we're waiting for chunks, not at other times. + * + * If session has an async_context associated with it, we'd + * want to pass that, rather than NULL, here. + */ + ad->loop = g_main_loop_new (NULL, FALSE); + ad->session = session; + + g_signal_connect (msg, "got_headers", + G_CALLBACK (sync_async_got_headers), ad); + + /* Start the request by queuing it and then running our main + * loop. Note: we have to use soup_session_queue_message() + * here; soup_session_send_message() won't work, for several + * reasons. Also, since soup_session_queue_message() steals a + * ref to the message and then unrefs it after invoking the + * callback, we have to add an extra ref before calling it. + */ + g_object_ref (msg); + soup_session_queue_message (session, msg, sync_async_finished, ad); + g_main_loop_run (ad->loop); + + /* At this point, one of two things has happened; either the + * got_headers handler got headers it liked, and so stopped + * the loop, or else the message was fully processed without + * the got_headers handler interrupting it, and so the final + * callback (sync_async_finished) was invoked, and stopped the + * loop. + * + * Either way, we're done, so we return to the caller. + */ +} + +static void +sync_async_got_headers (SoupMessage *msg, gpointer user_data) +{ + SyncAsyncData *ad = user_data; + + debug_printf (1, " %d %s\n", msg->status_code, msg->reason_phrase); + if (msg->status_code == SOUP_STATUS_UNAUTHORIZED) { + /* Let soup handle this one; this got_headers handler + * will get called again next time around. + */ + return; + } else if (msg->status_code != SOUP_STATUS_OK) { + debug_printf (1, " unexpected status: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + return; + } + + /* Stop I/O and return to the caller */ + soup_session_pause_message (ad->session, msg); + g_main_loop_quit (ad->loop); +} + +static gboolean +sync_async_is_finished (SoupMessage *msg) +{ + SyncAsyncData *ad = g_object_get_data (G_OBJECT (msg), "SyncAsyncData"); + + /* sync_async_finished clears ad->loop */ + return ad->loop == NULL; +} + +/* Tries to read a chunk. Returns %NULL on error/end-of-response. */ +static SoupBuffer * +sync_async_read_chunk (SoupMessage *msg) +{ + SyncAsyncData *ad = g_object_get_data (G_OBJECT (msg), "SyncAsyncData"); + guint handler; + + if (sync_async_is_finished (msg)) + return NULL; + + ad->chunk = NULL; + handler = g_signal_connect (msg, "got_chunk", + G_CALLBACK (sync_async_copy_chunk), + ad); + soup_session_unpause_message (ad->session, msg); + g_main_loop_run (ad->loop); + g_signal_handler_disconnect (msg, handler); + + return ad->chunk; +} + +static void +sync_async_copy_chunk (SoupMessage *msg, SoupBuffer *chunk, gpointer user_data) +{ + SyncAsyncData *ad = user_data; + + ad->chunk = soup_buffer_copy (chunk); + + /* Now pause and return from the g_main_loop_run() call in + * sync_async_read_chunk(). + */ + soup_session_pause_message (ad->session, msg); + g_main_loop_quit (ad->loop); +} + +static void +sync_async_finished (SoupSession *session, SoupMessage *msg, gpointer user_data) +{ + SyncAsyncData *ad = user_data; + + /* Unlike in the fully_async_case, we don't need to do much + * here, because control will return to + * do_synchronously_async_test() when we're done, and we do + * the final tests there. + */ + g_main_loop_quit (ad->loop); + g_main_loop_unref (ad->loop); + ad->loop = NULL; +} + +static void +sync_async_cleanup (SoupMessage *msg) +{ + SyncAsyncData *ad = g_object_get_data (G_OBJECT (msg), "SyncAsyncData"); + + if (ad->loop) + g_main_loop_unref (ad->loop); + g_free (ad); +} + + +int +main (int argc, char **argv) +{ + SoupSession *session; + const char *base_uri; + + test_init (argc, argv, NULL); + apache_init (); + + base_uri = "http://127.0.0.1:47524/"; + get_correct_response (base_uri); + + debug_printf (1, "\nFully async, fast requests\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + g_signal_connect (session, "authenticate", + G_CALLBACK (authenticate), NULL); + do_fully_async_test (session, base_uri, "/", + TRUE, SOUP_STATUS_OK); + do_fully_async_test (session, base_uri, "/Basic/realm1/", + TRUE, SOUP_STATUS_UNAUTHORIZED); + do_fully_async_test (session, base_uri, "/Basic/realm2/", + TRUE, SOUP_STATUS_OK); + soup_test_session_abort_unref (session); + + debug_printf (1, "\nFully async, slow requests\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + g_signal_connect (session, "authenticate", + G_CALLBACK (authenticate), NULL); + do_fully_async_test (session, base_uri, "/", + FALSE, SOUP_STATUS_OK); + do_fully_async_test (session, base_uri, "/Basic/realm1/", + FALSE, SOUP_STATUS_UNAUTHORIZED); + do_fully_async_test (session, base_uri, "/Basic/realm2/", + FALSE, SOUP_STATUS_OK); + soup_test_session_abort_unref (session); + + debug_printf (1, "\nSynchronously async\n"); + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + g_signal_connect (session, "authenticate", + G_CALLBACK (authenticate), NULL); + do_synchronously_async_test (session, base_uri, "/", + SOUP_STATUS_OK); + do_synchronously_async_test (session, base_uri, "/Basic/realm1/", + SOUP_STATUS_UNAUTHORIZED); + do_synchronously_async_test (session, base_uri, "/Basic/realm2/", + SOUP_STATUS_OK); + soup_test_session_abort_unref (session); + + soup_buffer_free (correct_response); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/range-test.c b/tests/range-test.c new file mode 100644 index 0000000..d7ba922 --- /dev/null +++ b/tests/range-test.c @@ -0,0 +1,377 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "libsoup/soup.h" + +#include "test-utils.h" + +SoupBuffer *full_response; +int total_length; +char *test_response; + +static void +get_full_response (void) +{ + char *contents; + gsize length; + GError *error = NULL; + + if (!g_file_get_contents (SRCDIR "/index.txt", &contents, &length, &error)) { + fprintf (stderr, "Could not read index.txt: %s\n", + error->message); + exit (1); + } + + full_response = soup_buffer_new (SOUP_MEMORY_TAKE, contents, length); + debug_printf (1, "Total response length is %d\n\n", (int)length); +} + +static void +check_part (SoupMessageHeaders *headers, const char *body, gsize body_len, + gboolean check_start_end, int expected_start, int expected_end) +{ + goffset start, end, total_length; + + debug_printf (1, " Content-Range: %s\n", + soup_message_headers_get_one (headers, "Content-Range")); + + if (!soup_message_headers_get_content_range (headers, &start, &end, &total_length)) { + debug_printf (1, " Could not find/parse Content-Range\n"); + errors++; + return; + } + + if (total_length != full_response->length && total_length != -1) { + debug_printf (1, " Unexpected total length %" G_GINT64_FORMAT " in response\n", + total_length); + errors++; + return; + } + + if (check_start_end) { + if ((expected_start >= 0 && start != expected_start) || + (expected_start < 0 && start != full_response->length + expected_start)) { + debug_printf (1, " Unexpected range start %" G_GINT64_FORMAT " in response\n", + start); + errors++; + return; + } + + if ((expected_end >= 0 && end != expected_end) || + (expected_end < 0 && end != full_response->length - 1)) { + debug_printf (1, " Unexpected range end %" G_GINT64_FORMAT " in response\n", + end); + errors++; + return; + } + } + + if (end - start + 1 != body_len) { + debug_printf (1, " Range length (%d) does not match body length (%d)\n", + (int)(end - start) + 1, + (int)body_len); + errors++; + return; + } + + memcpy (test_response + start, body, body_len); +} + +static void +do_single_range (SoupSession *session, SoupMessage *msg, + int start, int end) +{ + const char *content_type; + + debug_printf (1, " Range: %s\n", + soup_message_headers_get_one (msg->request_headers, "Range")); + + soup_session_send_message (session, msg); + + if (msg->status_code != SOUP_STATUS_PARTIAL_CONTENT) { + debug_printf (1, " Unexpected status %d %s\n", + msg->status_code, msg->reason_phrase); + g_object_unref (msg); + errors++; + return; + } + + content_type = soup_message_headers_get_content_type ( + msg->response_headers, NULL); + if (content_type && !strcmp (content_type, "multipart/byteranges")) { + debug_printf (1, " Response body should not have been multipart/byteranges\n"); + g_object_unref (msg); + errors++; + return; + } + + check_part (msg->response_headers, msg->response_body->data, + msg->response_body->length, TRUE, start, end); + g_object_unref (msg); +} + +static void +request_single_range (SoupSession *session, const char *uri, + int start, int end) +{ + SoupMessage *msg; + + msg = soup_message_new ("GET", uri); + soup_message_headers_set_range (msg->request_headers, start, end); + do_single_range (session, msg, start, end); +} + +static void +do_multi_range (SoupSession *session, SoupMessage *msg, + int expected_return_ranges) +{ + SoupMultipart *multipart; + const char *content_type; + int i, length; + + debug_printf (1, " Range: %s\n", + soup_message_headers_get_one (msg->request_headers, "Range")); + + soup_session_send_message (session, msg); + + if (msg->status_code != SOUP_STATUS_PARTIAL_CONTENT) { + debug_printf (1, " Unexpected status %d %s\n", + msg->status_code, msg->reason_phrase); + g_object_unref (msg); + errors++; + return; + } + + content_type = soup_message_headers_get_content_type (msg->response_headers, NULL); + if (!content_type || strcmp (content_type, "multipart/byteranges") != 0) { + debug_printf (1, " Response Content-Type (%s) was not multipart/byteranges\n", + content_type); + g_object_unref (msg); + errors++; + return; + } + + multipart = soup_multipart_new_from_message (msg->response_headers, + msg->response_body); + if (!multipart) { + debug_printf (1, " Could not parse multipart\n"); + g_object_unref (msg); + errors++; + return; + } + + length = soup_multipart_get_length (multipart); + if (length != expected_return_ranges) { + debug_printf (1, " Expected %d ranges, got %d\n", + expected_return_ranges, length); + errors++; + } + + for (i = 0; i < length; i++) { + SoupMessageHeaders *headers; + SoupBuffer *body; + + debug_printf (1, " Part %d\n", i + 1); + soup_multipart_get_part (multipart, i, &headers, &body); + check_part (headers, body->data, body->length, FALSE, 0, 0); + } + + soup_multipart_free (multipart); + g_object_unref (msg); +} + +static void +request_double_range (SoupSession *session, const char *uri, + int first_start, int first_end, + int second_start, int second_end, + int expected_return_ranges) +{ + SoupMessage *msg; + SoupRange ranges[2]; + + msg = soup_message_new ("GET", uri); + ranges[0].start = first_start; + ranges[0].end = first_end; + ranges[1].start = second_start; + ranges[1].end = second_end; + soup_message_headers_set_ranges (msg->request_headers, ranges, 2); + + if (expected_return_ranges == 1) { + do_single_range (session, msg, + MIN (first_start, second_start), + MAX (first_end, second_end)); + } else + do_multi_range (session, msg, expected_return_ranges); +} + +static void +request_triple_range (SoupSession *session, const char *uri, + int first_start, int first_end, + int second_start, int second_end, + int third_start, int third_end, + int expected_return_ranges) +{ + SoupMessage *msg; + SoupRange ranges[3]; + + msg = soup_message_new ("GET", uri); + ranges[0].start = first_start; + ranges[0].end = first_end; + ranges[1].start = second_start; + ranges[1].end = second_end; + ranges[2].start = third_start; + ranges[2].end = third_end; + soup_message_headers_set_ranges (msg->request_headers, ranges, 3); + + if (expected_return_ranges == 1) { + do_single_range (session, msg, + MIN (first_start, MIN (second_start, third_start)), + MAX (first_end, MAX (second_end, third_end))); + } else + do_multi_range (session, msg, expected_return_ranges); +} + +static void +do_range_test (SoupSession *session, const char *uri, gboolean expect_coalesce) +{ + int twelfths = full_response->length / 12; + + memset (test_response, 0, full_response->length); + + /* We divide the response into 12 ranges and request them + * as follows: + * + * 0: A (first single request) + * 1: D (2nd part of triple request) + * 2: C (1st part of double request) + * 3: D (1st part of triple request) + * 4: F (trickier overlapping request) + * 5: C (2nd part of double request) + * 6: D (3rd part of triple request) + * 7: E (overlapping request) + * 8: E (overlapping request) + * 9: F (trickier overlapping request) + * 10: F (trickier overlapping request) + * 11: B (second and third single requests) + */ + + /* A: 0, simple request */ + debug_printf (1, "Requesting %d-%d\n", 0 * twelfths, 1 * twelfths); + request_single_range (session, uri, + 0 * twelfths, 1 * twelfths); + + /* B: 11, end-relative request. These two are mostly redundant + * in terms of data coverage, but they may still catch + * Range-header-generating bugs. + */ + debug_printf (1, "Requesting %d-\n", 11 * twelfths); + request_single_range (session, uri, + 11 * twelfths, -1); + debug_printf (1, "Requesting -%d\n", 1 * twelfths); + request_single_range (session, uri, + -1 * twelfths, -1); + + /* C: 2 and 5 */ + debug_printf (1, "Requesting %d-%d,%d-%d\n", + 2 * twelfths, 3 * twelfths, + 5 * twelfths, 6 * twelfths); + request_double_range (session, uri, + 2 * twelfths, 3 * twelfths, + 5 * twelfths, 6 * twelfths, + 2); + + /* D: 1, 3, 6 */ + debug_printf (1, "Requesting %d-%d,%d-%d,%d-%d\n", + 3 * twelfths, 4 * twelfths, + 1 * twelfths, 2 * twelfths, + 6 * twelfths, 7 * twelfths); + request_triple_range (session, uri, + 3 * twelfths, 4 * twelfths, + 1 * twelfths, 2 * twelfths, + 6 * twelfths, 7 * twelfths, + 3); + + /* E: 7 and 8: should coalesce into a single response */ + debug_printf (1, "Requesting %d-%d,%d-%d (can coalesce)\n", + 7 * twelfths, 8 * twelfths, + 8 * twelfths, 9 * twelfths); + request_double_range (session, uri, + 7 * twelfths, 8 * twelfths, + 8 * twelfths, 9 * twelfths, + expect_coalesce ? 1 : 2); + + /* F: 4, 9, 10: 9 and 10 should coalesce even though 4 was + * requested between them. (Also, they actually overlap in + * this case, as opposed to just touching.) + */ + debug_printf (1, "Requesting %d-%d,%d-%d,%d-%d (can partially coalesce)\n", + 9 * twelfths, 10 * twelfths + 5, + 4 * twelfths, 5 * twelfths, + 10 * twelfths - 5, 11 * twelfths); + request_triple_range (session, uri, + 9 * twelfths, 10 * twelfths + 5, + 4 * twelfths, 5 * twelfths, + 10 * twelfths - 5, 11 * twelfths, + expect_coalesce ? 2 : 3); + + if (memcmp (full_response->data, test_response, full_response->length) != 0) { + debug_printf (1, "\nfull_response and test_response don't match\n"); + errors++; + } +} + +static void +server_handler (SoupServer *server, + SoupMessage *msg, + const char *path, + GHashTable *query, + SoupClientContext *client, + gpointer user_data) +{ + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_body_append_buffer (msg->response_body, + full_response); +} + +int +main (int argc, char **argv) +{ + SoupSession *session; + SoupServer *server; + char *base_uri; + + test_init (argc, argv, NULL); + apache_init (); + + get_full_response (); + test_response = g_malloc0 (full_response->length); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + + debug_printf (1, "1. Testing against apache\n"); + do_range_test (session, "http://127.0.0.1:47524/", FALSE); + + debug_printf (1, "\n2. Testing against SoupServer\n"); + server = soup_test_server_new (FALSE); + soup_server_add_handler (server, NULL, server_handler, NULL, NULL); + base_uri = g_strdup_printf ("http://127.0.0.1:%u/", + soup_server_get_port (server)); + do_range_test (session, base_uri, TRUE); + g_free (base_uri); + soup_test_server_quit_unref (server); + + soup_test_session_abort_unref (session); + + soup_buffer_free (full_response); + g_free (test_response); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/redirect-test.c b/tests/redirect-test.c new file mode 100644 index 0000000..f99d9c7 --- /dev/null +++ b/tests/redirect-test.c @@ -0,0 +1,384 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#include "config.h" + +#include +#include +#include + +#include +#include + +#include "test-utils.h" + +char *server2_uri; + +typedef struct { + const char *method; + const char *path; + guint status_code; + gboolean repeat; +} TestRequest; + +static struct { + TestRequest requests[3]; + guint final_status; +} tests[] = { + /* A redirecty response to a GET or HEAD should cause a redirect */ + + { { { "GET", "/301", 301 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "GET", "/302", 302 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "GET", "/303", 303 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "GET", "/307", 307 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "HEAD", "/301", 301 }, + { "HEAD", "/", 200 }, + { NULL } }, 200 }, + { { { "HEAD", "/302", 302 }, + { "HEAD", "/", 200 }, + { NULL } }, 200 }, + /* 303 is a nonsensical response to HEAD, but some sites do + * it anyway. :-/ + */ + { { { "HEAD", "/303", 303 }, + { "HEAD", "/", 200 }, + { NULL } }, 200 }, + { { { "HEAD", "/307", 307 }, + { "HEAD", "/", 200 }, + { NULL } }, 200 }, + + /* A non-redirecty response to a GET or HEAD should not */ + + { { { "GET", "/300", 300 }, + { NULL } }, 300 }, + { { { "GET", "/304", 304 }, + { NULL } }, 304 }, + { { { "GET", "/305", 305 }, + { NULL } }, 305 }, + { { { "GET", "/306", 306 }, + { NULL } }, 306 }, + { { { "GET", "/308", 308 }, + { NULL } }, 308 }, + { { { "HEAD", "/300", 300 }, + { NULL } }, 300 }, + { { { "HEAD", "/304", 304 }, + { NULL } }, 304 }, + { { { "HEAD", "/305", 305 }, + { NULL } }, 305 }, + { { { "HEAD", "/306", 306 }, + { NULL } }, 306 }, + { { { "HEAD", "/308", 308 }, + { NULL } }, 308 }, + + /* Test double-redirect */ + + { { { "GET", "/301/302", 301 }, + { "GET", "/302", 302 }, + { "GET", "/", 200 } }, 200 }, + { { { "HEAD", "/301/302", 301 }, + { "HEAD", "/302", 302 }, + { "HEAD", "/", 200 } }, 200 }, + + /* POST should only automatically redirect on 301, 302 and 303 */ + + { { { "POST", "/301", 301 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "POST", "/302", 302 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "POST", "/303", 303 }, + { "GET", "/", 200 }, + { NULL } }, 200 }, + { { { "POST", "/307", 307 }, + { NULL } }, 307 }, + + /* Test behavior with recoverably-bad Location header */ + { { { "GET", "/bad", 302 }, + { "GET", "/bad%20with%20spaces", 200 }, + { NULL } }, 200 }, + + /* Test behavior with irrecoverably-bad Location header */ + { { { "GET", "/bad-no-host", 302 }, + { NULL } }, SOUP_STATUS_MALFORMED }, + + /* Test infinite redirection */ + { { { "GET", "/bad-recursive", 302, TRUE }, + { NULL } }, SOUP_STATUS_TOO_MANY_REDIRECTS }, + + /* Test redirection to a different server */ + { { { "GET", "/server2", 302 }, + { "GET", "/on-server2", 200 }, + { NULL } }, 200 }, +}; +static const int n_tests = G_N_ELEMENTS (tests); + +static void +got_headers (SoupMessage *msg, gpointer user_data) +{ + TestRequest **req = user_data; + const char *location; + + debug_printf (2, " -> %d %s\n", msg->status_code, + msg->reason_phrase); + location = soup_message_headers_get_one (msg->response_headers, + "Location"); + if (location) + debug_printf (2, " Location: %s\n", location); + + if (!(*req)->method) + return; + + if (msg->status_code != (*req)->status_code) { + debug_printf (1, " - Expected %d !\n", + (*req)->status_code); + errors++; + } +} + +static void +restarted (SoupMessage *msg, gpointer user_data) +{ + TestRequest **req = user_data; + SoupURI *uri = soup_message_get_uri (msg); + + debug_printf (2, " %s %s\n", msg->method, uri->path); + + if ((*req)->method && !(*req)->repeat) + (*req)++; + + if (!(*req)->method) { + debug_printf (1, " - Expected to be done!\n"); + errors++; + return; + } + + if (strcmp (msg->method, (*req)->method) != 0) { + debug_printf (1, " - Expected %s !\n", (*req)->method); + errors++; + } + if (strcmp (uri->path, (*req)->path) != 0) { + debug_printf (1, " - Expected %s !\n", (*req)->path); + errors++; + } +} + +static void +do_test (SoupSession *session, SoupURI *base_uri, int n) +{ + SoupURI *uri; + SoupMessage *msg; + TestRequest *req; + + debug_printf (1, "%2d. %s %s\n", n + 1, + tests[n].requests[0].method, + tests[n].requests[0].path); + + uri = soup_uri_new_with_base (base_uri, tests[n].requests[0].path); + msg = soup_message_new_from_uri (tests[n].requests[0].method, uri); + soup_uri_free (uri); + + if (msg->method == SOUP_METHOD_POST) { + soup_message_set_request (msg, "text/plain", + SOUP_MEMORY_STATIC, + "post body", + strlen ("post body")); + } + + req = &tests[n].requests[0]; + g_signal_connect (msg, "got_headers", + G_CALLBACK (got_headers), &req); + g_signal_connect (msg, "restarted", + G_CALLBACK (restarted), &req); + + soup_session_send_message (session, msg); + + if (msg->status_code != tests[n].final_status) { + debug_printf (1, " - Expected final status of %d, got %d !\n", + tests[n].final_status, msg->status_code); + errors++; + } + + g_object_unref (msg); + debug_printf (2, "\n"); +} + +static void +do_redirect_tests (SoupURI *base_uri) +{ + SoupSession *session; + int n; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + debug_printf (1, "Async session\n"); + for (n = 0; n < n_tests; n++) + do_test (session, base_uri, n); + soup_test_session_abort_unref (session); + + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, NULL); + debug_printf (1, "Sync session\n"); + for (n = 0; n < n_tests; n++) + do_test (session, base_uri, n); + soup_test_session_abort_unref (session); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + char *remainder; + guint status_code; + + if (g_str_has_prefix (path, "/bad")) { + if (!strcmp (path, "/bad")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_replace (msg->response_headers, + "Location", + "/bad with spaces"); + } else if (!strcmp (path, "/bad-recursive")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_replace (msg->response_headers, + "Location", + "/bad-recursive"); + } else if (!strcmp (path, "/bad-no-host")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_replace (msg->response_headers, + "Location", + "about:blank"); + } else if (!strcmp (path, "/bad with spaces")) + soup_message_set_status (msg, SOUP_STATUS_OK); + else + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + return; + } else if (!strcmp (path, "/server2")) { + soup_message_set_status (msg, SOUP_STATUS_FOUND); + soup_message_headers_replace (msg->response_headers, + "Location", + server2_uri); + return; + } else if (!strcmp (path, "/")) { + if (msg->method != SOUP_METHOD_GET && + msg->method != SOUP_METHOD_HEAD) { + soup_message_set_status (msg, SOUP_STATUS_METHOD_NOT_ALLOWED); + return; + } + + /* Make sure that redirecting a POST clears the body */ + if (msg->request_body->length) { + soup_message_set_status (msg, SOUP_STATUS_BAD_REQUEST); + return; + } + + /* Make sure that a HTTP/1.0 redirect doesn't cause an + * HTTP/1.0 re-request. (#521848) + */ + if (soup_message_get_http_version (msg) == SOUP_HTTP_1_0) { + soup_message_set_status (msg, SOUP_STATUS_BAD_REQUEST); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + + /* FIXME: this is wrong, though it doesn't matter for + * the purposes of this test, and to do the right + * thing currently we'd have to set Content-Length by + * hand. + */ + if (msg->method != SOUP_METHOD_HEAD) { + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, + "OK\r\n", 4); + } + return; + } + + status_code = strtoul (path + 1, &remainder, 10); + if (!SOUP_STATUS_IS_REDIRECTION (status_code) || + (*remainder && *remainder != '/')) { + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + return; + } + + /* See above comment re bug 521848. */ + soup_message_set_http_version (msg, SOUP_HTTP_1_0); + + soup_message_set_status (msg, status_code); + if (*remainder) { + soup_message_headers_replace (msg->response_headers, + "Location", remainder); + } else { + soup_message_headers_replace (msg->response_headers, + "Location", "/"); + } +} + +static void +server2_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +static gboolean run_tests = TRUE; + +static GOptionEntry no_test_entry[] = { + { "no-tests", 'n', G_OPTION_FLAG_REVERSE, + G_OPTION_ARG_NONE, &run_tests, + "Don't run tests, just run the test server", NULL }, + { NULL } +}; + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server, *server2; + guint port; + SoupURI *base_uri; + + test_init (argc, argv, no_test_entry); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + port = soup_server_get_port (server); + + server2 = soup_test_server_new (TRUE); + soup_server_add_handler (server2, NULL, + server2_callback, NULL, NULL); + server2_uri = g_strdup_printf ("http://127.0.0.1:%d/on-server2", + soup_server_get_port (server2)); + + loop = g_main_loop_new (NULL, TRUE); + + if (run_tests) { + base_uri = soup_uri_new ("http://127.0.0.1"); + soup_uri_set_port (base_uri, port); + do_redirect_tests (base_uri); + soup_uri_free (base_uri); + } else { + printf ("Listening on port %d\n", port); + g_main_loop_run (loop); + } + + g_main_loop_unref (loop); + g_free (server2_uri); + soup_test_server_quit_unref (server); + soup_test_server_quit_unref (server2); + + if (run_tests) + test_cleanup (); + return errors != 0; +} diff --git a/tests/requester-test.c b/tests/requester-test.c new file mode 100644 index 0000000..d303865 --- /dev/null +++ b/tests/requester-test.c @@ -0,0 +1,213 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2011 Red Hat, Inc. + */ + +#include +#include +#include + +#define LIBSOUP_USE_UNSTABLE_REQUEST_API +#include +#include +#include + +#include "test-utils.h" + +SoupServer *server; +GMainLoop *loop; +char buf[1024]; + +SoupBuffer *response; + +static void +get_index (void) +{ + char *contents; + gsize length; + GError *error = NULL; + + if (!g_file_get_contents (SRCDIR "/index.txt", &contents, &length, &error)) { + fprintf (stderr, "Could not read index.txt: %s\n", + error->message); + exit (1); + } + + response = soup_buffer_new (SOUP_MEMORY_TAKE, contents, length); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, NULL, 0); + soup_message_body_append_buffer (msg->response_body, response); +} + +static void +test_read_ready (GObject *source, GAsyncResult *res, gpointer user_data) +{ + GInputStream *stream = G_INPUT_STREAM (source); + GString *body = user_data; + GError *error = NULL; + gsize nread; + + nread = g_input_stream_read_finish (stream, res, &error); + if (nread == -1) { + debug_printf (1, " read_async failed: %s", error->message); + errors++; + g_object_unref (stream); + g_main_loop_quit (loop); + return; + } else if (nread == 0) { + g_object_unref (stream); + g_main_loop_quit (loop); + return; + } + + g_string_append_len (body, buf, nread); + g_input_stream_read_async (stream, buf, sizeof (buf), + G_PRIORITY_DEFAULT, NULL, + test_read_ready, body); +} + +static void +test_sent (GObject *source, GAsyncResult *res, gpointer user_data) +{ + GString *body = user_data; + GInputStream *stream; + GError *error = NULL; + SoupMessage *msg; + + stream = soup_request_send_finish (SOUP_REQUEST (source), res, &error); + if (!stream) { + debug_printf (1, " send_async failed: %s", error->message); + errors++; + g_main_loop_quit (loop); + return; + } + + msg = soup_request_http_get_message (SOUP_REQUEST_HTTP (source)); + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " GET failed: %d %s", msg->status_code, + msg->reason_phrase); + errors++; + g_main_loop_quit (loop); + return; + } + + g_input_stream_read_async (stream, buf, sizeof (buf), + G_PRIORITY_DEFAULT, NULL, + test_read_ready, body); +} + +static void +do_test_for_thread_and_context (SoupSession *session, const char *uri) +{ + SoupRequester *requester; + SoupRequest *request; + GString *body; + + requester = soup_requester_new (); + soup_session_add_feature (session, SOUP_SESSION_FEATURE (requester)); + g_object_unref (requester); + + body = g_string_new (NULL); + + request = soup_requester_request (requester, uri, NULL); + soup_request_send_async (request, NULL, test_sent, body); + g_object_unref (request); + + loop = g_main_loop_new (soup_session_get_async_context (session), TRUE); + g_main_loop_run (loop); + g_main_loop_unref (loop); + + if (body->len != response->length) { + debug_printf (1, " body length mismatch: expected %d, got %d\n", + (int)response->length, (int)body->len); + errors++; + } else if (memcmp (body->str, response->data, response->length) != 0) { + debug_printf (1, " body data mismatch\n"); + errors++; + } + + g_string_free (body, TRUE); +} + +static void +do_simple_test (const char *uri) +{ + SoupSession *session; + + debug_printf (1, "Simple streaming test\n"); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + do_test_for_thread_and_context (session, uri); + soup_test_session_abort_unref (session); +} + +static gpointer +do_test_with_context (const char *uri) +{ + GMainContext *async_context; + SoupSession *session; + + async_context = g_main_context_new (); + g_main_context_push_thread_default (async_context); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_ASYNC_CONTEXT, async_context, + NULL); + g_main_context_unref (async_context); + + do_test_for_thread_and_context (session, uri); + soup_test_session_abort_unref (session); + + return NULL; +} + +static void +do_context_test (const char *uri) +{ + debug_printf (1, "Streaming with a non-default-context\n"); + do_test_with_context (uri); +} + +static void +do_thread_test (const char *uri) +{ + GThread *thread; + + debug_printf (1, "Streaming in another thread\n"); + + thread = g_thread_create ((GThreadFunc)do_test_with_context, + (gpointer)uri, TRUE, NULL); + g_thread_join (thread); +} + +int +main (int argc, char **argv) +{ + char *uri; + + test_init (argc, argv, NULL); + get_index (); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + uri = g_strdup_printf ("http://127.0.0.1:%u/", soup_server_get_port (server)); + + do_simple_test (uri); + do_thread_test (uri); + do_context_test (uri); + + g_free (uri); + soup_buffer_free (response); + soup_test_server_quit_unref (server); + + test_cleanup (); + return errors != 0; +} diff --git a/tests/resources/atom.xml b/tests/resources/atom.xml new file mode 100644 index 0000000..962ecf4 --- /dev/null +++ b/tests/resources/atom.xml @@ -0,0 +1,35 @@ + + + A small ATOM feed + 2009-07-02T10:27:44Z + kov + + Anonymous Coward + + http://libsoup.rocks/atom.xml + + + + + http://libsoup.rocks/so/much/ + + One post too many + woo [...] +

woohoo

+
+ 2009-07-02T10:38:28Z + + + + kov + + + http://libsoup.rocks/blog + + + Just stuff to test libsoup + Random stuff to test libsoup + 2009-07-02T00:38:29Z + +
+
diff --git a/tests/resources/home.gif b/tests/resources/home.gif new file mode 100644 index 0000000..55e1d59 Binary files /dev/null and b/tests/resources/home.gif differ diff --git a/tests/resources/html_binary.html b/tests/resources/html_binary.html new file mode 100644 index 0000000..9200dd4 --- /dev/null +++ b/tests/resources/html_binary.html @@ -0,0 +1 @@ + +Envelope-to: email@here +Delivery-date: Wed, 17 Jun 2009 21:20:48 -0300 +Received: from email by here.domain with local (Exim 4.69) + (envelope-from ) + id 1MH5N2-0008Lq-7c + for email@here; Wed, 17 Jun 2009 21:20:48 -0300 +To: email@here +Subject: This is just so that I have a mailbox +Message-Id: +From: A Nice User +Date: Wed, 17 Jun 2009 21:20:48 -0300 + +This is a dumb email. + +From email@here Wed Jun 17 21:20:48 2009 +Return-path: +Envelope-to: email@here +Delivery-date: Wed, 17 Jun 2009 21:20:48 -0300 +Received: from email by here.domain with local (Exim 4.69) + (envelope-from ) + id 1MH5N2-0008Lq-7c + for email@here; Wed, 17 Jun 2009 21:20:48 -0300 +To: email@here +Subject: This is just so that I have a mailbox +Message-Id: +From: A Nice User +Date: Wed, 17 Jun 2009 21:20:48 -0300 + +This is a dumb email. + +From email@here Wed Jun 17 21:20:48 2009 +Return-path: +Envelope-to: email@here +Delivery-date: Wed, 17 Jun 2009 21:20:48 -0300 +Received: from email by here.domain with local (Exim 4.69) + (envelope-from ) + id 1MH5N2-0008Lq-7c + for email@here; Wed, 17 Jun 2009 21:20:48 -0300 +To: email@here +Subject: This is just so that I have a mailbox +Message-Id: +From: A Nice User +Date: Wed, 17 Jun 2009 21:20:48 -0300 + +This is a dumb email. + +From email@here Wed Jun 17 21:20:48 2009 +Return-path: +Envelope-to: email@here +Delivery-date: Wed, 17 Jun 2009 21:20:48 -0300 +Received: from email by here.domain with local (Exim 4.69) + (envelope-from ) + id 1MH5N2-0008Lq-7c + for email@here; Wed, 17 Jun 2009 21:20:48 -0300 +To: email@here +Subject: This is just so that I have a mailbox +Message-Id: +From: A Nice User +Date: Wed, 17 Jun 2009 21:20:48 -0300 + +This is a dumb email. + diff --git a/tests/resources/mbox.gz b/tests/resources/mbox.gz new file mode 100644 index 0000000..1a70d06 Binary files /dev/null and b/tests/resources/mbox.gz differ diff --git a/tests/resources/ps_binary.ps b/tests/resources/ps_binary.ps new file mode 100644 index 0000000..3d210ed --- /dev/null +++ b/tests/resources/ps_binary.ps @@ -0,0 +1 @@ +%!PS-Adobe-"  diff --git a/tests/resources/rss20.xml b/tests/resources/rss20.xml new file mode 100644 index 0000000..d64bdda --- /dev/null +++ b/tests/resources/rss20.xml @@ -0,0 +1,26 @@ + + + + + A small RSS + http://libsoup.rocks/ + en + A small RSS to test libsoup + + + One post too many + http://libsoup.rocks/so/much/ + http://libsoup.rocks/so/much/ + <p>woohoo</p> + Wed, 02 Jul 2009 10:26:28 +0000 + + + GCDS will rock + http://libsoup.rocks/so/much/again/ + http://libsoup.rocks/so/much/again/ + <p>I mean, really.</p> + Wed, 02 Jul 2009 10:26:28 +0000 + + + + diff --git a/tests/resources/test.html b/tests/resources/test.html new file mode 100644 index 0000000..5a6cc0c --- /dev/null +++ b/tests/resources/test.html @@ -0,0 +1,10 @@ + + + + + + + +

GNOME!

+ + diff --git a/tests/resources/text_binary.txt b/tests/resources/text_binary.txt new file mode 100644 index 0000000..113bfdd --- /dev/null +++ b/tests/resources/text_binary.txt @@ -0,0 +1 @@ +abc \ No newline at end of file diff --git a/tests/server-auth-test.c b/tests/server-auth-test.c new file mode 100644 index 0000000..89e5943 --- /dev/null +++ b/tests/server-auth-test.c @@ -0,0 +1,383 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +static struct { + gboolean client_sent_basic, client_sent_digest; + gboolean server_requested_basic, server_requested_digest; + gboolean succeeded; +} test_data; + +static void +curl_exited (GPid pid, int status, gpointer data) +{ + gboolean *done = data; + + *done = TRUE; + test_data.succeeded = (status == 0); +} + +static void +do_test (int n, SoupURI *base_uri, const char *path, + gboolean good_user, gboolean good_password, + gboolean offer_basic, gboolean offer_digest, + gboolean client_sends_basic, gboolean client_sends_digest, + gboolean server_requests_basic, gboolean server_requests_digest, + gboolean success) +{ + SoupURI *uri; + char *uri_str; + GPtrArray *args; + GPid pid; + gboolean done; + + debug_printf (1, "%2d. %s, %soffer Basic, %soffer Digest, %s user, %s password\n", + n, path, offer_basic ? "" : "don't ", + offer_digest ? "" : "don't ", + good_user ? "good" : "bad", + good_password ? "good" : "bad"); + + uri = soup_uri_new_with_base (base_uri, path); + uri_str = soup_uri_to_string (uri, FALSE); + soup_uri_free (uri); + + args = g_ptr_array_new (); + g_ptr_array_add (args, "curl"); + g_ptr_array_add (args, "-f"); + g_ptr_array_add (args, "-s"); + if (offer_basic || offer_digest) { + g_ptr_array_add (args, "-u"); + if (good_user) { + if (good_password) + g_ptr_array_add (args, "user:password"); + else + g_ptr_array_add (args, "user:badpassword"); + } else { + if (good_password) + g_ptr_array_add (args, "baduser:password"); + else + g_ptr_array_add (args, "baduser:badpassword"); + } + + if (offer_basic && offer_digest) + g_ptr_array_add (args, "--anyauth"); + else if (offer_basic) + g_ptr_array_add (args, "--basic"); + else + g_ptr_array_add (args, "--digest"); + } + g_ptr_array_add (args, uri_str); + g_ptr_array_add (args, NULL); + + memset (&test_data, 0, sizeof (test_data)); + if (g_spawn_async (NULL, (char **)args->pdata, NULL, + G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL | G_SPAWN_STDERR_TO_DEV_NULL | G_SPAWN_DO_NOT_REAP_CHILD, + NULL, NULL, &pid, NULL)) { + done = FALSE; + g_child_watch_add (pid, curl_exited, &done); + + while (!done) + g_main_context_iteration (NULL, TRUE); + } else + test_data.succeeded = FALSE; + g_ptr_array_free (args, TRUE); + g_free (uri_str); + + if (server_requests_basic != test_data.server_requested_basic) { + errors++; + if (test_data.server_requested_basic) + debug_printf (1, " Server sent WWW-Authenticate: Basic, but shouldn't have!\n"); + else + debug_printf (1, " Server didn't send WWW-Authenticate: Basic, but should have!\n"); + } + if (server_requests_digest != test_data.server_requested_digest) { + errors++; + if (test_data.server_requested_digest) + debug_printf (1, " Server sent WWW-Authenticate: Digest, but shouldn't have!\n"); + else + debug_printf (1, " Server didn't send WWW-Authenticate: Digest, but should have!\n"); + } + if (client_sends_basic != test_data.client_sent_basic) { + errors++; + if (test_data.client_sent_basic) + debug_printf (1, " Client sent Authorization: Basic, but shouldn't have!\n"); + else + debug_printf (1, " Client didn't send Authorization: Basic, but should have!\n"); + } + if (client_sends_digest != test_data.client_sent_digest) { + errors++; + if (test_data.client_sent_digest) + debug_printf (1, " Client sent Authorization: Digest, but shouldn't have!\n"); + else + debug_printf (1, " Client didn't send Authorization: Digest, but should have!\n"); + } + if (success && !test_data.succeeded) { + errors++; + debug_printf (1, " Should have succeeded, but didn't!\n"); + } else if (!success && test_data.succeeded) { + errors++; + debug_printf (1, " Should not have succeeded, but did!\n"); + } +} + +static void +do_auth_tests (SoupURI *base_uri) +{ + int i, n = 1; + gboolean use_basic, use_digest, good_user, good_password; + gboolean preemptive_basic, good_auth; + + for (i = 0; i < 16; i++) { + use_basic = (i & 1) == 1; + use_digest = (i & 2) == 2; + good_user = (i & 4) == 4; + good_password = (i & 8) == 8; + + good_auth = good_user && good_password; + + /* Curl will preemptively send Basic if it's told to + * use Basic but not Digest. + */ + preemptive_basic = use_basic && !use_digest; + + /* 1. No auth required. The server will ignore the + * Authorization headers completely, and the request + * will always succeed. + */ + do_test (n++, base_uri, "/foo", + good_user, good_password, + /* request */ + use_basic, use_digest, + /* expected from client */ + preemptive_basic, FALSE, + /* expected from server */ + FALSE, FALSE, + /* success? */ + TRUE); + + /* 2. Basic auth required. The server will send + * "WWW-Authenticate: Basic" if the client fails to + * send an Authorization: Basic on the first request, + * or if it sends a bad password. + */ + do_test (n++, base_uri, "/Basic/foo", + good_user, good_password, + /* request */ + use_basic, use_digest, + /* expected from client */ + use_basic, FALSE, + /* expected from server */ + !preemptive_basic || !good_auth, FALSE, + /* success? */ + use_basic && good_auth); + + /* 3. Digest auth required. Simpler than the basic + * case because the client can't send Digest auth + * premptively. + */ + do_test (n++, base_uri, "/Digest/foo", + good_user, good_password, + /* request */ + use_basic, use_digest, + /* expected from client */ + preemptive_basic, use_digest, + /* expected from server */ + FALSE, TRUE, + /* success? */ + use_digest && good_auth); + + /* 4. Any auth required. */ + do_test (n++, base_uri, "/Any/foo", + good_user, good_password, + /* request */ + use_basic, use_digest, + /* expected from client */ + preemptive_basic, use_digest, + /* expected from server */ + !preemptive_basic || !good_auth, !preemptive_basic || !good_auth, + /* success? */ + (use_basic || use_digest) && good_auth); + + /* 5. No auth required again. (Makes sure that + * SOUP_AUTH_DOMAIN_REMOVE_PATH works.) + */ + do_test (n++, base_uri, "/Any/Not/foo", + good_user, good_password, + /* request */ + use_basic, use_digest, + /* expected from client */ + preemptive_basic, FALSE, + /* expected from server */ + FALSE, FALSE, + /* success? */ + TRUE); + } +} + +static gboolean +basic_auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, const char *password, gpointer data) +{ + return !strcmp (username, "user") && !strcmp (password, "password"); +} + +static char * +digest_auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, gpointer data) +{ + if (strcmp (username, "user") != 0) + return NULL; + + /* Note: this is exactly how you *shouldn't* do it in the real + * world; you should have the pre-encoded password stored in a + * database of some sort rather than using the cleartext + * password in the callback. + */ + return soup_auth_domain_digest_encode_password ("user", + "server-auth-test", + "password"); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + if (msg->method != SOUP_METHOD_GET && msg->method != SOUP_METHOD_HEAD) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, + "OK\r\n", 4); + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +static void +got_headers_callback (SoupMessage *msg, gpointer data) +{ + const char *header; + + header = soup_message_headers_get_one (msg->request_headers, + "Authorization"); + if (header) { + if (strstr (header, "Basic ")) + test_data.client_sent_basic = TRUE; + if (strstr (header, "Digest ")) + test_data.client_sent_digest = TRUE; + } +} + +static void +wrote_headers_callback (SoupMessage *msg, gpointer data) +{ + const char *header; + + header = soup_message_headers_get_list (msg->response_headers, + "WWW-Authenticate"); + if (header) { + if (strstr (header, "Basic ")) + test_data.server_requested_basic = TRUE; + if (strstr (header, "Digest ")) + test_data.server_requested_digest = TRUE; + } +} + +static void +request_started_callback (SoupServer *server, SoupMessage *msg, + SoupClientContext *client, gpointer data) +{ + g_signal_connect (msg, "got_headers", + G_CALLBACK (got_headers_callback), NULL); + g_signal_connect (msg, "wrote_headers", + G_CALLBACK (wrote_headers_callback), NULL); +} + +static gboolean run_tests = TRUE; + +static GOptionEntry no_test_entry[] = { + { "no-tests", 'n', G_OPTION_FLAG_REVERSE, + G_OPTION_ARG_NONE, &run_tests, + "Don't run tests, just run the test server", NULL }, + { NULL } +}; + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server; + SoupURI *uri; + SoupAuthDomain *auth_domain; + + test_init (argc, argv, no_test_entry); + + server = soup_test_server_new (FALSE); + g_signal_connect (server, "request_started", + G_CALLBACK (request_started_callback), NULL); + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + + auth_domain = soup_auth_domain_basic_new ( + SOUP_AUTH_DOMAIN_REALM, "server-auth-test", + SOUP_AUTH_DOMAIN_ADD_PATH, "/Basic", + SOUP_AUTH_DOMAIN_ADD_PATH, "/Any", + SOUP_AUTH_DOMAIN_REMOVE_PATH, "/Any/Not", + SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, basic_auth_callback, + NULL); + soup_server_add_auth_domain (server, auth_domain); + g_object_unref (auth_domain); + + auth_domain = soup_auth_domain_digest_new ( + SOUP_AUTH_DOMAIN_REALM, "server-auth-test", + SOUP_AUTH_DOMAIN_ADD_PATH, "/Digest", + SOUP_AUTH_DOMAIN_ADD_PATH, "/Any", + SOUP_AUTH_DOMAIN_REMOVE_PATH, "/Any/Not", + SOUP_AUTH_DOMAIN_DIGEST_AUTH_CALLBACK, digest_auth_callback, + NULL); + soup_server_add_auth_domain (server, auth_domain); + g_object_unref (auth_domain); + + loop = g_main_loop_new (NULL, TRUE); + + if (run_tests) { + uri = soup_uri_new ("http://127.0.0.1"); + soup_uri_set_port (uri, soup_server_get_port (server)); + do_auth_tests (uri); + soup_uri_free (uri); + } else { + printf ("Listening on port %d\n", soup_server_get_port (server)); + g_main_loop_run (loop); + } + + g_main_loop_unref (loop); + soup_test_server_quit_unref (server); + + if (run_tests) + test_cleanup (); + return errors != 0; +} diff --git a/tests/simple-httpd.c b/tests/simple-httpd.c new file mode 100644 index 0000000..b63ad04 --- /dev/null +++ b/tests/simple-httpd.c @@ -0,0 +1,329 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_MMAP +#include +#endif + +#include + +#ifdef HAVE_MMAP +struct mapping { + void *start; + size_t length; +}; + +static void +free_mapping (gpointer data) +{ + struct mapping *mapping = data; + munmap (mapping->start, mapping->length); + g_slice_free (struct mapping, mapping); +} +#endif + +static int +compare_strings (gconstpointer a, gconstpointer b) +{ + const char **sa = (const char **)a; + const char **sb = (const char **)b; + + return strcmp (*sa, *sb); +} + +static GString * +get_directory_listing (const char *path) +{ + GPtrArray *entries; + GString *listing; + char *escaped; + DIR *dir; + struct dirent *dent; + int i; + + entries = g_ptr_array_new (); + dir = opendir (path); + if (dir) { + while ((dent = readdir (dir))) { + if (!strcmp (dent->d_name, ".") || + (!strcmp (dent->d_name, "..") && + !strcmp (path, "./"))) + continue; + escaped = g_markup_escape_text (dent->d_name, -1); + g_ptr_array_add (entries, escaped); + } + closedir (dir); + } + + g_ptr_array_sort (entries, (GCompareFunc)compare_strings); + + listing = g_string_new ("\r\n"); + escaped = g_markup_escape_text (strchr (path, '/'), -1); + g_string_append_printf (listing, "Index of %s\r\n", escaped); + g_string_append_printf (listing, "

Index of %s

\r\n

\r\n", escaped); + g_free (escaped); + for (i = 0; i < entries->len; i++) { + g_string_append_printf (listing, "%s
\r\n", + (char *)entries->pdata[i], + (char *)entries->pdata[i]); + g_free (entries->pdata[i]); + } + g_string_append (listing, "\r\n\r\n"); + + g_ptr_array_free (entries, TRUE); + return listing; +} + +static void +do_get (SoupServer *server, SoupMessage *msg, const char *path) +{ + char *slash; + struct stat st; + int fd; + + if (stat (path, &st) == -1) { + if (errno == EPERM) + soup_message_set_status (msg, SOUP_STATUS_FORBIDDEN); + else if (errno == ENOENT) + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + else + soup_message_set_status (msg, SOUP_STATUS_INTERNAL_SERVER_ERROR); + return; + } + + if (S_ISDIR (st.st_mode)) { + GString *listing; + char *index_path; + + slash = strrchr (path, '/'); + if (!slash || slash[1]) { + char *uri, *redir_uri; + + uri = soup_uri_to_string (soup_message_get_uri (msg), FALSE); + redir_uri = g_strdup_printf ("%s/", uri); + soup_message_headers_append (msg->response_headers, + "Location", redir_uri); + soup_message_set_status (msg, SOUP_STATUS_MOVED_PERMANENTLY); + g_free (redir_uri); + g_free (uri); + return; + } + + index_path = g_strdup_printf ("%s/index.html", path); + if (stat (index_path, &st) != -1) { + do_get (server, msg, index_path); + g_free (index_path); + return; + } + + listing = get_directory_listing (path); + soup_message_set_response (msg, "text/html", + SOUP_MEMORY_TAKE, + listing->str, listing->len); + g_string_free (listing, FALSE); + return; + } + + fd = open (path, O_RDONLY); + if (fd == -1) { + soup_message_set_status (msg, SOUP_STATUS_INTERNAL_SERVER_ERROR); + return; + } + + if (msg->method == SOUP_METHOD_GET) { +#ifdef HAVE_MMAP + struct mapping *mapping = g_slice_new (struct mapping); + SoupBuffer *buffer; + + mapping->start = mmap (NULL, st.st_size, PROT_READ, + MAP_PRIVATE, fd, 0); + mapping->length = st.st_size; + buffer = soup_buffer_new_with_owner (mapping->start, + mapping->length, + mapping, free_mapping); + soup_message_body_append_buffer (msg->response_body, buffer); + soup_buffer_free (buffer); +#else + char *buf; + + buf = g_malloc (st.st_size); + read (fd, buf, st.st_size); + close (fd); + soup_message_body_append (msg->response_body, SOUP_MEMORY_TAKE, + buf, st.st_size); +#endif + } else /* msg->method == SOUP_METHOD_HEAD */ { + char *length; + + /* We could just use the same code for both GET and + * HEAD. But we'll optimize and avoid the extra + * malloc. + */ + length = g_strdup_printf ("%lu", (gulong)st.st_size); + soup_message_headers_append (msg->response_headers, + "Content-Length", length); + g_free (length); + } + + soup_message_set_status (msg, SOUP_STATUS_OK); +} + +static void +do_put (SoupServer *server, SoupMessage *msg, const char *path) +{ + struct stat st; + FILE *f; + gboolean created = TRUE; + + if (stat (path, &st) != -1) { + const char *match = soup_message_headers_get_one (msg->request_headers, "If-None-Match"); + if (match && !strcmp (match, "*")) { + soup_message_set_status (msg, SOUP_STATUS_CONFLICT); + return; + } + + if (!S_ISREG (st.st_mode)) { + soup_message_set_status (msg, SOUP_STATUS_FORBIDDEN); + return; + } + + created = FALSE; + } + + f = fopen (path, "w"); + if (!f) { + soup_message_set_status (msg, SOUP_STATUS_INTERNAL_SERVER_ERROR); + return; + } + + fwrite (msg->request_body->data, 1, msg->request_body->length, f); + fclose (f); + + soup_message_set_status (msg, created ? SOUP_STATUS_CREATED : SOUP_STATUS_OK); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + char *file_path; + SoupMessageHeadersIter iter; + const char *name, *value; + + printf ("%s %s HTTP/1.%d\n", msg->method, path, + soup_message_get_http_version (msg)); + soup_message_headers_iter_init (&iter, msg->request_headers); + while (soup_message_headers_iter_next (&iter, &name, &value)) + printf ("%s: %s\n", name, value); + if (msg->request_body->length) + printf ("%s\n", msg->request_body->data); + + file_path = g_strdup_printf (".%s", path); + + if (msg->method == SOUP_METHOD_GET || msg->method == SOUP_METHOD_HEAD) + do_get (server, msg, file_path); + else if (msg->method == SOUP_METHOD_PUT) + do_put (server, msg, file_path); + else + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + + g_free (file_path); + printf (" -> %d %s\n\n", msg->status_code, msg->reason_phrase); +} + +static void +quit (int sig) +{ + /* Exit cleanly on ^C in case we're valgrinding. */ + exit (0); +} + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server, *ssl_server; + int opt; + int port = SOUP_ADDRESS_ANY_PORT; + int ssl_port = SOUP_ADDRESS_ANY_PORT; + const char *ssl_cert_file = NULL, *ssl_key_file = NULL; + + g_thread_init (NULL); + g_type_init (); + signal (SIGINT, quit); + + while ((opt = getopt (argc, argv, "p:k:c:s:")) != -1) { + switch (opt) { + case 'p': + port = atoi (optarg); + break; + case 'k': + ssl_key_file = optarg; + break; + case 'c': + ssl_cert_file = optarg; + break; + case 's': + ssl_port = atoi (optarg); + break; + default: + fprintf (stderr, "Usage: %s [-p port] [-c ssl-cert-file -k ssl-key-file [-s ssl-port]]\n", + argv[0]); + exit (1); + } + } + + server = soup_server_new (SOUP_SERVER_PORT, port, + SOUP_SERVER_SERVER_HEADER, "simple-httpd ", + NULL); + if (!server) { + fprintf (stderr, "Unable to bind to server port %d\n", port); + exit (1); + } + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + printf ("\nStarting Server on port %d\n", + soup_server_get_port (server)); + soup_server_run_async (server); + + if (ssl_cert_file && ssl_key_file) { + ssl_server = soup_server_new ( + SOUP_SERVER_PORT, ssl_port, + SOUP_SERVER_SSL_CERT_FILE, ssl_cert_file, + SOUP_SERVER_SSL_KEY_FILE, ssl_key_file, + NULL); + + if (!ssl_server) { + fprintf (stderr, "Unable to bind to SSL server port %d\n", ssl_port); + exit (1); + } + soup_server_add_handler (ssl_server, NULL, + server_callback, NULL, NULL); + printf ("Starting SSL Server on port %d\n", + soup_server_get_port (ssl_server)); + soup_server_run_async (ssl_server); + } + + printf ("\nWaiting for requests...\n"); + + loop = g_main_loop_new (NULL, TRUE); + g_main_loop_run (loop); + + return 0; +} diff --git a/tests/simple-proxy.c b/tests/simple-proxy.c new file mode 100644 index 0000000..0101b63 --- /dev/null +++ b/tests/simple-proxy.c @@ -0,0 +1,191 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* WARNING: this is really really really not especially compliant with + * RFC 2616. But it does work for basic stuff. + */ + +static SoupSession *session; +static SoupServer *server; + +static void +copy_header (const char *name, const char *value, gpointer dest_headers) +{ + soup_message_headers_append (dest_headers, name, value); +} + +static void +send_headers (SoupMessage *from, SoupMessage *to) +{ + printf ("[%p] HTTP/1.%d %d %s\n", to, + soup_message_get_http_version (from), + from->status_code, from->reason_phrase); + + soup_message_set_status_full (to, from->status_code, + from->reason_phrase); + soup_message_headers_foreach (from->response_headers, copy_header, + to->response_headers); + soup_message_headers_remove (to->response_headers, "Content-Length"); + soup_server_unpause_message (server, to); +} + +static void +send_chunk (SoupMessage *from, SoupBuffer *chunk, SoupMessage *to) +{ + printf ("[%p] writing chunk of %lu bytes\n", to, + (unsigned long)chunk->length); + + soup_message_body_append_buffer (to->response_body, chunk); + soup_server_unpause_message (server, to); +} + +static void +client_msg_failed (SoupMessage *msg, gpointer msg2) +{ + soup_session_cancel_message (session, msg2, SOUP_STATUS_IO_ERROR); +} + +static void +finish_msg (SoupSession *session, SoupMessage *msg2, gpointer data) +{ + SoupMessage *msg = data; + + printf ("[%p] done\n\n", msg); + g_signal_handlers_disconnect_by_func (msg, client_msg_failed, msg2); + + soup_message_body_complete (msg->response_body); + soup_server_unpause_message (server, msg); + g_object_unref (msg); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + SoupMessage *msg2; + char *uristr; + + uristr = soup_uri_to_string (soup_message_get_uri (msg), FALSE); + printf ("[%p] %s %s HTTP/1.%d\n", msg, msg->method, uristr, + soup_message_get_http_version (msg)); + + if (msg->method == SOUP_METHOD_CONNECT) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + msg2 = soup_message_new (msg->method, uristr); + soup_message_headers_foreach (msg->request_headers, copy_header, + msg2->request_headers); + soup_message_headers_remove (msg2->request_headers, "Host"); + soup_message_headers_remove (msg2->request_headers, "Connection"); + + if (msg->request_body->length) { + SoupBuffer *request = soup_message_body_flatten (msg->request_body); + soup_message_body_append_buffer (msg2->request_body, request); + soup_buffer_free (request); + } + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_CHUNKED); + + g_signal_connect (msg2, "got_headers", + G_CALLBACK (send_headers), msg); + g_signal_connect (msg2, "got_chunk", + G_CALLBACK (send_chunk), msg); + + g_signal_connect (msg, "finished", G_CALLBACK (client_msg_failed), msg2); + + soup_session_queue_message (session, msg2, finish_msg, msg); + + g_object_ref (msg); + soup_server_pause_message (server, msg); +} + +static gboolean +auth_callback (SoupAuthDomain *auth_domain, SoupMessage *msg, + const char *username, const char *password, gpointer data) +{ + return !strcmp (username, "user") && !strcmp (password, "password"); +} + +static void +quit (int sig) +{ + /* Exit cleanly on ^C in case we're valgrinding. */ + exit (0); +} + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + int opt; + int port = SOUP_ADDRESS_ANY_PORT; + SoupAuthDomain *auth_domain = NULL; + + g_thread_init (NULL); + g_type_init (); + signal (SIGINT, quit); + + while ((opt = getopt (argc, argv, "ap:")) != -1) { + switch (opt) { + case 'a': + auth_domain = soup_auth_domain_basic_new ( + SOUP_AUTH_DOMAIN_REALM, "simple-proxy", + SOUP_AUTH_DOMAIN_PROXY, TRUE, + SOUP_AUTH_DOMAIN_BASIC_AUTH_CALLBACK, auth_callback, + NULL); + break; + case 'p': + port = atoi (optarg); + break; + default: + fprintf (stderr, "Usage: %s [-p port] [-n]\n", + argv[0]); + exit (1); + } + } + + server = soup_server_new (SOUP_SERVER_PORT, port, + NULL); + if (!server) { + fprintf (stderr, "Unable to bind to server port %d\n", port); + exit (1); + } + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + if (auth_domain) { + soup_server_add_auth_domain (server, auth_domain); + g_object_unref (auth_domain); + } + + printf ("\nStarting proxy on port %d\n", + soup_server_get_port (server)); + soup_server_run_async (server); + + session = soup_session_async_new (); + + printf ("\nWaiting for requests...\n"); + + loop = g_main_loop_new (NULL, TRUE); + g_main_loop_run (loop); + g_main_loop_unref (loop); + + return 0; +} diff --git a/tests/sniffing-test.c b/tests/sniffing-test.c new file mode 100644 index 0000000..828f1d5 --- /dev/null +++ b/tests/sniffing-test.c @@ -0,0 +1,568 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2009 Gustavo Noronha Silva . + */ + +#include +#include +#include + +#include + +#include "test-utils.h" + +SoupSession *session; +SoupURI *base_uri; +SoupMessageBody *chunk_data; + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + GError *error = NULL; + char *query_key; + char *contents; + gsize length, offset; + gboolean empty_response = FALSE; + + if (msg->method != SOUP_METHOD_GET) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + + if (query) { + query_key = g_hash_table_lookup (query, "chunked"); + if (query_key && g_str_equal (query_key, "yes")) { + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_CHUNKED); + } + + query_key = g_hash_table_lookup (query, "empty_response"); + if (query_key && g_str_equal (query_key, "yes")) + empty_response = TRUE; + } + + if (!strcmp (path, "/mbox")) { + if (empty_response) { + contents = g_strdup (""); + length = 0; + } else { + g_file_get_contents (SRCDIR "/resources/mbox", + &contents, &length, + &error); + } + + if (error) { + g_error ("%s", error->message); + g_error_free (error); + exit (1); + } + + soup_message_headers_append (msg->response_headers, + "Content-Type", "text/plain"); + } + + if (g_str_has_prefix (path, "/text_or_binary/")) { + char *base_name = g_path_get_basename (path); + char *file_name = g_strdup_printf (SRCDIR "/resources/%s", base_name); + + g_file_get_contents (file_name, + &contents, &length, + &error); + + g_free (base_name); + g_free (file_name); + + if (error) { + g_error ("%s", error->message); + g_error_free (error); + exit (1); + } + + soup_message_headers_append (msg->response_headers, + "Content-Type", "text/plain"); + } + + if (g_str_has_prefix (path, "/unknown/")) { + char *base_name = g_path_get_basename (path); + char *file_name = g_strdup_printf (SRCDIR "/resources/%s", base_name); + + g_file_get_contents (file_name, + &contents, &length, + &error); + + g_free (base_name); + g_free (file_name); + + if (error) { + g_error ("%s", error->message); + g_error_free (error); + exit (1); + } + + soup_message_headers_append (msg->response_headers, + "Content-Type", "UNKNOWN/unknown"); + } + + if (g_str_has_prefix (path, "/type/")) { + char **components = g_strsplit (path, "/", 4); + char *ptr; + + char *base_name = g_path_get_basename (path); + char *file_name = g_strdup_printf (SRCDIR "/resources/%s", base_name); + + g_file_get_contents (file_name, + &contents, &length, + &error); + + g_free (base_name); + g_free (file_name); + + if (error) { + g_error ("%s", error->message); + g_error_free (error); + exit (1); + } + + /* Hack to allow passing type in the URI */ + ptr = g_strrstr (components[2], "_"); + *ptr = '/'; + + soup_message_headers_append (msg->response_headers, + "Content-Type", components[2]); + g_strfreev (components); + } + + if (g_str_has_prefix (path, "/multiple_headers/")) { + char *base_name = g_path_get_basename (path); + char *file_name = g_strdup_printf (SRCDIR "/resources/%s", base_name); + + g_file_get_contents (file_name, + &contents, &length, + &error); + + g_free (base_name); + g_free (file_name); + + if (error) { + g_error ("%s", error->message); + g_error_free (error); + exit (1); + } + + soup_message_headers_append (msg->response_headers, + "Content-Type", "text/xml"); + soup_message_headers_append (msg->response_headers, + "Content-Type", "text/plain"); + } + + for (offset = 0; offset < length; offset += 500) { + soup_message_body_append (msg->response_body, + SOUP_MEMORY_COPY, + contents + offset, + MIN(500, length - offset)); + } + soup_message_body_complete (msg->response_body); + + g_free (contents); +} + +static gboolean +unpause_msg (gpointer data) +{ + SoupMessage *msg = (SoupMessage*)data; + debug_printf (2, " unpause\n"); + soup_session_unpause_message (session, msg); + return FALSE; +} + + +static void +content_sniffed (SoupMessage *msg, char *content_type, GHashTable *params, gpointer data) +{ + gboolean should_pause = GPOINTER_TO_INT (data); + + debug_printf (2, " content-sniffed -> %s\n", content_type); + + if (g_object_get_data (G_OBJECT (msg), "got-chunk")) { + debug_printf (1, " got-chunk got emitted before content-sniffed\n"); + errors++; + } + + g_object_set_data (G_OBJECT (msg), "content-sniffed", GINT_TO_POINTER (TRUE)); + + if (should_pause) { + debug_printf (2, " pause\n"); + soup_session_pause_message (session, msg); + g_idle_add (unpause_msg, msg); + } +} + +static void +got_headers (SoupMessage *msg, gpointer data) +{ + gboolean should_pause = GPOINTER_TO_INT (data); + + debug_printf (2, " got-headers\n"); + + if (g_object_get_data (G_OBJECT (msg), "content-sniffed")) { + debug_printf (1, " content-sniffed got emitted before got-headers\n"); + errors++; + } + + g_object_set_data (G_OBJECT (msg), "got-headers", GINT_TO_POINTER (TRUE)); + + if (should_pause) { + debug_printf (2, " pause\n"); + soup_session_pause_message (session, msg); + g_idle_add (unpause_msg, msg); + } +} + +static void +got_chunk (SoupMessage *msg, SoupBuffer *chunk, gpointer data) +{ + gboolean should_accumulate = GPOINTER_TO_INT (data); + + debug_printf (2, " got-chunk\n"); + + g_object_set_data (G_OBJECT (msg), "got-chunk", GINT_TO_POINTER (TRUE)); + + if (!should_accumulate) { + if (!chunk_data) + chunk_data = soup_message_body_new (); + soup_message_body_append_buffer (chunk_data, chunk); + } +} + +static void +finished (SoupSession *session, SoupMessage *msg, gpointer data) +{ + GMainLoop *loop = (GMainLoop*)data; + g_main_loop_quit (loop); +} + +static void +do_signals_test (gboolean should_content_sniff, + gboolean should_pause, + gboolean should_accumulate, + gboolean chunked_encoding, + gboolean empty_response) +{ + SoupURI *uri = soup_uri_new_with_base (base_uri, "/mbox"); + SoupMessage *msg = soup_message_new_from_uri ("GET", uri); + GMainLoop *loop = g_main_loop_new (NULL, TRUE); + char *contents; + gsize length; + GError *error = NULL; + SoupBuffer *body = NULL; + + debug_printf (1, "do_signals_test(%ssniff, %spause, %saccumulate, %schunked, %sempty)\n", + should_content_sniff ? "" : "!", + should_pause ? "" : "!", + should_accumulate ? "" : "!", + chunked_encoding ? "" : "!", + empty_response ? "" : "!"); + + if (chunked_encoding) + soup_uri_set_query (uri, "chunked=yes"); + + if (empty_response) { + if (uri->query) { + char *tmp = uri->query; + uri->query = g_strdup_printf ("%s&empty_response=yes", tmp); + g_free (tmp); + } else + soup_uri_set_query (uri, "empty_response=yes"); + } + + soup_message_set_uri (msg, uri); + + soup_message_body_set_accumulate (msg->response_body, should_accumulate); + + g_object_connect (msg, + "signal::got-headers", got_headers, GINT_TO_POINTER (should_pause), + "signal::got-chunk", got_chunk, GINT_TO_POINTER (should_accumulate), + "signal::content_sniffed", content_sniffed, GINT_TO_POINTER (should_pause), + NULL); + + g_object_ref (msg); + soup_session_queue_message (session, msg, finished, loop); + + g_main_loop_run (loop); + + if (!should_content_sniff && + g_object_get_data (G_OBJECT (msg), "content-sniffed")) { + debug_printf (1, " content-sniffed got emitted without a sniffer\n"); + errors++; + } else if (should_content_sniff && + !g_object_get_data (G_OBJECT (msg), "content-sniffed")) { + debug_printf (1, " content-sniffed did not get emitted\n"); + errors++; + } + + if (empty_response) { + contents = g_strdup (""); + length = 0; + } else { + g_file_get_contents (SRCDIR "/resources/mbox", + &contents, &length, + &error); + } + + if (error) { + g_error ("%s", error->message); + g_error_free (error); + exit (1); + } + + if (!should_accumulate && chunk_data) + body = soup_message_body_flatten (chunk_data); + else if (msg->response_body) + body = soup_message_body_flatten (msg->response_body); + + if (body && body->length != length) { + debug_printf (1, " lengths do not match\n"); + errors++; + } + + if (body && memcmp (body->data, contents, length)) { + debug_printf (1, " downloaded data does not match\n"); + errors++; + } + + g_free (contents); + if (body) + soup_buffer_free (body); + if (chunk_data) { + soup_message_body_free (chunk_data); + chunk_data = NULL; + } + + soup_uri_free (uri); + g_object_unref (msg); + g_main_loop_unref (loop); +} + +static void +sniffing_content_sniffed (SoupMessage *msg, const char *content_type, + GHashTable *params, gpointer data) +{ + char **sniffed_type = (char **)data; + GString *full_header; + GList *keys; + GList *iter; + + if (params == NULL) { + *sniffed_type = g_strdup (content_type); + return; + } + + full_header = g_string_new (content_type); + g_string_append (full_header, "; "); + + keys = g_hash_table_get_keys (params); + for (iter = keys; iter != NULL; iter = iter->next) { + const gchar *value = (const gchar*) g_hash_table_lookup (params, iter->data); + + soup_header_g_string_append_param (full_header, + (const gchar*) iter->data, + value); + } + + *sniffed_type = full_header->str; + + g_string_free (full_header, FALSE); + g_list_free (keys); +} + +static void +test_sniffing (const char *path, const char *expected_type) +{ + SoupURI *uri = soup_uri_new_with_base (base_uri, path); + SoupMessage *msg = soup_message_new_from_uri ("GET", uri); + GMainLoop *loop = g_main_loop_new (NULL, TRUE); + char *sniffed_type = NULL; + + debug_printf (1, "test_sniffing(\"%s\", \"%s\")\n", path, expected_type); + + g_signal_connect (msg, "content-sniffed", + G_CALLBACK (sniffing_content_sniffed), &sniffed_type); + + g_object_ref (msg); + + soup_session_queue_message (session, msg, finished, loop); + + g_main_loop_run (loop); + + if (!sniffed_type) { + debug_printf (1, " message was not sniffed!\n"); + errors++; + } else if (strcmp (sniffed_type, expected_type) != 0) { + debug_printf (1, " sniffing failed! expected %s, got %s\n", + expected_type, sniffed_type); + errors++; + } + g_free (sniffed_type); + + soup_uri_free (uri); + g_object_unref (msg); + g_main_loop_unref (loop); +} + +static void +test_disabled (const char *path) +{ + SoupURI *uri = soup_uri_new_with_base (base_uri, path); + SoupMessage *msg = soup_message_new_from_uri ("GET", uri); + GMainLoop *loop = g_main_loop_new (NULL, TRUE); + char *sniffed_type = NULL; + + soup_message_disable_feature (msg, SOUP_TYPE_CONTENT_SNIFFER); + + debug_printf (1, "test_disabled(\"%s\")\n", path); + + g_signal_connect (msg, "content-sniffed", + G_CALLBACK (sniffing_content_sniffed), &sniffed_type); + + g_object_ref (msg); + + soup_session_queue_message (session, msg, finished, loop); + + g_main_loop_run (loop); + + if (sniffed_type) { + debug_printf (1, " message was sniffed!\n"); + errors++; + g_free (sniffed_type); + } + + soup_uri_free (uri); + g_object_unref (msg); + g_main_loop_unref (loop); +} + +int +main (int argc, char **argv) +{ + SoupServer *server; + + test_init (argc, argv, NULL); + + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_callback, NULL, NULL); + base_uri = soup_uri_new ("http://127.0.0.1/"); + soup_uri_set_port (base_uri, soup_server_get_port (server)); + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + + /* No sniffer, no content_sniffed should be emitted */ + do_signals_test (FALSE, FALSE, FALSE, FALSE, FALSE); + do_signals_test (FALSE, FALSE, FALSE, TRUE, FALSE); + do_signals_test (FALSE, FALSE, TRUE, FALSE, FALSE); + do_signals_test (FALSE, FALSE, TRUE, TRUE, FALSE); + + do_signals_test (FALSE, TRUE, TRUE, FALSE, FALSE); + do_signals_test (FALSE, TRUE, TRUE, TRUE, FALSE); + do_signals_test (FALSE, TRUE, FALSE, FALSE, FALSE); + do_signals_test (FALSE, TRUE, FALSE, TRUE, FALSE); + + /* Tests that the signals are correctly emitted for empty + * responses; see + * http://bugzilla.gnome.org/show_bug.cgi?id=587907 */ + + do_signals_test (FALSE, TRUE, TRUE, FALSE, TRUE); + do_signals_test (FALSE, TRUE, TRUE, TRUE, TRUE); + + soup_session_add_feature_by_type (session, SOUP_TYPE_CONTENT_SNIFFER); + + /* Now, with a sniffer, content_sniffed must be emitted after + * got-headers, and before got-chunk. + */ + do_signals_test (TRUE, FALSE, FALSE, FALSE, FALSE); + do_signals_test (TRUE, FALSE, FALSE, TRUE, FALSE); + do_signals_test (TRUE, FALSE, TRUE, FALSE, FALSE); + do_signals_test (TRUE, FALSE, TRUE, TRUE, FALSE); + + do_signals_test (TRUE, TRUE, TRUE, FALSE, FALSE); + do_signals_test (TRUE, TRUE, TRUE, TRUE, FALSE); + do_signals_test (TRUE, TRUE, FALSE, FALSE, FALSE); + do_signals_test (TRUE, TRUE, FALSE, TRUE, FALSE); + + /* Empty response tests */ + do_signals_test (TRUE, TRUE, TRUE, FALSE, TRUE); + do_signals_test (TRUE, TRUE, TRUE, TRUE, TRUE); + + /* Test the text_or_binary sniffing path */ + + /* GIF is a 'safe' type */ + test_sniffing ("/text_or_binary/home.gif", "image/gif"); + + /* With our current code, no sniffing is done using GIO, so + * the mbox will be identified as text/plain; should we change + * this? + */ + test_sniffing ("/text_or_binary/mbox", "text/plain"); + + /* HTML is considered unsafe for this algorithm, since it is + * scriptable, so going from text/plain to text/html is + * considered 'privilege escalation' + */ + test_sniffing ("/text_or_binary/test.html", "text/plain"); + + /* text/plain with binary content and unknown pattern should be + * application/octet-stream */ + test_sniffing ("/text_or_binary/text_binary.txt", "application/octet-stream"); + + /* text/plain with binary content and scriptable pattern should be + * application/octet-stream to avoid 'privilege escalation' */ + test_sniffing ("/text_or_binary/html_binary.html", "application/octet-stream"); + + /* text/plain with binary content and non scriptable known pattern should + * be the given type */ + test_sniffing ("/text_or_binary/ps_binary.ps", "application/postscript"); + + /* Test the unknown sniffing path */ + + test_sniffing ("/unknown/test.html", "text/html"); + test_sniffing ("/unknown/home.gif", "image/gif"); + test_sniffing ("/unknown/mbox", "text/plain"); + test_sniffing ("/unknown/text_binary.txt", "application/octet-stream"); + + /* Test the XML sniffing path */ + + test_sniffing ("/type/text_xml/home.gif", "text/xml"); + test_sniffing ("/type/anice_type+xml/home.gif", "anice/type+xml"); + test_sniffing ("/type/application_xml/home.gif", "application/xml"); + + /* Test the image sniffing path */ + + test_sniffing ("/type/image_png/home.gif", "image/gif"); + + /* Test the feed or html path */ + + test_sniffing ("/type/text_html/test.html", "text/html"); + test_sniffing ("/type/text_html/rss20.xml", "application/rss+xml"); + test_sniffing ("/type/text_html/atom.xml", "application/atom+xml"); + + /* The spec tells us to only use the last Content-Type header */ + + test_sniffing ("/multiple_headers/home.gif", "image/gif"); + + /* Test that we keep the parameters when sniffing */ + test_sniffing ("/type/text_html; charset=UTF-8/test.html", "text/html; charset=UTF-8"); + + /* Test that disabling the sniffer works correctly */ + + test_disabled ("/text_or_binary/home.gif"); + + soup_uri_free (base_uri); + + soup_test_session_abort_unref (session); + soup_test_server_quit_unref (server); + test_cleanup (); + return errors != 0; +} diff --git a/tests/streaming-test.c b/tests/streaming-test.c new file mode 100644 index 0000000..869a7b6 --- /dev/null +++ b/tests/streaming-test.c @@ -0,0 +1,190 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#include "config.h" + +#include +#include +#include + +#include +#include + +#include "test-utils.h" + +#define RESPONSE_CHUNK_SIZE 1024 + +char *full_response, *full_response_md5; +gsize full_response_length; + +static void +get_full_response (void) +{ + GError *error = NULL; + + if (!g_file_get_contents (SRCDIR "/index.txt", + &full_response, + &full_response_length, + &error)) { + fprintf (stderr, "Could not read index file %s: %s\n", + SRCDIR "/index.txt", error->message); + g_error_free (error); + exit (1); + } + + full_response_md5 = g_compute_checksum_for_data (G_CHECKSUM_MD5, + (guchar *)full_response, + full_response_length); +} + +static void +write_next_chunk (SoupMessage *msg, gpointer user_data) +{ + gsize *offset = user_data; + gsize chunk_length; + + chunk_length = MIN (RESPONSE_CHUNK_SIZE, full_response_length - *offset); + if (chunk_length > 0) { + debug_printf (2, " writing chunk\n"); + soup_message_body_append (msg->response_body, + SOUP_MEMORY_STATIC, + full_response + *offset, + chunk_length); + *offset += chunk_length; + } else { + debug_printf (2, " done\n"); + /* This is only actually needed in the chunked and eof + * cases, but it's harmless in the content-length + * case. + */ + soup_message_body_complete (msg->response_body); + } +} + +static void +free_offset (SoupMessage *msg, gpointer offset) +{ + g_free (offset); +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + gsize *offset; + + if (!strcmp (path, "/chunked")) { + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_CHUNKED); + } else if (!strcmp (path, "/content-length")) { + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_CONTENT_LENGTH); + soup_message_headers_set_content_length (msg->response_headers, + full_response_length); + } else if (!strcmp (path, "/eof")) { + soup_message_headers_set_encoding (msg->response_headers, + SOUP_ENCODING_EOF); + } else { + soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND); + return; + } + soup_message_set_status (msg, SOUP_STATUS_OK); + + offset = g_new0 (gsize, 1); + g_signal_connect (msg, "wrote_headers", + G_CALLBACK (write_next_chunk), offset); + g_signal_connect (msg, "wrote_chunk", + G_CALLBACK (write_next_chunk), offset); + g_signal_connect (msg, "finished", + G_CALLBACK (free_offset), offset); +} + +static void +do_request (SoupSession *session, SoupURI *base_uri, char *path) +{ + SoupURI *uri; + SoupMessage *msg; + char *md5; + + uri = soup_uri_new_with_base (base_uri, path); + msg = soup_message_new_from_uri ("GET", uri); + soup_uri_free (uri); + + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, " message failed: %d %s\n", + msg->status_code, msg->reason_phrase); + errors++; + } + + if (msg->response_body->length != full_response_length) { + debug_printf (1, " received length mismatch: expected %d, got %d\n", + (int)full_response_length, (int)msg->request_body->length); + errors++; + } + + md5 = g_compute_checksum_for_data (G_CHECKSUM_MD5, + (guchar *)msg->response_body->data, + msg->response_body->length); + if (strcmp (md5, full_response_md5) != 0) { + debug_printf (1, " data mismatch: expected %s, got %s\n", + full_response_md5, md5); + errors++; + } + g_free (md5); + + g_object_unref (msg); +} + +static void +do_tests (SoupURI *base_uri) +{ + SoupSession *session; + + session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL); + debug_printf (1, "Chunked encoding\n"); + do_request (session, base_uri, "chunked"); + debug_printf (1, "\n"); + debug_printf (1, "Content-Length encoding\n"); + do_request (session, base_uri, "content-length"); + debug_printf (1, "\n"); + debug_printf (1, "EOF encoding\n"); + do_request (session, base_uri, "eof"); + soup_test_session_abort_unref (session); +} + +int +main (int argc, char **argv) +{ + GMainLoop *loop; + SoupServer *server; + guint port; + SoupURI *base_uri; + + test_init (argc, argv, NULL); + get_full_response (); + + server = soup_test_server_new (FALSE); + soup_server_add_handler (server, NULL, + server_callback, NULL, NULL); + port = soup_server_get_port (server); + + loop = g_main_loop_new (NULL, TRUE); + + base_uri = soup_uri_new ("http://127.0.0.1"); + soup_uri_set_port (base_uri, port); + do_tests (base_uri); + soup_uri_free (base_uri); + + g_main_loop_unref (loop); + + g_free (full_response); + g_free (full_response_md5); + soup_test_server_quit_unref (server); + test_cleanup (); + return errors != 0; +} diff --git a/tests/test-cert.pem b/tests/test-cert.pem new file mode 100644 index 0000000..a6b6608 --- /dev/null +++ b/tests/test-cert.pem @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDjzCCAvigAwIBAgIBADANBgkqhkiG9w0BAQQFADCBkjELMAkGA1UEBhMCVVMx +FjAUBgNVBAgTDU1hc3NhY2h1c2V0dHMxDzANBgNVBAcTBkJvc3RvbjEPMA0GA1UE +ChMGWGltaWFuMRUwEwYDVQQLEwxTb3VwIEtpdGNoZW4xEjAQBgNVBAMTCWxvY2Fs +aG9zdDEeMBwGCSqGSIb3DQEJARYPc291cEB4aW1pYW4uY29tMB4XDTAzMDkyMzE4 +Mzc0MVoXDTEzMDkyMzE4Mzc0MVowgZIxCzAJBgNVBAYTAlVTMRYwFAYDVQQIEw1N +YXNzYWNodXNldHRzMQ8wDQYDVQQHEwZCb3N0b24xDzANBgNVBAoTBlhpbWlhbjEV +MBMGA1UECxMMU291cCBLaXRjaGVuMRIwEAYDVQQDEwlsb2NhbGhvc3QxHjAcBgkq +hkiG9w0BCQEWD3NvdXBAeGltaWFuLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEAwzT/WxfdXqb2hbyjQav3FtN7tLxj3UbZKCKDYlizBsNLxb9exfebhV4h +CoAcaSNvLUnk3tAXnk+BDsIC1V4SbwqHYR17PnO3YZ8fkNwh5RGZwNx+zafdfFyu ++3Sh+mE03bljpDlTsgPL8CiFCd68MPRnuHoKt5iTpSyLC6Df0qcCAwEAAaOB8jCB +7zAdBgNVHQ4EFgQU9A9omrgBK5Kkl6FRxrgJU2voj4Uwgb8GA1UdIwSBtzCBtIAU +9A9omrgBK5Kkl6FRxrgJU2voj4WhgZikgZUwgZIxCzAJBgNVBAYTAlVTMRYwFAYD +VQQIEw1NYXNzYWNodXNldHRzMQ8wDQYDVQQHEwZCb3N0b24xDzANBgNVBAoTBlhp +bWlhbjEVMBMGA1UECxMMU291cCBLaXRjaGVuMRIwEAYDVQQDEwlsb2NhbGhvc3Qx +HjAcBgkqhkiG9w0BCQEWD3NvdXBAeGltaWFuLmNvbYIBADAMBgNVHRMEBTADAQH/ +MA0GCSqGSIb3DQEBBAUAA4GBAGCV56N7bEDNdE76T8i68gS00NIVVosVQjS39Ojd +ED+rvq0YYvuc2UXlzAonuCJfwFc73g4wSIjS0xijF5rnugZ+aay0LNv2y+Rf34CQ +RNswrwurFjlxgTOO+Wx2IM64mAnBfj43M8uKEZFqAiGKrZZ0xIqyUMlku0FgXDH2 +Jvpg +-----END CERTIFICATE----- diff --git a/tests/test-key.pem b/tests/test-key.pem new file mode 100644 index 0000000..9bea9bf --- /dev/null +++ b/tests/test-key.pem @@ -0,0 +1,15 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICWwIBAAKBgQDDNP9bF91epvaFvKNBq/cW03u0vGPdRtkoIoNiWLMGw0vFv17F +95uFXiEKgBxpI28tSeTe0BeeT4EOwgLVXhJvCodhHXs+c7dhnx+Q3CHlEZnA3H7N +p918XK77dKH6YTTduWOkOVOyA8vwKIUJ3rww9Ge4egq3mJOlLIsLoN/SpwIDAQAB +AoGAOGAi6zzuKrrPcXo0L/ApEQeMr3rE4I/ogUXOaeWx9l8KkBafmU7UNGUl57Fu +AxM/tXWkypCQcaEGZau0Q8jCS5wKgynNi72F4OzBqgjgW4vvtrjfC1LagnCd2ZMX +V5XVECjO/sEDg0hJeOsXlKbECAgvHMU3dSCGO7DmuG9tIxkCQQDsth1VvVjOdfp6 +klOfYzbAM1p9HIcNPJMeuBFqq//UHX4aPqh/6G6W06TOTN+bjZBmitG9yjV958t2 +rPxl64f7AkEA0x0WOLm5S0LNsv7zwjXuTcj+NCHL36b3dK90oxX8Gq69PANL/EJY +ItpHNLgzzo4DRmQy8q0WZlC9HYk1YljERQJAEN7+AkFnlfeErb3GJgMNQO+oEGi7 +G29o0PSvkRnHNxgPB9HVcqBfWXKmOWnzOgQB+b0FK/DAlUOzFbdImf8KhwJAFLty +hzeV/tIcqUtoXNY3BOSMMkpvXxNikc75QVrTWzt10gLw32EUjreo7oB4dfx0TeFh +L3vYC0w6hkAHQhU9kQJAPSEQ+Bqzlk6BrQNrNFEVzi1Rwpz7LOzhOjuYW6bsiAdX +axA4r6Xh25x08ZU7cqX7gwVLHL6pgrEKuUs0Nc5Klg== +-----END RSA PRIVATE KEY----- diff --git a/tests/test-utils.c b/tests/test-utils.c new file mode 100644 index 0000000..15495a0 --- /dev/null +++ b/tests/test-utils.c @@ -0,0 +1,361 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "test-utils.h" +#include "libsoup/soup.h" + +#include +#include +#include +#include +#include + +#ifdef HAVE_APACHE +static gboolean apache_running; +#endif + +static SoupLogger *logger; + +int debug_level, errors; +gboolean expect_warning, tls_available; +static int http_debug_level; + +static gboolean +increment_debug_level (const char *option_name, const char *value, + gpointer data, GError **error) +{ + debug_level++; + return TRUE; +} + +static gboolean +increment_http_debug_level (const char *option_name, const char *value, + gpointer data, GError **error) +{ + http_debug_level++; + return TRUE; +} + +static GOptionEntry debug_entry[] = { + { "debug", 'd', G_OPTION_FLAG_NO_ARG, + G_OPTION_ARG_CALLBACK, increment_debug_level, + "Enable (or increase) test-specific debugging", NULL }, + { "http-debug", 'h', G_OPTION_FLAG_NO_ARG, + G_OPTION_ARG_CALLBACK, increment_http_debug_level, + "Enable (or increase) HTTP-level debugging", NULL }, + { NULL } +}; + +static void +quit (int sig) +{ +#ifdef HAVE_APACHE + if (apache_running) + apache_cleanup (); +#endif + + exit (1); +} + +static void +test_log_handler (const char *log_domain, GLogLevelFlags log_level, + const char *message, gpointer user_data) +{ + if (log_level & (G_LOG_LEVEL_WARNING | G_LOG_LEVEL_CRITICAL)) { + if (expect_warning) { + expect_warning = FALSE; + debug_printf (2, "Got expected warning: %s\n", message); + return; + } else + errors++; + } + g_log_default_handler (log_domain, log_level, message, user_data); +} + +void +test_init (int argc, char **argv, GOptionEntry *entries) +{ + GOptionContext *opts; + char *name; + GError *error = NULL; + GTlsBackend *tls_backend; + + g_thread_init (NULL); + g_type_init (); + + name = strrchr (argv[0], '/'); + if (!name++) + name = argv[0]; + if (!strncmp (name, "lt-", 3)) + name += 3; + g_set_prgname (name); + + opts = g_option_context_new (NULL); + g_option_context_add_main_entries (opts, debug_entry, NULL); + if (entries) + g_option_context_add_main_entries (opts, entries, NULL); + + if (!g_option_context_parse (opts, &argc, &argv, &error)) { + fprintf (stderr, "Could not parse arguments: %s\n", + error->message); + fprintf (stderr, "%s", + g_option_context_get_help (opts, TRUE, NULL)); + exit (1); + } + g_option_context_free (opts); + + /* Exit cleanly on ^C in case we're valgrinding. */ + signal (SIGINT, quit); + + g_log_set_default_handler (test_log_handler, NULL); + + tls_backend = g_tls_backend_get_default (); + tls_available = g_tls_backend_supports_tls (tls_backend); +} + +void +test_cleanup (void) +{ +#ifdef HAVE_APACHE + if (apache_running) + apache_cleanup (); +#endif + + if (logger) + g_object_unref (logger); + + g_main_context_unref (g_main_context_default ()); + + debug_printf (1, "\n"); + if (errors) { + printf ("%s: %d error(s).%s\n", + g_get_prgname (), errors, + debug_level == 0 ? " Run with '-d' for details" : ""); + } else + printf ("%s: OK\n", g_get_prgname ()); +} + +void +debug_printf (int level, const char *format, ...) +{ + va_list args; + + if (debug_level < level) + return; + + va_start (args, format); + vprintf (format, args); + va_end (args); +} + +#ifdef HAVE_APACHE + +static gboolean +apache_cmd (const char *cmd) +{ + const char *argv[8]; + char *cwd, *conf; + int status; + gboolean ok; + + cwd = g_get_current_dir (); + conf = g_build_filename (cwd, "httpd.conf", NULL); + + argv[0] = APACHE_HTTPD; + argv[1] = "-d"; + argv[2] = cwd; + argv[3] = "-f"; + argv[4] = conf; + argv[5] = "-k"; + argv[6] = cmd; + argv[7] = NULL; + + ok = g_spawn_sync (cwd, (char **)argv, NULL, 0, NULL, NULL, + NULL, NULL, &status, NULL); + if (ok) + ok = (status == 0); + + g_free (cwd); + g_free (conf); + + return ok; +} + +void +apache_init (void) +{ + if (!apache_cmd ("start")) { + fprintf (stderr, "Could not start apache\n"); + exit (1); + } + apache_running = TRUE; +} + +void +apache_cleanup (void) +{ + pid_t pid; + char *contents; + + if (g_file_get_contents ("httpd.pid", &contents, NULL, NULL)) { + pid = strtoul (contents, NULL, 10); + g_free (contents); + } else + pid = 0; + + if (!apache_cmd ("graceful-stop")) + return; + apache_running = FALSE; + + if (pid) { + while (kill (pid, 0) == 0) + g_usleep (100); + } +} + +#endif /* HAVE_APACHE */ + +SoupSession * +soup_test_session_new (GType type, ...) +{ + va_list args; + const char *propname; + SoupSession *session; + + va_start (args, type); + propname = va_arg (args, const char *); + session = (SoupSession *)g_object_new_valist (type, propname, args); + va_end (args); + + g_object_set (G_OBJECT (session), + SOUP_SESSION_SSL_CA_FILE, SRCDIR "/test-cert.pem", + SOUP_SESSION_SSL_STRICT, FALSE, + NULL); + + if (http_debug_level && !logger) { + SoupLoggerLogLevel level = MIN ((SoupLoggerLogLevel)http_debug_level, SOUP_LOGGER_LOG_BODY); + + logger = soup_logger_new (level, -1); + } + + if (logger) + soup_session_add_feature (session, SOUP_SESSION_FEATURE (logger)); + + return session; +} + +void +soup_test_session_abort_unref (SoupSession *session) +{ + g_object_add_weak_pointer (G_OBJECT (session), (gpointer *)&session); + + soup_session_abort (session); + g_object_unref (session); + + if (session) { + errors++; + debug_printf (1, "leaked SoupSession!\n"); + g_object_remove_weak_pointer (G_OBJECT (session), (gpointer *)&session); + } +} + +static gpointer run_server_thread (gpointer user_data); + +static SoupServer * +test_server_new (gboolean in_own_thread, gboolean ssl) +{ + SoupServer *server; + GMainContext *async_context; + const char *ssl_cert_file, *ssl_key_file; + SoupAddress *addr; + + async_context = in_own_thread ? g_main_context_new () : NULL; + + if (ssl) { + ssl_cert_file = SRCDIR "/test-cert.pem"; + ssl_key_file = SRCDIR "/test-key.pem"; + } else + ssl_cert_file = ssl_key_file = NULL; + + addr = soup_address_new ("127.0.0.1", SOUP_ADDRESS_ANY_PORT); + soup_address_resolve_sync (addr, NULL); + + server = soup_server_new (SOUP_SERVER_INTERFACE, addr, + SOUP_SERVER_ASYNC_CONTEXT, async_context, + SOUP_SERVER_SSL_CERT_FILE, ssl_cert_file, + SOUP_SERVER_SSL_KEY_FILE, ssl_key_file, + NULL); + g_object_unref (addr); + if (async_context) + g_main_context_unref (async_context); + + if (!server) { + fprintf (stderr, "Unable to create server\n"); + exit (1); + } + + if (in_own_thread) { + GThread *thread; + + thread = g_thread_create (run_server_thread, server, + TRUE, NULL); + g_object_set_data (G_OBJECT (server), "thread", thread); + } else + soup_server_run_async (server); + + return server; +} + +SoupServer * +soup_test_server_new (gboolean in_own_thread) +{ + return test_server_new (in_own_thread, FALSE); +} + +SoupServer * +soup_test_server_new_ssl (gboolean in_own_thread) +{ + return test_server_new (in_own_thread, TRUE); +} + +static gpointer +run_server_thread (gpointer user_data) +{ + SoupServer *server = user_data; + + soup_server_run (server); + return NULL; +} + +static gboolean +idle_quit_server (gpointer server) +{ + soup_server_quit (server); + return FALSE; +} + +void +soup_test_server_quit_unref (SoupServer *server) +{ + GThread *thread; + + g_object_add_weak_pointer (G_OBJECT (server), + (gpointer *)&server); + + thread = g_object_get_data (G_OBJECT (server), "thread"); + if (thread) { + soup_add_completion (soup_server_get_async_context (server), + idle_quit_server, server); + g_thread_join (thread); + } else + soup_server_quit (server); + g_object_unref (server); + + if (server) { + errors++; + debug_printf (1, "leaked SoupServer!\n"); + g_object_remove_weak_pointer (G_OBJECT (server), + (gpointer *)&server); + } +} diff --git a/tests/test-utils.h b/tests/test-utils.h new file mode 100644 index 0000000..98c12fd --- /dev/null +++ b/tests/test-utils.h @@ -0,0 +1,24 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "libsoup/soup-types.h" + +void test_init (int argc, char **argv, GOptionEntry *entries); +void test_cleanup (void); + +extern int debug_level, errors; +extern gboolean expect_warning, tls_available; +void debug_printf (int level, const char *format, ...) G_GNUC_PRINTF (2, 3); + +#ifdef HAVE_APACHE +void apache_init (void); +void apache_cleanup (void); +#endif + +SoupSession *soup_test_session_new (GType type, ...); +void soup_test_session_abort_unref (SoupSession *session); + +SoupServer *soup_test_server_new (gboolean in_own_thread); +SoupServer *soup_test_server_new_ssl (gboolean in_own_thread); +void soup_test_server_quit_unref (SoupServer *server); diff --git a/tests/timeout-test.c b/tests/timeout-test.c new file mode 100644 index 0000000..d3b6279 --- /dev/null +++ b/tests/timeout-test.c @@ -0,0 +1,206 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "libsoup/soup.h" + +#include "test-utils.h" + +static void +do_message_to_session (SoupSession *session, const char *uri, + const char *comment, guint expected_status) +{ + SoupMessage *msg; + + debug_printf (1, " %s\n", comment); + msg = soup_message_new ("GET", uri); + soup_session_send_message (session, msg); + + if (msg->status_code != expected_status) { + debug_printf (1, " FAILED: %d %s (expected %d %s)\n", + msg->status_code, msg->reason_phrase, + expected_status, + soup_status_get_phrase (expected_status)); + errors++; + } + + if (SOUP_STATUS_IS_SUCCESSFUL (msg->status_code) && + !soup_message_is_keepalive (msg)) { + debug_printf (1, " ERROR: message is not keepalive!"); + errors++; + } + + g_object_unref (msg); +} + +static void +request_started_cb (SoupSession *session, SoupMessage *msg, + SoupSocket *socket, gpointer user_data) +{ + SoupSocket **ret = user_data; + + *ret = socket; +} + +static void +do_tests_for_session (SoupSession *timeout_session, + SoupSession *idle_session, + SoupSession *plain_session, + char *fast_uri, char *slow_uri) +{ + SoupSocket *ret, *idle_first, *idle_second; + SoupSocket *plain_first, *plain_second; + + if (idle_session) { + g_signal_connect (idle_session, "request-started", + G_CALLBACK (request_started_cb), &ret); + do_message_to_session (idle_session, fast_uri, "fast to idle", SOUP_STATUS_OK); + idle_first = ret; + } + + if (plain_session) { + g_signal_connect (plain_session, "request-started", + G_CALLBACK (request_started_cb), &ret); + do_message_to_session (plain_session, fast_uri, "fast to plain", SOUP_STATUS_OK); + plain_first = ret; + } + + do_message_to_session (timeout_session, fast_uri, "fast to timeout", SOUP_STATUS_OK); + do_message_to_session (timeout_session, slow_uri, "slow to timeout", SOUP_STATUS_IO_ERROR); + + if (idle_session) { + do_message_to_session (idle_session, fast_uri, "fast to idle", SOUP_STATUS_OK); + idle_second = ret; + g_signal_handlers_disconnect_by_func (idle_session, + (gpointer)request_started_cb, + &ret); + + if (idle_first == idle_second) { + debug_printf (1, " ERROR: idle_session did not close first connection\n"); + errors++; + } + } + + if (plain_session) { + do_message_to_session (plain_session, fast_uri, "fast to plain", SOUP_STATUS_OK); + plain_second = ret; + g_signal_handlers_disconnect_by_func (plain_session, + (gpointer)request_started_cb, + &ret); + + if (plain_first != plain_second) { + debug_printf (1, " ERROR: plain_session closed connection\n"); + errors++; + } + } +} + +static void +do_timeout_tests (char *fast_uri, char *slow_uri) +{ + SoupSession *timeout_session, *idle_session, *plain_session; + + debug_printf (1, " async\n"); + timeout_session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_TIMEOUT, 1, + NULL); + idle_session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_IDLE_TIMEOUT, 1, + NULL); + /* The "plain" session also has an idle timeout, but it's longer + * than the test takes, so for our purposes it should behave like + * it has no timeout. + */ + plain_session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, + SOUP_SESSION_IDLE_TIMEOUT, 2, + NULL); + do_tests_for_session (timeout_session, idle_session, plain_session, + fast_uri, slow_uri); + soup_test_session_abort_unref (timeout_session); + soup_test_session_abort_unref (idle_session); + soup_test_session_abort_unref (plain_session); + + debug_printf (1, " sync\n"); + timeout_session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, + SOUP_SESSION_TIMEOUT, 1, + NULL); + /* SOUP_SESSION_TIMEOUT doesn't work with sync sessions */ + plain_session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, + NULL); + do_tests_for_session (timeout_session, NULL, plain_session, fast_uri, slow_uri); + soup_test_session_abort_unref (timeout_session); +} + +static gboolean +timeout_finish_message (gpointer msg) +{ + SoupServer *server = g_object_get_data (G_OBJECT (msg), "server"); + + soup_server_unpause_message (server, msg); + return FALSE; +} + +static void +server_handler (SoupServer *server, + SoupMessage *msg, + const char *path, + GHashTable *query, + SoupClientContext *client, + gpointer user_data) +{ + soup_message_set_status (msg, SOUP_STATUS_OK); + soup_message_set_response (msg, "text/plain", + SOUP_MEMORY_STATIC, + "ok\r\n", 4); + + if (!strcmp (path, "/slow")) { + soup_server_pause_message (server, msg); + g_object_set_data (G_OBJECT (msg), "server", server); + soup_add_timeout (soup_server_get_async_context (server), + 1100, timeout_finish_message, msg); + } +} + +int +main (int argc, char **argv) +{ + SoupServer *server; + char *fast_uri, *slow_uri; + + test_init (argc, argv, NULL); + + debug_printf (1, "http\n"); + server = soup_test_server_new (TRUE); + soup_server_add_handler (server, NULL, server_handler, NULL, NULL); + fast_uri = g_strdup_printf ("http://127.0.0.1:%u/", + soup_server_get_port (server)); + slow_uri = g_strdup_printf ("http://127.0.0.1:%u/slow", + soup_server_get_port (server)); + do_timeout_tests (fast_uri, slow_uri); + g_free (fast_uri); + g_free (slow_uri); + soup_test_server_quit_unref (server); + + if (tls_available) { + debug_printf (1, "\nhttps\n"); + server = soup_test_server_new_ssl (TRUE); + soup_server_add_handler (server, NULL, server_handler, NULL, NULL); + fast_uri = g_strdup_printf ("https://127.0.0.1:%u/", + soup_server_get_port (server)); + slow_uri = g_strdup_printf ("https://127.0.0.1:%u/slow", + soup_server_get_port (server)); + do_timeout_tests (fast_uri, slow_uri); + g_free (fast_uri); + g_free (slow_uri); + soup_test_server_quit_unref (server); + } + + test_cleanup (); + return errors != 0; +} diff --git a/tests/uri-parsing.c b/tests/uri-parsing.c new file mode 100644 index 0000000..e8568a9 --- /dev/null +++ b/tests/uri-parsing.c @@ -0,0 +1,256 @@ +#include + +#include +#include +#include +#include +#include + +#include "libsoup/soup-uri.h" + +#include "test-utils.h" + +static struct { + const char *uri_string, *result; +} abs_tests[] = { + { "foo:", "foo:" }, + { "file:/dev/null", "file:/dev/null" }, + { "file:///dev/null", "file:///dev/null" }, + { "ftp://user@host/path", "ftp://user@host/path" }, + { "ftp://user@host:9999/path", "ftp://user@host:9999/path" }, + { "ftp://user:password@host/path", "ftp://user@host/path" }, + { "ftp://user:password@host:9999/path", "ftp://user@host:9999/path" }, + { "ftp://user:password@host", "ftp://user@host" }, + { "http://us%65r@host", "http://user@host/" }, + { "http://us%40r@host", "http://us%40r@host/" }, + { "http://us%3ar@host", "http://us%3Ar@host/" }, + { "http://us%2fr@host", "http://us%2Fr@host/" }, + { "http://us%3fr@host", "http://us%3Fr@host/" }, + { "http://host?query", "http://host/?query" }, + { "http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fchildparam%3Dchildvalue¶m=value", + "http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fchildparam%3Dchildvalue¶m=value" }, + { "http://control-chars/%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F", + "http://control-chars/%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F"}, + { "http://space/%20", + "http://space/%20" }, + { "http://delims/%3C%3E%23%25%22", + "http://delims/%3C%3E%23%25%22" }, + { "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60", + "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60" }, + + /* From RFC 2732 */ + { "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html", + "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]/index.html" }, + { "http://[1080:0:0:0:8:800:200C:417A]/index.html", + "http://[1080:0:0:0:8:800:200C:417A]/index.html" }, + { "http://[3ffe:2a00:100:7031::1]", + "http://[3ffe:2a00:100:7031::1]/" }, + { "http://[1080::8:800:200C:417A]/foo", + "http://[1080::8:800:200C:417A]/foo" }, + { "http://[::192.9.5.5]/ipng", + "http://[::192.9.5.5]/ipng" }, + { "http://[::FFFF:129.144.52.38]:80/index.html", + "http://[::FFFF:129.144.52.38]/index.html" }, + { "http://[2010:836B:4179::836B:4179]", + "http://[2010:836B:4179::836B:4179]/" }, + + /* Try to recover certain kinds of invalid URIs */ + { "http://host/path with spaces", + "http://host/path%20with%20spaces" }, + { " http://host/path", "http://host/path" }, + { "http://host/path ", "http://host/path" }, + { "http://host ", "http://host/" }, + { "http://host:999 ", "http://host:999/" }, + { "http://host/pa\nth", "http://host/path" }, + { "http:\r\n//host/path", "http://host/path" }, + { "http://\thost/path", "http://host/path" }, + + /* Bug 594405; 0-length is different from not-present */ + { "http://host/path?", "http://host/path?" }, + { "http://host/path#", "http://host/path#" }, + + /* Bug 590524; ignore badly-%-encoding */ + { "http://host/path%", "http://host/path%" }, + { "http://h%ost/path", "http://h%25ost/path" }, + { "http://host/path%%", "http://host/path%%" }, + { "http://host/path%%%", "http://host/path%%%" }, + { "http://host/path%/x/", "http://host/path%/x/" }, + { "http://host/path%0x/", "http://host/path%0x/" }, + { "http://host/path%ax", "http://host/path%ax" } +}; +static int num_abs_tests = G_N_ELEMENTS(abs_tests); + +/* From RFC 3986. */ +static const char *base = "http://a/b/c/d;p?q"; +static struct { + const char *uri_string, *result; +} rel_tests[] = { + { "g:h", "g:h" }, + { "g", "http://a/b/c/g" }, + { "./g", "http://a/b/c/g" }, + { "g/", "http://a/b/c/g/" }, + { "/g", "http://a/g" }, + { "//g", "http://g/" }, + { "?y", "http://a/b/c/d;p?y" }, + { "g?y", "http://a/b/c/g?y" }, + { "#s", "http://a/b/c/d;p?q#s" }, + { "g#s", "http://a/b/c/g#s" }, + { "g?y#s", "http://a/b/c/g?y#s" }, + { ";x", "http://a/b/c/;x" }, + { "g;x", "http://a/b/c/g;x" }, + { "g;x?y#s", "http://a/b/c/g;x?y#s" }, + { ".", "http://a/b/c/" }, + { "./", "http://a/b/c/" }, + { "..", "http://a/b/" }, + { "../", "http://a/b/" }, + { "../g", "http://a/b/g" }, + { "../..", "http://a/" }, + { "../../", "http://a/" }, + { "../../g", "http://a/g" }, + { "", "http://a/b/c/d;p?q" }, + { "../../../g", "http://a/g" }, + { "../../../../g", "http://a/g" }, + { "/./g", "http://a/g" }, + { "/../g", "http://a/g" }, + { "g.", "http://a/b/c/g." }, + { ".g", "http://a/b/c/.g" }, + { "g..", "http://a/b/c/g.." }, + { "..g", "http://a/b/c/..g" }, + { "./../g", "http://a/b/g" }, + { "./g/.", "http://a/b/c/g/" }, + { "g/./h", "http://a/b/c/g/h" }, + { "g/../h", "http://a/b/c/h" }, + { "g;x=1/./y", "http://a/b/c/g;x=1/y" }, + { "g;x=1/../y", "http://a/b/c/y" }, + { "g?y/./x", "http://a/b/c/g?y/./x" }, + { "g?y/../x", "http://a/b/c/g?y/../x" }, + { "g#s/./x", "http://a/b/c/g#s/./x" }, + { "g#s/../x", "http://a/b/c/g#s/../x" }, + + /* RFC 3986 notes that some old parsers will parse this as + * a relative URL ("http://a/b/c/g"), but it should be + * interpreted as absolute. libsoup should parse it + * correctly as being absolute, but then reject it since it's + * an http URL with no host. + */ + { "http:g", NULL } +}; +static int num_rel_tests = G_N_ELEMENTS(rel_tests); + +static struct { + const char *one, *two; +} eq_tests[] = { + { "example://a/b/c/%7Bfoo%7D", "eXAMPLE://a/./b/../b/%63/%7Bfoo%7D" }, + { "http://example.com", "http://example.com/" }, + /* From RFC 2616 */ + { "http://abc.com:80/~smith/home.html", "http://abc.com:80/~smith/home.html" }, + { "http://abc.com:80/~smith/home.html", "http://ABC.com/%7Esmith/home.html" }, + { "http://abc.com:80/~smith/home.html", "http://ABC.com:/%7esmith/home.html" }, +}; +static int num_eq_tests = G_N_ELEMENTS(eq_tests); + +static gboolean +do_uri (SoupURI *base_uri, const char *base_str, + const char *in_uri, const char *out_uri) +{ + SoupURI *uri; + char *uri_string; + + if (base_uri) { + debug_printf (1, "<%s> + <%s> = <%s>? ", base_str, in_uri, + out_uri ? out_uri : "ERR"); + uri = soup_uri_new_with_base (base_uri, in_uri); + } else { + debug_printf (1, "<%s> => <%s>? ", in_uri, + out_uri ? out_uri : "ERR"); + uri = soup_uri_new (in_uri); + } + + if (!uri) { + if (out_uri) { + debug_printf (1, "ERR\n Could not parse %s\n", in_uri); + return FALSE; + } else { + debug_printf (1, "OK\n"); + return TRUE; + } + } + + uri_string = soup_uri_to_string (uri, FALSE); + soup_uri_free (uri); + + if (!out_uri) { + debug_printf (1, "ERR\n Got %s\n", uri_string); + return FALSE; + } + + if (strcmp (uri_string, out_uri) != 0) { + debug_printf (1, "NO\n Unparses to <%s>\n", uri_string); + g_free (uri_string); + return FALSE; + } + g_free (uri_string); + + debug_printf (1, "OK\n"); + return TRUE; +} + +int +main (int argc, char **argv) +{ + SoupURI *base_uri, *uri1, *uri2; + char *uri_string; + int i; + + test_init (argc, argv, NULL); + + debug_printf (1, "Absolute URI parsing\n"); + for (i = 0; i < num_abs_tests; i++) { + if (!do_uri (NULL, NULL, abs_tests[i].uri_string, + abs_tests[i].result)) + errors++; + } + + debug_printf (1, "\nRelative URI parsing\n"); + base_uri = soup_uri_new (base); + if (!base_uri) { + fprintf (stderr, "Could not parse %s!\n", base); + exit (1); + } + + uri_string = soup_uri_to_string (base_uri, FALSE); + if (strcmp (uri_string, base) != 0) { + fprintf (stderr, "URI <%s> unparses to <%s>\n", + base, uri_string); + errors++; + } + g_free (uri_string); + + for (i = 0; i < num_rel_tests; i++) { + if (!do_uri (base_uri, base, rel_tests[i].uri_string, + rel_tests[i].result)) + errors++; + } + soup_uri_free (base_uri); + + debug_printf (1, "\nURI equality testing\n"); + for (i = 0; i < num_eq_tests; i++) { + uri1 = soup_uri_new (eq_tests[i].one); + uri2 = soup_uri_new (eq_tests[i].two); + debug_printf (1, "<%s> == <%s>? ", eq_tests[i].one, eq_tests[i].two); + if (soup_uri_equal (uri1, uri2)) + debug_printf (1, "OK\n"); + else { + debug_printf (1, "NO\n"); + debug_printf (1, "%s : %s : %s\n%s : %s : %s\n", + uri1->scheme, uri1->host, uri1->path, + uri2->scheme, uri2->host, uri2->path); + errors++; + } + soup_uri_free (uri1); + soup_uri_free (uri2); + } + + test_cleanup (); + return errors != 0; +} diff --git a/tests/xmlrpc-server-test.c b/tests/xmlrpc-server-test.c new file mode 100644 index 0000000..9eae702 --- /dev/null +++ b/tests/xmlrpc-server-test.c @@ -0,0 +1,339 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2008 Red Hat, Inc. + */ + +#include "config.h" + +#include +#include +#include + +#include + +#include "test-utils.h" + +GMainLoop *loop; + +static void +type_error (SoupMessage *msg, GType expected, GValueArray *params, int bad_value) +{ + soup_xmlrpc_set_fault (msg, + SOUP_XMLRPC_FAULT_SERVER_ERROR_INVALID_METHOD_PARAMETERS, + "Bad parameter #%d: expected %s, got %s", + bad_value + 1, g_type_name (expected), + g_type_name (G_VALUE_TYPE (¶ms->values[bad_value]))); +} + +static void +args_error (SoupMessage *msg, GValueArray *params, int expected) +{ + soup_xmlrpc_set_fault (msg, + SOUP_XMLRPC_FAULT_SERVER_ERROR_INVALID_METHOD_PARAMETERS, + "Wrong number of parameters: expected %d, got %d", + expected, params->n_values); +} + +static void +do_sum (SoupMessage *msg, GValueArray *params) +{ + int sum = 0, i, val; + GValueArray *nums; + + if (params->n_values != 1) { + args_error (msg, params, 1); + return; + } + if (!soup_value_array_get_nth (params, 0, G_TYPE_VALUE_ARRAY, &nums)) { + type_error (msg, G_TYPE_VALUE_ARRAY, params, 0); + return; + } + + for (i = 0; i < nums->n_values; i++) { + if (!soup_value_array_get_nth (nums, i, G_TYPE_INT, &val)) { + type_error (msg, G_TYPE_INT, nums, i); + return; + } + sum += val; + } + + soup_xmlrpc_set_response (msg, G_TYPE_INT, sum); + +} + +static void +do_countBools (SoupMessage *msg, GValueArray *params) +{ + int i, trues = 0, falses = 0; + GValueArray *bools; + GHashTable *ret = soup_value_hash_new (); + gboolean val; + + if (params->n_values != 1) { + args_error (msg, params, 1); + return; + } + if (!soup_value_array_get_nth (params, 0, G_TYPE_VALUE_ARRAY, &bools)) { + type_error (msg, G_TYPE_VALUE_ARRAY, params, 0); + return; + } + + for (i = 0; i < bools->n_values; i++) { + if (!soup_value_array_get_nth (bools, i, G_TYPE_BOOLEAN, &val)) { + type_error (msg, G_TYPE_BOOLEAN, params, i); + return; + } + if (val) + trues++; + else + falses++; + } + + soup_value_hash_insert (ret, "true", G_TYPE_INT, trues); + soup_value_hash_insert (ret, "false", G_TYPE_INT, falses); + soup_xmlrpc_set_response (msg, G_TYPE_HASH_TABLE, ret); + g_hash_table_destroy (ret); + +} + +static void +do_md5sum (SoupMessage *msg, GValueArray *params) +{ + GChecksum *checksum; + GByteArray *data, *digest; + gsize digest_len = 16; + + if (params->n_values != 1) { + args_error (msg, params, 1); + return; + } + + if (!soup_value_array_get_nth (params, 0, SOUP_TYPE_BYTE_ARRAY, &data)) { + type_error (msg, SOUP_TYPE_BYTE_ARRAY, params, 0); + return; + } + checksum = g_checksum_new (G_CHECKSUM_MD5); + g_checksum_update (checksum, data->data, data->len); + digest = g_byte_array_new (); + g_byte_array_set_size (digest, digest_len); + g_checksum_get_digest (checksum, digest->data, &digest_len); + g_checksum_free (checksum); + + soup_xmlrpc_set_response (msg, SOUP_TYPE_BYTE_ARRAY, digest); + g_byte_array_free (digest, TRUE); +} + + +static void +do_dateChange (SoupMessage *msg, GValueArray *params) +{ + GHashTable *arg; + SoupDate *date; + int val; + + if (params->n_values != 2) { + args_error (msg, params, 2); + return; + } + + if (!soup_value_array_get_nth (params, 0, SOUP_TYPE_DATE, &date)) { + type_error (msg, SOUP_TYPE_DATE, params, 0); + return; + } + if (!soup_value_array_get_nth (params, 1, G_TYPE_HASH_TABLE, &arg)) { + type_error (msg, G_TYPE_HASH_TABLE, params, 1); + return; + } + + if (soup_value_hash_lookup (arg, "tm_year", G_TYPE_INT, &val)) + date->year = val + 1900; + if (soup_value_hash_lookup (arg, "tm_mon", G_TYPE_INT, &val)) + date->month = val + 1; + if (soup_value_hash_lookup (arg, "tm_mday", G_TYPE_INT, &val)) + date->day = val; + if (soup_value_hash_lookup (arg, "tm_hour", G_TYPE_INT, &val)) + date->hour = val; + if (soup_value_hash_lookup (arg, "tm_min", G_TYPE_INT, &val)) + date->minute = val; + if (soup_value_hash_lookup (arg, "tm_sec", G_TYPE_INT, &val)) + date->second = val; + + soup_xmlrpc_set_response (msg, SOUP_TYPE_DATE, date); +} + +static void +do_echo (SoupMessage *msg, GValueArray *params) +{ + int i; + const char *val; + GValueArray *in, *out; + + if (!soup_value_array_get_nth (params, 0, G_TYPE_VALUE_ARRAY, &in)) { + type_error (msg, G_TYPE_VALUE_ARRAY, params, 0); + return; + } + + out = g_value_array_new (in->n_values); + for (i = 0; i < in->n_values; i++) { + if (!soup_value_array_get_nth (in, i, G_TYPE_STRING, &val)) { + type_error (msg, G_TYPE_STRING, in, i); + return; + } + soup_value_array_append (out, G_TYPE_STRING, val); + } + + soup_xmlrpc_set_response (msg, G_TYPE_VALUE_ARRAY, out); + g_value_array_free (out); + +} + +static void +server_callback (SoupServer *server, SoupMessage *msg, + const char *path, GHashTable *query, + SoupClientContext *context, gpointer data) +{ + char *method_name; + GValueArray *params; + + if (msg->method != SOUP_METHOD_POST) { + soup_message_set_status (msg, SOUP_STATUS_NOT_IMPLEMENTED); + return; + } + + soup_message_set_status (msg, SOUP_STATUS_OK); + + if (!soup_xmlrpc_parse_method_call (msg->request_body->data, + msg->request_body->length, + &method_name, ¶ms)) { + soup_xmlrpc_set_fault (msg, SOUP_XMLRPC_FAULT_PARSE_ERROR_NOT_WELL_FORMED, + "Could not parse method call"); + return; + } + + if (!strcmp (method_name, "sum")) + do_sum (msg, params); + else if (!strcmp (method_name, "countBools")) + do_countBools (msg, params); + else if (!strcmp (method_name, "md5sum")) + do_md5sum (msg, params); + else if (!strcmp (method_name, "dateChange")) + do_dateChange (msg, params); + else if (!strcmp (method_name, "echo")) + do_echo (msg, params); + else { + soup_xmlrpc_set_fault (msg, SOUP_XMLRPC_FAULT_SERVER_ERROR_REQUESTED_METHOD_NOT_FOUND, + "Unknown method %s", method_name); + } + + g_free (method_name); + g_value_array_free (params); +} + +static void +xmlrpc_test_exited (GPid pid, int status, gpointer data) +{ + errors = WIFEXITED (status) ? WEXITSTATUS (status) : 1; + g_main_loop_quit (loop); +} + +static gboolean +xmlrpc_test_print (GIOChannel *io, GIOCondition cond, gpointer data) +{ + char *line; + gsize len; + GIOStatus status; + + if (!(cond & G_IO_IN)) + return FALSE; + + status = g_io_channel_read_line (io, &line, &len, NULL, NULL); + if (status == G_IO_STATUS_NORMAL) { + /* Don't print the exit status, just the debug stuff */ + if (strncmp (line, "xmlrpc-test:", strlen ("xmlrpc-test:")) != 0) + printf ("%s", line); + g_free (line); + return TRUE; + } else if (status == G_IO_STATUS_AGAIN) + return TRUE; + else + return FALSE; +} + +static void +do_xmlrpc_tests (SoupURI *uri) +{ + char *argv[8]; + int arg, out; + gboolean ok; + GPid pid; + GError *error = NULL; + GIOChannel *child_out; + + argv[0] = "./xmlrpc-test"; + argv[1] = "-s"; + argv[2] = "-u"; + argv[3] = soup_uri_to_string (uri, FALSE); + + for (arg = 0; arg < debug_level && arg < 3; arg++) + argv[arg + 4] = "-d"; + argv[arg + 4] = NULL; + + ok = g_spawn_async_with_pipes (NULL, argv, NULL, + G_SPAWN_DO_NOT_REAP_CHILD, + NULL, NULL, &pid, + NULL, &out, NULL, + &error); + g_free (argv[3]); + + if (!ok) { + printf ("Could not run xmlrpc-test: %s\n", error->message); + errors++; + return; + } + + g_child_watch_add (pid, xmlrpc_test_exited, NULL); + child_out = g_io_channel_unix_new (out); + g_io_add_watch (child_out, G_IO_IN | G_IO_ERR | G_IO_HUP, + xmlrpc_test_print, NULL); + g_io_channel_unref (child_out); +} + +gboolean run_tests = TRUE; + +static GOptionEntry no_test_entry[] = { + { "no-tests", 'n', G_OPTION_FLAG_REVERSE, + G_OPTION_ARG_NONE, &run_tests, + "Don't run tests, just run the test server", NULL }, + { NULL } +}; + +int +main (int argc, char **argv) +{ + SoupServer *server; + SoupURI *uri; + + test_init (argc, argv, no_test_entry); + + server = soup_test_server_new (FALSE); + soup_server_add_handler (server, "/xmlrpc-server.php", + server_callback, NULL, NULL); + + loop = g_main_loop_new (NULL, TRUE); + + if (run_tests) { + uri = soup_uri_new ("http://127.0.0.1/xmlrpc-server.php"); + soup_uri_set_port (uri, soup_server_get_port (server)); + do_xmlrpc_tests (uri); + soup_uri_free (uri); + } else + printf ("Listening on port %d\n", soup_server_get_port (server)); + + g_main_loop_run (loop); + g_main_loop_unref (loop); + + soup_test_server_quit_unref (server); + if (run_tests) + test_cleanup (); + return errors != 0; +} diff --git a/tests/xmlrpc-server.php b/tests/xmlrpc-server.php new file mode 100644 index 0000000..2e3dd38 --- /dev/null +++ b/tests/xmlrpc-server.php @@ -0,0 +1,90 @@ + + $fault["faultCode"] = -32602; + $fault["faultString"] = "bad parameter"; + return $fault; +} + +# We only check the params in sum(), because that's the one that +# xmlrpc-test tests will fail if given bad args + +function sum ($method_name, $params, $app_data) +{ + if (xmlrpc_get_type ($params[0]) != "array") + return paramfault(); + + $sum = 0; + foreach ($params[0] as $val) { + if (xmlrpc_get_type ($val) != "int") + return paramfault(); + + $sum = $sum + $val; + } + return $sum; +} + +function countBools ($method_name, $params, $app_data) +{ + $counts["true"] = $counts["false"] = 0; + foreach ($params[0] as $val) { + if ($val) + $counts["true"] = $counts["true"] + 1; + else + $counts["false"] = $counts["false"] + 1; + } + return $counts; +} + +function md5sum ($method_name, $params, $app_data) +{ + $val = md5 ($params[0]->scalar, true); + xmlrpc_set_type ($val, "base64"); + return $val; +} + +function dateChange ($method_name, $params, $app_data) +{ + $date_str = $params[0]->scalar; + $date = strptime ($date_str, "%Y%m%dT%H:%M:%S"); + + foreach ($params[1] as $name => $val) { + if ($name == "date") + continue; + $date[$name] = $val; + } + + $ret = sprintf ("%04d%02d%02dT%02d:%02d:%02d", + $date["tm_year"] + 1900, $date["tm_mon"] + 1, + $date["tm_mday"], $date["tm_hour"], + $date["tm_min"], $date["tm_sec"]); + xmlrpc_set_type ($ret, "datetime"); + return $ret; +} + +function echo_ ($method_name, $params, $app_data) +{ + return $params[0]; +} + +# Work around xmlrpc-epi-php lossage; otherwise the datetime values +# we return will sometimes get a DST adjustment we don't want. +putenv ("TZ="); + +$xmlrpc_server = xmlrpc_server_create (); +xmlrpc_server_register_method($xmlrpc_server, "sum", "sum"); +xmlrpc_server_register_method($xmlrpc_server, "countBools", "countBools"); +xmlrpc_server_register_method($xmlrpc_server, "md5sum", "md5sum"); +xmlrpc_server_register_method($xmlrpc_server, "dateChange", "dateChange"); +xmlrpc_server_register_method($xmlrpc_server, "echo", "echo_"); + +$response = xmlrpc_server_call_method ($xmlrpc_server, + implode("\r\n", file('php://input')), + 0, array ("output_type" => "xml")); +echo ($response); + +xmlrpc_server_destroy ($xmlrpc_server); + +?> diff --git a/tests/xmlrpc-test.c b/tests/xmlrpc-test.c new file mode 100644 index 0000000..c7c1774 --- /dev/null +++ b/tests/xmlrpc-test.c @@ -0,0 +1,493 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* + * Copyright (C) 2001-2003, Ximian, Inc. + */ + +#include +#include +#include +#include + +#include + +#include "test-utils.h" + +static SoupSession *session; +static const char *default_uri = "http://127.0.0.1:47524/xmlrpc-server.php"; +static const char *uri = NULL; +static gboolean server_test = FALSE; + +static const char *const value_type[] = { + "BAD", + "int", + "boolean", + "string", + "double", + "datetime", + "base64", + "struct", + "array" +}; + +static gboolean +do_xmlrpc (const char *method, GValue *retval, ...) +{ + SoupMessage *msg; + va_list args; + GValueArray *params; + GError *err = NULL; + char *body; + + va_start (args, retval); + params = soup_value_array_from_args (args); + va_end (args); + + body = soup_xmlrpc_build_method_call (method, params->values, + params->n_values); + g_value_array_free (params); + if (!body) + return FALSE; + + msg = soup_message_new ("POST", uri); + soup_message_set_request (msg, "text/xml", SOUP_MEMORY_TAKE, + body, strlen (body)); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, "ERROR: %d %s\n", msg->status_code, + msg->reason_phrase); + g_object_unref (msg); + return FALSE; + } + + if (!soup_xmlrpc_parse_method_response (msg->response_body->data, + msg->response_body->length, + retval, &err)) { + if (err) { + debug_printf (1, "FAULT: %d %s\n", err->code, err->message); + g_error_free (err); + } else + debug_printf (1, "ERROR: could not parse response\n"); + g_object_unref (msg); + return FALSE; + } + g_object_unref (msg); + + return TRUE; +} + +static gboolean +check_xmlrpc (GValue *value, GType type, ...) +{ + va_list args; + + if (!G_VALUE_HOLDS (value, type)) { + debug_printf (1, "ERROR: could not parse response\n"); + g_value_unset (value); + return FALSE; + } + + va_start (args, type); + SOUP_VALUE_GETV (value, type, args); + va_end (args); + return TRUE; +} + +static gboolean +test_sum (void) +{ + GValueArray *ints; + int i, val, sum, result; + GValue retval; + gboolean ok; + + debug_printf (1, "sum (array of int -> int): "); + + ints = g_value_array_new (10); + for (i = sum = 0; i < 10; i++) { + val = rand () % 100; + debug_printf (2, "%s%d", i == 0 ? "[" : ", ", val); + soup_value_array_append (ints, G_TYPE_INT, val); + sum += val; + } + debug_printf (2, "] -> "); + + ok = (do_xmlrpc ("sum", &retval, + G_TYPE_VALUE_ARRAY, ints, + G_TYPE_INVALID) && + check_xmlrpc (&retval, G_TYPE_INT, &result)); + g_value_array_free (ints); + + if (!ok) + return FALSE; + + debug_printf (2, "%d: ", result); + debug_printf (1, "%s\n", result == sum ? "OK!" : "WRONG!"); + return result == sum; +} + +static gboolean +test_countBools (void) +{ + GValueArray *bools; + int i, trues, falses; + GValue retval; + int ret_trues, ret_falses; + gboolean val, ok; + GHashTable *result; + + debug_printf (1, "countBools (array of boolean -> struct of ints): "); + + bools = g_value_array_new (10); + for (i = trues = falses = 0; i < 10; i++) { + val = rand () > (RAND_MAX / 2); + debug_printf (2, "%s%c", i == 0 ? "[" : ", ", val ? 'T' : 'F'); + soup_value_array_append (bools, G_TYPE_BOOLEAN, val); + if (val) + trues++; + else + falses++; + } + debug_printf (2, "] -> "); + + ok = (do_xmlrpc ("countBools", &retval, + G_TYPE_VALUE_ARRAY, bools, + G_TYPE_INVALID) && + check_xmlrpc (&retval, G_TYPE_HASH_TABLE, &result)); + g_value_array_free (bools); + if (!ok) + return FALSE; + + if (!soup_value_hash_lookup (result, "true", G_TYPE_INT, &ret_trues)) { + debug_printf (1, "NO 'true' value in response\n"); + return FALSE; + } + if (!soup_value_hash_lookup (result, "false", G_TYPE_INT, &ret_falses)) { + debug_printf (1, "NO 'false' value in response\n"); + return FALSE; + } + g_hash_table_destroy (result); + + debug_printf (2, "{ true: %d, false: %d } ", ret_trues, ret_falses); + ok = (trues == ret_trues) && (falses == ret_falses); + debug_printf (1, "%s\n", ok ? "OK!" : "WRONG!"); + return ok; +} + +static gboolean +test_md5sum (void) +{ + GByteArray *data, *result; + int i; + GChecksum *checksum; + guchar digest[16]; + gsize digest_len = sizeof (digest); + GValue retval; + gboolean ok; + + debug_printf (1, "md5sum (base64 -> base64): "); + + data = g_byte_array_new (); + g_byte_array_set_size (data, 256); + for (i = 0; i < data->len; i++) + data->data[i] = (char)(rand ()); + + checksum = g_checksum_new (G_CHECKSUM_MD5); + g_checksum_update (checksum, data->data, data->len); + g_checksum_get_digest (checksum, digest, &digest_len); + g_checksum_free (checksum); + + ok = (do_xmlrpc ("md5sum", &retval, + SOUP_TYPE_BYTE_ARRAY, data, + G_TYPE_INVALID) && + check_xmlrpc (&retval, SOUP_TYPE_BYTE_ARRAY, &result)); + g_byte_array_free (data, TRUE); + if (!ok) + return FALSE; + + if (result->len != digest_len) { + debug_printf (1, "result has WRONG length (%d)\n", result->len); + g_byte_array_free (result, TRUE); + return FALSE; + } + + ok = (memcmp (digest, result->data, digest_len) == 0); + debug_printf (1, "%s\n", ok ? "OK!" : "WRONG!"); + g_byte_array_free (result, TRUE); + return ok; +} + +static gboolean +test_dateChange (void) +{ + GHashTable *structval; + SoupDate *date, *result; + char *timestamp; + GValue retval; + gboolean ok; + + debug_printf (1, "dateChange (date, struct of ints -> time): "); + + date = soup_date_new (1970 + (rand () % 50), + 1 + rand () % 12, + 1 + rand () % 28, + rand () % 24, + rand () % 60, + rand () % 60); + if (debug_level >= 2) { + timestamp = soup_date_to_string (date, SOUP_DATE_ISO8601_XMLRPC); + debug_printf (2, "date: %s, {", timestamp); + g_free (timestamp); + } + + structval = soup_value_hash_new (); + + if (rand () % 3) { + date->year = 1970 + (rand () % 50); + debug_printf (2, "tm_year: %d, ", date->year - 1900); + soup_value_hash_insert (structval, "tm_year", + G_TYPE_INT, date->year - 1900); + } + if (rand () % 3) { + date->month = 1 + rand () % 12; + debug_printf (2, "tm_mon: %d, ", date->month - 1); + soup_value_hash_insert (structval, "tm_mon", + G_TYPE_INT, date->month - 1); + } + if (rand () % 3) { + date->day = 1 + rand () % 28; + debug_printf (2, "tm_mday: %d, ", date->day); + soup_value_hash_insert (structval, "tm_mday", + G_TYPE_INT, date->day); + } + if (rand () % 3) { + date->hour = rand () % 24; + debug_printf (2, "tm_hour: %d, ", date->hour); + soup_value_hash_insert (structval, "tm_hour", + G_TYPE_INT, date->hour); + } + if (rand () % 3) { + date->minute = rand () % 60; + debug_printf (2, "tm_min: %d, ", date->minute); + soup_value_hash_insert (structval, "tm_min", + G_TYPE_INT, date->minute); + } + if (rand () % 3) { + date->second = rand () % 60; + debug_printf (2, "tm_sec: %d, ", date->second); + soup_value_hash_insert (structval, "tm_sec", + G_TYPE_INT, date->second); + } + + debug_printf (2, "} -> "); + + ok = (do_xmlrpc ("dateChange", &retval, + SOUP_TYPE_DATE, date, + G_TYPE_HASH_TABLE, structval, + G_TYPE_INVALID) && + check_xmlrpc (&retval, SOUP_TYPE_DATE, &result)); + g_hash_table_destroy (structval); + if (!ok) { + soup_date_free (date); + return FALSE; + } + + if (debug_level >= 2) { + timestamp = soup_date_to_string (result, SOUP_DATE_ISO8601_XMLRPC); + debug_printf (2, "%s: ", timestamp); + g_free (timestamp); + } + + ok = ((date->year == result->year) && + (date->month == result->month) && + (date->day == result->day) && + (date->hour == result->hour) && + (date->minute == result->minute) && + (date->second == result->second)); + soup_date_free (date); + soup_date_free (result); + + debug_printf (1, "%s\n", ok ? "OK!" : "WRONG!"); + return ok; +} + +static const char *const echo_strings[] = { + "This is a test", + "& so is this", + "and so is ", + "& so is <this>" +}; +#define N_ECHO_STRINGS G_N_ELEMENTS (echo_strings) + +static const char *const echo_strings_broken[] = { + "This is a test", + " so is this", + "and so is this", + "amp; so is lt;thisgt;" +}; + +static gboolean +test_echo (void) +{ + GValueArray *originals, *echoes; + GValue retval; + int i; + gboolean php_bug = FALSE; + + debug_printf (1, "echo (array of string -> array of string): "); + + originals = g_value_array_new (N_ECHO_STRINGS); + for (i = 0; i < N_ECHO_STRINGS; i++) { + soup_value_array_append (originals, G_TYPE_STRING, echo_strings[i]); + debug_printf (2, "%s\"%s\"", i == 0 ? "[" : ", ", echo_strings[i]); + } + debug_printf (2, "] -> "); + + if (!(do_xmlrpc ("echo", &retval, + G_TYPE_VALUE_ARRAY, originals, + G_TYPE_INVALID) && + check_xmlrpc (&retval, G_TYPE_VALUE_ARRAY, &echoes))) { + g_value_array_free (originals); + return FALSE; + } + g_value_array_free (originals); + + if (debug_level >= 2) { + for (i = 0; i < echoes->n_values; i++) { + debug_printf (2, "%s\"%s\"", i == 0 ? "[" : ", ", + g_value_get_string (&echoes->values[i])); + } + debug_printf (2, "] -> "); + } + + if (echoes->n_values != N_ECHO_STRINGS) { + debug_printf (1, " WRONG! Wrong number of return strings"); + g_value_array_free (echoes); + return FALSE; + } + + for (i = 0; i < echoes->n_values; i++) { + if (strcmp (echo_strings[i], g_value_get_string (&echoes->values[i])) != 0) { + if (!server_test && strcmp (echo_strings_broken[i], g_value_get_string (&echoes->values[i])) == 0) + php_bug = TRUE; + else { + debug_printf (1, " WRONG! Mismatch at %d\n", i + 1); + g_value_array_free (echoes); + return FALSE; + } + } + } + + if (php_bug) + debug_printf (1, "WRONG, but it's php's fault\n"); + else + debug_printf (1, "OK!\n"); + g_value_array_free (echoes); + return TRUE; +} + +static gboolean +do_bad_xmlrpc (const char *body) +{ + SoupMessage *msg; + GError *err = NULL; + GValue retval; + + msg = soup_message_new ("POST", uri); + soup_message_set_request (msg, "text/xml", SOUP_MEMORY_COPY, + body, strlen (body)); + soup_session_send_message (session, msg); + + if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) { + debug_printf (1, "ERROR: %d %s\n", msg->status_code, + msg->reason_phrase); + g_object_unref (msg); + return FALSE; + } + + if (!soup_xmlrpc_parse_method_response (msg->response_body->data, + msg->response_body->length, + &retval, &err)) { + if (err) { + debug_printf (1, "FAULT: %d %s (OK!)\n", + err->code, err->message); + g_error_free (err); + g_object_unref (msg); + return TRUE; + } else + debug_printf (1, "ERROR: could not parse response\n"); + } else + debug_printf (1, "Unexpectedly got successful response!\n"); + + g_object_unref (msg); + return FALSE; +} + +static gboolean +test_fault_malformed (void) +{ + debug_printf (1, "malformed request: "); + + return do_bad_xmlrpc (""); +} + +static gboolean +test_fault_method (void) +{ + debug_printf (1, "request to non-existent method: "); + + return do_bad_xmlrpc ("no_such_method1"); +} + +static gboolean +test_fault_args (void) +{ + debug_printf (1, "request with invalid args: "); + + return do_bad_xmlrpc ("sum1"); +} + +static GOptionEntry xmlrpc_entries[] = { + { "uri", 'u', 0, G_OPTION_ARG_STRING, &uri, + "Alternate URI for server", NULL }, + { "server-test", 's', 0, G_OPTION_ARG_NONE, &server_test, + "If this is being run from xmlrpc-server-test", NULL }, + { NULL } +}; + +int +main (int argc, char **argv) +{ + test_init (argc, argv, xmlrpc_entries); + + if (!uri) { + apache_init (); + uri = default_uri; + } + + srand (time (NULL)); + + session = soup_test_session_new (SOUP_TYPE_SESSION_SYNC, NULL); + + if (!test_sum ()) + errors++; + if (!test_countBools ()) + errors++; + if (!test_md5sum ()) + errors++; + if (!test_dateChange ()) + errors++; + if (!test_echo ()) + errors++; + if (!test_fault_malformed ()) + errors++; + if (!test_fault_method ()) + errors++; + if (!test_fault_args ()) + errors++; + + soup_test_session_abort_unref (session); + + test_cleanup (); + return errors != 0; +}