1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package url parses URLs and implements query escaping.
17 // Error reports an error and the operation and URL that caused it.
24 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
26 func ishex(c byte) bool {
28 case '0' <= c && c <= '9':
30 case 'a' <= c && c <= 'f':
32 case 'A' <= c && c <= 'F':
38 func unhex(c byte) byte {
40 case '0' <= c && c <= '9':
42 case 'a' <= c && c <= 'f':
44 case 'A' <= c && c <= 'F':
53 encodePath encoding = 1 + iota
59 type EscapeError string
61 func (e EscapeError) Error() string {
62 return "invalid URL escape " + strconv.Quote(string(e))
65 // Return true if the specified character should be escaped when
66 // appearing in a URL string, according to RFC 3986.
67 // When 'all' is true the full range of reserved characters are matched.
68 func shouldEscape(c byte, mode encoding) bool {
69 // §2.3 Unreserved characters (alphanum)
70 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
75 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
78 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
79 // Different sections of the URL allow a few of
80 // the reserved characters to appear unescaped.
82 case encodePath: // §3.3
83 // The RFC allows : @ & = + $ but saves / ; , for assigning
84 // meaning to individual path segments. This package
85 // only manipulates the path as a whole, so we allow those
86 // last two as well. That leaves only ? to escape.
89 case encodeUserPassword: // §3.2.2
90 // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
91 // The parsing of userinfo treats : as special so we must escape that too.
92 return c == '@' || c == '/' || c == ':'
94 case encodeQueryComponent: // §3.4
95 // The RFC reserves (so we must escape) everything.
98 case encodeFragment: // §4.1
99 // The RFC text is silent but the grammar allows
100 // everything, so escape nothing.
105 // Everything else must be escaped.
109 // QueryUnescape does the inverse transformation of QueryEscape, converting
110 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
111 // any % is not followed by two hexadecimal digits.
112 func QueryUnescape(s string) (string, error) {
113 return unescape(s, encodeQueryComponent)
116 // unescape unescapes a string; the mode specifies
117 // which section of the URL string is being unescaped.
118 func unescape(s string, mode encoding) (string, error) {
119 // Count %, check that they're well-formed.
122 for i := 0; i < len(s); {
126 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
131 return "", EscapeError(s)
135 hasPlus = mode == encodeQueryComponent
142 if n == 0 && !hasPlus {
146 t := make([]byte, len(s)-2*n)
148 for i := 0; i < len(s); {
151 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
155 if mode == encodeQueryComponent {
168 return string(t), nil
171 // QueryEscape escapes the string so it can be safely placed
172 // inside a URL query.
173 func QueryEscape(s string) string {
174 return escape(s, encodeQueryComponent)
177 func escape(s string, mode encoding) string {
178 spaceCount, hexCount := 0, 0
179 for i := 0; i < len(s); i++ {
181 if shouldEscape(c, mode) {
182 if c == ' ' && mode == encodeQueryComponent {
190 if spaceCount == 0 && hexCount == 0 {
194 t := make([]byte, len(s)+2*hexCount)
196 for i := 0; i < len(s); i++ {
198 case c == ' ' && mode == encodeQueryComponent:
201 case shouldEscape(c, mode):
203 t[j+1] = "0123456789ABCDEF"[c>>4]
204 t[j+2] = "0123456789ABCDEF"[c&15]
214 // A URL represents a parsed URL (technically, a URI reference).
215 // The general form represented is:
217 // scheme://[userinfo@]host/path[?query][#fragment]
219 // URLs that do not start with a slash after the scheme are interpreted as:
221 // scheme:opaque[?query][#fragment]
223 // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
224 // A consequence is that it is impossible to tell which slashes in the Path were
225 // slashes in the raw URL and which were %2f. This distinction is rarely important,
226 // but when it is a client must use other routines to parse the raw URL or construct
227 // the parsed URL. For example, an HTTP server can consult req.RequestURI, and
228 // an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
229 // instead of URL{Host: "example.com", Path: "/Go/"}.
232 Opaque string // encoded opaque data
233 User *Userinfo // username and password information
234 Host string // host or host:port
236 RawQuery string // encoded query values, without '?'
237 Fragment string // fragment for references, without '#'
240 // User returns a Userinfo containing the provided username
241 // and no password set.
242 func User(username string) *Userinfo {
243 return &Userinfo{username, "", false}
246 // UserPassword returns a Userinfo containing the provided username
248 // This functionality should only be used with legacy web sites.
249 // RFC 2396 warns that interpreting Userinfo this way
250 // ``is NOT RECOMMENDED, because the passing of authentication
251 // information in clear text (such as URI) has proven to be a
252 // security risk in almost every case where it has been used.''
253 func UserPassword(username, password string) *Userinfo {
254 return &Userinfo{username, password, true}
257 // The Userinfo type is an immutable encapsulation of username and
258 // password details for a URL. An existing Userinfo value is guaranteed
259 // to have a username set (potentially empty, as allowed by RFC 2396),
260 // and optionally a password.
261 type Userinfo struct {
267 // Username returns the username.
268 func (u *Userinfo) Username() string {
272 // Password returns the password in case it is set, and whether it is set.
273 func (u *Userinfo) Password() (string, bool) {
275 return u.password, true
280 // String returns the encoded userinfo information in the standard form
281 // of "username[:password]".
282 func (u *Userinfo) String() string {
283 s := escape(u.username, encodeUserPassword)
285 s += ":" + escape(u.password, encodeUserPassword)
290 // Maybe rawurl is of the form scheme:path.
291 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
292 // If so, return scheme, path; else return "", rawurl.
293 func getscheme(rawurl string) (scheme, path string, err error) {
294 for i := 0; i < len(rawurl); i++ {
297 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
299 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
301 return "", rawurl, nil
305 return "", "", errors.New("missing protocol scheme")
307 return rawurl[0:i], rawurl[i+1:], nil
309 // we have encountered an invalid character,
310 // so there is no valid scheme
311 return "", rawurl, nil
314 return "", rawurl, nil
317 // Maybe s is of the form t c u.
318 // If so, return t, c u (or t, u if cutc == true).
319 // If not, return s, "".
320 func split(s string, c string, cutc bool) (string, string) {
321 i := strings.Index(s, c)
326 return s[0:i], s[i+len(c):]
331 // Parse parses rawurl into a URL structure.
332 // The rawurl may be relative or absolute.
333 func Parse(rawurl string) (url *URL, err error) {
335 u, frag := split(rawurl, "#", true)
336 if url, err = parse(u, false); err != nil {
342 if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
343 return nil, &Error{"parse", rawurl, err}
348 // ParseRequestURI parses rawurl into a URL structure. It assumes that
349 // rawurl was received in an HTTP request, so the rawurl is interpreted
350 // only as an absolute URI or an absolute path.
351 // The string rawurl is assumed not to have a #fragment suffix.
352 // (Web browsers strip #fragment before sending the URL to a web server.)
353 func ParseRequestURI(rawurl string) (url *URL, err error) {
354 return parse(rawurl, true)
357 // parse parses a URL from a string in one of two contexts. If
358 // viaRequest is true, the URL is assumed to have arrived via an HTTP request,
359 // in which case only absolute URLs or path-absolute relative URLs are allowed.
360 // If viaRequest is false, all forms of relative URLs are allowed.
361 func parse(rawurl string, viaRequest bool) (url *URL, err error) {
364 if rawurl == "" && viaRequest {
365 err = errors.New("empty url")
375 // Split off possible leading "http:", "mailto:", etc.
376 // Cannot contain escaped characters.
377 if url.Scheme, rest, err = getscheme(rawurl); err != nil {
380 url.Scheme = strings.ToLower(url.Scheme)
382 rest, url.RawQuery = split(rest, "?", true)
384 if !strings.HasPrefix(rest, "/") {
385 if url.Scheme != "" {
386 // We consider rootless paths per RFC 3986 as opaque.
391 err = errors.New("invalid URI for request")
396 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
398 authority, rest = split(rest[2:], "/", false)
399 url.User, url.Host, err = parseAuthority(authority)
403 if strings.Contains(url.Host, "%") {
404 err = errors.New("hexadecimal escape in host")
408 if url.Path, err = unescape(rest, encodePath); err != nil {
414 return nil, &Error{"parse", rawurl, err}
417 func parseAuthority(authority string) (user *Userinfo, host string, err error) {
418 i := strings.LastIndex(authority, "@")
423 userinfo, host := authority[:i], authority[i+1:]
424 if strings.Index(userinfo, ":") < 0 {
425 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
428 user = User(userinfo)
430 username, password := split(userinfo, ":", true)
431 if username, err = unescape(username, encodeUserPassword); err != nil {
434 if password, err = unescape(password, encodeUserPassword); err != nil {
437 user = UserPassword(username, password)
442 // String reassembles the URL into a valid URL string.
443 func (u *URL) String() string {
446 buf.WriteString(u.Scheme)
450 buf.WriteString(u.Opaque)
452 if u.Scheme != "" || u.Host != "" || u.User != nil {
453 buf.WriteString("//")
454 if u := u.User; u != nil {
455 buf.WriteString(u.String())
458 if h := u.Host; h != "" {
462 buf.WriteString(escape(u.Path, encodePath))
464 if u.RawQuery != "" {
466 buf.WriteString(u.RawQuery)
468 if u.Fragment != "" {
470 buf.WriteString(escape(u.Fragment, encodeFragment))
475 // Values maps a string key to a list of values.
476 // It is typically used for query parameters and form values.
477 // Unlike in the http.Header map, the keys in a Values map
478 // are case-sensitive.
479 type Values map[string][]string
481 // Get gets the first value associated with the given key.
482 // If there are no values associated with the key, Get returns
483 // the empty string. To access multiple values, use the map
485 func (v Values) Get(key string) string {
490 if !ok || len(vs) == 0 {
496 // Set sets the key to value. It replaces any existing
498 func (v Values) Set(key, value string) {
499 v[key] = []string{value}
502 // Add adds the key to value. It appends to any existing
503 // values associated with key.
504 func (v Values) Add(key, value string) {
505 v[key] = append(v[key], value)
508 // Del deletes the values associated with key.
509 func (v Values) Del(key string) {
513 // ParseQuery parses the URL-encoded query string and returns
514 // a map listing the values specified for each key.
515 // ParseQuery always returns a non-nil map containing all the
516 // valid query parameters found; err describes the first decoding error
517 // encountered, if any.
518 func ParseQuery(query string) (m Values, err error) {
520 err = parseQuery(m, query)
524 func parseQuery(m Values, query string) (err error) {
527 if i := strings.IndexAny(key, "&;"); i >= 0 {
528 key, query = key[:i], key[i+1:]
536 if i := strings.Index(key, "="); i >= 0 {
537 key, value = key[:i], key[i+1:]
539 key, err1 := QueryUnescape(key)
546 value, err1 = QueryUnescape(value)
553 m[key] = append(m[key], value)
558 // Encode encodes the values into ``URL encoded'' form.
559 // e.g. "foo=bar&bar=baz"
560 func (v Values) Encode() string {
565 keys := make([]string, 0, len(v))
567 keys = append(keys, k)
570 for _, k := range keys {
572 prefix := QueryEscape(k) + "="
573 for _, v := range vs {
577 buf.WriteString(prefix)
578 buf.WriteString(QueryEscape(v))
584 // resolvePath applies special path segments from refs and applies
585 // them to base, per RFC 3986.
586 func resolvePath(base, ref string) string {
590 } else if ref[0] != '/' {
591 i := strings.LastIndex(base, "/")
592 full = base[:i+1] + ref
600 src := strings.Split(full, "/")
601 for _, elem := range src {
607 dst = dst[:len(dst)-1]
610 dst = append(dst, elem)
613 if last := src[len(src)-1]; last == "." || last == ".." {
614 // Add final slash to the joined path.
615 dst = append(dst, "")
617 return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/")
620 // IsAbs returns true if the URL is absolute.
621 func (u *URL) IsAbs() bool {
622 return u.Scheme != ""
625 // Parse parses a URL in the context of the receiver. The provided URL
626 // may be relative or absolute. Parse returns nil, err on parse
627 // failure, otherwise its return value is the same as ResolveReference.
628 func (u *URL) Parse(ref string) (*URL, error) {
629 refurl, err := Parse(ref)
633 return u.ResolveReference(refurl), nil
636 // ResolveReference resolves a URI reference to an absolute URI from
637 // an absolute base URI, per RFC 3986 Section 5.2. The URI reference
638 // may be relative or absolute. ResolveReference always returns a new
639 // URL instance, even if the returned URL is identical to either the
640 // base or reference. If ref is an absolute URL, then ResolveReference
641 // ignores base and returns a copy of ref.
642 func (u *URL) ResolveReference(ref *URL) *URL {
644 if ref.Scheme == "" {
645 url.Scheme = u.Scheme
647 if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
648 // The "absoluteURI" or "net_path" cases.
649 url.Path = resolvePath(ref.Path, "")
652 if ref.Opaque != "" {
659 if ref.RawQuery == "" {
660 url.RawQuery = u.RawQuery
661 if ref.Fragment == "" {
662 url.Fragment = u.Fragment
666 // The "abs_path" or "rel_path" cases.
669 url.Path = resolvePath(u.Path, ref.Path)
673 // Query parses RawQuery and returns the corresponding values.
674 func (u *URL) Query() Values {
675 v, _ := ParseQuery(u.RawQuery)
679 // RequestURI returns the encoded path?query or opaque?query
680 // string that would be used in an HTTP request for u.
681 func (u *URL) RequestURI() string {
684 result = escape(u.Path, encodePath)
689 if strings.HasPrefix(result, "//") {
690 result = u.Scheme + ":" + result
693 if u.RawQuery != "" {
694 result += "?" + u.RawQuery