1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package url parses URLs and implements query escaping.
17 // Error reports an error and the operation and URL that caused it.
24 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
26 func ishex(c byte) bool {
28 case '0' <= c && c <= '9':
30 case 'a' <= c && c <= 'f':
32 case 'A' <= c && c <= 'F':
38 func unhex(c byte) byte {
40 case '0' <= c && c <= '9':
42 case 'a' <= c && c <= 'f':
44 case 'A' <= c && c <= 'F':
53 encodePath encoding = 1 + iota
59 type EscapeError string
61 func (e EscapeError) Error() string {
62 return "invalid URL escape " + strconv.Quote(string(e))
65 // Return true if the specified character should be escaped when
66 // appearing in a URL string, according to RFC 3986.
67 // When 'all' is true the full range of reserved characters are matched.
68 func shouldEscape(c byte, mode encoding) bool {
69 // §2.3 Unreserved characters (alphanum)
70 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
75 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
78 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
79 // Different sections of the URL allow a few of
80 // the reserved characters to appear unescaped.
82 case encodePath: // §3.3
83 // The RFC allows : @ & = + $ but saves / ; , for assigning
84 // meaning to individual path segments. This package
85 // only manipulates the path as a whole, so we allow those
86 // last two as well. That leaves only ? to escape.
89 case encodeUserPassword: // §3.2.2
90 // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
91 // The parsing of userinfo treats : as special so we must escape that too.
92 return c == '@' || c == '/' || c == ':'
94 case encodeQueryComponent: // §3.4
95 // The RFC reserves (so we must escape) everything.
98 case encodeFragment: // §4.1
99 // The RFC text is silent but the grammar allows
100 // everything, so escape nothing.
105 // Everything else must be escaped.
109 // QueryUnescape does the inverse transformation of QueryEscape, converting
110 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
111 // any % is not followed by two hexadecimal digits.
112 func QueryUnescape(s string) (string, error) {
113 return unescape(s, encodeQueryComponent)
116 // unescape unescapes a string; the mode specifies
117 // which section of the URL string is being unescaped.
118 func unescape(s string, mode encoding) (string, error) {
119 // Count %, check that they're well-formed.
122 for i := 0; i < len(s); {
126 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
131 return "", EscapeError(s)
135 hasPlus = mode == encodeQueryComponent
142 if n == 0 && !hasPlus {
146 t := make([]byte, len(s)-2*n)
148 for i := 0; i < len(s); {
151 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
155 if mode == encodeQueryComponent {
168 return string(t), nil
171 // QueryEscape escapes the string so it can be safely placed
172 // inside a URL query.
173 func QueryEscape(s string) string {
174 return escape(s, encodeQueryComponent)
177 func escape(s string, mode encoding) string {
178 spaceCount, hexCount := 0, 0
179 for i := 0; i < len(s); i++ {
181 if shouldEscape(c, mode) {
182 if c == ' ' && mode == encodeQueryComponent {
190 if spaceCount == 0 && hexCount == 0 {
194 t := make([]byte, len(s)+2*hexCount)
196 for i := 0; i < len(s); i++ {
198 case c == ' ' && mode == encodeQueryComponent:
201 case shouldEscape(c, mode):
203 t[j+1] = "0123456789ABCDEF"[c>>4]
204 t[j+2] = "0123456789ABCDEF"[c&15]
214 // A URL represents a parsed URL (technically, a URI reference).
215 // The general form represented is:
217 // scheme://[userinfo@]host/path[?query][#fragment]
219 // URLs that do not start with a slash after the scheme are interpreted as:
221 // scheme:opaque[?query][#fragment]
225 Opaque string // encoded opaque data
226 User *Userinfo // username and password information
227 Host string // host or host:port
229 RawQuery string // encoded query values, without '?'
230 Fragment string // fragment for references, without '#'
233 // User returns a Userinfo containing the provided username
234 // and no password set.
235 func User(username string) *Userinfo {
236 return &Userinfo{username, "", false}
239 // UserPassword returns a Userinfo containing the provided username
241 // This functionality should only be used with legacy web sites.
242 // RFC 2396 warns that interpreting Userinfo this way
243 // ``is NOT RECOMMENDED, because the passing of authentication
244 // information in clear text (such as URI) has proven to be a
245 // security risk in almost every case where it has been used.''
246 func UserPassword(username, password string) *Userinfo {
247 return &Userinfo{username, password, true}
250 // The Userinfo type is an immutable encapsulation of username and
251 // password details for a URL. An existing Userinfo value is guaranteed
252 // to have a username set (potentially empty, as allowed by RFC 2396),
253 // and optionally a password.
254 type Userinfo struct {
260 // Username returns the username.
261 func (u *Userinfo) Username() string {
265 // Password returns the password in case it is set, and whether it is set.
266 func (u *Userinfo) Password() (string, bool) {
268 return u.password, true
273 // String returns the encoded userinfo information in the standard form
274 // of "username[:password]".
275 func (u *Userinfo) String() string {
276 s := escape(u.username, encodeUserPassword)
278 s += ":" + escape(u.password, encodeUserPassword)
283 // Maybe rawurl is of the form scheme:path.
284 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
285 // If so, return scheme, path; else return "", rawurl.
286 func getscheme(rawurl string) (scheme, path string, err error) {
287 for i := 0; i < len(rawurl); i++ {
290 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
292 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
294 return "", rawurl, nil
298 return "", "", errors.New("missing protocol scheme")
300 return rawurl[0:i], rawurl[i+1:], nil
302 // we have encountered an invalid character,
303 // so there is no valid scheme
304 return "", rawurl, nil
307 return "", rawurl, nil
310 // Maybe s is of the form t c u.
311 // If so, return t, c u (or t, u if cutc == true).
312 // If not, return s, "".
313 func split(s string, c byte, cutc bool) (string, string) {
314 for i := 0; i < len(s); i++ {
317 return s[0:i], s[i+1:]
325 // Parse parses rawurl into a URL structure.
326 // The rawurl may be relative or absolute.
327 func Parse(rawurl string) (url *URL, err error) {
329 u, frag := split(rawurl, '#', true)
330 if url, err = parse(u, false); err != nil {
336 if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
337 return nil, &Error{"parse", rawurl, err}
342 // ParseRequestURI parses rawurl into a URL structure. It assumes that
343 // rawurl was received in an HTTP request, so the rawurl is interpreted
344 // only as an absolute URI or an absolute path.
345 // The string rawurl is assumed not to have a #fragment suffix.
346 // (Web browsers strip #fragment before sending the URL to a web server.)
347 func ParseRequestURI(rawurl string) (url *URL, err error) {
348 return parse(rawurl, true)
351 // parse parses a URL from a string in one of two contexts. If
352 // viaRequest is true, the URL is assumed to have arrived via an HTTP request,
353 // in which case only absolute URLs or path-absolute relative URLs are allowed.
354 // If viaRequest is false, all forms of relative URLs are allowed.
355 func parse(rawurl string, viaRequest bool) (url *URL, err error) {
359 err = errors.New("empty url")
369 // Split off possible leading "http:", "mailto:", etc.
370 // Cannot contain escaped characters.
371 if url.Scheme, rest, err = getscheme(rawurl); err != nil {
375 rest, url.RawQuery = split(rest, '?', true)
377 if !strings.HasPrefix(rest, "/") {
378 if url.Scheme != "" {
379 // We consider rootless paths per RFC 3986 as opaque.
384 err = errors.New("invalid URI for request")
389 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
391 authority, rest = split(rest[2:], '/', false)
392 url.User, url.Host, err = parseAuthority(authority)
396 if strings.Contains(url.Host, "%") {
397 err = errors.New("hexadecimal escape in host")
401 if url.Path, err = unescape(rest, encodePath); err != nil {
407 return nil, &Error{"parse", rawurl, err}
410 func parseAuthority(authority string) (user *Userinfo, host string, err error) {
411 i := strings.LastIndex(authority, "@")
416 userinfo, host := authority[:i], authority[i+1:]
417 if strings.Index(userinfo, ":") < 0 {
418 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
421 user = User(userinfo)
423 username, password := split(userinfo, ':', true)
424 if username, err = unescape(username, encodeUserPassword); err != nil {
427 if password, err = unescape(password, encodeUserPassword); err != nil {
430 user = UserPassword(username, password)
435 // String reassembles the URL into a valid URL string.
436 func (u *URL) String() string {
439 buf.WriteString(u.Scheme)
443 buf.WriteString(u.Opaque)
445 if u.Scheme != "" || u.Host != "" || u.User != nil {
446 buf.WriteString("//")
447 if u := u.User; u != nil {
448 buf.WriteString(u.String())
451 if h := u.Host; h != "" {
455 buf.WriteString(escape(u.Path, encodePath))
457 if u.RawQuery != "" {
459 buf.WriteString(u.RawQuery)
461 if u.Fragment != "" {
463 buf.WriteString(escape(u.Fragment, encodeFragment))
468 // Values maps a string key to a list of values.
469 // It is typically used for query parameters and form values.
470 // Unlike in the http.Header map, the keys in a Values map
471 // are case-sensitive.
472 type Values map[string][]string
474 // Get gets the first value associated with the given key.
475 // If there are no values associated with the key, Get returns
476 // the empty string. To access multiple values, use the map
478 func (v Values) Get(key string) string {
483 if !ok || len(vs) == 0 {
489 // Set sets the key to value. It replaces any existing
491 func (v Values) Set(key, value string) {
492 v[key] = []string{value}
495 // Add adds the key to value. It appends to any existing
496 // values associated with key.
497 func (v Values) Add(key, value string) {
498 v[key] = append(v[key], value)
501 // Del deletes the values associated with key.
502 func (v Values) Del(key string) {
506 // ParseQuery parses the URL-encoded query string and returns
507 // a map listing the values specified for each key.
508 // ParseQuery always returns a non-nil map containing all the
509 // valid query parameters found; err describes the first decoding error
510 // encountered, if any.
511 func ParseQuery(query string) (m Values, err error) {
513 err = parseQuery(m, query)
517 func parseQuery(m Values, query string) (err error) {
520 if i := strings.IndexAny(key, "&;"); i >= 0 {
521 key, query = key[:i], key[i+1:]
529 if i := strings.Index(key, "="); i >= 0 {
530 key, value = key[:i], key[i+1:]
532 key, err1 := QueryUnescape(key)
539 value, err1 = QueryUnescape(value)
546 m[key] = append(m[key], value)
551 // Encode encodes the values into ``URL encoded'' form.
552 // e.g. "foo=bar&bar=baz"
553 func (v Values) Encode() string {
558 keys := make([]string, 0, len(v))
560 keys = append(keys, k)
563 for _, k := range keys {
565 prefix := QueryEscape(k) + "="
566 for _, v := range vs {
570 buf.WriteString(prefix)
571 buf.WriteString(QueryEscape(v))
577 // resolvePath applies special path segments from refs and applies
578 // them to base, per RFC 2396.
579 func resolvePath(basepath string, refpath string) string {
580 base := strings.Split(basepath, "/")
581 refs := strings.Split(refpath, "/")
587 for idx, ref := range refs {
591 base[len(base)-1] = ""
597 newLen := len(base) - 1
601 base = base[0:newLen]
603 base[len(base)-1] = ""
606 if idx == 0 || base[len(base)-1] == "" {
607 base[len(base)-1] = ref
609 base = append(base, ref)
614 return strings.Join(base, "/")
617 // IsAbs returns true if the URL is absolute.
618 func (u *URL) IsAbs() bool {
619 return u.Scheme != ""
622 // Parse parses a URL in the context of the receiver. The provided URL
623 // may be relative or absolute. Parse returns nil, err on parse
624 // failure, otherwise its return value is the same as ResolveReference.
625 func (u *URL) Parse(ref string) (*URL, error) {
626 refurl, err := Parse(ref)
630 return u.ResolveReference(refurl), nil
633 // ResolveReference resolves a URI reference to an absolute URI from
634 // an absolute base URI, per RFC 2396 Section 5.2. The URI reference
635 // may be relative or absolute. ResolveReference always returns a new
636 // URL instance, even if the returned URL is identical to either the
637 // base or reference. If ref is an absolute URL, then ResolveReference
638 // ignores base and returns a copy of ref.
639 func (u *URL) ResolveReference(ref *URL) *URL {
644 // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
646 url.RawQuery = ref.RawQuery
647 url.Fragment = ref.Fragment
648 if ref.Opaque != "" {
649 url.Opaque = ref.Opaque
655 if ref.Host != "" || ref.User != nil {
656 // The "net_path" case.
660 if strings.HasPrefix(ref.Path, "/") {
661 // The "abs_path" case.
664 // The "rel_path" case.
665 path := resolvePath(u.Path, ref.Path)
666 if !strings.HasPrefix(path, "/") {
674 // Query parses RawQuery and returns the corresponding values.
675 func (u *URL) Query() Values {
676 v, _ := ParseQuery(u.RawQuery)
680 // RequestURI returns the encoded path?query or opaque?query
681 // string that would be used in an HTTP request for u.
682 func (u *URL) RequestURI() string {
685 result = escape(u.Path, encodePath)
690 if u.RawQuery != "" {
691 result += "?" + u.RawQuery