1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 const lowerhex = "0123456789abcdef"
17 func quoteWith(s string, quote byte, ASCIIonly bool) string {
20 for width := 0; len(s) > 0; s = s[width:] {
23 if rune >= utf8.RuneSelf {
24 rune, width = utf8.DecodeRuneInString(s)
26 if width == 1 && rune == utf8.RuneError {
28 buf.WriteByte(lowerhex[s[0]>>4])
29 buf.WriteByte(lowerhex[s[0]&0xF])
32 if rune == int(quote) || rune == '\\' { // always backslashed
34 buf.WriteByte(byte(rune))
38 if rune <= unicode.MaxASCII && unicode.IsPrint(rune) {
42 } else if unicode.IsPrint(rune) {
65 buf.WriteByte(lowerhex[s[0]>>4])
66 buf.WriteByte(lowerhex[s[0]&0xF])
67 case rune > unicode.MaxRune:
72 for s := 12; s >= 0; s -= 4 {
73 buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
77 for s := 28; s >= 0; s -= 4 {
78 buf.WriteByte(lowerhex[rune>>uint(s)&0xF])
88 // Quote returns a double-quoted Go string literal representing s. The
89 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
90 // control characters and non-printable characters as defined by
92 func Quote(s string) string {
93 return quoteWith(s, '"', false)
96 // QuoteToASCII returns a double-quoted Go string literal representing s.
97 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
98 // non-ASCII characters and non-printable characters as defined by
100 func QuoteToASCII(s string) string {
101 return quoteWith(s, '"', true)
104 // QuoteRune returns a single-quoted Go character literal representing the
105 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
106 // for control characters and non-printable characters as defined by
108 func QuoteRune(rune int) string {
109 // TODO: avoid the allocation here.
110 return quoteWith(string(rune), '\'', false)
113 // QuoteRuneToASCII returns a single-quoted Go character literal representing
114 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
115 // \u0100) for non-ASCII characters and non-printable characters as defined
116 // by unicode.IsPrint.
117 func QuoteRuneToASCII(rune int) string {
118 // TODO: avoid the allocation here.
119 return quoteWith(string(rune), '\'', true)
122 // CanBackquote returns whether the string s would be
123 // a valid Go string literal if enclosed in backquotes.
124 func CanBackquote(s string) bool {
125 for i := 0; i < len(s); i++ {
126 if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
133 func unhex(b byte) (v int, ok bool) {
136 case '0' <= c && c <= '9':
138 case 'a' <= c && c <= 'f':
139 return c - 'a' + 10, true
140 case 'A' <= c && c <= 'F':
141 return c - 'A' + 10, true
146 // UnquoteChar decodes the first character or byte in the escaped string
147 // or character literal represented by the string s.
148 // It returns four values:
150 // 1) value, the decoded Unicode code point or byte value;
151 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
152 // 3) tail, the remainder of the string after the character; and
153 // 4) an error that will be nil if the character is syntactically valid.
155 // The second argument, quote, specifies the type of literal being parsed
156 // and therefore which escaped quote character is permitted.
157 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
158 // If set to a double quote, it permits \" and disallows unescaped ".
159 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
160 func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, err os.Error) {
163 case c == quote && (quote == '\'' || quote == '"'):
166 case c >= utf8.RuneSelf:
167 r, size := utf8.DecodeRuneInString(s)
168 return r, true, s[size:], nil
170 return int(s[0]), false, s[1:], nil
173 // hard case: c is backslash
211 for j := 0; j < n; j++ {
221 // single-byte string, possibly not UTF-8
225 if v > unicode.MaxRune {
231 case '0', '1', '2', '3', '4', '5', '6', '7':
237 for j := 0; j < 2; j++ { // one digit already; two more
266 // Unquote interprets s as a single-quoted, double-quoted,
267 // or backquoted Go string literal, returning the string value
268 // that s quotes. (If s is single-quoted, it would be a Go
269 // character literal; Unquote returns the corresponding
270 // one-character string.)
271 func Unquote(s string) (t string, err os.Error) {
283 if strings.Contains(s, "`") {
288 if quote != '"' && quote != '\'' {
291 if strings.Index(s, "\n") >= 0 {
295 // Is it trivial? Avoid allocation.
296 if strings.Index(s, `\`) < 0 && strings.IndexRune(s, int(quote)) < 0 {
301 r, size := utf8.DecodeRuneInString(s)
302 if size == len(s) && (r != utf8.RuneError || size != 1) {
310 c, multibyte, ss, err := UnquoteChar(s, quote)
315 if c < utf8.RuneSelf || !multibyte {
316 buf.WriteByte(byte(c))
318 buf.WriteString(string(c))
320 if quote == '\'' && len(s) != 0 {
321 // single-quoted must be single character
325 return buf.String(), nil