1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
10 Py_ssize_t null_strings, one_strings;
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject *interned;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For PyString_FromString(), the parameter `str' points to a null-terminated
36 string containing exactly `size' bytes.
38 For PyString_FromStringAndSize(), the parameter the parameter `str' is
39 either NULL or else points to a string containing at least `size' bytes.
40 For PyString_FromStringAndSize(), the string in the `str' parameter does
41 not have to be null-terminated. (Therefore it is safe to construct a
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
51 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
53 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
59 register PyStringObject *op;
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
65 if (size == 0 && (op = nullstring) != NULL) {
70 return (PyObject *)op;
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
79 return (PyObject *)op;
82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
87 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
90 return PyErr_NoMemory();
91 PyObject_INIT_VAR(op, &PyString_Type, size);
93 op->ob_sstate = SSTATE_NOT_INTERNED;
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
111 return (PyObject *) op;
115 PyString_FromString(const char *str)
117 register size_t size;
118 register PyStringObject *op;
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
127 if (size == 0 && (op = nullstring) != NULL) {
132 return (PyObject *)op;
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139 return (PyObject *)op;
142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
145 return PyErr_NoMemory();
146 PyObject_INIT_VAR(op, &PyString_Type, size);
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
164 return (PyObject *) op;
168 PyString_FromFormatV(const char *format, va_list vargs)
176 #ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
180 __va_copy(count, vargs);
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
188 #ifdef HAVE_LONG_LONG
189 int longlongflag = 0;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
199 if (f[1] == 'd' || f[1] == 'u') {
202 #ifdef HAVE_LONG_LONG
203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
216 (void)va_arg(count, int);
217 /* fall through... */
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
240 s = va_arg(count, char*);
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
273 s = PyString_AsString(string);
275 for (f = format; *f; f++) {
280 #ifdef HAVE_LONG_LONG
281 int longlongflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
297 /* Handle %ld, %lu, %lld and %llu. */
299 if (f[1] == 'd' || f[1] == 'u') {
303 #ifdef HAVE_LONG_LONG
304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
319 *s++ = va_arg(vargs, int);
323 sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
333 sprintf(s, "%d", va_arg(vargs, int));
339 va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
350 va_arg(vargs, unsigned int));
354 sprintf(s, "%i", va_arg(vargs, int));
358 sprintf(s, "%x", va_arg(vargs, int));
362 p = va_arg(vargs, char*);
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
400 PyString_FromFormat(const char *format, ...)
405 #ifdef HAVE_STDARG_PROTOTYPES
406 va_start(vargs, format);
410 ret = PyString_FromFormatV(format, vargs);
416 PyObject *PyString_Decode(const char *s,
418 const char *encoding,
423 str = PyString_FromStringAndSize(s, size);
426 v = PyString_AsDecodedString(str, encoding, errors);
431 PyObject *PyString_AsDecodedObject(PyObject *str,
432 const char *encoding,
437 if (!PyString_Check(str)) {
442 if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444 encoding = PyUnicode_GetDefaultEncoding();
446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
451 /* Decode via the codec registry */
452 v = PyCodec_Decode(str, encoding, errors);
462 PyObject *PyString_AsDecodedString(PyObject *str,
463 const char *encoding,
468 v = PyString_AsDecodedObject(str, encoding, errors);
472 #ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
482 if (!PyString_Check(v)) {
483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
496 PyObject *PyString_Encode(const char *s,
498 const char *encoding,
503 str = PyString_FromStringAndSize(s, size);
506 v = PyString_AsEncodedString(str, encoding, errors);
511 PyObject *PyString_AsEncodedObject(PyObject *str,
512 const char *encoding,
517 if (!PyString_Check(str)) {
522 if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524 encoding = PyUnicode_GetDefaultEncoding();
526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
531 /* Encode via the codec registry */
532 v = PyCodec_Encode(str, encoding, errors);
542 PyObject *PyString_AsEncodedString(PyObject *str,
543 const char *encoding,
548 v = PyString_AsEncodedObject(str, encoding, errors);
552 #ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
562 if (!PyString_Check(v)) {
563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
577 string_dealloc(PyObject *op)
579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
586 if (PyDict_DelItem(interned, op) != 0)
588 "deletion of interned string failed");
591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
595 Py_FatalError("Inconsistent interned string state.");
597 Py_TYPE(op)->tp_free(op);
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
605 PyObject *PyString_DecodeEscape(const char *s,
609 const char *recode_encoding)
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
619 p = buf = PyString_AsString(v);
624 #ifdef Py_USING_UNICODE
625 if (recode_encoding && (*s & 0x80)) {
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
665 /* XXX This assumes ASCII! */
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
718 if (strcmp(errors, "replace") == 0) {
720 } else if (strcmp(errors, "ignore") == 0)
723 PyErr_Format(PyExc_ValueError,
725 "unknown error handling code: %.400s",
729 #ifndef Py_USING_UNICODE
734 PyErr_SetString(PyExc_ValueError,
735 "Unicode escapes not legal "
736 "when Unicode disabled");
743 goto non_esc; /* an arbitrary number of unescaped
744 UTF-8 bytes may follow. */
747 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
755 /* -------------------------------------------------------------------- */
759 string_getsize(register PyObject *op)
763 if (PyString_AsStringAndSize(op, &s, &len))
768 static /*const*/ char *
769 string_getbuffer(register PyObject *op)
773 if (PyString_AsStringAndSize(op, &s, &len))
779 PyString_Size(register PyObject *op)
781 if (!PyString_Check(op))
782 return string_getsize(op);
787 PyString_AsString(register PyObject *op)
789 if (!PyString_Check(op))
790 return string_getbuffer(op);
791 return ((PyStringObject *)op) -> ob_sval;
795 PyString_AsStringAndSize(register PyObject *obj,
797 register Py_ssize_t *len)
800 PyErr_BadInternalCall();
804 if (!PyString_Check(obj)) {
805 #ifdef Py_USING_UNICODE
806 if (PyUnicode_Check(obj)) {
807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
814 PyErr_Format(PyExc_TypeError,
815 "expected string or Unicode object, "
816 "%.200s found", Py_TYPE(obj)->tp_name);
821 *s = PyString_AS_STRING(obj);
823 *len = PyString_GET_SIZE(obj);
824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825 PyErr_SetString(PyExc_TypeError,
826 "expected string without null bytes");
832 /* -------------------------------------------------------------------- */
835 #include "stringlib/stringdefs.h"
836 #include "stringlib/fastsearch.h"
838 #include "stringlib/count.h"
839 #include "stringlib/find.h"
840 #include "stringlib/partition.h"
841 #include "stringlib/split.h"
843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
844 #include "stringlib/localeutil.h"
849 string_print(PyStringObject *op, FILE *fp, int flags)
851 Py_ssize_t i, str_len;
855 /* XXX Ought to check for interrupts when writing long strings */
856 if (! PyString_CheckExact(op)) {
858 /* A str subclass may have its own __str__ method. */
859 op = (PyStringObject *) PyObject_Str((PyObject *)op);
862 ret = string_print(op, fp, flags);
866 if (flags & Py_PRINT_RAW) {
867 char *data = op->ob_sval;
868 Py_ssize_t size = Py_SIZE(op);
869 Py_BEGIN_ALLOW_THREADS
870 while (size > INT_MAX) {
871 /* Very long strings cannot be written atomically.
872 * But don't write exactly INT_MAX bytes at a time
873 * to avoid memory aligment issues.
875 const int chunk_size = INT_MAX & ~0x3FFF;
876 fwrite(data, 1, chunk_size, fp);
881 if (size) fwrite(data, (int)size, 1, fp);
883 fwrite(data, 1, (int)size, fp);
889 /* figure out which quote to use; single is preferred */
891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
895 str_len = Py_SIZE(op);
896 Py_BEGIN_ALLOW_THREADS
898 for (i = 0; i < str_len; i++) {
899 /* Since strings are immutable and the caller should have a
900 reference, accessing the interal buffer should not be an issue
901 with the GIL released. */
903 if (c == quote || c == '\\')
904 fprintf(fp, "\\%c", c);
911 else if (c < ' ' || c >= 0x7f)
912 fprintf(fp, "\\x%02x", c & 0xff);
922 PyString_Repr(PyObject *obj, int smartquotes)
924 register PyStringObject* op = (PyStringObject*) obj;
925 size_t newsize = 2 + 4 * Py_SIZE(op);
927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928 PyErr_SetString(PyExc_OverflowError,
929 "string is too large to make repr");
932 v = PyString_FromStringAndSize((char *)NULL, newsize);
937 register Py_ssize_t i;
942 /* figure out which quote to use; single is preferred */
945 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946 !memchr(op->ob_sval, '"', Py_SIZE(op)))
949 p = PyString_AS_STRING(v);
951 for (i = 0; i < Py_SIZE(op); i++) {
952 /* There's at least enough room for a hex escape
953 and a closing quote. */
954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
956 if (c == quote || c == '\\')
957 *p++ = '\\', *p++ = c;
959 *p++ = '\\', *p++ = 't';
961 *p++ = '\\', *p++ = 'n';
963 *p++ = '\\', *p++ = 'r';
964 else if (c < ' ' || c >= 0x7f) {
965 /* For performance, we don't want to call
966 PyOS_snprintf here (extra layers of
968 sprintf(p, "\\x%02x", c & 0xff);
974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
984 string_repr(PyObject *op)
986 return PyString_Repr(op, 1);
990 string_str(PyObject *s)
992 assert(PyString_Check(s));
993 if (PyString_CheckExact(s)) {
998 /* Subtype -- return genuine string with the same value. */
999 PyStringObject *t = (PyStringObject *) s;
1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005 string_length(PyStringObject *a)
1011 string_concat(register PyStringObject *a, register PyObject *bb)
1013 register Py_ssize_t size;
1014 register PyStringObject *op;
1015 if (!PyString_Check(bb)) {
1016 #ifdef Py_USING_UNICODE
1017 if (PyUnicode_Check(bb))
1018 return PyUnicode_Concat((PyObject *)a, bb);
1020 if (PyByteArray_Check(bb))
1021 return PyByteArray_Concat((PyObject *)a, bb);
1022 PyErr_Format(PyExc_TypeError,
1023 "cannot concatenate 'str' and '%.200s' objects",
1024 Py_TYPE(bb)->tp_name);
1027 #define b ((PyStringObject *)bb)
1028 /* Optimize cases with empty left or right operand */
1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031 if (Py_SIZE(a) == 0) {
1036 return (PyObject *)a;
1038 size = Py_SIZE(a) + Py_SIZE(b);
1039 /* Check that string sizes are not negative, to prevent an
1040 overflow in cases where we are passed incorrectly-created
1041 strings with negative lengths (due to a bug in other code).
1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "strings are too large to concat");
1050 /* Inline PyObject_NewVar */
1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052 PyErr_SetString(PyExc_OverflowError,
1053 "strings are too large to concat");
1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1058 return PyErr_NoMemory();
1059 PyObject_INIT_VAR(op, &PyString_Type, size);
1061 op->ob_sstate = SSTATE_NOT_INTERNED;
1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064 op->ob_sval[size] = '\0';
1065 return (PyObject *) op;
1070 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1072 register Py_ssize_t i;
1073 register Py_ssize_t j;
1074 register Py_ssize_t size;
1075 register PyStringObject *op;
1079 /* watch out for overflows: the size can overflow int,
1080 * and the # of bytes needed can overflow size_t
1082 size = Py_SIZE(a) * n;
1083 if (n && size / n != Py_SIZE(a)) {
1084 PyErr_SetString(PyExc_OverflowError,
1085 "repeated string is too long");
1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1090 return (PyObject *)a;
1092 nbytes = (size_t)size;
1093 if (nbytes + PyStringObject_SIZE <= nbytes) {
1094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1100 return PyErr_NoMemory();
1101 PyObject_INIT_VAR(op, &PyString_Type, size);
1103 op->ob_sstate = SSTATE_NOT_INTERNED;
1104 op->ob_sval[size] = '\0';
1105 if (Py_SIZE(a) == 1 && n > 0) {
1106 memset(op->ob_sval, a->ob_sval[0] , n);
1107 return (PyObject *) op;
1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1115 j = (i <= size-i) ? i : size-i;
1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1119 return (PyObject *) op;
1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1125 string_slice(register PyStringObject *a, register Py_ssize_t i,
1126 register Py_ssize_t j)
1127 /* j -- may be negative! */
1132 j = 0; /* Avoid signed/unsigned bug in next line */
1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136 /* It's the same as a */
1138 return (PyObject *)a;
1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1146 string_contains(PyObject *str_obj, PyObject *sub_obj)
1148 if (!PyString_CheckExact(sub_obj)) {
1149 #ifdef Py_USING_UNICODE
1150 if (PyUnicode_Check(sub_obj))
1151 return PyUnicode_Contains(str_obj, sub_obj);
1153 if (!PyString_Check(sub_obj)) {
1154 PyErr_Format(PyExc_TypeError,
1155 "'in <string>' requires string as left operand, "
1156 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return stringlib_contains_obj(str_obj, sub_obj);
1165 string_item(PyStringObject *a, register Py_ssize_t i)
1169 if (i < 0 || i >= Py_SIZE(a)) {
1170 PyErr_SetString(PyExc_IndexError, "string index out of range");
1173 pchar = a->ob_sval[i];
1174 v = (PyObject *)characters[pchar & UCHAR_MAX];
1176 v = PyString_FromStringAndSize(&pchar, 1);
1187 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1190 Py_ssize_t len_a, len_b;
1194 /* Make sure both arguments are strings. */
1195 if (!(PyString_Check(a) && PyString_Check(b))) {
1196 result = Py_NotImplemented;
1201 case Py_EQ:case Py_LE:case Py_GE:
1204 case Py_NE:case Py_LT:case Py_GT:
1210 /* Supporting Py_NE here as well does not save
1211 much time, since Py_NE is rarely used. */
1212 if (Py_SIZE(a) == Py_SIZE(b)
1213 && (a->ob_sval[0] == b->ob_sval[0]
1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222 min_len = (len_a < len_b) ? len_a : len_b;
1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1226 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1232 case Py_LT: c = c < 0; break;
1233 case Py_LE: c = c <= 0; break;
1234 case Py_EQ: assert(0); break; /* unreachable */
1235 case Py_NE: c = c != 0; break;
1236 case Py_GT: c = c > 0; break;
1237 case Py_GE: c = c >= 0; break;
1239 result = Py_NotImplemented;
1242 result = c ? Py_True : Py_False;
1249 _PyString_Eq(PyObject *o1, PyObject *o2)
1251 PyStringObject *a = (PyStringObject*) o1;
1252 PyStringObject *b = (PyStringObject*) o2;
1253 return Py_SIZE(a) == Py_SIZE(b)
1254 && *a->ob_sval == *b->ob_sval
1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1259 string_hash(PyStringObject *a)
1261 register Py_ssize_t len;
1262 register unsigned char *p;
1266 assert(_Py_HashSecret_Initialized);
1268 if (a->ob_shash != -1)
1272 We make the hash of the empty string be 0, rather than using
1273 (prefix ^ suffix), since this slightly obfuscates the hash secret
1279 p = (unsigned char *) a->ob_sval;
1280 x = _Py_HashSecret.prefix;
1283 x = (1000003*x) ^ *p++;
1285 x ^= _Py_HashSecret.suffix;
1293 string_subscript(PyStringObject* self, PyObject* item)
1295 if (PyIndex_Check(item)) {
1296 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1297 if (i == -1 && PyErr_Occurred())
1300 i += PyString_GET_SIZE(self);
1301 return string_item(self, i);
1303 else if (PySlice_Check(item)) {
1304 Py_ssize_t start, stop, step, slicelength, cur, i;
1309 if (PySlice_GetIndicesEx((PySliceObject*)item,
1310 PyString_GET_SIZE(self),
1311 &start, &stop, &step, &slicelength) < 0) {
1315 if (slicelength <= 0) {
1316 return PyString_FromStringAndSize("", 0);
1318 else if (start == 0 && step == 1 &&
1319 slicelength == PyString_GET_SIZE(self) &&
1320 PyString_CheckExact(self)) {
1322 return (PyObject *)self;
1324 else if (step == 1) {
1325 return PyString_FromStringAndSize(
1326 PyString_AS_STRING(self) + start,
1330 source_buf = PyString_AsString((PyObject*)self);
1331 result_buf = (char *)PyMem_Malloc(slicelength);
1332 if (result_buf == NULL)
1333 return PyErr_NoMemory();
1335 for (cur = start, i = 0; i < slicelength;
1337 result_buf[i] = source_buf[cur];
1340 result = PyString_FromStringAndSize(result_buf,
1342 PyMem_Free(result_buf);
1347 PyErr_Format(PyExc_TypeError,
1348 "string indices must be integers, not %.200s",
1349 Py_TYPE(item)->tp_name);
1355 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1358 PyErr_SetString(PyExc_SystemError,
1359 "accessing non-existent string segment");
1362 *ptr = (void *)self->ob_sval;
1363 return Py_SIZE(self);
1367 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1369 PyErr_SetString(PyExc_TypeError,
1370 "Cannot use string as modifiable buffer");
1375 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1378 *lenp = Py_SIZE(self);
1383 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1386 PyErr_SetString(PyExc_SystemError,
1387 "accessing non-existent string segment");
1390 *ptr = self->ob_sval;
1391 return Py_SIZE(self);
1395 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1397 return PyBuffer_FillInfo(view, (PyObject*)self,
1398 (void *)self->ob_sval, Py_SIZE(self),
1402 static PySequenceMethods string_as_sequence = {
1403 (lenfunc)string_length, /*sq_length*/
1404 (binaryfunc)string_concat, /*sq_concat*/
1405 (ssizeargfunc)string_repeat, /*sq_repeat*/
1406 (ssizeargfunc)string_item, /*sq_item*/
1407 (ssizessizeargfunc)string_slice, /*sq_slice*/
1410 (objobjproc)string_contains /*sq_contains*/
1413 static PyMappingMethods string_as_mapping = {
1414 (lenfunc)string_length,
1415 (binaryfunc)string_subscript,
1419 static PyBufferProcs string_as_buffer = {
1420 (readbufferproc)string_buffer_getreadbuf,
1421 (writebufferproc)string_buffer_getwritebuf,
1422 (segcountproc)string_buffer_getsegcount,
1423 (charbufferproc)string_buffer_getcharbuf,
1424 (getbufferproc)string_buffer_getbuffer,
1431 #define RIGHTSTRIP 1
1434 /* Arrays indexed by above */
1435 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1437 #define STRIPNAME(i) (stripformat[i]+3)
1439 PyDoc_STRVAR(split__doc__,
1440 "S.split([sep [,maxsplit]]) -> list of strings\n\
1442 Return a list of the words in the string S, using sep as the\n\
1443 delimiter string. If maxsplit is given, at most maxsplit\n\
1444 splits are done. If sep is not specified or is None, any\n\
1445 whitespace string is a separator and empty strings are removed\n\
1449 string_split(PyStringObject *self, PyObject *args)
1451 Py_ssize_t len = PyString_GET_SIZE(self), n;
1452 Py_ssize_t maxsplit = -1;
1453 const char *s = PyString_AS_STRING(self), *sub;
1454 PyObject *subobj = Py_None;
1456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1459 maxsplit = PY_SSIZE_T_MAX;
1460 if (subobj == Py_None)
1461 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1462 if (PyString_Check(subobj)) {
1463 sub = PyString_AS_STRING(subobj);
1464 n = PyString_GET_SIZE(subobj);
1466 #ifdef Py_USING_UNICODE
1467 else if (PyUnicode_Check(subobj))
1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1470 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1473 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1476 PyDoc_STRVAR(partition__doc__,
1477 "S.partition(sep) -> (head, sep, tail)\n\
1479 Search for the separator sep in S, and return the part before it,\n\
1480 the separator itself, and the part after it. If the separator is not\n\
1481 found, return S and two empty strings.");
1484 string_partition(PyStringObject *self, PyObject *sep_obj)
1489 if (PyString_Check(sep_obj)) {
1490 sep = PyString_AS_STRING(sep_obj);
1491 sep_len = PyString_GET_SIZE(sep_obj);
1493 #ifdef Py_USING_UNICODE
1494 else if (PyUnicode_Check(sep_obj))
1495 return PyUnicode_Partition((PyObject *) self, sep_obj);
1497 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1500 return stringlib_partition(
1502 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1503 sep_obj, sep, sep_len
1507 PyDoc_STRVAR(rpartition__doc__,
1508 "S.rpartition(sep) -> (head, sep, tail)\n\
1510 Search for the separator sep in S, starting at the end of S, and return\n\
1511 the part before it, the separator itself, and the part after it. If the\n\
1512 separator is not found, return two empty strings and S.");
1515 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1520 if (PyString_Check(sep_obj)) {
1521 sep = PyString_AS_STRING(sep_obj);
1522 sep_len = PyString_GET_SIZE(sep_obj);
1524 #ifdef Py_USING_UNICODE
1525 else if (PyUnicode_Check(sep_obj))
1526 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1528 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1531 return stringlib_rpartition(
1533 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1534 sep_obj, sep, sep_len
1538 PyDoc_STRVAR(rsplit__doc__,
1539 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1541 Return a list of the words in the string S, using sep as the\n\
1542 delimiter string, starting at the end of the string and working\n\
1543 to the front. If maxsplit is given, at most maxsplit splits are\n\
1544 done. If sep is not specified or is None, any whitespace string\n\
1548 string_rsplit(PyStringObject *self, PyObject *args)
1550 Py_ssize_t len = PyString_GET_SIZE(self), n;
1551 Py_ssize_t maxsplit = -1;
1552 const char *s = PyString_AS_STRING(self), *sub;
1553 PyObject *subobj = Py_None;
1555 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1558 maxsplit = PY_SSIZE_T_MAX;
1559 if (subobj == Py_None)
1560 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1561 if (PyString_Check(subobj)) {
1562 sub = PyString_AS_STRING(subobj);
1563 n = PyString_GET_SIZE(subobj);
1565 #ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1572 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1576 PyDoc_STRVAR(join__doc__,
1577 "S.join(iterable) -> string\n\
1579 Return a string which is the concatenation of the strings in the\n\
1580 iterable. The separator between elements is S.");
1583 string_join(PyStringObject *self, PyObject *orig)
1585 char *sep = PyString_AS_STRING(self);
1586 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1587 PyObject *res = NULL;
1589 Py_ssize_t seqlen = 0;
1592 PyObject *seq, *item;
1594 seq = PySequence_Fast(orig, "");
1599 seqlen = PySequence_Size(seq);
1602 return PyString_FromString("");
1605 item = PySequence_Fast_GET_ITEM(seq, 0);
1606 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1613 /* There are at least two things to join, or else we have a subclass
1614 * of the builtin types in the sequence.
1615 * Do a pre-pass to figure out the total amount of space we'll
1616 * need (sz), see whether any argument is absurd, and defer to
1617 * the Unicode join if appropriate.
1619 for (i = 0; i < seqlen; i++) {
1620 const size_t old_sz = sz;
1621 item = PySequence_Fast_GET_ITEM(seq, i);
1622 if (!PyString_Check(item)){
1623 #ifdef Py_USING_UNICODE
1624 if (PyUnicode_Check(item)) {
1625 /* Defer to Unicode join.
1626 * CAUTION: There's no gurantee that the
1627 * original sequence can be iterated over
1628 * again, so we must pass seq here.
1631 result = PyUnicode_Join((PyObject *)self, seq);
1636 PyErr_Format(PyExc_TypeError,
1637 "sequence item %zd: expected string,"
1639 i, Py_TYPE(item)->tp_name);
1643 sz += PyString_GET_SIZE(item);
1646 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1647 PyErr_SetString(PyExc_OverflowError,
1648 "join() result is too long for a Python string");
1654 /* Allocate result space. */
1655 res = PyString_FromStringAndSize((char*)NULL, sz);
1661 /* Catenate everything. */
1662 p = PyString_AS_STRING(res);
1663 for (i = 0; i < seqlen; ++i) {
1665 item = PySequence_Fast_GET_ITEM(seq, i);
1666 n = PyString_GET_SIZE(item);
1667 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1669 if (i < seqlen - 1) {
1670 Py_MEMCPY(p, sep, seplen);
1680 _PyString_Join(PyObject *sep, PyObject *x)
1682 assert(sep != NULL && PyString_Check(sep));
1684 return string_join((PyStringObject *)sep, x);
1687 /* helper macro to fixup start/end slice values */
1688 #define ADJUST_INDICES(start, end, len) \
1691 else if (end < 0) { \
1702 Py_LOCAL_INLINE(Py_ssize_t)
1703 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1708 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1710 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1711 args, &subobj, &start, &end))
1714 if (PyString_Check(subobj)) {
1715 sub = PyString_AS_STRING(subobj);
1716 sub_len = PyString_GET_SIZE(subobj);
1718 #ifdef Py_USING_UNICODE
1719 else if (PyUnicode_Check(subobj))
1720 return PyUnicode_Find(
1721 (PyObject *)self, subobj, start, end, dir);
1723 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1724 /* XXX - the "expected a character buffer object" is pretty
1725 confusing for a non-expert. remap to something else ? */
1729 return stringlib_find_slice(
1730 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1731 sub, sub_len, start, end);
1733 return stringlib_rfind_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
1739 PyDoc_STRVAR(find__doc__,
1740 "S.find(sub [,start [,end]]) -> int\n\
1742 Return the lowest index in S where substring sub is found,\n\
1743 such that sub is contained within S[start:end]. Optional\n\
1744 arguments start and end are interpreted as in slice notation.\n\
1746 Return -1 on failure.");
1749 string_find(PyStringObject *self, PyObject *args)
1751 Py_ssize_t result = string_find_internal(self, args, +1);
1754 return PyInt_FromSsize_t(result);
1758 PyDoc_STRVAR(index__doc__,
1759 "S.index(sub [,start [,end]]) -> int\n\
1761 Like S.find() but raise ValueError when the substring is not found.");
1764 string_index(PyStringObject *self, PyObject *args)
1766 Py_ssize_t result = string_find_internal(self, args, +1);
1770 PyErr_SetString(PyExc_ValueError,
1771 "substring not found");
1774 return PyInt_FromSsize_t(result);
1778 PyDoc_STRVAR(rfind__doc__,
1779 "S.rfind(sub [,start [,end]]) -> int\n\
1781 Return the highest index in S where substring sub is found,\n\
1782 such that sub is contained within S[start:end]. Optional\n\
1783 arguments start and end are interpreted as in slice notation.\n\
1785 Return -1 on failure.");
1788 string_rfind(PyStringObject *self, PyObject *args)
1790 Py_ssize_t result = string_find_internal(self, args, -1);
1793 return PyInt_FromSsize_t(result);
1797 PyDoc_STRVAR(rindex__doc__,
1798 "S.rindex(sub [,start [,end]]) -> int\n\
1800 Like S.rfind() but raise ValueError when the substring is not found.");
1803 string_rindex(PyStringObject *self, PyObject *args)
1805 Py_ssize_t result = string_find_internal(self, args, -1);
1809 PyErr_SetString(PyExc_ValueError,
1810 "substring not found");
1813 return PyInt_FromSsize_t(result);
1817 Py_LOCAL_INLINE(PyObject *)
1818 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1820 char *s = PyString_AS_STRING(self);
1821 Py_ssize_t len = PyString_GET_SIZE(self);
1822 char *sep = PyString_AS_STRING(sepobj);
1823 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1827 if (striptype != RIGHTSTRIP) {
1828 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1834 if (striptype != LEFTSTRIP) {
1837 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1841 if (i == 0 && j == len && PyString_CheckExact(self)) {
1843 return (PyObject*)self;
1846 return PyString_FromStringAndSize(s+i, j-i);
1850 Py_LOCAL_INLINE(PyObject *)
1851 do_strip(PyStringObject *self, int striptype)
1853 char *s = PyString_AS_STRING(self);
1854 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1857 if (striptype != RIGHTSTRIP) {
1858 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1864 if (striptype != LEFTSTRIP) {
1867 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1871 if (i == 0 && j == len && PyString_CheckExact(self)) {
1873 return (PyObject*)self;
1876 return PyString_FromStringAndSize(s+i, j-i);
1880 Py_LOCAL_INLINE(PyObject *)
1881 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1883 PyObject *sep = NULL;
1885 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1888 if (sep != NULL && sep != Py_None) {
1889 if (PyString_Check(sep))
1890 return do_xstrip(self, striptype, sep);
1891 #ifdef Py_USING_UNICODE
1892 else if (PyUnicode_Check(sep)) {
1893 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1897 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1903 PyErr_Format(PyExc_TypeError,
1904 #ifdef Py_USING_UNICODE
1905 "%s arg must be None, str or unicode",
1907 "%s arg must be None or str",
1909 STRIPNAME(striptype));
1913 return do_strip(self, striptype);
1917 PyDoc_STRVAR(strip__doc__,
1918 "S.strip([chars]) -> string or unicode\n\
1920 Return a copy of the string S with leading and trailing\n\
1921 whitespace removed.\n\
1922 If chars is given and not None, remove characters in chars instead.\n\
1923 If chars is unicode, S will be converted to unicode before stripping");
1926 string_strip(PyStringObject *self, PyObject *args)
1928 if (PyTuple_GET_SIZE(args) == 0)
1929 return do_strip(self, BOTHSTRIP); /* Common case */
1931 return do_argstrip(self, BOTHSTRIP, args);
1935 PyDoc_STRVAR(lstrip__doc__,
1936 "S.lstrip([chars]) -> string or unicode\n\
1938 Return a copy of the string S with leading whitespace removed.\n\
1939 If chars is given and not None, remove characters in chars instead.\n\
1940 If chars is unicode, S will be converted to unicode before stripping");
1943 string_lstrip(PyStringObject *self, PyObject *args)
1945 if (PyTuple_GET_SIZE(args) == 0)
1946 return do_strip(self, LEFTSTRIP); /* Common case */
1948 return do_argstrip(self, LEFTSTRIP, args);
1952 PyDoc_STRVAR(rstrip__doc__,
1953 "S.rstrip([chars]) -> string or unicode\n\
1955 Return a copy of the string S with trailing whitespace removed.\n\
1956 If chars is given and not None, remove characters in chars instead.\n\
1957 If chars is unicode, S will be converted to unicode before stripping");
1960 string_rstrip(PyStringObject *self, PyObject *args)
1962 if (PyTuple_GET_SIZE(args) == 0)
1963 return do_strip(self, RIGHTSTRIP); /* Common case */
1965 return do_argstrip(self, RIGHTSTRIP, args);
1969 PyDoc_STRVAR(lower__doc__,
1970 "S.lower() -> string\n\
1972 Return a copy of the string S converted to lowercase.");
1974 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976 #define _tolower tolower
1980 string_lower(PyStringObject *self)
1983 Py_ssize_t i, n = PyString_GET_SIZE(self);
1986 newobj = PyString_FromStringAndSize(NULL, n);
1990 s = PyString_AS_STRING(newobj);
1992 Py_MEMCPY(s, PyString_AS_STRING(self), n);
1994 for (i = 0; i < n; i++) {
1995 int c = Py_CHARMASK(s[i]);
2003 PyDoc_STRVAR(upper__doc__,
2004 "S.upper() -> string\n\
2006 Return a copy of the string S converted to uppercase.");
2009 #define _toupper toupper
2013 string_upper(PyStringObject *self)
2016 Py_ssize_t i, n = PyString_GET_SIZE(self);
2019 newobj = PyString_FromStringAndSize(NULL, n);
2023 s = PyString_AS_STRING(newobj);
2025 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2027 for (i = 0; i < n; i++) {
2028 int c = Py_CHARMASK(s[i]);
2036 PyDoc_STRVAR(title__doc__,
2037 "S.title() -> string\n\
2039 Return a titlecased version of S, i.e. words start with uppercase\n\
2040 characters, all remaining cased characters have lowercase.");
2043 string_title(PyStringObject *self)
2045 char *s = PyString_AS_STRING(self), *s_new;
2046 Py_ssize_t i, n = PyString_GET_SIZE(self);
2047 int previous_is_cased = 0;
2050 newobj = PyString_FromStringAndSize(NULL, n);
2053 s_new = PyString_AsString(newobj);
2054 for (i = 0; i < n; i++) {
2055 int c = Py_CHARMASK(*s++);
2057 if (!previous_is_cased)
2059 previous_is_cased = 1;
2060 } else if (isupper(c)) {
2061 if (previous_is_cased)
2063 previous_is_cased = 1;
2065 previous_is_cased = 0;
2071 PyDoc_STRVAR(capitalize__doc__,
2072 "S.capitalize() -> string\n\
2074 Return a copy of the string S with only its first character\n\
2078 string_capitalize(PyStringObject *self)
2080 char *s = PyString_AS_STRING(self), *s_new;
2081 Py_ssize_t i, n = PyString_GET_SIZE(self);
2084 newobj = PyString_FromStringAndSize(NULL, n);
2087 s_new = PyString_AsString(newobj);
2089 int c = Py_CHARMASK(*s++);
2091 *s_new = toupper(c);
2096 for (i = 1; i < n; i++) {
2097 int c = Py_CHARMASK(*s++);
2099 *s_new = tolower(c);
2108 PyDoc_STRVAR(count__doc__,
2109 "S.count(sub[, start[, end]]) -> int\n\
2111 Return the number of non-overlapping occurrences of substring sub in\n\
2112 string S[start:end]. Optional arguments start and end are interpreted\n\
2113 as in slice notation.");
2116 string_count(PyStringObject *self, PyObject *args)
2119 const char *str = PyString_AS_STRING(self), *sub;
2121 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2123 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2126 if (PyString_Check(sub_obj)) {
2127 sub = PyString_AS_STRING(sub_obj);
2128 sub_len = PyString_GET_SIZE(sub_obj);
2130 #ifdef Py_USING_UNICODE
2131 else if (PyUnicode_Check(sub_obj)) {
2133 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2137 return PyInt_FromSsize_t(count);
2140 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2143 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2145 return PyInt_FromSsize_t(
2146 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2150 PyDoc_STRVAR(swapcase__doc__,
2151 "S.swapcase() -> string\n\
2153 Return a copy of the string S with uppercase characters\n\
2154 converted to lowercase and vice versa.");
2157 string_swapcase(PyStringObject *self)
2159 char *s = PyString_AS_STRING(self), *s_new;
2160 Py_ssize_t i, n = PyString_GET_SIZE(self);
2163 newobj = PyString_FromStringAndSize(NULL, n);
2166 s_new = PyString_AsString(newobj);
2167 for (i = 0; i < n; i++) {
2168 int c = Py_CHARMASK(*s++);
2170 *s_new = toupper(c);
2172 else if (isupper(c)) {
2173 *s_new = tolower(c);
2183 PyDoc_STRVAR(translate__doc__,
2184 "S.translate(table [,deletechars]) -> string\n\
2186 Return a copy of the string S, where all characters occurring\n\
2187 in the optional argument deletechars are removed, and the\n\
2188 remaining characters have been mapped through the given\n\
2189 translation table, which must be a string of length 256 or None.\n\
2190 If the table argument is None, no translation is applied and\n\
2191 the operation simply removes the characters in deletechars.");
2194 string_translate(PyStringObject *self, PyObject *args)
2196 register char *input, *output;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
2206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
2214 else if (tableobj == Py_None) {
2218 #ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2240 if (delobj != NULL) {
2241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
2245 #ifdef Py_USING_UNICODE
2246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
2264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
2267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
2274 if (changed || !PyString_CheckExact(input_obj))
2277 Py_INCREF(input_obj);
2281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2299 if (!changed && PyString_CheckExact(input_obj)) {
2301 Py_INCREF(input_obj);
2304 /* Fix the size of the resulting string */
2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2311 /* find and count characters and substrings */
2313 #define findchar(target, target_len, c) \
2314 ((char *)memchr((const void *)(target), c, target_len))
2316 /* String ops must return a string. */
2317 /* If the object is subclass of string, create a copy */
2318 Py_LOCAL(PyStringObject *)
2319 return_self(PyStringObject *self)
2321 if (PyString_CheckExact(self)) {
2325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
2330 Py_LOCAL_INLINE(Py_ssize_t)
2331 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2334 const char *start=target;
2335 const char *end=target+target_len;
2337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2339 if (count >= maxcount)
2347 /* Algorithms for different cases of string replacement */
2349 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2350 Py_LOCAL(PyStringObject *)
2351 replace_interleave(PyStringObject *self,
2352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
2355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
2358 PyStringObject *result;
2360 self_len = PyString_GET_SIZE(self);
2362 /* 1 at the end plus 1 after every character */
2364 if (maxcount < count)
2367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
2386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
2389 /* TODO: special case single character, which doesn't need memcpy */
2391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2408 /* Special case for deleting a single character */
2409 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2410 Py_LOCAL(PyStringObject *)
2411 replace_delete_single_character(PyStringObject *self,
2412 char from_c, Py_ssize_t maxcount)
2414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2418 PyStringObject *result;
2420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
2423 count = countchar(self_s, self_len, from_c, maxcount);
2425 return return_self(self);
2428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
2431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2434 result_s = PyString_AS_STRING(result);
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2446 Py_MEMCPY(result_s, start, end-start);
2451 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2453 Py_LOCAL(PyStringObject *)
2454 replace_delete_substring(PyStringObject *self,
2455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
2461 PyStringObject *result;
2463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
2466 count = stringlib_count(self_s, self_len,
2472 return return_self(self);
2475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
2478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2482 result_s = PyString_AS_STRING(result);
2485 end = self_s + self_len;
2486 while (count-- > 0) {
2487 offset = stringlib_find(start, end-start,
2492 next = start + offset;
2494 Py_MEMCPY(result_s, start, next-start);
2496 result_s += (next-start);
2497 start = next+from_len;
2499 Py_MEMCPY(result_s, start, end-start);
2503 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2504 Py_LOCAL(PyStringObject *)
2505 replace_single_character_in_place(PyStringObject *self,
2506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
2509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
2511 PyStringObject *result;
2513 /* The result string will be the same size */
2514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
2517 next = findchar(self_s, self_len, from_c);
2520 /* No matches; return the original string */
2521 return return_self(self);
2524 /* Need to make a new string */
2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2528 result_s = PyString_AS_STRING(result);
2529 Py_MEMCPY(result_s, self_s, self_len);
2531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2535 end = result_s + self_len;
2537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2548 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2549 Py_LOCAL(PyStringObject *)
2550 replace_substring_in_place(PyStringObject *self,
2551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
2555 char *result_s, *start, *end;
2557 Py_ssize_t self_len, offset;
2558 PyStringObject *result;
2560 /* The result string will be the same size */
2562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
2565 offset = stringlib_find(self_s, self_len,
2569 /* No matches; return the original string */
2570 return return_self(self);
2573 /* Need to make a new string */
2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2577 result_s = PyString_AS_STRING(result);
2578 Py_MEMCPY(result_s, self_s, self_len);
2580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2584 end = result_s + self_len;
2586 while ( --maxcount > 0) {
2587 offset = stringlib_find(start, end-start,
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2599 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2600 Py_LOCAL(PyStringObject *)
2601 replace_single_character(PyStringObject *self,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
2606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
2610 PyStringObject *result;
2612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
2615 count = countchar(self_s, self_len, from_c, maxcount);
2617 /* no matches, return unchanged */
2618 return return_self(self);
2621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2637 result_s = PyString_AS_STRING(result);
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
2666 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2667 Py_LOCAL(PyStringObject *)
2668 replace_substring(PyStringObject *self,
2669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
2676 PyStringObject *result;
2678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
2681 count = stringlib_count(self_s, self_len,
2686 /* no matches, return unchanged */
2687 return return_self(self);
2690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2706 result_s = PyString_AS_STRING(result);
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2728 start = next+from_len;
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
2738 Py_LOCAL(PyStringObject *)
2739 replace(PyStringObject *self,
2740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
2745 maxcount = PY_SSIZE_T_MAX;
2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self);
2751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2757 /* Handle zero-length special cases */
2759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self) == 0) {
2770 return return_self(self);
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2783 /* Handle special case where both strings have the same length */
2785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2808 PyDoc_STRVAR(replace__doc__,
2809 "S.replace(old, new[, count]) -> string\n\
2811 Return a copy of string S with all occurrences of substring\n\
2812 old replaced by new. If the optional argument count is\n\
2813 given, only the first count occurrences are replaced.");
2816 string_replace(PyStringObject *self, PyObject *args)
2818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
2823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2830 #ifdef Py_USING_UNICODE
2831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
2842 #ifdef Py_USING_UNICODE
2843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2850 return (PyObject *)replace((PyStringObject *) self,
2852 to_s, to_len, count);
2857 /* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2862 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2863 Py_ssize_t end, int direction)
2865 Py_ssize_t len = PyString_GET_SIZE(self);
2870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
2874 #ifdef Py_USING_UNICODE
2875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
2879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2881 str = PyString_AS_STRING(self);
2883 ADJUST_INDICES(start, end, len);
2885 if (direction < 0) {
2887 if (start+slen > len)
2891 if (end-start < slen || start > len)
2894 if (end-slen > start)
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2903 PyDoc_STRVAR(startswith__doc__,
2904 "S.startswith(prefix[, start[, end]]) -> bool\n\
2906 Return True if S starts with the specified prefix, False otherwise.\n\
2907 With optional start, test S beginning at that position.\n\
2908 With optional end, stop comparing S at that position.\n\
2909 prefix can also be a tuple of strings to try.");
2912 string_startswith(PyStringObject *self, PyObject *args)
2914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2919 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2921 if (PyTuple_Check(subobj)) {
2923 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2924 result = _string_tailmatch(self,
2925 PyTuple_GET_ITEM(subobj, i),
2935 result = _string_tailmatch(self, subobj, start, end, -1);
2937 if (PyErr_ExceptionMatches(PyExc_TypeError))
2938 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2939 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2943 return PyBool_FromLong(result);
2947 PyDoc_STRVAR(endswith__doc__,
2948 "S.endswith(suffix[, start[, end]]) -> bool\n\
2950 Return True if S ends with the specified suffix, False otherwise.\n\
2951 With optional start, test S beginning at that position.\n\
2952 With optional end, stop comparing S at that position.\n\
2953 suffix can also be a tuple of strings to try.");
2956 string_endswith(PyStringObject *self, PyObject *args)
2958 Py_ssize_t start = 0;
2959 Py_ssize_t end = PY_SSIZE_T_MAX;
2963 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2965 if (PyTuple_Check(subobj)) {
2967 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2968 result = _string_tailmatch(self,
2969 PyTuple_GET_ITEM(subobj, i),
2979 result = _string_tailmatch(self, subobj, start, end, +1);
2981 if (PyErr_ExceptionMatches(PyExc_TypeError))
2982 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2983 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2987 return PyBool_FromLong(result);
2991 PyDoc_STRVAR(encode__doc__,
2992 "S.encode([encoding[,errors]]) -> object\n\
2994 Encodes S using the codec registered for encoding. encoding defaults\n\
2995 to the default encoding. errors may be given to set a different error\n\
2996 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2997 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2998 'xmlcharrefreplace' as well as any other name registered with\n\
2999 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3002 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3004 static char *kwlist[] = {"encoding", "errors", 0};
3005 char *encoding = NULL;
3006 char *errors = NULL;
3009 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3010 kwlist, &encoding, &errors))
3012 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3015 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3016 PyErr_Format(PyExc_TypeError,
3017 "encoder did not return a string/unicode object "
3019 Py_TYPE(v)->tp_name);
3030 PyDoc_STRVAR(decode__doc__,
3031 "S.decode([encoding[,errors]]) -> object\n\
3033 Decodes S using the codec registered for encoding. encoding defaults\n\
3034 to the default encoding. errors may be given to set a different error\n\
3035 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3036 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3037 as well as any other name registered with codecs.register_error that is\n\
3038 able to handle UnicodeDecodeErrors.");
3041 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3043 static char *kwlist[] = {"encoding", "errors", 0};
3044 char *encoding = NULL;
3045 char *errors = NULL;
3048 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3049 kwlist, &encoding, &errors))
3051 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3054 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3055 PyErr_Format(PyExc_TypeError,
3056 "decoder did not return a string/unicode object "
3058 Py_TYPE(v)->tp_name);
3069 PyDoc_STRVAR(expandtabs__doc__,
3070 "S.expandtabs([tabsize]) -> string\n\
3072 Return a copy of S where all tab characters are expanded using spaces.\n\
3073 If tabsize is not given, a tab size of 8 characters is assumed.");
3076 string_expandtabs(PyStringObject *self, PyObject *args)
3078 const char *e, *p, *qe;
3080 Py_ssize_t i, j, incr;
3084 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3087 /* First pass: determine size of output string */
3088 i = 0; /* chars up to and including most recent \n or \r */
3089 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3090 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3091 for (p = PyString_AS_STRING(self); p < e; p++)
3094 incr = tabsize - (j % tabsize);
3095 if (j > PY_SSIZE_T_MAX - incr)
3101 if (j > PY_SSIZE_T_MAX - 1)
3104 if (*p == '\n' || *p == '\r') {
3105 if (i > PY_SSIZE_T_MAX - j)
3112 if (i > PY_SSIZE_T_MAX - j)
3115 /* Second pass: create output string and fill it */
3116 u = PyString_FromStringAndSize(NULL, i + j);
3120 j = 0; /* same as in first pass */
3121 q = PyString_AS_STRING(u); /* next output char */
3122 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3124 for (p = PyString_AS_STRING(self); p < e; p++)
3127 i = tabsize - (j % tabsize);
3141 if (*p == '\n' || *p == '\r')
3150 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3154 Py_LOCAL_INLINE(PyObject *)
3155 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3164 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3166 return (PyObject *)self;
3169 u = PyString_FromStringAndSize(NULL,
3170 left + PyString_GET_SIZE(self) + right);
3173 memset(PyString_AS_STRING(u), fill, left);
3174 Py_MEMCPY(PyString_AS_STRING(u) + left,
3175 PyString_AS_STRING(self),
3176 PyString_GET_SIZE(self));
3178 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3185 PyDoc_STRVAR(ljust__doc__,
3186 "S.ljust(width[, fillchar]) -> string\n"
3188 "Return S left-justified in a string of length width. Padding is\n"
3189 "done using the specified fill character (default is a space).");
3192 string_ljust(PyStringObject *self, PyObject *args)
3195 char fillchar = ' ';
3197 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3200 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3202 return (PyObject*) self;
3205 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3209 PyDoc_STRVAR(rjust__doc__,
3210 "S.rjust(width[, fillchar]) -> string\n"
3212 "Return S right-justified in a string of length width. Padding is\n"
3213 "done using the specified fill character (default is a space)");
3216 string_rjust(PyStringObject *self, PyObject *args)
3219 char fillchar = ' ';
3221 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3224 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3226 return (PyObject*) self;
3229 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3233 PyDoc_STRVAR(center__doc__,
3234 "S.center(width[, fillchar]) -> string\n"
3236 "Return S centered in a string of length width. Padding is\n"
3237 "done using the specified fill character (default is a space)");
3240 string_center(PyStringObject *self, PyObject *args)
3242 Py_ssize_t marg, left;
3244 char fillchar = ' ';
3246 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3249 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3251 return (PyObject*) self;
3254 marg = width - PyString_GET_SIZE(self);
3255 left = marg / 2 + (marg & width & 1);
3257 return pad(self, left, marg - left, fillchar);
3260 PyDoc_STRVAR(zfill__doc__,
3261 "S.zfill(width) -> string\n"
3263 "Pad a numeric string S with zeros on the left, to fill a field\n"
3264 "of the specified width. The string S is never truncated.");
3267 string_zfill(PyStringObject *self, PyObject *args)
3274 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3277 if (PyString_GET_SIZE(self) >= width) {
3278 if (PyString_CheckExact(self)) {
3280 return (PyObject*) self;
3283 return PyString_FromStringAndSize(
3284 PyString_AS_STRING(self),
3285 PyString_GET_SIZE(self)
3289 fill = width - PyString_GET_SIZE(self);
3291 s = pad(self, fill, 0, '0');
3296 p = PyString_AS_STRING(s);
3297 if (p[fill] == '+' || p[fill] == '-') {
3298 /* move sign to beginning of string */
3303 return (PyObject*) s;
3306 PyDoc_STRVAR(isspace__doc__,
3307 "S.isspace() -> bool\n\
3309 Return True if all characters in S are whitespace\n\
3310 and there is at least one character in S, False otherwise.");
3313 string_isspace(PyStringObject *self)
3315 register const unsigned char *p
3316 = (unsigned char *) PyString_AS_STRING(self);
3317 register const unsigned char *e;
3319 /* Shortcut for single character strings */
3320 if (PyString_GET_SIZE(self) == 1 &&
3322 return PyBool_FromLong(1);
3324 /* Special case for empty strings */
3325 if (PyString_GET_SIZE(self) == 0)
3326 return PyBool_FromLong(0);
3328 e = p + PyString_GET_SIZE(self);
3329 for (; p < e; p++) {
3331 return PyBool_FromLong(0);
3333 return PyBool_FromLong(1);
3337 PyDoc_STRVAR(isalpha__doc__,
3338 "S.isalpha() -> bool\n\
3340 Return True if all characters in S are alphabetic\n\
3341 and there is at least one character in S, False otherwise.");
3344 string_isalpha(PyStringObject *self)
3346 register const unsigned char *p
3347 = (unsigned char *) PyString_AS_STRING(self);
3348 register const unsigned char *e;
3350 /* Shortcut for single character strings */
3351 if (PyString_GET_SIZE(self) == 1 &&
3353 return PyBool_FromLong(1);
3355 /* Special case for empty strings */
3356 if (PyString_GET_SIZE(self) == 0)
3357 return PyBool_FromLong(0);
3359 e = p + PyString_GET_SIZE(self);
3360 for (; p < e; p++) {
3362 return PyBool_FromLong(0);
3364 return PyBool_FromLong(1);
3368 PyDoc_STRVAR(isalnum__doc__,
3369 "S.isalnum() -> bool\n\
3371 Return True if all characters in S are alphanumeric\n\
3372 and there is at least one character in S, False otherwise.");
3375 string_isalnum(PyStringObject *self)
3377 register const unsigned char *p
3378 = (unsigned char *) PyString_AS_STRING(self);
3379 register const unsigned char *e;
3381 /* Shortcut for single character strings */
3382 if (PyString_GET_SIZE(self) == 1 &&
3384 return PyBool_FromLong(1);
3386 /* Special case for empty strings */
3387 if (PyString_GET_SIZE(self) == 0)
3388 return PyBool_FromLong(0);
3390 e = p + PyString_GET_SIZE(self);
3391 for (; p < e; p++) {
3393 return PyBool_FromLong(0);
3395 return PyBool_FromLong(1);
3399 PyDoc_STRVAR(isdigit__doc__,
3400 "S.isdigit() -> bool\n\
3402 Return True if all characters in S are digits\n\
3403 and there is at least one character in S, False otherwise.");
3406 string_isdigit(PyStringObject *self)
3408 register const unsigned char *p
3409 = (unsigned char *) PyString_AS_STRING(self);
3410 register const unsigned char *e;
3412 /* Shortcut for single character strings */
3413 if (PyString_GET_SIZE(self) == 1 &&
3415 return PyBool_FromLong(1);
3417 /* Special case for empty strings */
3418 if (PyString_GET_SIZE(self) == 0)
3419 return PyBool_FromLong(0);
3421 e = p + PyString_GET_SIZE(self);
3422 for (; p < e; p++) {
3424 return PyBool_FromLong(0);
3426 return PyBool_FromLong(1);
3430 PyDoc_STRVAR(islower__doc__,
3431 "S.islower() -> bool\n\
3433 Return True if all cased characters in S are lowercase and there is\n\
3434 at least one cased character in S, False otherwise.");
3437 string_islower(PyStringObject *self)
3439 register const unsigned char *p
3440 = (unsigned char *) PyString_AS_STRING(self);
3441 register const unsigned char *e;
3444 /* Shortcut for single character strings */
3445 if (PyString_GET_SIZE(self) == 1)
3446 return PyBool_FromLong(islower(*p) != 0);
3448 /* Special case for empty strings */
3449 if (PyString_GET_SIZE(self) == 0)
3450 return PyBool_FromLong(0);
3452 e = p + PyString_GET_SIZE(self);
3454 for (; p < e; p++) {
3456 return PyBool_FromLong(0);
3457 else if (!cased && islower(*p))
3460 return PyBool_FromLong(cased);
3464 PyDoc_STRVAR(isupper__doc__,
3465 "S.isupper() -> bool\n\
3467 Return True if all cased characters in S are uppercase and there is\n\
3468 at least one cased character in S, False otherwise.");
3471 string_isupper(PyStringObject *self)
3473 register const unsigned char *p
3474 = (unsigned char *) PyString_AS_STRING(self);
3475 register const unsigned char *e;
3478 /* Shortcut for single character strings */
3479 if (PyString_GET_SIZE(self) == 1)
3480 return PyBool_FromLong(isupper(*p) != 0);
3482 /* Special case for empty strings */
3483 if (PyString_GET_SIZE(self) == 0)
3484 return PyBool_FromLong(0);
3486 e = p + PyString_GET_SIZE(self);
3488 for (; p < e; p++) {
3490 return PyBool_FromLong(0);
3491 else if (!cased && isupper(*p))
3494 return PyBool_FromLong(cased);
3498 PyDoc_STRVAR(istitle__doc__,
3499 "S.istitle() -> bool\n\
3501 Return True if S is a titlecased string and there is at least one\n\
3502 character in S, i.e. uppercase characters may only follow uncased\n\
3503 characters and lowercase characters only cased ones. Return False\n\
3507 string_istitle(PyStringObject *self, PyObject *uncased)
3509 register const unsigned char *p
3510 = (unsigned char *) PyString_AS_STRING(self);
3511 register const unsigned char *e;
3512 int cased, previous_is_cased;
3514 /* Shortcut for single character strings */
3515 if (PyString_GET_SIZE(self) == 1)
3516 return PyBool_FromLong(isupper(*p) != 0);
3518 /* Special case for empty strings */
3519 if (PyString_GET_SIZE(self) == 0)
3520 return PyBool_FromLong(0);
3522 e = p + PyString_GET_SIZE(self);
3524 previous_is_cased = 0;
3525 for (; p < e; p++) {
3526 register const unsigned char ch = *p;
3529 if (previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3534 else if (islower(ch)) {
3535 if (!previous_is_cased)
3536 return PyBool_FromLong(0);
3537 previous_is_cased = 1;
3541 previous_is_cased = 0;
3543 return PyBool_FromLong(cased);
3547 PyDoc_STRVAR(splitlines__doc__,
3548 "S.splitlines([keepends]) -> list of strings\n\
3550 Return a list of the lines in S, breaking at line boundaries.\n\
3551 Line breaks are not included in the resulting list unless keepends\n\
3552 is given and true.");
3555 string_splitlines(PyStringObject *self, PyObject *args)
3559 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3562 return stringlib_splitlines(
3563 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3568 PyDoc_STRVAR(sizeof__doc__,
3569 "S.__sizeof__() -> size of S in memory, in bytes");
3572 string_sizeof(PyStringObject *v)
3575 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3576 return PyInt_FromSsize_t(res);
3580 string_getnewargs(PyStringObject *v)
3582 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3586 #include "stringlib/string_format.h"
3588 PyDoc_STRVAR(format__doc__,
3589 "S.format(*args, **kwargs) -> string\n\
3591 Return a formatted version of S, using substitutions from args and kwargs.\n\
3592 The substitutions are identified by braces ('{' and '}').");
3595 string__format__(PyObject* self, PyObject* args)
3597 PyObject *format_spec;
3598 PyObject *result = NULL;
3599 PyObject *tmp = NULL;
3601 /* If 2.x, convert format_spec to the same type as value */
3602 /* This is to allow things like u''.format('') */
3603 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3605 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3606 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3607 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3610 tmp = PyObject_Str(format_spec);
3615 result = _PyBytes_FormatAdvanced(self,
3616 PyString_AS_STRING(format_spec),
3617 PyString_GET_SIZE(format_spec));
3623 PyDoc_STRVAR(p_format__doc__,
3624 "S.__format__(format_spec) -> string\n\
3626 Return a formatted version of S as described by format_spec.");
3630 string_methods[] = {
3631 /* Counterparts of the obsolete stropmodule functions; except
3632 string.maketrans(). */
3633 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3634 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3635 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3636 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3637 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3638 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3639 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3640 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3641 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3642 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3643 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3644 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3645 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3647 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3648 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3650 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3651 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3652 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3653 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3654 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3655 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3656 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3657 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3658 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3660 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3662 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3663 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3665 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3667 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3668 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3669 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3670 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3671 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3672 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3673 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3674 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3675 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3676 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3677 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3678 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3680 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3682 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3684 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3685 {NULL, NULL} /* sentinel */
3689 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3692 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3695 static char *kwlist[] = {"object", 0};
3697 if (type != &PyString_Type)
3698 return str_subtype_new(type, args, kwds);
3699 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3702 return PyString_FromString("");
3703 return PyObject_Str(x);
3707 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3709 PyObject *tmp, *pnew;
3712 assert(PyType_IsSubtype(type, &PyString_Type));
3713 tmp = string_new(&PyString_Type, args, kwds);
3716 assert(PyString_CheckExact(tmp));
3717 n = PyString_GET_SIZE(tmp);
3718 pnew = type->tp_alloc(type, n);
3720 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3721 ((PyStringObject *)pnew)->ob_shash =
3722 ((PyStringObject *)tmp)->ob_shash;
3723 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3730 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3732 PyErr_SetString(PyExc_TypeError,
3733 "The basestring type cannot be instantiated");
3738 string_mod(PyObject *v, PyObject *w)
3740 if (!PyString_Check(v)) {
3741 Py_INCREF(Py_NotImplemented);
3742 return Py_NotImplemented;
3744 return PyString_Format(v, w);
3747 PyDoc_STRVAR(basestring_doc,
3748 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3750 static PyNumberMethods string_as_number = {
3755 string_mod, /*nb_remainder*/
3759 PyTypeObject PyBaseString_Type = {
3760 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3770 0, /* tp_as_number */
3771 0, /* tp_as_sequence */
3772 0, /* tp_as_mapping */
3776 0, /* tp_getattro */
3777 0, /* tp_setattro */
3778 0, /* tp_as_buffer */
3779 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3780 basestring_doc, /* tp_doc */
3781 0, /* tp_traverse */
3783 0, /* tp_richcompare */
3784 0, /* tp_weaklistoffset */
3786 0, /* tp_iternext */
3790 &PyBaseObject_Type, /* tp_base */
3792 0, /* tp_descr_get */
3793 0, /* tp_descr_set */
3794 0, /* tp_dictoffset */
3797 basestring_new, /* tp_new */
3801 PyDoc_STRVAR(string_doc,
3802 "str(object) -> string\n\
3804 Return a nice string representation of the object.\n\
3805 If the argument is a string, the return value is the same object.");
3807 PyTypeObject PyString_Type = {
3808 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3810 PyStringObject_SIZE,
3812 string_dealloc, /* tp_dealloc */
3813 (printfunc)string_print, /* tp_print */
3817 string_repr, /* tp_repr */
3818 &string_as_number, /* tp_as_number */
3819 &string_as_sequence, /* tp_as_sequence */
3820 &string_as_mapping, /* tp_as_mapping */
3821 (hashfunc)string_hash, /* tp_hash */
3823 string_str, /* tp_str */
3824 PyObject_GenericGetAttr, /* tp_getattro */
3825 0, /* tp_setattro */
3826 &string_as_buffer, /* tp_as_buffer */
3827 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3828 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3829 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3830 string_doc, /* tp_doc */
3831 0, /* tp_traverse */
3833 (richcmpfunc)string_richcompare, /* tp_richcompare */
3834 0, /* tp_weaklistoffset */
3836 0, /* tp_iternext */
3837 string_methods, /* tp_methods */
3840 &PyBaseString_Type, /* tp_base */
3842 0, /* tp_descr_get */
3843 0, /* tp_descr_set */
3844 0, /* tp_dictoffset */
3847 string_new, /* tp_new */
3848 PyObject_Del, /* tp_free */
3852 PyString_Concat(register PyObject **pv, register PyObject *w)
3854 register PyObject *v;
3857 if (w == NULL || !PyString_Check(*pv)) {
3862 v = string_concat((PyStringObject *) *pv, w);
3868 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3870 PyString_Concat(pv, w);
3875 /* The following function breaks the notion that strings are immutable:
3876 it changes the size of a string. We get away with this only if there
3877 is only one module referencing the object. You can also think of it
3878 as creating a new string object and destroying the old one, only
3879 more efficiently. In any case, don't use this if the string may
3880 already be known to some other part of the code...
3881 Note that if there's not enough memory to resize the string, the original
3882 string object at *pv is deallocated, *pv is set to NULL, an "out of
3883 memory" exception is set, and -1 is returned. Else (on success) 0 is
3884 returned, and the value in *pv may or may not be the same as on input.
3885 As always, an extra byte is allocated for a trailing \0 byte (newsize
3886 does *not* include that), and a trailing \0 byte is stored.
3890 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3892 register PyObject *v;
3893 register PyStringObject *sv;
3895 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3896 PyString_CHECK_INTERNED(v)) {
3899 PyErr_BadInternalCall();
3902 /* XXX UNREF/NEWREF interface should be more symmetrical */
3904 _Py_ForgetReference(v);
3906 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3912 _Py_NewReference(*pv);
3913 sv = (PyStringObject *) *pv;
3914 Py_SIZE(sv) = newsize;
3915 sv->ob_sval[newsize] = '\0';
3916 sv->ob_shash = -1; /* invalidate cached hash value */
3920 /* Helpers for formatstring */
3922 Py_LOCAL_INLINE(PyObject *)
3923 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3925 Py_ssize_t argidx = *p_argidx;
3926 if (argidx < arglen) {
3931 return PyTuple_GetItem(args, argidx);
3933 PyErr_SetString(PyExc_TypeError,
3934 "not enough arguments for format string");
3945 #define F_LJUST (1<<0)
3946 #define F_SIGN (1<<1)
3947 #define F_BLANK (1<<2)
3948 #define F_ALT (1<<3)
3949 #define F_ZERO (1<<4)
3951 /* Returns a new reference to a PyString object, or NULL on failure. */
3954 formatfloat(PyObject *v, int flags, int prec, int type)
3960 x = PyFloat_AsDouble(v);
3961 if (x == -1.0 && PyErr_Occurred()) {
3962 PyErr_Format(PyExc_TypeError, "float argument required, "
3963 "not %.200s", Py_TYPE(v)->tp_name);
3970 p = PyOS_double_to_string(x, type, prec,
3971 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3975 result = PyString_FromStringAndSize(p, strlen(p));
3980 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3981 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3982 * Python's regular ints.
3983 * Return value: a new PyString*, or NULL if error.
3984 * . *pbuf is set to point into it,
3985 * *plen set to the # of chars following that.
3986 * Caller must decref it when done using pbuf.
3987 * The string starting at *pbuf is of the form
3988 * "-"? ("0x" | "0X")? digit+
3989 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3990 * set in flags. The case of hex digits will be correct,
3991 * There will be at least prec digits, zero-filled on the left if
3992 * necessary to get that many.
3993 * val object to be converted
3994 * flags bitmask of format flags; only F_ALT is looked at
3995 * prec minimum number of digits; 0-fill on left if needed
3996 * type a character in [duoxX]; u acts the same as d
3998 * CAUTION: o, x and X conversions on regular ints can never
3999 * produce a '-' sign, but can for Python's unbounded ints.
4002 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4003 char **pbuf, int *plen)
4005 PyObject *result = NULL;
4008 int sign; /* 1 if '-', else 0 */
4009 int len; /* number of characters */
4011 int numdigits; /* len == numnondigits + numdigits */
4012 int numnondigits = 0;
4017 result = Py_TYPE(val)->tp_str(val);
4020 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4025 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4028 assert(!"'type' not in [duoxX]");
4033 buf = PyString_AsString(result);
4039 /* To modify the string in-place, there can only be one reference. */
4040 if (Py_REFCNT(result) != 1) {
4041 PyErr_BadInternalCall();
4044 llen = PyString_Size(result);
4045 if (llen > INT_MAX) {
4046 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4050 if (buf[len-1] == 'L') {
4054 sign = buf[0] == '-';
4055 numnondigits += sign;
4056 numdigits = len - numnondigits;
4057 assert(numdigits > 0);
4059 /* Get rid of base marker unless F_ALT */
4060 if ((flags & F_ALT) == 0) {
4061 /* Need to skip 0x, 0X or 0. */
4065 assert(buf[sign] == '0');
4066 /* If 0 is only digit, leave it alone. */
4067 if (numdigits > 1) {
4074 assert(buf[sign] == '0');
4075 assert(buf[sign + 1] == 'x');
4086 assert(len == numnondigits + numdigits);
4087 assert(numdigits > 0);
4090 /* Fill with leading zeroes to meet minimum width. */
4091 if (prec > numdigits) {
4092 PyObject *r1 = PyString_FromStringAndSize(NULL,
4093 numnondigits + prec);
4099 b1 = PyString_AS_STRING(r1);
4100 for (i = 0; i < numnondigits; ++i)
4102 for (i = 0; i < prec - numdigits; i++)
4104 for (i = 0; i < numdigits; i++)
4109 buf = PyString_AS_STRING(result);
4110 len = numnondigits + prec;
4113 /* Fix up case for hex conversions. */
4115 /* Need to convert all lower case letters to upper case.
4116 and need to convert 0x to 0X (and -0x to -0X). */
4117 for (i = 0; i < len; i++)
4118 if (buf[i] >= 'a' && buf[i] <= 'x')
4126 Py_LOCAL_INLINE(int)
4127 formatint(char *buf, size_t buflen, int flags,
4128 int prec, int type, PyObject *v)
4130 /* fmt = '%#.' + `prec` + 'l' + `type`
4131 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4133 char fmt[64]; /* plenty big enough! */
4137 x = PyInt_AsLong(v);
4138 if (x == -1 && PyErr_Occurred()) {
4139 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4140 Py_TYPE(v)->tp_name);
4143 if (x < 0 && type == 'u') {
4146 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4153 if ((flags & F_ALT) &&
4154 (type == 'x' || type == 'X')) {
4155 /* When converting under %#x or %#X, there are a number
4156 * of issues that cause pain:
4157 * - when 0 is being converted, the C standard leaves off
4158 * the '0x' or '0X', which is inconsistent with other
4159 * %#x/%#X conversions and inconsistent with Python's
4161 * - there are platforms that violate the standard and
4162 * convert 0 with the '0x' or '0X'
4163 * (Metrowerks, Compaq Tru64)
4164 * - there are platforms that give '0x' when converting
4165 * under %#X, but convert 0 in accordance with the
4166 * standard (OS/2 EMX)
4168 * We can achieve the desired consistency by inserting our
4169 * own '0x' or '0X' prefix, and substituting %x/%X in place
4172 * Note that this is the same approach as used in
4173 * formatint() in unicodeobject.c
4175 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4176 sign, type, prec, type);
4179 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4180 sign, (flags&F_ALT) ? "#" : "",
4184 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4185 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4187 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4188 PyErr_SetString(PyExc_OverflowError,
4189 "formatted integer is too long (precision too large?)");
4193 PyOS_snprintf(buf, buflen, fmt, -x);
4195 PyOS_snprintf(buf, buflen, fmt, x);
4196 return (int)strlen(buf);
4199 Py_LOCAL_INLINE(int)
4200 formatchar(char *buf, size_t buflen, PyObject *v)
4202 /* presume that the buffer is at least 2 characters long */
4203 if (PyString_Check(v)) {
4204 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4208 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4215 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4217 FORMATBUFLEN is the length of the buffer in which the ints &
4218 chars are formatted. XXX This is a magic number. Each formatting
4219 routine does bounds checking to ensure no overflow, but a better
4220 solution may be to malloc a buffer of appropriate size for each
4221 format. For now, the current solution is sufficient.
4223 #define FORMATBUFLEN (size_t)120
4226 PyString_Format(PyObject *format, PyObject *args)
4229 Py_ssize_t arglen, argidx;
4230 Py_ssize_t reslen, rescnt, fmtcnt;
4232 PyObject *result, *orig_args;
4233 #ifdef Py_USING_UNICODE
4236 PyObject *dict = NULL;
4237 if (format == NULL || !PyString_Check(format) || args == NULL) {
4238 PyErr_BadInternalCall();
4242 fmt = PyString_AS_STRING(format);
4243 fmtcnt = PyString_GET_SIZE(format);
4244 reslen = rescnt = fmtcnt + 100;
4245 result = PyString_FromStringAndSize((char *)NULL, reslen);
4248 res = PyString_AsString(result);
4249 if (PyTuple_Check(args)) {
4250 arglen = PyTuple_GET_SIZE(args);
4257 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4258 !PyObject_TypeCheck(args, &PyBaseString_Type))
4260 while (--fmtcnt >= 0) {
4263 rescnt = fmtcnt + 100;
4265 if (_PyString_Resize(&result, reslen))
4267 res = PyString_AS_STRING(result)
4274 /* Got a format specifier */
4276 Py_ssize_t width = -1;
4282 PyObject *temp = NULL;
4286 char formatbuf[FORMATBUFLEN];
4287 /* For format{int,char}() */
4288 #ifdef Py_USING_UNICODE
4289 char *fmt_start = fmt;
4290 Py_ssize_t argidx_start = argidx;
4301 PyErr_SetString(PyExc_TypeError,
4302 "format requires a mapping");
4308 /* Skip over balanced parentheses */
4309 while (pcount > 0 && --fmtcnt >= 0) {
4312 else if (*fmt == '(')
4316 keylen = fmt - keystart - 1;
4317 if (fmtcnt < 0 || pcount > 0) {
4318 PyErr_SetString(PyExc_ValueError,
4319 "incomplete format key");
4322 key = PyString_FromStringAndSize(keystart,
4330 args = PyObject_GetItem(dict, key);
4339 while (--fmtcnt >= 0) {
4340 switch (c = *fmt++) {
4341 case '-': flags |= F_LJUST; continue;
4342 case '+': flags |= F_SIGN; continue;
4343 case ' ': flags |= F_BLANK; continue;
4344 case '#': flags |= F_ALT; continue;
4345 case '0': flags |= F_ZERO; continue;
4350 v = getnextarg(args, arglen, &argidx);
4353 if (!PyInt_Check(v)) {
4354 PyErr_SetString(PyExc_TypeError,
4358 width = PyInt_AsLong(v);
4366 else if (c >= 0 && isdigit(c)) {
4368 while (--fmtcnt >= 0) {
4369 c = Py_CHARMASK(*fmt++);
4372 if ((width*10) / 10 != width) {
4378 width = width*10 + (c - '0');
4386 v = getnextarg(args, arglen, &argidx);
4389 if (!PyInt_Check(v)) {
4395 prec = PyInt_AsLong(v);
4401 else if (c >= 0 && isdigit(c)) {
4403 while (--fmtcnt >= 0) {
4404 c = Py_CHARMASK(*fmt++);
4407 if ((prec*10) / 10 != prec) {
4413 prec = prec*10 + (c - '0');
4418 if (c == 'h' || c == 'l' || c == 'L') {
4424 PyErr_SetString(PyExc_ValueError,
4425 "incomplete format");
4429 v = getnextarg(args, arglen, &argidx);
4441 #ifdef Py_USING_UNICODE
4442 if (PyUnicode_Check(v)) {
4444 argidx = argidx_start;
4448 temp = _PyObject_Str(v);
4449 #ifdef Py_USING_UNICODE
4450 if (temp != NULL && PyUnicode_Check(temp)) {
4453 argidx = argidx_start;
4460 temp = PyObject_Repr(v);
4463 if (!PyString_Check(temp)) {
4464 PyErr_SetString(PyExc_TypeError,
4465 "%s argument has non-string str()");
4469 pbuf = PyString_AS_STRING(temp);
4470 len = PyString_GET_SIZE(temp);
4471 if (prec >= 0 && len > prec)
4483 if (PyNumber_Check(v)) {
4484 PyObject *iobj=NULL;
4486 if (PyInt_Check(v) || (PyLong_Check(v))) {
4491 iobj = PyNumber_Int(v);
4492 if (iobj==NULL) iobj = PyNumber_Long(v);
4495 if (PyInt_Check(iobj)) {
4498 len = formatint(pbuf,
4500 flags, prec, c, iobj);
4506 else if (PyLong_Check(iobj)) {
4510 temp = _PyString_FormatLong(iobj, flags,
4511 prec, c, &pbuf, &ilen);
4524 PyErr_Format(PyExc_TypeError,
4525 "%%%c format: a number is required, "
4526 "not %.200s", c, Py_TYPE(v)->tp_name);
4538 temp = formatfloat(v, flags, prec, c);
4541 pbuf = PyString_AS_STRING(temp);
4542 len = PyString_GET_SIZE(temp);
4548 #ifdef Py_USING_UNICODE
4549 if (PyUnicode_Check(v)) {
4551 argidx = argidx_start;
4556 len = formatchar(pbuf, sizeof(formatbuf), v);
4561 PyErr_Format(PyExc_ValueError,
4562 "unsupported format character '%c' (0x%x) "
4565 (Py_ssize_t)(fmt - 1 -
4566 PyString_AsString(format)));
4570 if (*pbuf == '-' || *pbuf == '+') {
4574 else if (flags & F_SIGN)
4576 else if (flags & F_BLANK)
4583 if (rescnt - (sign != 0) < width) {
4585 rescnt = width + fmtcnt + 100;
4590 return PyErr_NoMemory();
4592 if (_PyString_Resize(&result, reslen)) {
4596 res = PyString_AS_STRING(result)
4606 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4607 assert(pbuf[0] == '0');
4608 assert(pbuf[1] == c);
4619 if (width > len && !(flags & F_LJUST)) {
4623 } while (--width > len);
4628 if ((flags & F_ALT) &&
4629 (c == 'x' || c == 'X')) {
4630 assert(pbuf[0] == '0');
4631 assert(pbuf[1] == c);
4636 Py_MEMCPY(res, pbuf, len);
4639 while (--width >= len) {
4643 if (dict && (argidx < arglen) && c != '%') {
4644 PyErr_SetString(PyExc_TypeError,
4645 "not all arguments converted during string formatting");
4652 if (argidx < arglen && !dict) {
4653 PyErr_SetString(PyExc_TypeError,
4654 "not all arguments converted during string formatting");
4660 if (_PyString_Resize(&result, reslen - rescnt))
4664 #ifdef Py_USING_UNICODE
4670 /* Fiddle args right (remove the first argidx arguments) */
4671 if (PyTuple_Check(orig_args) && argidx > 0) {
4673 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4678 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4680 PyTuple_SET_ITEM(v, n, w);
4684 Py_INCREF(orig_args);
4688 /* Take what we have of the result and let the Unicode formatting
4689 function format the rest of the input. */
4690 rescnt = res - PyString_AS_STRING(result);
4691 if (_PyString_Resize(&result, rescnt))
4693 fmtcnt = PyString_GET_SIZE(format) - \
4694 (fmt - PyString_AS_STRING(format));
4695 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4698 v = PyUnicode_Format(format, args);
4702 /* Paste what we have (result) to what the Unicode formatting
4703 function returned (v) and return the result (or error) */
4704 w = PyUnicode_Concat(result, v);
4709 #endif /* Py_USING_UNICODE */
4720 PyString_InternInPlace(PyObject **p)
4722 register PyStringObject *s = (PyStringObject *)(*p);
4724 if (s == NULL || !PyString_Check(s))
4725 Py_FatalError("PyString_InternInPlace: strings only please!");
4726 /* If it's a string subclass, we don't really know what putting
4727 it in the interned dict might do. */
4728 if (!PyString_CheckExact(s))
4730 if (PyString_CHECK_INTERNED(s))
4732 if (interned == NULL) {
4733 interned = PyDict_New();
4734 if (interned == NULL) {
4735 PyErr_Clear(); /* Don't leave an exception */
4739 t = PyDict_GetItem(interned, (PyObject *)s);
4747 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4751 /* The two references in interned are not counted by refcnt.
4752 The string deallocator will take care of this */
4754 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4758 PyString_InternImmortal(PyObject **p)
4760 PyString_InternInPlace(p);
4761 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4762 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4769 PyString_InternFromString(const char *cp)
4771 PyObject *s = PyString_FromString(cp);
4774 PyString_InternInPlace(&s);
4782 for (i = 0; i < UCHAR_MAX + 1; i++) {
4783 Py_XDECREF(characters[i]);
4784 characters[i] = NULL;
4786 Py_XDECREF(nullstring);
4790 void _Py_ReleaseInternedStrings(void)
4795 Py_ssize_t immortal_size = 0, mortal_size = 0;
4797 if (interned == NULL || !PyDict_Check(interned))
4799 keys = PyDict_Keys(interned);
4800 if (keys == NULL || !PyList_Check(keys)) {
4805 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4806 detector, interned strings are not forcibly deallocated; rather, we
4807 give them their stolen references back, and then clear and DECREF
4808 the interned dict. */
4810 n = PyList_GET_SIZE(keys);
4811 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4813 for (i = 0; i < n; i++) {
4814 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4815 switch (s->ob_sstate) {
4816 case SSTATE_NOT_INTERNED:
4817 /* XXX Shouldn't happen */
4819 case SSTATE_INTERNED_IMMORTAL:
4821 immortal_size += Py_SIZE(s);
4823 case SSTATE_INTERNED_MORTAL:
4825 mortal_size += Py_SIZE(s);
4828 Py_FatalError("Inconsistent interned string state.");
4830 s->ob_sstate = SSTATE_NOT_INTERNED;
4832 fprintf(stderr, "total size of all interned strings: "
4833 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4834 "mortal/immortal\n", mortal_size, immortal_size);
4836 PyDict_Clear(interned);
4837 Py_DECREF(interned);