2 ** string.c - String class
4 ** See Copyright Notice in mruby.h
8 # define _CRT_NONSTDC_NO_DEPRECATE
11 #ifndef MRB_WITHOUT_FLOAT
20 #include <mruby/array.h>
21 #include <mruby/class.h>
22 #include <mruby/range.h>
23 #include <mruby/string.h>
24 #include <mruby/numeric.h>
26 typedef struct mrb_shared_string {
32 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
34 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
36 static struct RString*
37 str_init_normal_capa(mrb_state *mrb, struct RString *s,
38 const char *p, size_t len, size_t capa)
40 char *dst = (char *)mrb_malloc(mrb, capa + 1);
41 if (p) memcpy(dst, p, len);
44 s->as.heap.len = (mrb_ssize)len;
45 s->as.heap.aux.capa = (mrb_ssize)capa;
46 RSTR_UNSET_TYPE_FLAG(s);
50 static struct RString*
51 str_init_normal(mrb_state *mrb, struct RString *s, const char *p, size_t len)
53 return str_init_normal_capa(mrb, s, p, len, len);
56 static struct RString*
57 str_init_embed(struct RString *s, const char *p, size_t len)
59 if (p) memcpy(RSTR_EMBED_PTR(s), p, len);
60 RSTR_EMBED_PTR(s)[len] = '\0';
61 RSTR_SET_TYPE_FLAG(s, EMBED);
62 RSTR_SET_EMBED_LEN(s, len);
66 static struct RString*
67 str_init_nofree(struct RString *s, const char *p, size_t len)
69 s->as.heap.ptr = (char *)p;
70 s->as.heap.len = (mrb_ssize)len;
71 s->as.heap.aux.capa = 0; /* nofree */
72 RSTR_SET_TYPE_FLAG(s, NOFREE);
76 static struct RString*
77 str_init_shared(mrb_state *mrb, const struct RString *orig, struct RString *s, mrb_shared_string *shared)
83 shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
85 shared->ptr = orig->as.heap.ptr;
86 shared->capa = orig->as.heap.aux.capa;
88 s->as.heap.ptr = orig->as.heap.ptr;
89 s->as.heap.len = orig->as.heap.len;
90 s->as.heap.aux.shared = shared;
91 RSTR_SET_TYPE_FLAG(s, SHARED);
95 static struct RString*
96 str_init_fshared(const struct RString *orig, struct RString *s, struct RString *fshared)
98 s->as.heap.ptr = orig->as.heap.ptr;
99 s->as.heap.len = orig->as.heap.len;
100 s->as.heap.aux.fshared = fshared;
101 RSTR_SET_TYPE_FLAG(s, FSHARED);
105 static struct RString*
106 str_init_modifiable(mrb_state *mrb, struct RString *s, const char *p, size_t len)
108 if (RSTR_EMBEDDABLE_P(len)) {
109 return str_init_embed(s, p, len);
112 return str_init_normal(mrb, s, p, len);
116 static struct RString*
117 str_new_static(mrb_state *mrb, const char *p, size_t len)
119 if (RSTR_EMBEDDABLE_P(len)) {
120 return str_init_embed(mrb_obj_alloc_string(mrb), p, len);
122 if (len >= MRB_SSIZE_MAX) {
123 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
125 return str_init_nofree(mrb_obj_alloc_string(mrb), p, len);
128 static struct RString*
129 str_new(mrb_state *mrb, const char *p, size_t len)
131 if (RSTR_EMBEDDABLE_P(len)) {
132 return str_init_embed(mrb_obj_alloc_string(mrb), p, len);
134 if (len >= MRB_SSIZE_MAX) {
135 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
137 if (p && mrb_ro_data_p(p)) {
138 return str_init_nofree(mrb_obj_alloc_string(mrb), p, len);
140 return str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len);
144 str_with_class(struct RString *s, mrb_value obj)
146 s->c = mrb_str_ptr(obj)->c;
150 mrb_str_new_empty(mrb_state *mrb, mrb_value str)
152 struct RString *s = str_new(mrb, 0, 0);
154 str_with_class(s, str);
155 return mrb_obj_value(s);
159 mrb_str_new_capa(mrb_state *mrb, size_t capa)
163 if (RSTR_EMBEDDABLE_P(capa)) {
164 s = str_init_embed(mrb_obj_alloc_string(mrb), NULL, 0);
166 else if (capa >= MRB_SSIZE_MAX) {
167 mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
172 s = str_init_normal_capa(mrb, mrb_obj_alloc_string(mrb), NULL, 0, capa);
175 return mrb_obj_value(s);
178 #ifndef MRB_STR_BUF_MIN_SIZE
179 # define MRB_STR_BUF_MIN_SIZE 128
183 mrb_str_buf_new(mrb_state *mrb, size_t capa)
185 if (capa < MRB_STR_BUF_MIN_SIZE) {
186 capa = MRB_STR_BUF_MIN_SIZE;
188 return mrb_str_new_capa(mrb, capa);
192 resize_capa(mrb_state *mrb, struct RString *s, size_t capacity)
194 #if SIZE_MAX > MRB_SSIZE_MAX
195 mrb_assert(capacity < MRB_SSIZE_MAX);
197 if (RSTR_EMBED_P(s)) {
198 if (!RSTR_EMBEDDABLE_P(capacity)) {
199 str_init_normal_capa(mrb, s, RSTR_EMBED_PTR(s), RSTR_EMBED_LEN(s), capacity);
203 s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
204 s->as.heap.aux.capa = (mrb_ssize)capacity;
209 mrb_str_new(mrb_state *mrb, const char *p, size_t len)
211 return mrb_obj_value(str_new(mrb, p, len));
215 mrb_str_new_cstr(mrb_state *mrb, const char *p)
227 s = str_new(mrb, p, len);
229 return mrb_obj_value(s);
233 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
235 struct RString *s = str_new_static(mrb, p, len);
236 return mrb_obj_value(s);
240 str_decref(mrb_state *mrb, mrb_shared_string *shared)
243 if (shared->refcnt == 0) {
244 mrb_free(mrb, shared->ptr);
245 mrb_free(mrb, shared);
250 str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
252 if (RSTR_SHARED_P(s)) {
253 mrb_shared_string *shared = s->as.heap.aux.shared;
255 if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
256 s->as.heap.aux.capa = shared->capa;
257 s->as.heap.ptr[s->as.heap.len] = '\0';
258 RSTR_UNSET_SHARED_FLAG(s);
259 mrb_free(mrb, shared);
262 str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
263 str_decref(mrb, shared);
266 else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
267 str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
272 check_null_byte(mrb_state *mrb, mrb_value str)
274 mrb_to_str(mrb, str);
275 if (memchr(RSTRING_PTR(str), '\0', RSTRING_LEN(str))) {
276 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
281 mrb_gc_free_str(mrb_state *mrb, struct RString *str)
283 if (RSTR_EMBED_P(str))
285 else if (RSTR_SHARED_P(str))
286 str_decref(mrb, str->as.heap.aux.shared);
287 else if (!RSTR_NOFREE_P(str) && !RSTR_FSHARED_P(str))
288 mrb_free(mrb, str->as.heap.ptr);
291 #ifdef MRB_UTF8_STRING
292 static const char utf8len_codepage[256] =
294 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
295 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
296 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
297 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
298 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
299 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
300 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
301 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
305 mrb_utf8len(const char* p, const char* e)
310 if ((unsigned char)*p < 0x80) return 1;
311 len = utf8len_codepage[(unsigned char)*p];
312 if (len == 1) return 1;
313 if (len > e - p) return 1;
314 for (i = 1; i < len; ++i)
315 if ((p[i] & 0xc0) != 0x80)
321 mrb_utf8_strlen(const char *str, mrb_int byte_len)
325 const char *e = p + byte_len;
328 p += mrb_utf8len(p, e);
335 utf8_strlen(mrb_value str)
337 struct RString *s = mrb_str_ptr(str);
338 mrb_int byte_len = RSTR_LEN(s);
340 if (RSTR_ASCII_P(s)) {
344 mrb_int utf8_len = mrb_utf8_strlen(RSTR_PTR(s), byte_len);
345 if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s);
350 #define RSTRING_CHAR_LEN(s) utf8_strlen(s)
352 /* map character index to byte offset index */
354 chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
356 if (RSTR_ASCII_P(mrb_str_ptr(s))) {
361 const char *p = RSTRING_PTR(s) + off;
362 const char *e = RSTRING_END(s);
364 for (b=i=0; p<e && i<idx; i++) {
365 n = mrb_utf8len(p, e);
373 /* map byte offset to character index */
375 bytes2chars(char *p, mrb_int len, mrb_int bi)
377 const char *e = p + (size_t)len;
378 const char *pivot = p + bi;
381 for (i = 0; p < pivot; i ++) {
382 p += mrb_utf8len(p, e);
384 if (p != pivot) return -1;
389 char_adjust(const char *beg, const char *end, const char *ptr)
391 if ((ptr > beg || ptr < end) && (*ptr & 0xc0) == 0x80) {
392 const int utf8_adjust_max = 3;
395 if (ptr - beg > utf8_adjust_max) {
396 beg = ptr - utf8_adjust_max;
402 if ((*p & 0xc0) != 0x80) {
403 int clen = mrb_utf8len(p, end);
404 if (clen > ptr - p) return p;
414 char_backtrack(const char *ptr, const char *end)
417 const int utf8_bytelen_max = 4;
420 if (end - ptr > utf8_bytelen_max) {
421 ptr = end - utf8_bytelen_max;
427 if ((*p & 0xc0) != 0x80) {
428 int clen = utf8len_codepage[(unsigned char)*p];
429 if (clen == end - p) { return p; }
439 str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, const char *s, const mrb_int slen, mrb_int off)
441 /* Based on Quick Search algorithm (Boyer-Moore-Horspool algorithm) */
443 ptrdiff_t qstable[1 << CHAR_BIT];
449 for (i = 0; i < 1 << CHAR_BIT; i ++) {
452 for (i = 0; i < slen; i ++) {
453 qstable[(unsigned char)s[i]] = slen - (i + 1);
458 while (p < pend && pend - p >= slen) {
461 if (memcmp(p, s, slen) == 0) {
465 pivot = p + qstable[(unsigned char)p[slen - 1]];
466 if (pivot >= pend || pivot < p /* overflowed */) { return -1; }
469 p += mrb_utf8len(p, pend);
478 str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
480 const char *p = RSTRING_PTR(str);
481 const char *pend = p + RSTRING_LEN(str);
482 const char *s = RSTRING_PTR(sub);
483 const mrb_int slen = RSTRING_LEN(sub);
486 for (; pos > 0; pos --) {
487 if (pend - p < 1) { return -1; }
488 p += mrb_utf8len(p, pend);
491 if (slen < 1) { return off; }
493 return str_index_str_by_char_search(mrb, p, pend, s, slen, off);
496 #define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
498 #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
499 #define chars2bytes(p, off, ci) (ci)
500 #define bytes2chars(p, end, bi) (bi)
501 #define char_adjust(beg, end, ptr) (ptr)
502 #define char_backtrack(ptr, end) ((end) - 1)
503 #define BYTES_ALIGN_CHECK(pos)
504 #define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos)
507 #ifndef MRB_QS_SHORT_STRING_LENGTH
508 #define MRB_QS_SHORT_STRING_LENGTH 2048
511 static inline mrb_int
512 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
514 if (n + m < MRB_QS_SHORT_STRING_LENGTH) {
515 const unsigned char *y = ys;
516 const unsigned char *ye = ys+n-m+1;
519 y = (const unsigned char*)memchr(y, xs[0], (size_t)(ye-y));
520 if (y == NULL) return -1;
521 if (memcmp(xs, y, m) == 0) {
522 return (mrb_int)(y - ys);
529 const unsigned char *x = xs, *xe = xs + m;
530 const unsigned char *y = ys;
532 ptrdiff_t qstable[256];
535 for (i = 0; i < 256; ++i)
538 qstable[*x] = xe - x;
540 for (; y + m <= ys + n; y += *(qstable + y[m])) {
541 if (*xs == *y && memcmp(xs, y, m) == 0)
542 return (mrb_int)(y - ys);
549 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
551 const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
553 if (m > n) return -1;
555 return memcmp(x0, y0, m) == 0 ? 0 : -1;
561 const unsigned char *ys = (const unsigned char *)memchr(y, *x, n);
564 return (mrb_int)(ys - y);
568 return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
572 str_share(mrb_state *mrb, struct RString *orig, struct RString *s)
574 size_t len = (size_t)orig->as.heap.len;
576 mrb_assert(!RSTR_EMBED_P(orig));
577 if (RSTR_NOFREE_P(orig)) {
578 str_init_nofree(s, orig->as.heap.ptr, len);
580 else if (RSTR_SHARED_P(orig)) {
581 str_init_shared(mrb, orig, s, orig->as.heap.aux.shared);
583 else if (RSTR_FSHARED_P(orig)) {
584 str_init_fshared(orig, s, orig->as.heap.aux.fshared);
586 else if (mrb_frozen_p(orig) && !RSTR_POOL_P(orig)) {
587 str_init_fshared(orig, s, orig);
590 if (orig->as.heap.aux.capa > orig->as.heap.len) {
591 orig->as.heap.ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1);
592 orig->as.heap.aux.capa = (mrb_ssize)len;
594 str_init_shared(mrb, orig, s, NULL);
595 str_init_shared(mrb, orig, orig, s->as.heap.aux.shared);
600 mrb_str_pool(mrb_state *mrb, const char *p, mrb_int len, mrb_bool nofree)
602 struct RString *s = (struct RString *)mrb_malloc(mrb, sizeof(struct RString));
604 s->tt = MRB_TT_STRING;
605 s->c = mrb->string_class;
608 if (RSTR_EMBEDDABLE_P(len)) {
609 str_init_embed(s, p, len);
612 str_init_nofree(s, p, len);
615 str_init_normal(mrb, s, p, len);
617 RSTR_SET_POOL_FLAG(s);
618 MRB_SET_FROZEN_FLAG(s);
619 return mrb_obj_value(s);
623 mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
625 struct RString *orig, *s;
627 orig = mrb_str_ptr(str);
628 s = mrb_obj_alloc_string(mrb);
629 if (RSTR_EMBEDDABLE_P(len)) {
630 str_init_embed(s, RSTR_PTR(orig)+beg, len);
633 str_share(mrb, orig, s);
634 s->as.heap.ptr += (mrb_ssize)beg;
635 s->as.heap.len = (mrb_ssize)len;
637 RSTR_COPY_ASCII_FLAG(s, orig);
638 return mrb_obj_value(s);
642 str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len)
644 *pos = chars2bytes(str, 0, *pos);
645 *len = chars2bytes(str, *pos, *len);
647 #ifdef MRB_UTF8_STRING
648 static inline mrb_value
649 str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
651 str_range_to_bytes(str, &beg, &len);
652 return mrb_str_byte_subseq(mrb, str, beg, len);
655 #define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
659 mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
661 if (str_len < *begp || *lenp < 0) return FALSE;
664 if (*begp < 0) return FALSE;
666 if (*lenp > str_len - *begp)
667 *lenp = str_len - *begp;
675 str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
677 return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
678 str_subseq(mrb, str, beg, len) : mrb_nil_value();
682 mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset)
688 len = RSTRING_LEN(str);
691 if (offset < 0) return -1;
693 if (len - offset < slen) return -1;
694 s = RSTRING_PTR(str);
698 if (slen == 0) return offset;
699 /* need proceed one character at a time */
700 len = RSTRING_LEN(str) - offset;
701 pos = mrb_memsearch(sptr, slen, s, len);
702 if (pos < 0) return pos;
707 str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset)
712 ptr = RSTRING_PTR(str2);
713 len = RSTRING_LEN(str2);
715 return mrb_str_index(mrb, str, ptr, len, offset);
719 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
723 mrb_check_frozen(mrb, s1);
724 if (s1 == s2) return mrb_obj_value(s1);
725 RSTR_COPY_ASCII_FLAG(s1, s2);
726 if (RSTR_SHARED_P(s1)) {
727 str_decref(mrb, s1->as.heap.aux.shared);
729 else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1)
730 && s1->as.heap.ptr) {
731 mrb_free(mrb, s1->as.heap.ptr);
734 len = (size_t)RSTR_LEN(s2);
735 if (RSTR_EMBEDDABLE_P(len)) {
736 str_init_embed(s1, RSTR_PTR(s2), len);
739 str_share(mrb, s2, s1);
742 return mrb_obj_value(s1);
746 str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
748 const char *s, *sbeg, *t;
749 struct RString *ps = mrb_str_ptr(str);
750 mrb_int len = RSTRING_LEN(sub);
752 /* substring longer than string */
753 if (RSTR_LEN(ps) < len) return -1;
754 if (RSTR_LEN(ps) - pos < len) {
755 pos = RSTR_LEN(ps) - len;
758 s = RSTR_PTR(ps) + pos;
759 t = RSTRING_PTR(sub);
761 s = char_adjust(sbeg, sbeg + RSTR_LEN(ps), s);
763 if (memcmp(s, t, len) == 0) {
764 return (mrb_int)(s - RSTR_PTR(ps));
766 s = char_backtrack(sbeg, s);
776 mrb_str_strlen(mrb_state *mrb, struct RString *s)
778 mrb_int i, max = RSTR_LEN(s);
779 char *p = RSTR_PTR(s);
782 for (i=0; i<max; i++) {
784 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
794 mrb_utf8_from_locale(const char *str, int len)
798 int mbssize, wcssize;
803 len = (int)strlen(str);
804 wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0);
805 wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
808 wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1);
811 mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
812 mbsp = (char*) malloc((mbssize + 1));
817 mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
824 mrb_locale_from_utf8(const char *utf8, int len)
828 int mbssize, wcssize;
833 len = (int)strlen(utf8);
834 wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0);
835 wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
838 wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1);
840 mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
841 mbsp = (char*) malloc((mbssize + 1));
846 mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
854 mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
856 mrb_check_frozen(mrb, s);
857 str_modify_keep_ascii(mrb, s);
861 mrb_str_modify(mrb_state *mrb, struct RString *s)
863 mrb_str_modify_keep_ascii(mrb, s);
864 RSTR_UNSET_ASCII_FLAG(s);
868 mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
871 struct RString *s = mrb_str_ptr(str);
874 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative (or overflowed) string size");
876 mrb_str_modify(mrb, s);
879 if (slen < len || slen - len > 256) {
880 resize_capa(mrb, s, len);
882 RSTR_SET_LEN(s, len);
883 RSTR_PTR(s)[len] = '\0'; /* sentinel */
889 mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
893 check_null_byte(mrb, str0);
894 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
899 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
901 other = mrb_str_to_str(mrb, other);
902 mrb_str_cat_str(mrb, self, other);
906 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
908 struct RString *s = mrb_str_ptr(a);
909 struct RString *s2 = mrb_str_ptr(b);
912 t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2));
913 memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s));
914 memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2));
916 return mrb_obj_value(t);
923 * str + other_str -> new_str
925 * Concatenation---Returns a new <code>String</code> containing
926 * <i>other_str</i> concatenated to <i>str</i>.
928 * "Hello from " + self.to_s #=> "Hello from main"
931 mrb_str_plus_m(mrb_state *mrb, mrb_value self)
935 mrb_get_args(mrb, "S", &str);
936 return mrb_str_plus(mrb, self, str);
945 * Returns the length of string.
948 mrb_str_size(mrb_state *mrb, mrb_value self)
950 mrb_int len = RSTRING_CHAR_LEN(self);
951 return mrb_fixnum_value(len);
955 mrb_str_bytesize(mrb_state *mrb, mrb_value self)
957 mrb_int len = RSTRING_LEN(self);
958 return mrb_fixnum_value(len);
964 * str * integer => new_str
966 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
969 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
972 mrb_str_times(mrb_state *mrb, mrb_value self)
975 struct RString *str2;
978 mrb_get_args(mrb, "i", ×);
980 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
982 if (times && MRB_SSIZE_MAX / times < RSTRING_LEN(self)) {
983 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
986 len = RSTRING_LEN(self)*times;
987 str2 = str_new(mrb, 0, len);
988 str_with_class(str2, self);
991 n = RSTRING_LEN(self);
992 memcpy(p, RSTRING_PTR(self), n);
997 memcpy(p + n, p, len-n);
999 p[RSTR_LEN(str2)] = '\0';
1000 RSTR_COPY_ASCII_FLAG(str2, mrb_str_ptr(self));
1002 return mrb_obj_value(str2);
1004 /* -------------------------------------------------------------- */
1006 #define lesser(a,b) (((a)>(b))?(b):(a))
1008 /* ---------------------------*/
1011 * mrb_value str1 <=> mrb_value str2 => int
1017 mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
1021 struct RString *s1 = mrb_str_ptr(str1);
1022 struct RString *s2 = mrb_str_ptr(str2);
1024 len = lesser(RSTR_LEN(s1), RSTR_LEN(s2));
1025 retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len);
1027 if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0;
1028 if (RSTR_LEN(s1) > RSTR_LEN(s2)) return 1;
1031 if (retval > 0) return 1;
1039 * str <=> other_str => -1, 0, +1
1041 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
1042 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
1043 * <i>str</i>. If the strings are of different lengths, and the strings are
1044 * equal when compared up to the shortest length, then the longer string is
1045 * considered greater than the shorter one. If the variable <code>$=</code> is
1046 * <code>false</code>, the comparison is based on comparing the binary values
1047 * of each character in the string. In older versions of Ruby, setting
1048 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
1049 * in favor of using <code>String#casecmp</code>.
1051 * <code><=></code> is the basis for the methods <code><</code>,
1052 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
1053 * included from module <code>Comparable</code>. The method
1054 * <code>String#==</code> does not use <code>Comparable#==</code>.
1056 * "abcdef" <=> "abcde" #=> 1
1057 * "abcdef" <=> "abcdef" #=> 0
1058 * "abcdef" <=> "abcdefg" #=> -1
1059 * "abcdef" <=> "ABCDEF" #=> 1
1062 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
1064 mrb_value str2 = mrb_get_arg1(mrb);
1067 if (!mrb_string_p(str2)) {
1068 return mrb_nil_value();
1071 result = mrb_str_cmp(mrb, str1, str2);
1073 return mrb_fixnum_value(result);
1077 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
1079 const mrb_int len = RSTRING_LEN(str1);
1081 if (len != RSTRING_LEN(str2)) return FALSE;
1082 if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
1088 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
1090 if (!mrb_string_p(str2)) return FALSE;
1091 return str_eql(mrb, str1, str2);
1097 * str == obj => true or false
1100 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
1101 * Otherwise, returns <code>false</code> or <code>true</code>
1103 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
1106 mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
1108 mrb_value str2 = mrb_get_arg1(mrb);
1110 return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
1112 /* ---------------------------------- */
1115 mrb_str_to_str(mrb_state *mrb, mrb_value str)
1117 switch (mrb_type(str)) {
1121 return mrb_sym_str(mrb, mrb_symbol(str));
1123 return mrb_fixnum_to_str(mrb, str, 10);
1126 return mrb_mod_to_s(mrb, str);
1128 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
1132 /* obslete: use RSTRING_PTR() */
1134 mrb_string_value_ptr(mrb_state *mrb, mrb_value str)
1136 str = mrb_str_to_str(mrb, str);
1137 return RSTRING_PTR(str);
1140 /* obslete: use RSTRING_LEN() */
1142 mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
1144 mrb_to_str(mrb, ptr);
1145 return RSTRING_LEN(ptr);
1149 mrb_str_dup(mrb_state *mrb, mrb_value str)
1151 struct RString *s = mrb_str_ptr(str);
1152 struct RString *dup = str_new(mrb, 0, 0);
1154 str_with_class(dup, str);
1155 return str_replace(mrb, dup, s);
1158 enum str_convert_range {
1159 /* `beg` and `len` are byte unit in `0 ... str.bytesize` */
1160 STR_BYTE_RANGE_CORRECTED = 1,
1162 /* `beg` and `len` are char unit in any range */
1165 /* `beg` and `len` are char unit in `0 ... str.size` */
1166 STR_CHAR_RANGE_CORRECTED = 3,
1168 /* `beg` is out of range */
1169 STR_OUT_OF_RANGE = -1
1172 static enum str_convert_range
1173 str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len)
1175 if (!mrb_undef_p(alen)) {
1176 *beg = mrb_int(mrb, indx);
1177 *len = mrb_int(mrb, alen);
1178 return STR_CHAR_RANGE;
1181 switch (mrb_type(indx)) {
1183 *beg = mrb_fixnum(indx);
1185 return STR_CHAR_RANGE;
1188 *beg = str_index_str(mrb, str, indx, 0);
1189 if (*beg < 0) { break; }
1190 *len = RSTRING_LEN(indx);
1191 return STR_BYTE_RANGE_CORRECTED;
1197 indx = mrb_to_int(mrb, indx);
1198 if (mrb_fixnum_p(indx)) {
1199 *beg = mrb_fixnum(indx);
1201 return STR_CHAR_RANGE;
1204 *len = RSTRING_CHAR_LEN(str);
1205 switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) {
1207 return STR_CHAR_RANGE_CORRECTED;
1209 return STR_OUT_OF_RANGE;
1214 mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
1217 return STR_OUT_OF_RANGE;
1221 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen)
1225 switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
1226 case STR_CHAR_RANGE_CORRECTED:
1227 return str_subseq(mrb, str, beg, len);
1228 case STR_CHAR_RANGE:
1229 str = str_substr(mrb, str, beg, len);
1230 if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
1232 case STR_BYTE_RANGE_CORRECTED:
1233 if (mrb_string_p(indx)) {
1234 return mrb_str_dup(mrb, indx);
1237 return mrb_str_byte_subseq(mrb, str, beg, len);
1239 case STR_OUT_OF_RANGE:
1241 return mrb_nil_value();
1249 * str[fixnum] => fixnum or nil
1250 * str[fixnum, fixnum] => new_str or nil
1251 * str[range] => new_str or nil
1252 * str[other_str] => new_str or nil
1253 * str.slice(fixnum) => fixnum or nil
1254 * str.slice(fixnum, fixnum) => new_str or nil
1255 * str.slice(range) => new_str or nil
1256 * str.slice(other_str) => new_str or nil
1258 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
1259 * of the character at that position. If passed two <code>Fixnum</code>
1260 * objects, returns a substring starting at the offset given by the first, and
1261 * a length given by the second. If given a range, a substring containing
1262 * characters at offsets given by the range is returned. In all three cases, if
1263 * an offset is negative, it is counted from the end of <i>str</i>. Returns
1264 * <code>nil</code> if the initial offset falls outside the string, the length
1265 * is negative, or the beginning of the range is greater than the end.
1267 * If a <code>String</code> is given, that string is returned if it occurs in
1268 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
1272 * a[1] #=> 101(1.8.7) "e"(1.9.2)
1273 * a[1.1] #=> "e"(1.9.2)
1277 * a[-4..-2] #=> "her"
1284 mrb_str_aref_m(mrb_state *mrb, mrb_value str)
1288 if (mrb_get_args(mrb, "o|o", &a1, &a2) == 1) {
1289 a2 = mrb_undef_value();
1292 return mrb_str_aref(mrb, str, a1, a2);
1295 static mrb_noreturn void
1296 str_out_of_index(mrb_state *mrb, mrb_value index)
1298 mrb_raisef(mrb, E_INDEX_ERROR, "index %v out of string", index);
1302 str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep)
1304 const mrb_int shrink_threshold = 256;
1305 struct RString *str = mrb_str_ptr(src);
1306 mrb_int len = RSTR_LEN(str);
1307 mrb_int replen, newlen;
1310 if (end > len) { end = len; }
1312 if (pos < 0 || pos > len) {
1313 str_out_of_index(mrb, mrb_fixnum_value(pos));
1316 replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep));
1317 newlen = replen + len - (end - pos);
1319 if (newlen >= MRB_SSIZE_MAX || newlen < replen /* overflowed */) {
1320 mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big");
1323 mrb_str_modify(mrb, str);
1326 resize_capa(mrb, str, newlen);
1329 strp = RSTR_PTR(str);
1331 memmove(strp + newlen - (len - end), strp + end, len - end);
1332 if (!mrb_nil_p(rep)) {
1333 memmove(strp + pos, RSTRING_PTR(rep), replen);
1335 RSTR_SET_LEN(str, newlen);
1336 strp[newlen] = '\0';
1338 if (len - newlen >= shrink_threshold) {
1339 resize_capa(mrb, str, newlen);
1345 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
1348 str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect)
1350 const char *p, *pend;
1351 char buf[4]; /* `\x??` or UTF-8 character */
1352 mrb_value result = mrb_str_new_lit(mrb, "\"");
1353 #ifdef MRB_UTF8_STRING
1354 uint32_t ascii_flag = MRB_STR_ASCII;
1357 p = RSTRING_PTR(str); pend = RSTRING_END(str);
1358 for (;p < pend; p++) {
1359 unsigned char c, cc;
1360 #ifdef MRB_UTF8_STRING
1362 mrb_int clen = mrb_utf8len(p, pend);
1366 for (i=0; i<clen; i++) {
1369 mrb_str_cat(mrb, result, buf, clen);
1377 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
1378 buf[0] = '\\'; buf[1] = c;
1379 mrb_str_cat(mrb, result, buf, 2);
1384 mrb_str_cat(mrb, result, buf, 1);
1388 case '\n': cc = 'n'; break;
1389 case '\r': cc = 'r'; break;
1390 case '\t': cc = 't'; break;
1391 case '\f': cc = 'f'; break;
1392 case '\013': cc = 'v'; break;
1393 case '\010': cc = 'b'; break;
1394 case '\007': cc = 'a'; break;
1395 case 033: cc = 'e'; break;
1396 default: cc = 0; break;
1401 mrb_str_cat(mrb, result, buf, 2);
1407 buf[3] = mrb_digitmap[c % 16]; c /= 16;
1408 buf[2] = mrb_digitmap[c % 16];
1409 mrb_str_cat(mrb, result, buf, 4);
1413 mrb_str_cat_lit(mrb, result, "\"");
1414 #ifdef MRB_UTF8_STRING
1416 mrb_str_ptr(str)->flags |= ascii_flag;
1417 mrb_str_ptr(result)->flags |= ascii_flag;
1420 RSTR_SET_ASCII_FLAG(mrb_str_ptr(result));
1428 mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
1430 mrb_int beg, len, charlen;
1432 mrb_to_str(mrb, replace);
1434 switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
1435 case STR_OUT_OF_RANGE:
1437 mrb_raise(mrb, E_INDEX_ERROR, "string not matched");
1438 case STR_CHAR_RANGE:
1440 mrb_raisef(mrb, E_INDEX_ERROR, "negative length %v", alen);
1442 charlen = RSTRING_CHAR_LEN(str);
1443 if (beg < 0) { beg += charlen; }
1444 if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); }
1446 case STR_CHAR_RANGE_CORRECTED:
1447 str_range_to_bytes(str, &beg, &len);
1449 case STR_BYTE_RANGE_CORRECTED:
1450 str_replace_partial(mrb, str, beg, beg + len, replace);
1456 * str[fixnum] = replace
1457 * str[fixnum, fixnum] = replace
1458 * str[range] = replace
1459 * str[other_str] = replace
1461 * Modify +self+ by replacing the content of +self+.
1462 * The portion of the string affected is determined using the same criteria as +String#[]+.
1465 mrb_str_aset_m(mrb_state *mrb, mrb_value str)
1467 mrb_value indx, alen, replace;
1469 switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) {
1472 alen = mrb_undef_value();
1477 mrb_str_aset(mrb, str, indx, alen, replace);
1484 * str.capitalize! => str or nil
1486 * Modifies <i>str</i> by converting the first character to uppercase and the
1487 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
1490 * a.capitalize! #=> "Hello"
1492 * a.capitalize! #=> nil
1495 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
1498 mrb_bool modify = FALSE;
1499 struct RString *s = mrb_str_ptr(str);
1501 mrb_str_modify_keep_ascii(mrb, s);
1502 if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value();
1503 p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s);
1508 while (++p < pend) {
1514 if (modify) return str;
1515 return mrb_nil_value();
1521 * str.capitalize => new_str
1523 * Returns a copy of <i>str</i> with the first character converted to uppercase
1524 * and the remainder to lowercase.
1526 * "hello".capitalize #=> "Hello"
1527 * "HELLO".capitalize #=> "Hello"
1528 * "123ABC".capitalize #=> "123abc"
1531 mrb_str_capitalize(mrb_state *mrb, mrb_value self)
1535 str = mrb_str_dup(mrb, self);
1536 mrb_str_capitalize_bang(mrb, str);
1543 * str.chomp!(separator="\n") => str or nil
1545 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
1546 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
1549 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
1557 struct RString *s = mrb_str_ptr(str);
1559 argc = mrb_get_args(mrb, "|S", &rs);
1560 mrb_str_modify_keep_ascii(mrb, s);
1563 if (len == 0) return mrb_nil_value();
1565 if (RSTR_PTR(s)[len-1] == '\n') {
1566 RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
1567 if (RSTR_LEN(s) > 0 &&
1568 RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') {
1569 RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
1572 else if (RSTR_PTR(s)[len-1] == '\r') {
1573 RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
1576 return mrb_nil_value();
1578 RSTR_PTR(s)[RSTR_LEN(s)] = '\0';
1582 if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
1584 rslen = RSTRING_LEN(rs);
1586 while (len>0 && p[len-1] == '\n') {
1588 if (len>0 && p[len-1] == '\r')
1591 if (len < RSTR_LEN(s)) {
1592 RSTR_SET_LEN(s, len);
1596 return mrb_nil_value();
1598 if (rslen > len) return mrb_nil_value();
1599 newline = RSTRING_PTR(rs)[rslen-1];
1600 if (rslen == 1 && newline == '\n')
1601 newline = RSTRING_PTR(rs)[rslen-1];
1602 if (rslen == 1 && newline == '\n')
1605 pp = p + len - rslen;
1606 if (p[len-1] == newline &&
1608 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
1609 RSTR_SET_LEN(s, len - rslen);
1610 p[RSTR_LEN(s)] = '\0';
1613 return mrb_nil_value();
1619 * str.chomp(separator="\n") => new_str
1621 * Returns a new <code>String</code> with the given record separator removed
1622 * from the end of <i>str</i> (if present). <code>chomp</code> also removes
1623 * carriage return characters (that is it will remove <code>\n</code>,
1624 * <code>\r</code>, and <code>\r\n</code>).
1626 * "hello".chomp #=> "hello"
1627 * "hello\n".chomp #=> "hello"
1628 * "hello\r\n".chomp #=> "hello"
1629 * "hello\n\r".chomp #=> "hello\n"
1630 * "hello\r".chomp #=> "hello"
1631 * "hello \n there".chomp #=> "hello \n there"
1632 * "hello".chomp("llo") #=> "he"
1635 mrb_str_chomp(mrb_state *mrb, mrb_value self)
1639 str = mrb_str_dup(mrb, self);
1640 mrb_str_chomp_bang(mrb, str);
1647 * str.chop! => str or nil
1649 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
1650 * or <code>nil</code> if <i>str</i> is the empty string. See also
1651 * <code>String#chomp!</code>.
1654 mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
1656 struct RString *s = mrb_str_ptr(str);
1658 mrb_str_modify_keep_ascii(mrb, s);
1659 if (RSTR_LEN(s) > 0) {
1661 #ifdef MRB_UTF8_STRING
1662 const char* t = RSTR_PTR(s), *p = t;
1663 const char* e = p + RSTR_LEN(s);
1665 mrb_int clen = mrb_utf8len(p, e);
1666 if (p + clen>=e) break;
1671 len = RSTR_LEN(s) - 1;
1673 if (RSTR_PTR(s)[len] == '\n') {
1675 RSTR_PTR(s)[len-1] == '\r') {
1679 RSTR_SET_LEN(s, len);
1680 RSTR_PTR(s)[len] = '\0';
1683 return mrb_nil_value();
1689 * str.chop => new_str
1691 * Returns a new <code>String</code> with the last character removed. If the
1692 * string ends with <code>\r\n</code>, both characters are removed. Applying
1693 * <code>chop</code> to an empty string returns an empty
1694 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1695 * the string unchanged if it doesn't end in a record separator.
1697 * "string\r\n".chop #=> "string"
1698 * "string\n\r".chop #=> "string\n"
1699 * "string\n".chop #=> "string"
1700 * "string".chop #=> "strin"
1704 mrb_str_chop(mrb_state *mrb, mrb_value self)
1707 str = mrb_str_dup(mrb, self);
1708 mrb_str_chop_bang(mrb, str);
1715 * str.downcase! => str or nil
1717 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1718 * changes were made.
1721 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
1724 mrb_bool modify = FALSE;
1725 struct RString *s = mrb_str_ptr(str);
1727 mrb_str_modify_keep_ascii(mrb, s);
1729 pend = RSTR_PTR(s) + RSTR_LEN(s);
1738 if (modify) return str;
1739 return mrb_nil_value();
1745 * str.downcase => new_str
1747 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1748 * lowercase counterparts. The operation is locale insensitive---only
1749 * characters 'A' to 'Z' are affected.
1751 * "hEllO".downcase #=> "hello"
1754 mrb_str_downcase(mrb_state *mrb, mrb_value self)
1758 str = mrb_str_dup(mrb, self);
1759 mrb_str_downcase_bang(mrb, str);
1766 * str.empty? => true or false
1768 * Returns <code>true</code> if <i>str</i> has a length of zero.
1770 * "hello".empty? #=> false
1771 * "".empty? #=> true
1774 mrb_str_empty_p(mrb_state *mrb, mrb_value self)
1776 struct RString *s = mrb_str_ptr(self);
1778 return mrb_bool_value(RSTR_LEN(s) == 0);
1784 * str.eql?(other) => true or false
1786 * Two strings are equal if the have the same length and content.
1789 mrb_str_eql(mrb_state *mrb, mrb_value self)
1791 mrb_value str2 = mrb_get_arg1(mrb);
1794 eql_p = (mrb_string_p(str2)) && str_eql(mrb, self, str2);
1796 return mrb_bool_value(eql_p);
1800 mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1802 return str_substr(mrb, str, beg, len);
1806 mrb_str_hash(mrb_state *mrb, mrb_value str)
1809 struct RString *s = mrb_str_ptr(str);
1810 mrb_int len = RSTR_LEN(s);
1811 char *p = RSTR_PTR(s);
1815 key = key*65599 + *p;
1818 return (uint32_t)(key + (key>>5));
1824 * str.hash => fixnum
1826 * Return a hash based on the string's length and content.
1829 mrb_str_hash_m(mrb_state *mrb, mrb_value self)
1831 mrb_int key = mrb_str_hash(mrb, self);
1832 return mrb_fixnum_value(key);
1838 * str.include? other_str => true or false
1839 * str.include? fixnum => true or false
1841 * Returns <code>true</code> if <i>str</i> contains the given string or
1844 * "hello".include? "lo" #=> true
1845 * "hello".include? "ol" #=> false
1846 * "hello".include? ?h #=> true
1849 mrb_str_include(mrb_state *mrb, mrb_value self)
1853 mrb_get_args(mrb, "S", &str2);
1854 if (str_index_str(mrb, self, str2, 0) < 0)
1855 return mrb_bool_value(FALSE);
1856 return mrb_bool_value(TRUE);
1862 * str.index(substring [, offset]) => fixnum or nil
1864 * Returns the index of the first occurrence of the given
1865 * <i>substring</i>. Returns <code>nil</code> if not found.
1866 * If the second parameter is present, it
1867 * specifies the position in the string to begin the search.
1869 * "hello".index('l') #=> 2
1870 * "hello".index('lo') #=> 3
1871 * "hello".index('a') #=> nil
1872 * "hello".index('l', -2) #=> 3
1875 mrb_str_index_m(mrb_state *mrb, mrb_value str)
1880 if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) {
1884 mrb_int clen = RSTRING_CHAR_LEN(str);
1887 return mrb_nil_value();
1890 pos = str_index_str_by_char(mrb, str, sub, pos);
1892 if (pos == -1) return mrb_nil_value();
1893 BYTES_ALIGN_CHECK(pos);
1894 return mrb_fixnum_value(pos);
1901 * str.replace(other_str) => str
1903 * s = "hello" #=> "hello"
1904 * s.replace "world" #=> "world"
1907 mrb_str_replace(mrb_state *mrb, mrb_value str)
1911 mrb_get_args(mrb, "S", &str2);
1912 return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
1918 * String.new(str="") => new_str
1920 * Returns a new string object containing a copy of <i>str</i>.
1923 mrb_str_init(mrb_state *mrb, mrb_value self)
1927 if (mrb_get_args(mrb, "|S", &str2) == 0) {
1928 struct RString *s = str_new(mrb, 0, 0);
1929 str2 = mrb_obj_value(s);
1931 str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
1939 * str.intern => symbol
1940 * str.to_sym => symbol
1942 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1943 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1945 * "Koala".intern #=> :Koala
1946 * s = 'cat'.to_sym #=> :cat
1947 * s == :cat #=> true
1948 * s = '@cat'.to_sym #=> :@cat
1949 * s == :@cat #=> true
1951 * This can also be used to create symbols that cannot be represented using the
1952 * <code>:xxx</code> notation.
1954 * 'cat and dog'.to_sym #=> :"cat and dog"
1957 mrb_str_intern(mrb_state *mrb, mrb_value self)
1959 return mrb_symbol_value(mrb_intern_str(mrb, self));
1961 /* ---------------------------------- */
1963 mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
1965 if (mrb_string_p(obj)) {
1968 return mrb_str_to_str(mrb, obj);
1972 mrb_ptr_to_str(mrb_state *mrb, void *p)
1974 struct RString *p_str;
1977 uintptr_t n = (uintptr_t)p;
1979 p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
1980 p1 = RSTR_PTR(p_str);
1986 *p2++ = mrb_digitmap[n % 16];
1990 RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str)));
1998 return mrb_obj_value(p_str);
2002 str_reverse(char *p, char *e)
2016 * str.reverse! => str
2018 * Reverses <i>str</i> in place.
2021 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
2023 struct RString *s = mrb_str_ptr(str);
2026 #ifdef MRB_UTF8_STRING
2027 mrb_int utf8_len = RSTRING_CHAR_LEN(str);
2028 mrb_int len = RSTR_LEN(s);
2030 if (utf8_len < 2) return str;
2031 if (utf8_len < len) {
2032 mrb_str_modify(mrb, s);
2034 e = p + RSTR_LEN(s);
2036 mrb_int clen = mrb_utf8len(p, e);
2037 str_reverse(p, p + clen - 1);
2044 if (RSTR_LEN(s) > 1) {
2045 mrb_str_modify(mrb, s);
2052 e = p + RSTR_LEN(s) - 1;
2057 /* ---------------------------------- */
2061 * str.reverse => new_str
2063 * Returns a new string with the characters from <i>str</i> in reverse order.
2065 * "stressed".reverse #=> "desserts"
2068 mrb_str_reverse(mrb_state *mrb, mrb_value str)
2070 mrb_value str2 = mrb_str_dup(mrb, str);
2071 mrb_str_reverse_bang(mrb, str2);
2078 * str.rindex(substring [, offset]) => fixnum or nil
2080 * Returns the index of the last occurrence of the given <i>substring</i>.
2081 * Returns <code>nil</code> if not found. If the second parameter is
2082 * present, it specifies the position in the string to end the
2083 * search---characters beyond this point will not be considered.
2085 * "hello".rindex('e') #=> 1
2086 * "hello".rindex('l') #=> 3
2087 * "hello".rindex('a') #=> nil
2088 * "hello".rindex('l', 2) #=> 2
2091 mrb_str_rindex(mrb_state *mrb, mrb_value str)
2094 mrb_int pos, len = RSTRING_CHAR_LEN(str);
2096 if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) {
2103 return mrb_nil_value();
2106 if (pos > len) pos = len;
2108 pos = chars2bytes(str, 0, pos);
2109 pos = str_rindex(mrb, str, sub, pos);
2111 pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos);
2112 BYTES_ALIGN_CHECK(pos);
2113 return mrb_fixnum_value(pos);
2115 return mrb_nil_value();
2122 * str.split(separator=nil, [limit]) => anArray
2124 * Divides <i>str</i> into substrings based on a delimiter, returning an array
2125 * of these substrings.
2127 * If <i>separator</i> is a <code>String</code>, then its contents are used as
2128 * the delimiter when splitting <i>str</i>. If <i>separator</i> is a single
2129 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
2130 * of contiguous whitespace characters ignored.
2132 * If <i>separator</i> is omitted or <code>nil</code> (which is the default),
2133 * <i>str</i> is split on whitespace as if ' ' were specified.
2135 * If the <i>limit</i> parameter is omitted, trailing null fields are
2136 * suppressed. If <i>limit</i> is a positive number, at most that number of
2137 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
2138 * string is returned as the only entry in an array). If negative, there is no
2139 * limit to the number of fields returned, and trailing null fields are not
2142 * " now's the time".split #=> ["now's", "the", "time"]
2143 * " now's the time".split(' ') #=> ["now's", "the", "time"]
2145 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
2146 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
2147 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
2148 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
2152 mrb_str_split_m(mrb_state *mrb, mrb_value str)
2155 mrb_value spat = mrb_nil_value();
2156 enum {awk, string} split_type = string;
2162 mrb_value result, tmp;
2164 argc = mrb_get_args(mrb, "|oi", &spat, &lim);
2165 lim_p = (lim > 0 && argc == 2);
2168 if (RSTRING_LEN(str) == 0)
2169 return mrb_ary_new_capa(mrb, 0);
2170 return mrb_ary_new_from_values(mrb, 1, &str);
2175 if (argc == 0 || mrb_nil_p(spat)) {
2178 else if (!mrb_string_p(spat)) {
2179 mrb_raise(mrb, E_TYPE_ERROR, "expected String");
2181 else if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
2185 result = mrb_ary_new(mrb);
2187 if (split_type == awk) {
2188 mrb_bool skip = TRUE;
2190 mrb_int str_len = RSTRING_LEN(str);
2192 int ai = mrb_gc_arena_save(mrb);
2195 while (idx < str_len) {
2196 c = (unsigned char)RSTRING_PTR(str)[idx++];
2204 if (lim_p && lim <= i) break;
2207 else if (ISSPACE(c)) {
2208 mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
2209 mrb_gc_arena_restore(mrb, ai);
2219 else { /* split_type == string */
2220 mrb_int str_len = RSTRING_LEN(str);
2221 mrb_int pat_len = RSTRING_LEN(spat);
2223 int ai = mrb_gc_arena_save(mrb);
2225 while (idx < str_len) {
2227 end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx);
2231 end = chars2bytes(str, idx, 1);
2233 mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
2234 mrb_gc_arena_restore(mrb, ai);
2235 idx += end + pat_len;
2236 if (lim_p && lim <= ++i) break;
2240 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
2241 if (RSTRING_LEN(str) == beg) {
2242 tmp = mrb_str_new_empty(mrb, str);
2245 tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
2247 mrb_ary_push(mrb, result, tmp);
2249 if (!lim_p && lim == 0) {
2251 while ((len = RARRAY_LEN(result)) > 0 &&
2252 (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
2253 mrb_ary_pop(mrb, result);
2260 mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck)
2262 const char *p = str;
2263 const char *pend = str + len;
2269 #define conv_digit(c) \
2270 (ISDIGIT(c) ? ((c) - '0') : \
2271 ISLOWER(c) ? ((c) - 'a' + 10) : \
2272 ISUPPER(c) ? ((c) - 'A' + 10) : \
2276 if (badcheck) goto bad;
2277 return mrb_fixnum_value(0);
2279 while (p<pend && ISSPACE(*p))
2285 else if (p[0] == '-') {
2309 else if (base < -1) {
2318 if (p[0] == '0' && (p[1] == 'b'||p[1] == 'B')) {
2325 if (p[0] == '0' && (p[1] == 'o'||p[1] == 'O')) {
2328 case 4: case 5: case 6: case 7:
2331 if (p[0] == '0' && (p[1] == 'd'||p[1] == 'D')) {
2334 case 9: case 11: case 12: case 13: case 14: case 15:
2337 if (p[0] == '0' && (p[1] == 'x'||p[1] == 'X')) {
2342 if (base < 2 || 36 < base) {
2343 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
2346 } /* end of switch (base) { */
2348 if (badcheck) goto bad;
2349 return mrb_fixnum_value(0);
2351 if (*p == '0') { /* squeeze preceding 0s */
2356 if (p<pend && *p == '_') {
2357 if (badcheck) goto bad;
2367 if (*(p - 1) == '0')
2370 if (p == pend || *p == '_') {
2371 if (badcheck) goto bad;
2372 return mrb_fixnum_value(0);
2374 for ( ;p<pend;p++) {
2378 if (badcheck) goto bad;
2382 if (badcheck) goto bad;
2386 if (badcheck && *p == '\0') {
2390 if (c < 0 || c >= base) {
2395 if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) {
2396 #ifndef MRB_WITHOUT_FLOAT
2398 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, mrb_str_new(mrb, str, len), badcheck));
2403 mrb_raisef(mrb, E_RANGE_ERROR, "string (%l) too big for integer", str, pend-str);
2409 if (p == str) goto bad; /* no number */
2410 if (*(p - 1) == '_') goto bad; /* trailing '_' */
2411 while (p<pend && ISSPACE(*p)) p++;
2412 if (p<pend) goto bad; /* trailing garbage */
2415 return mrb_fixnum_value(sign ? val : -val);
2417 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
2420 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%!l)", str, pend-str);
2422 return mrb_fixnum_value(0);
2426 mrb_cstr_to_inum(mrb_state *mrb, const char *str, mrb_int base, mrb_bool badcheck)
2428 return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
2431 /* obslete: use RSTRING_CSTR() or mrb_string_cstr() */
2433 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
2439 check_null_byte(mrb, *ptr);
2440 ps = mrb_str_ptr(*ptr);
2443 if (p[len] == '\0') {
2448 * Even after str_modify_keep_ascii(), NULL termination is not ensured if
2449 * RSTR_SET_LEN() is used explicitly (e.g. String#delete_suffix!).
2451 str_modify_keep_ascii(mrb, ps);
2452 RSTR_PTR(ps)[len] = '\0';
2453 return RSTR_PTR(ps);
2457 mrb_string_cstr(mrb_state *mrb, mrb_value str)
2459 return mrb_string_value_cstr(mrb, &str);
2463 mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
2468 mrb_to_str(mrb, str);
2469 s = RSTRING_PTR(str);
2470 len = RSTRING_LEN(str);
2471 return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
2477 * str.to_i(base=10) => integer
2479 * Returns the result of interpreting leading characters in <i>str</i> as an
2480 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2481 * end of a valid number are ignored. If there is not a valid number at the
2482 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2485 * "12345".to_i #=> 12345
2486 * "99 red balloons".to_i #=> 99
2488 * "0a".to_i(16) #=> 10
2489 * "hello".to_i #=> 0
2490 * "1100101".to_i(2) #=> 101
2491 * "1100101".to_i(8) #=> 294977
2492 * "1100101".to_i(10) #=> 1100101
2493 * "1100101".to_i(16) #=> 17826049
2496 mrb_str_to_i(mrb_state *mrb, mrb_value self)
2500 mrb_get_args(mrb, "|i", &base);
2502 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
2504 return mrb_str_to_inum(mrb, self, base, FALSE);
2507 #ifndef MRB_WITHOUT_FLOAT
2509 mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck)
2511 char buf[DBL_DIG * 4 + 20];
2512 const char *p = s, *p2;
2513 const char *pend = p + len;
2518 mrb_bool dot = FALSE;
2521 while (p<pend && ISSPACE(*p)) p++;
2524 if (pend - p > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2527 if (!badcheck) return 0.0;
2528 x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck);
2529 if (mrb_fixnum_p(x))
2530 d = (double)mrb_fixnum(x);
2531 else /* if (mrb_float_p(x)) */
2538 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
2545 if (!badcheck && *p == ' ') {
2550 if (*p == '_') break;
2557 if (c == '.') dot = TRUE;
2559 /* remove an underscore between digits */
2560 if (n == buf || !ISDIGIT(prev) || p == pend) {
2561 if (badcheck) goto bad;
2565 else if (badcheck && prev == '_' && !ISDIGIT(c)) goto bad;
2567 const char *bend = buf+sizeof(buf)-1;
2568 if (n==bend) { /* buffer overflow */
2569 if (dot) break; /* cut off remaining fractions */
2580 d = mrb_float_read(p, &end);
2584 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%!s)", s);
2590 if (!end || p == end) goto bad;
2591 while (end<pend && ISSPACE(*end)) end++;
2592 if (end<pend) goto bad;
2598 mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck)
2600 return mrb_str_len_to_dbl(mrb, s, strlen(s), badcheck);
2604 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
2606 return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck);
2614 * Returns the result of interpreting leading characters in <i>str</i> as a
2615 * floating point number. Extraneous characters past the end of a valid number
2616 * are ignored. If there is not a valid number at the start of <i>str</i>,
2617 * <code>0.0</code> is returned. This method never raises an exception.
2619 * "123.45e1".to_f #=> 1234.5
2620 * "45.67 degrees".to_f #=> 45.67
2621 * "thx1138".to_f #=> 0.0
2624 mrb_str_to_f(mrb_state *mrb, mrb_value self)
2626 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE));
2635 * Returns the receiver.
2638 mrb_str_to_s(mrb_state *mrb, mrb_value self)
2640 if (mrb_obj_class(mrb, self) != mrb->string_class) {
2641 return mrb_str_dup(mrb, self);
2649 * str.upcase! => str or nil
2651 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2655 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
2657 struct RString *s = mrb_str_ptr(str);
2659 mrb_bool modify = FALSE;
2661 mrb_str_modify_keep_ascii(mrb, s);
2662 p = RSTRING_PTR(str);
2663 pend = RSTRING_END(str);
2672 if (modify) return str;
2673 return mrb_nil_value();
2679 * str.upcase => new_str
2681 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2682 * uppercase counterparts. The operation is locale insensitive---only
2683 * characters 'a' to 'z' are affected.
2685 * "hEllO".upcase #=> "HELLO"
2688 mrb_str_upcase(mrb_state *mrb, mrb_value self)
2692 str = mrb_str_dup(mrb, self);
2693 mrb_str_upcase_bang(mrb, str);
2699 * str.dump -> new_str
2701 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2702 * <code>\nnn</code> notation and all special characters escaped.
2705 mrb_str_dump(mrb_state *mrb, mrb_value str)
2707 return str_escape(mrb, str, FALSE);
2711 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
2713 struct RString *s = mrb_str_ptr(str);
2718 if (len == 0) return str;
2719 mrb_str_modify(mrb, s);
2720 if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) {
2721 off = ptr - RSTR_PTR(s);
2724 capa = RSTR_CAPA(s);
2725 total = RSTR_LEN(s)+len;
2726 if (total >= MRB_SSIZE_MAX) {
2728 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
2730 if (capa <= total) {
2731 if (capa == 0) capa = 1;
2732 while (capa <= total) {
2733 if (capa <= MRB_SSIZE_MAX / 2) {
2740 if (capa <= total || capa > MRB_SSIZE_MAX) {
2743 resize_capa(mrb, s, capa);
2746 ptr = RSTR_PTR(s) + off;
2748 memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len);
2749 mrb_assert_int_fit(size_t, total, mrb_ssize, MRB_SSIZE_MAX);
2750 RSTR_SET_LEN(s, total);
2751 RSTR_PTR(s)[total] = '\0'; /* sentinel */
2756 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
2758 return mrb_str_cat(mrb, str, ptr, ptr ? strlen(ptr) : 0);
2762 mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
2764 if (mrb_str_ptr(str) == mrb_str_ptr(str2)) {
2765 mrb_str_modify(mrb, mrb_str_ptr(str));
2767 return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
2771 mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
2773 mrb_to_str(mrb, str2);
2774 return mrb_str_cat_str(mrb, str1, str2);
2779 * str.inspect -> string
2781 * Returns a printable version of _str_, surrounded by quote marks,
2782 * with special characters escaped.
2786 * str.inspect #=> "\"hel\\bo\""
2789 mrb_str_inspect(mrb_state *mrb, mrb_value str)
2791 return str_escape(mrb, str, TRUE);
2796 * str.bytes -> array of fixnums
2798 * Returns an array of bytes in _str_.
2801 * str.bytes #=> [104, 101, 108, 108, 111]
2804 mrb_str_bytes(mrb_state *mrb, mrb_value str)
2806 struct RString *s = mrb_str_ptr(str);
2807 mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s));
2808 unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s);
2811 mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
2819 * str.getbyte(index) -> 0 .. 255
2821 * returns the <i>index</i>th byte as an integer.
2824 mrb_str_getbyte(mrb_state *mrb, mrb_value str)
2827 mrb_get_args(mrb, "i", &pos);
2830 pos += RSTRING_LEN(str);
2831 if (pos < 0 || RSTRING_LEN(str) <= pos)
2832 return mrb_nil_value();
2834 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
2839 * str.setbyte(index, integer) -> integer
2841 * modifies the <i>index</i>th byte as <i>integer</i>.
2844 mrb_str_setbyte(mrb_state *mrb, mrb_value str)
2849 mrb_get_args(mrb, "ii", &pos, &byte);
2851 len = RSTRING_LEN(str);
2852 if (pos < -len || len <= pos)
2853 mrb_raisef(mrb, E_INDEX_ERROR, "index %i out of string", pos);
2857 mrb_str_modify(mrb, mrb_str_ptr(str));
2859 RSTRING_PTR(str)[pos] = (unsigned char)byte;
2860 return mrb_fixnum_value((unsigned char)byte);
2865 * str.byteslice(integer) -> new_str or nil
2866 * str.byteslice(integer, integer) -> new_str or nil
2867 * str.byteslice(range) -> new_str or nil
2869 * Byte Reference---If passed a single Integer, returns a
2870 * substring of one byte at that position. If passed two Integer
2871 * objects, returns a substring starting at the offset given by the first, and
2872 * a length given by the second. If given a Range, a substring containing
2873 * bytes at offsets given by the range is returned. In all three cases, if
2874 * an offset is negative, it is counted from the end of <i>str</i>. Returns
2875 * <code>nil</code> if the initial offset falls outside the string, the length
2876 * is negative, or the beginning of the range is greater than the end.
2877 * The encoding of the resulted string keeps original encoding.
2879 * "hello".byteslice(1) #=> "e"
2880 * "hello".byteslice(-1) #=> "o"
2881 * "hello".byteslice(1, 2) #=> "el"
2882 * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
2883 * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
2886 mrb_str_byteslice(mrb_state *mrb, mrb_value str)
2889 mrb_int str_len = RSTRING_LEN(str), beg, len;
2890 mrb_bool empty = TRUE;
2892 len = mrb_get_argc(mrb);
2895 mrb_get_args(mrb, "ii", &beg, &len);
2898 a1 = mrb_get_arg1(mrb);
2899 if (mrb_range_p(a1)) {
2900 if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) != MRB_RANGE_OK) {
2901 return mrb_nil_value();
2905 beg = mrb_fixnum(mrb_to_int(mrb, a1));
2911 mrb_argnum_error(mrb, len, 1, 2);
2914 if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) {
2915 return mrb_str_byte_subseq(mrb, str, beg, len);
2918 return mrb_nil_value();
2922 /* ---------------------------*/
2924 mrb_init_string(mrb_state *mrb)
2928 mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << MRB_STR_EMBED_LEN_BIT),
2929 "pointer size too big for embedded string");
2931 mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
2932 MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
2934 mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
2936 mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2937 mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2938 mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2939 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2940 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2941 mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY());
2942 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2943 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
2944 mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2945 mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2946 mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */
2947 mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */
2948 mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2949 mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2950 mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2951 mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2953 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */
2954 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2955 mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ARG(1,1)); /* 15.2.10.5.22 */
2956 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2957 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2958 mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2959 mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2960 mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2961 mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2962 mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2963 mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2964 mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2965 mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2966 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2968 #ifndef MRB_WITHOUT_FLOAT
2969 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2971 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2972 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2973 mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
2974 mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2975 mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
2976 mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
2977 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2978 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
2980 mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
2981 mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));
2982 mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1));
2985 #ifndef MRB_WITHOUT_FLOAT
2987 * Source code for the "strtod" library procedure.
2989 * Copyright (c) 1988-1993 The Regents of the University of California.
2990 * Copyright (c) 1994 Sun Microsystems, Inc.
2992 * Permission to use, copy, modify, and distribute this
2993 * software and its documentation for any purpose and without
2994 * fee is hereby granted, provided that the above copyright
2995 * notice appear in all copies. The University of California
2996 * makes no representations about the suitability of this
2997 * software for any purpose. It is provided "as is" without
2998 * express or implied warranty.
3000 * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $
3006 static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
3007 * exponent larger than this will already
3008 * produce underflow or overflow, so there's
3009 * no need to worry about additional digits.
3011 static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */
3012 10., /* is 10^2^i. Used to convert decimal */
3013 100., /* exponents into floating-point numbers. */
3024 mrb_float_read(const char *string, char **endPtr)
3025 /* const char *string; A decimal ASCII floating-point number,
3026 * optionally preceded by white space.
3027 * Must have form "-I.FE-X", where I is the
3028 * integer part of the mantissa, F is the
3029 * fractional part of the mantissa, and X
3030 * is the exponent. Either of the signs
3031 * may be "+", "-", or omitted. Either I
3032 * or F may be omitted, or both. The decimal
3033 * point isn't necessary unless F is present.
3034 * The "E" may actually be an "e". E and X
3035 * may both be omitted (but not just one).
3037 /* char **endPtr; If non-NULL, store terminating character's
3040 int sign, expSign = FALSE;
3041 double fraction, dblExp;
3045 int exp = 0; /* Exponent read from "EX" field. */
3046 int fracExp = 0; /* Exponent that derives from the fractional
3047 * part. Under normal circumstatnces, it is
3048 * the negative of the number of digits in F.
3049 * However, if I is very long, the last digits
3050 * of I get dropped (otherwise a long I with a
3051 * large negative exponent could cause an
3052 * unnecessary overflow on I alone). In this
3053 * case, fracExp is incremented one for each
3055 int mantSize; /* Number of digits in mantissa. */
3056 int decPt; /* Number of mantissa digits BEFORE decimal
3058 const char *pExp; /* Temporarily holds location of exponent
3062 * Strip off leading blanks and check for a sign.
3066 while (ISSPACE(*p)) {
3081 * Count the number of digits in the mantissa (including the decimal
3082 * point), and also locate the decimal point.
3086 for (mantSize = 0; ; mantSize += 1)
3090 if ((c != '.') || (decPt >= 0)) {
3099 * Now suck up the digits in the mantissa. Use two integers to
3100 * collect 9 digits each (this is faster than using floating-point).
3101 * If the mantissa has more than 18 digits, ignore the extras, since
3102 * they can't affect the value anyway.
3111 mantSize -= 1; /* One of the digits was the point. */
3113 if (mantSize > 18) {
3114 if (decPt - 18 > 29999) {
3118 fracExp = decPt - 18;
3123 fracExp = decPt - mantSize;
3125 if (mantSize == 0) {
3133 for ( ; mantSize > 9; mantSize -= 1)
3141 frac1 = 10*frac1 + (c - '0');
3144 for (; mantSize > 0; mantSize -= 1)
3152 frac2 = 10*frac2 + (c - '0');
3154 fraction = (1.0e9 * frac1) + frac2;
3158 * Skim off the exponent.
3162 if ((*p == 'E') || (*p == 'e')) {
3174 while (ISDIGIT(*p)) {
3175 exp = exp * 10 + (*p - '0');
3183 exp = fracExp - exp;
3186 exp = fracExp + exp;
3190 * Generate a floating-point number that represents the exponent.
3191 * Do this by processing the exponent one bit at a time to combine
3192 * many powers of 2 of 10. Then combine the exponent with the
3203 if (exp > maxExponent) {
3208 for (d = powersOf10; exp != 0; exp >>= 1, d += 1) {
3221 if (endPtr != NULL) {
3222 *endPtr = (char *) p;