3 #include <mruby/array.h>
4 #include <mruby/class.h>
5 #include <mruby/string.h>
6 #include <mruby/range.h>
9 mrb_str_getbyte(mrb_state *mrb, mrb_value str)
12 mrb_get_args(mrb, "i", &pos);
15 pos += RSTRING_LEN(str);
16 if (pos < 0 || RSTRING_LEN(str) <= pos)
17 return mrb_nil_value();
19 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
23 mrb_str_setbyte(mrb_state *mrb, mrb_value str)
28 mrb_get_args(mrb, "ii", &pos, &byte);
30 len = RSTRING_LEN(str);
31 if (pos < -len || len <= pos)
32 mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos));
36 mrb_str_modify(mrb, mrb_str_ptr(str));
38 RSTRING_PTR(str)[pos] = (unsigned char)byte;
39 return mrb_fixnum_value((unsigned char)byte);
43 mrb_str_byteslice(mrb_state *mrb, mrb_value str)
48 if (mrb_get_argc(mrb) == 2) {
50 mrb_get_args(mrb, "ii", &pos, &len);
51 return mrb_str_substr(mrb, str, pos, len);
53 mrb_get_args(mrb, "o|i", &a1, &len);
54 switch (mrb_type(a1)) {
59 len = RSTRING_LEN(str);
60 switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {
61 case 0: /* not range */
64 return mrb_str_substr(mrb, str, beg, len);
65 case 2: /* out of range */
66 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);
69 return mrb_nil_value();
71 #ifndef MRB_WITHOUT_FLOAT
73 a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
77 return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);
79 mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");
82 return mrb_nil_value();
87 * str.swapcase! -> str or nil
89 * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
90 * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
91 * Note: case conversion is effective only in ASCII region.
94 mrb_str_swapcase_bang(mrb_state *mrb, mrb_value str)
98 struct RString *s = mrb_str_ptr(str);
100 mrb_str_modify(mrb, s);
101 p = RSTRING_PTR(str);
102 pend = p + RSTRING_LEN(str);
108 else if (ISLOWER(*p)) {
115 if (modify) return str;
116 return mrb_nil_value();
121 * str.swapcase -> new_str
123 * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
124 * to lowercase and lowercase characters converted to uppercase.
125 * Note: case conversion is effective only in ASCII region.
127 * "Hello".swapcase #=> "hELLO"
128 * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
131 mrb_str_swapcase(mrb_state *mrb, mrb_value self)
135 str = mrb_str_dup(mrb, self);
136 mrb_str_swapcase_bang(mrb, str);
140 static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);
144 * str << integer -> str
145 * str.concat(integer) -> str
147 * str.concat(obj) -> str
149 * Append---Concatenates the given object to <i>str</i>. If the object is a
150 * <code>Integer</code>, it is considered as a codepoint, and is converted
151 * to a character before concatenation.
154 * a << "world" #=> "hello world"
155 * a.concat(33) #=> "hello world!"
158 mrb_str_concat_m(mrb_state *mrb, mrb_value self)
162 mrb_get_args(mrb, "o", &str);
163 if (mrb_fixnum_p(str))
164 str = mrb_fixnum_chr(mrb, str);
166 str = mrb_ensure_string_type(mrb, str);
167 mrb_str_concat(mrb, self, str);
173 * str.start_with?([prefixes]+) -> true or false
175 * Returns true if +str+ starts with one of the +prefixes+ given.
177 * "hello".start_with?("hell") #=> true
179 * # returns true if one of the prefixes matches.
180 * "hello".start_with?("heaven", "hell") #=> true
181 * "hello".start_with?("heaven", "paradise") #=> false
182 * "h".start_with?("heaven", "hell") #=> false
185 mrb_str_start_with(mrb_state *mrb, mrb_value self)
187 mrb_value *argv, sub;
189 mrb_get_args(mrb, "*", &argv, &argc);
191 for (i = 0; i < argc; i++) {
193 int ai = mrb_gc_arena_save(mrb);
194 sub = mrb_ensure_string_type(mrb, argv[i]);
195 mrb_gc_arena_restore(mrb, ai);
196 len_l = RSTRING_LEN(self);
197 len_r = RSTRING_LEN(sub);
198 if (len_l >= len_r) {
199 if (memcmp(RSTRING_PTR(self), RSTRING_PTR(sub), len_r) == 0) {
200 return mrb_true_value();
204 return mrb_false_value();
209 * str.end_with?([suffixes]+) -> true or false
211 * Returns true if +str+ ends with one of the +suffixes+ given.
214 mrb_str_end_with(mrb_state *mrb, mrb_value self)
216 mrb_value *argv, sub;
218 mrb_get_args(mrb, "*", &argv, &argc);
220 for (i = 0; i < argc; i++) {
222 int ai = mrb_gc_arena_save(mrb);
223 sub = mrb_ensure_string_type(mrb, argv[i]);
224 mrb_gc_arena_restore(mrb, ai);
225 len_l = RSTRING_LEN(self);
226 len_r = RSTRING_LEN(sub);
227 if (len_l >= len_r) {
228 if (memcmp(RSTRING_PTR(self) + (len_l - len_r),
231 return mrb_true_value();
235 return mrb_false_value();
238 enum tr_pattern_type {
239 TR_UNINITIALIZED = 0,
247 <syntax> ::= (<pattern>)* | '^' (<pattern>)*
248 <pattern> ::= <in order> | <range>
249 <in order> ::= (<ch>)+
250 <range> ::= <ch> '-' <ch>
253 uint8_t type; // 1:in-order, 2:range
254 mrb_bool flag_reverse : 1;
255 mrb_bool flag_on_heap : 1;
261 struct tr_pattern *next;
264 #define STATIC_TR_PATTERN { 0 }
267 tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat)
270 struct tr_pattern *p = pat->next;
271 if (pat->flag_on_heap) {
278 static struct tr_pattern*
279 tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable)
281 const char *pattern = RSTRING_PTR(v_pattern);
282 mrb_int pattern_length = RSTRING_LEN(v_pattern);
283 mrb_bool flag_reverse = FALSE;
284 struct tr_pattern *pat1;
287 if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') {
292 while (i < pattern_length) {
293 /* is range pattern ? */
294 mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED);
297 : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern));
298 if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') {
299 if (pat1 == NULL && ret) {
301 tr_free_pattern(mrb, ret);
302 mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err));
303 return NULL; /* not reached */
305 pat1->type = TR_RANGE;
306 pat1->flag_reverse = flag_reverse;
307 pat1->flag_on_heap = !ret_uninit;
308 pat1->n = pattern[i+2] - pattern[i] + 1;
310 pat1->val.ch[0] = pattern[i];
311 pat1->val.ch[1] = pattern[i+2];
315 /* in order pattern. */
316 mrb_int start_pos = i++;
319 while (i < pattern_length) {
320 if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-')
326 if (len > UINT16_MAX) {
327 mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)");
329 if (pat1 == NULL && ret) {
332 pat1->type = TR_IN_ORDER;
333 pat1->flag_reverse = flag_reverse;
334 pat1->flag_on_heap = !ret_uninit;
337 pat1->val.start_pos = start_pos;
340 if (ret == NULL || ret_uninit) {
344 struct tr_pattern *p = ret;
345 while (p->next != NULL) {
355 static inline mrb_int
356 tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch)
360 mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
362 while (pat != NULL) {
363 if (pat->type == TR_IN_ORDER) {
365 for (i = 0; i < pat->n; i++) {
366 if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i;
369 else if (pat->type == TR_RANGE) {
370 if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1])
371 ret = n_sum + ch - pat->val.ch[0];
374 mrb_assert(pat->type == TR_UNINITIALIZED);
381 return (ret < 0) ? MRB_INT_MAX : -1;
386 static inline mrb_int
387 tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th)
391 while (pat != NULL) {
392 if (n_th < (n_sum + pat->n)) {
393 mrb_int i = (n_th - n_sum);
397 return pat_str[pat->val.start_pos + i];
399 return pat->val.ch[0]+i;
400 case TR_UNINITIALIZED:
404 if (pat->next == NULL) {
407 return pat_str[pat->val.start_pos + pat->n - 1];
409 return pat->val.ch[1];
410 case TR_UNINITIALIZED:
422 tr_bitmap_set(uint8_t bitmap[32], uint8_t ch)
424 uint8_t idx1 = ch / 8;
425 uint8_t idx2 = ch % 8;
426 bitmap[idx1] |= (1<<idx2);
429 static inline mrb_bool
430 tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch)
432 uint8_t idx1 = ch / 8;
433 uint8_t idx2 = ch % 8;
434 if (bitmap[idx1] & (1<<idx2))
439 /* compile patter to bitmap */
441 tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32])
443 const char *pattern = RSTRING_PTR(pstr);
444 mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
447 for (i=0; i<32; i++) {
450 while (pat != NULL) {
451 if (pat->type == TR_IN_ORDER) {
452 for (i = 0; i < pat->n; i++) {
453 tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]);
456 else if (pat->type == TR_RANGE) {
457 for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) {
458 tr_bitmap_set(bitmap, i);
462 mrb_assert(pat->type == TR_UNINITIALIZED);
468 for (i=0; i<32; i++) {
475 str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze)
477 struct tr_pattern pat = STATIC_TR_PATTERN;
478 struct tr_pattern rep_storage = STATIC_TR_PATTERN;
483 mrb_bool flag_changed = FALSE;
485 struct tr_pattern *rep;
487 mrb_str_modify(mrb, mrb_str_ptr(str));
488 tr_parse_pattern(mrb, &pat, p1, TRUE);
489 rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE);
490 s = RSTRING_PTR(str);
491 len = RSTRING_LEN(str);
493 for (i=j=0; i<len; i++,j++) {
494 mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]);
496 if (i>j) s[j] = s[i];
503 mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n);
505 if (c < 0 || (squeeze && c == lastch)) {
510 mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%S) out of range",
511 mrb_fixnum_value((mrb_int)c));
519 tr_free_pattern(mrb, &pat);
520 tr_free_pattern(mrb, rep);
523 RSTR_SET_LEN(RSTRING(str), j);
524 RSTRING_PTR(str)[j] = 0;
531 * str.tr(from_str, to_str) => new_str
533 * Returns a copy of str with the characters in from_str replaced by the
534 * corresponding characters in to_str. If to_str is shorter than from_str,
535 * it is padded with its last character in order to maintain the
538 * "hello".tr('el', 'ip') #=> "hippo"
539 * "hello".tr('aeiou', '*') #=> "h*ll*"
540 * "hello".tr('aeiou', 'AA*') #=> "hAll*"
542 * Both strings may use the c1-c2 notation to denote ranges of characters,
543 * and from_str may start with a ^, which denotes all characters except
546 * "hello".tr('a-y', 'b-z') #=> "ifmmp"
547 * "hello".tr('^aeiou', '*') #=> "*e**o"
549 * The backslash character \ can be used to escape ^ or - and is otherwise
550 * ignored unless it appears at the end of a range or the end of the
551 * from_str or to_str:
554 * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
555 * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
557 * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
558 * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
559 * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
561 * "X['\\b']".tr("X\\", "") #=> "['b']"
562 * "X['\\b']".tr("X-\\]", "") #=> "'b'"
564 * Note: conversion is effective only in ASCII region.
567 mrb_str_tr(mrb_state *mrb, mrb_value str)
572 mrb_get_args(mrb, "SS", &p1, &p2);
573 dup = mrb_str_dup(mrb, str);
574 str_tr(mrb, dup, p1, p2, FALSE);
580 * str.tr!(from_str, to_str) -> str or nil
582 * Translates str in place, using the same rules as String#tr.
583 * Returns str, or nil if no changes were made.
586 mrb_str_tr_bang(mrb_state *mrb, mrb_value str)
590 mrb_get_args(mrb, "SS", &p1, &p2);
591 if (str_tr(mrb, str, p1, p2, FALSE)) {
594 return mrb_nil_value();
599 * str.tr_s(from_str, to_str) -> new_str
601 * Processes a copy of str as described under String#tr, then removes
602 * duplicate characters in regions that were affected by the translation.
604 * "hello".tr_s('l', 'r') #=> "hero"
605 * "hello".tr_s('el', '*') #=> "h*o"
606 * "hello".tr_s('el', 'hx') #=> "hhxo"
609 mrb_str_tr_s(mrb_state *mrb, mrb_value str)
614 mrb_get_args(mrb, "SS", &p1, &p2);
615 dup = mrb_str_dup(mrb, str);
616 str_tr(mrb, dup, p1, p2, TRUE);
622 * str.tr_s!(from_str, to_str) -> str or nil
624 * Performs String#tr_s processing on str in place, returning
625 * str, or nil if no changes were made.
628 mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str)
632 mrb_get_args(mrb, "SS", &p1, &p2);
633 if (str_tr(mrb, str, p1, p2, TRUE)) {
636 return mrb_nil_value();
640 str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
642 struct tr_pattern pat_storage = STATIC_TR_PATTERN;
643 struct tr_pattern *pat = NULL;
647 mrb_bool flag_changed = FALSE;
651 mrb_str_modify(mrb, mrb_str_ptr(str));
652 if (!mrb_nil_p(v_pat)) {
653 pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE);
654 tr_compile_pattern(pat, v_pat, bitmap);
655 tr_free_pattern(mrb, pat);
657 s = RSTRING_PTR(str);
658 len = RSTRING_LEN(str);
661 for (i=j=0; i<len; i++,j++) {
662 if (i>j) s[j] = s[i];
663 if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) {
671 for (i=j=0; i<len; i++,j++) {
672 if (i>j) s[j] = s[i];
673 if (s[i] >= 0 && s[i] == lastch) {
682 RSTR_SET_LEN(RSTRING(str), j);
683 RSTRING_PTR(str)[j] = 0;
690 * str.squeeze([other_str]) -> new_str
692 * Builds a set of characters from the other_str
693 * parameter(s) using the procedure described for String#count. Returns a
694 * new string where runs of the same character that occur in this set are
695 * replaced by a single character. If no arguments are given, all runs of
696 * identical characters are replaced by a single character.
698 * "yellow moon".squeeze #=> "yelow mon"
699 * " now is the".squeeze(" ") #=> " now is the"
700 * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
703 mrb_str_squeeze(mrb_state *mrb, mrb_value str)
705 mrb_value pat = mrb_nil_value();
708 mrb_get_args(mrb, "|S", &pat);
709 dup = mrb_str_dup(mrb, str);
710 str_squeeze(mrb, dup, pat);
716 * str.squeeze!([other_str]) -> str or nil
718 * Squeezes str in place, returning either str, or nil if no
722 mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str)
724 mrb_value pat = mrb_nil_value();
726 mrb_get_args(mrb, "|S", &pat);
727 if (str_squeeze(mrb, str, pat)) {
730 return mrb_nil_value();
734 str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat)
736 struct tr_pattern pat = STATIC_TR_PATTERN;
740 mrb_bool flag_changed = FALSE;
743 mrb_str_modify(mrb, mrb_str_ptr(str));
744 tr_parse_pattern(mrb, &pat, v_pat, TRUE);
745 tr_compile_pattern(&pat, v_pat, bitmap);
746 tr_free_pattern(mrb, &pat);
748 s = RSTRING_PTR(str);
749 len = RSTRING_LEN(str);
751 for (i=j=0; i<len; i++,j++) {
752 if (i>j) s[j] = s[i];
753 if (tr_bitmap_detect(bitmap, s[i])) {
759 RSTR_SET_LEN(RSTRING(str), j);
760 RSTRING_PTR(str)[j] = 0;
766 mrb_str_delete(mrb_state *mrb, mrb_value str)
771 mrb_get_args(mrb, "S", &pat);
772 dup = mrb_str_dup(mrb, str);
773 str_delete(mrb, dup, pat);
778 mrb_str_delete_bang(mrb_state *mrb, mrb_value str)
782 mrb_get_args(mrb, "S", &pat);
783 if (str_delete(mrb, str, pat)) {
786 return mrb_nil_value();
791 * str.count([other_str]) -> integer
793 * Each other_str parameter defines a set of characters to count. The
794 * intersection of these sets defines the characters to count in str. Any
795 * other_str that starts with a caret ^ is negated. The sequence c1-c2
796 * means all characters between c1 and c2. The backslash character \ can
797 * be used to escape ^ or - and is otherwise ignored unless it appears at
798 * the end of a sequence or the end of a other_str.
801 mrb_str_count(mrb_state *mrb, mrb_value str)
803 mrb_value v_pat = mrb_nil_value();
808 struct tr_pattern pat = STATIC_TR_PATTERN;
811 mrb_get_args(mrb, "S", &v_pat);
812 tr_parse_pattern(mrb, &pat, v_pat, TRUE);
813 tr_compile_pattern(&pat, v_pat, bitmap);
814 tr_free_pattern(mrb, &pat);
816 s = RSTRING_PTR(str);
817 len = RSTRING_LEN(str);
818 for (i = 0; i < len; i++) {
819 if (tr_bitmap_detect(bitmap, s[i])) count++;
821 return mrb_fixnum_value(count);
825 mrb_str_hex(mrb_state *mrb, mrb_value self)
827 return mrb_str_to_inum(mrb, self, 16, FALSE);
831 mrb_str_oct(mrb_state *mrb, mrb_value self)
833 return mrb_str_to_inum(mrb, self, 8, FALSE);
838 * string.chr -> string
840 * Returns a one-character string at the beginning of the string.
846 mrb_str_chr(mrb_state *mrb, mrb_value self)
848 return mrb_str_substr(mrb, self, 0, 1);
852 mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
854 mrb_int cp = mrb_fixnum(num);
855 #ifdef MRB_UTF8_STRING
859 if (cp < 0 || 0x10FFFF < cp) {
860 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
866 else if (cp < 0x800) {
867 utf8[0] = (char)(0xC0 | (cp >> 6));
868 utf8[1] = (char)(0x80 | (cp & 0x3F));
871 else if (cp < 0x10000) {
872 utf8[0] = (char)(0xE0 | (cp >> 12));
873 utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
874 utf8[2] = (char)(0x80 | ( cp & 0x3F));
878 utf8[0] = (char)(0xF0 | (cp >> 18));
879 utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
880 utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
881 utf8[3] = (char)(0x80 | ( cp & 0x3F));
884 return mrb_str_new(mrb, utf8, len);
888 if (cp < 0 || 0xff < cp) {
889 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
892 return mrb_str_new(mrb, &c, 1);
898 * string.succ -> string
900 * Returns next sequence of the string;
906 mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
909 unsigned char *p, *e, *b, *t;
911 struct RString *s = mrb_str_ptr(self);
914 if (RSTRING_LEN(self) == 0)
917 mrb_str_modify(mrb, s);
918 l = RSTRING_LEN(self);
919 b = p = (unsigned char*) RSTRING_PTR(self);
923 // find trailing ascii/number
931 result = mrb_str_new_lit(mrb, "");
934 // find leading letter of the ascii/number
937 if (!ISALNUM(*b) || (ISALNUM(*b) && *b != '9' && *b != 'z' && *b != 'Z'))
943 result = mrb_str_new(mrb, (char*) p, b - p);
949 mrb_str_cat_lit(mrb, result, "\x01");
958 if (e == b) prepend = "1";
961 else if (*e == 'z') {
962 if (e == b) prepend = "a";
965 else if (*e == 'Z') {
966 if (e == b) prepend = "A";
973 if (prepend) mrb_str_cat_cstr(mrb, result, prepend);
976 result = mrb_str_cat(mrb, result, (char*) b, t - b);
977 l = RSTRING_LEN(result);
978 mrb_str_resize(mrb, self, l);
979 memcpy(RSTRING_PTR(self), RSTRING_PTR(result), l);
984 mrb_str_succ(mrb_state *mrb, mrb_value self)
988 str = mrb_str_dup(mrb, self);
989 mrb_str_succ_bang(mrb, str);
993 #ifdef MRB_UTF8_STRING
994 static const char utf8len_codepage_zero[256] =
996 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
997 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
998 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
999 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1000 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1001 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1002 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1003 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
1007 utf8code(unsigned char* p)
1014 len = utf8len_codepage_zero[p[0]];
1015 if (len > 1 && (p[1] & 0xc0) == 0x80) {
1017 return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f);
1018 if ((p[2] & 0xc0) == 0x80) {
1020 return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
1022 if ((p[3] & 0xc0) == 0x80) {
1024 return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
1025 + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f);
1026 if ((p[4] & 0xc0) == 0x80) {
1028 return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
1029 + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6)
1031 if ((p[5] & 0xc0) == 0x80 && len == 6)
1032 return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
1033 + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12)
1034 + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f);
1043 mrb_str_ord(mrb_state* mrb, mrb_value str)
1045 if (RSTRING_LEN(str) == 0)
1046 mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
1047 return mrb_fixnum_value(utf8code((unsigned char*) RSTRING_PTR(str)));
1051 mrb_str_ord(mrb_state* mrb, mrb_value str)
1053 if (RSTRING_LEN(str) == 0)
1054 mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
1055 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[0]);
1061 * str.delete_prefix!(prefix) -> self or nil
1063 * Deletes leading <code>prefix</code> from <i>str</i>, returning
1064 * <code>nil</code> if no change was made.
1066 * "hello".delete_prefix!("hel") #=> "lo"
1067 * "hello".delete_prefix!("llo") #=> nil
1070 mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self)
1074 struct RString *str = RSTRING(self);
1076 mrb_get_args(mrb, "s", &ptr, &plen);
1077 slen = RSTR_LEN(str);
1078 if (plen > slen) return mrb_nil_value();
1080 if (memcmp(s, ptr, plen) != 0) return mrb_nil_value();
1081 if (!MRB_FROZEN_P(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
1082 str->as.heap.ptr += plen;
1085 mrb_str_modify(mrb, str);
1087 memmove(s, s+plen, slen-plen);
1089 RSTR_SET_LEN(str, slen-plen);
1095 * str.delete_prefix(prefix) -> new_str
1097 * Returns a copy of <i>str</i> with leading <code>prefix</code> deleted.
1099 * "hello".delete_prefix("hel") #=> "lo"
1100 * "hello".delete_prefix("llo") #=> "hello"
1103 mrb_str_del_prefix(mrb_state *mrb, mrb_value self)
1108 mrb_get_args(mrb, "s", &ptr, &plen);
1109 slen = RSTRING_LEN(self);
1110 if (plen > slen) return mrb_str_dup(mrb, self);
1111 if (memcmp(RSTRING_PTR(self), ptr, plen) != 0)
1112 return mrb_str_dup(mrb, self);
1113 return mrb_str_substr(mrb, self, plen, slen-plen);
1118 * str.delete_suffix!(suffix) -> self or nil
1120 * Deletes trailing <code>suffix</code> from <i>str</i>, returning
1121 * <code>nil</code> if no change was made.
1123 * "hello".delete_suffix!("llo") #=> "he"
1124 * "hello".delete_suffix!("hel") #=> nil
1127 mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self)
1131 struct RString *str = RSTRING(self);
1133 mrb_get_args(mrb, "s", &ptr, &plen);
1134 slen = RSTR_LEN(str);
1135 if (plen > slen) return mrb_nil_value();
1137 if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value();
1138 if (!MRB_FROZEN_P(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
1139 /* no need to modify string */
1142 mrb_str_modify(mrb, str);
1144 RSTR_SET_LEN(str, slen-plen);
1150 * str.delete_suffix(suffix) -> new_str
1152 * Returns a copy of <i>str</i> with leading <code>suffix</code> deleted.
1154 * "hello".delete_suffix("hel") #=> "lo"
1155 * "hello".delete_suffix("llo") #=> "hello"
1158 mrb_str_del_suffix(mrb_state *mrb, mrb_value self)
1163 mrb_get_args(mrb, "s", &ptr, &plen);
1164 slen = RSTRING_LEN(self);
1165 if (plen > slen) return mrb_str_dup(mrb, self);
1166 if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0)
1167 return mrb_str_dup(mrb, self);
1168 return mrb_str_substr(mrb, self, 0, slen-plen);
1172 mrb_str_lines(mrb_state *mrb, mrb_value self)
1177 char *b = RSTRING_PTR(self);
1179 char *e = b + RSTRING_LEN(self);
1181 mrb_get_args(mrb, "");
1183 result = mrb_ary_new(mrb);
1184 ai = mrb_gc_arena_save(mrb);
1187 while (p < e && *p != '\n') p++;
1188 if (*p == '\n') p++;
1189 len = (mrb_int) (p - t);
1190 mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len));
1191 mrb_gc_arena_restore(mrb, ai);
1197 mrb_mruby_string_ext_gem_init(mrb_state* mrb)
1199 struct RClass * s = mrb->string_class;
1201 mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE());
1202 mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
1203 mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));
1204 mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1));
1205 mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE());
1206 mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE());
1207 mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1));
1208 mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1));
1209 mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1));
1210 mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2));
1211 mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2));
1212 mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2));
1213 mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2));
1214 mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1));
1215 mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1));
1216 mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1));
1217 mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1));
1218 mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST());
1219 mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST());
1220 mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE());
1221 mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE());
1222 mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE());
1223 mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE());
1224 mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE());
1225 mrb_define_alias(mrb, s, "next", "succ");
1226 mrb_define_alias(mrb, s, "next!", "succ!");
1227 mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE());
1228 mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1));
1229 mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1));
1230 mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1));
1231 mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1));
1233 mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE());
1234 mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
1238 mrb_mruby_string_ext_gem_final(mrb_state* mrb)