2 ** symbol.c - Symbol class
4 ** See Copyright Notice in mruby.h
10 #include <mruby/khash.h>
11 #include <mruby/string.h>
12 #include <mruby/dump.h>
13 #include <mruby/class.h>
15 /* ------------------------------------------------------ */
16 typedef struct symbol_name {
24 sym_validate_len(mrb_state *mrb, size_t len)
26 if (len >= RITE_LV_NULL_MARK) {
27 mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
31 #ifndef MRB_ENABLE_ALL_SYMBOLS
32 static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
35 sym_inline_pack(const char *name, uint16_t len)
37 const int lower_length_max = (MRB_SYMBOL_BITSIZE - 2) / 5;
38 const int mix_length_max = (MRB_SYMBOL_BITSIZE - 2) / 6;
46 if (len > lower_length_max) return 0; /* too long */
47 for (i=0; i<len; i++) {
51 if (c == 0) return 0; /* NUL in name */
52 p = strchr(pack_table, (int)c);
53 if (p == 0) return 0; /* non alnum char */
54 bits = (uint32_t)(p - pack_table)+1;
55 if (bits > 27) lower = 0;
56 if (i >= mix_length_max) break;
61 for (i=0; i<len; i++) {
65 p = strchr(pack_table, (int)c);
66 bits = (uint32_t)(p - pack_table)+1;
71 if (len > mix_length_max) return 0;
76 sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp)
78 int bit_per_char = sym&2 ? 5 : 6; /* all lower case if `sym&2` is true */
83 for (i=0; i<30/bit_per_char; i++) {
84 uint32_t bits = sym>>(i*bit_per_char+2) & ((1<<bit_per_char)-1);
86 buf[i] = pack_table[bits-1];;
95 symhash(const char *key, size_t len)
99 for(hash = i = 0; i < len; ++i) {
101 hash += (hash << 10);
105 hash ^= (hash >> 11);
106 hash += (hash << 15);
111 find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash)
116 #ifndef MRB_ENABLE_ALL_SYMBOLS
118 i = sym_inline_pack(name, len);
122 i = mrb->symhash[hash];
123 if (i == 0) return 0;
125 sname = &mrb->symtbl[i];
126 if (sname->len == len && memcmp(sname->name, name, len) == 0) {
129 if (sname->prev == 0xff) {
131 sname = &mrb->symtbl[i];
132 while (mrb->symtbl < sname) {
133 if (sname->len == len && memcmp(sname->name, name, len) == 0) {
134 return (mrb_sym)(sname - mrb->symtbl)<<1;
141 } while (sname->prev > 0);
146 sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
152 sym_validate_len(mrb, len);
153 hash = symhash(name, len);
154 sym = find_symbol(mrb, name, len, hash);
155 if (sym > 0) return sym;
157 /* registering a new symbol */
159 if (mrb->symcapa < sym) {
160 if (mrb->symcapa == 0) mrb->symcapa = 100;
161 else mrb->symcapa = (size_t)(mrb->symcapa * 6 / 5);
162 mrb->symtbl = (symbol_name*)mrb_realloc(mrb, mrb->symtbl, sizeof(symbol_name)*(mrb->symcapa+1));
164 sname = &mrb->symtbl[sym];
165 sname->len = (uint16_t)len;
166 if (lit || mrb_ro_data_p(name)) {
171 char *p = (char *)mrb_malloc(mrb, len+1);
172 memcpy(p, name, len);
174 sname->name = (const char*)p;
177 if (mrb->symhash[hash]) {
178 mrb_sym i = sym - mrb->symhash[hash];
187 mrb->symhash[hash] = sym;
193 mrb_intern(mrb_state *mrb, const char *name, size_t len)
195 return sym_intern(mrb, name, len, FALSE);
199 mrb_intern_static(mrb_state *mrb, const char *name, size_t len)
201 return sym_intern(mrb, name, len, TRUE);
205 mrb_intern_cstr(mrb_state *mrb, const char *name)
207 return mrb_intern(mrb, name, strlen(name));
211 mrb_intern_str(mrb_state *mrb, mrb_value str)
213 return mrb_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
217 mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
221 sym_validate_len(mrb, len);
222 sym = find_symbol(mrb, name, len, symhash(name, len));
223 if (sym > 0) return mrb_symbol_value(sym);
224 return mrb_nil_value();
228 mrb_check_intern_cstr(mrb_state *mrb, const char *name)
230 return mrb_check_intern(mrb, name, strlen(name));
234 mrb_check_intern_str(mrb_state *mrb, mrb_value str)
236 return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
240 sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp)
242 #ifndef MRB_ENABLE_ALL_SYMBOLS
243 if (sym & 1) { /* inline packed symbol */
244 return sym_inline_unpack(sym, buf, lenp);
249 if (sym == 0 || mrb->symidx < sym) {
254 if (lenp) *lenp = mrb->symtbl[sym].len;
255 return mrb->symtbl[sym].name;
259 mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
261 return sym2name_len(mrb, sym, mrb->symbuf, lenp);
265 mrb_free_symtbl(mrb_state *mrb)
269 for (i=1, lim=mrb->symidx+1; i<lim; i++) {
270 if (!mrb->symtbl[i].lit) {
271 mrb_free(mrb, (char*)mrb->symtbl[i].name);
274 mrb_free(mrb, mrb->symtbl);
278 mrb_init_symtbl(mrb_state *mrb)
282 /**********************************************************************
283 * Document-class: Symbol
285 * <code>Symbol</code> objects represent names and some strings
287 * interpreter. They are generated using the <code>:name</code> and
288 * <code>:"string"</code> literals
289 * syntax, and by the various <code>to_sym</code> methods. The same
290 * <code>Symbol</code> object will be created for a given name or string
291 * for the duration of a program's execution, regardless of the context
292 * or meaning of that name. Thus if <code>Fred</code> is a constant in
293 * one context, a method in another, and a class in a third, the
294 * <code>Symbol</code> <code>:Fred</code> will be the same object in
295 * all three contexts.
309 * $f1.object_id #=> 2514190
310 * $f2.object_id #=> 2514190
311 * $f3.object_id #=> 2514190
319 * sym.id2name -> string
322 * Returns the name or string corresponding to <i>sym</i>.
324 * :fred.id2name #=> "fred"
327 sym_to_s(mrb_state *mrb, mrb_value sym)
329 return mrb_sym2str(mrb, mrb_symbol(sym));
338 * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
339 * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
344 sym_to_sym(mrb_state *mrb, mrb_value sym)
352 * sym.inspect -> string
354 * Returns the representation of <i>sym</i> as a symbol literal.
356 * :fred.inspect #=> ":fred"
360 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
361 #else /* not __STDC__ */
362 /* As in Harbison and Steele. */
363 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
365 #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
368 is_special_global_name(const char* m)
371 case '~': case '*': case '$': case '?': case '!': case '@':
372 case '/': case '\\': case ';': case ',': case '.': case '=':
373 case ':': case '<': case '>': case '\"':
374 case '&': case '`': case '\'': case '+':
380 if (is_identchar(*m)) m += 1;
383 if (!ISDIGIT(*m)) return FALSE;
384 do ++m; while (ISDIGIT(*m));
391 symname_p(const char *name)
393 const char *m = name;
394 mrb_bool localid = FALSE;
396 if (!m) return FALSE;
402 if (is_special_global_name(++m)) return TRUE;
406 if (*++m == '@') ++m;
411 case '<': ++m; break;
412 case '=': if (*++m == '>') ++m; break;
419 case '>': case '=': ++m; break;
426 case '~': ++m; break;
427 case '=': if (*++m == '=') ++m; break;
428 default: return FALSE;
433 if (*++m == '*') ++m;
437 case '=': case '~': ++m;
441 if (*++m == '@') ++m;
444 if (*++m == '|') ++m;
447 if (*++m == '&') ++m;
450 case '^': case '/': case '%': case '~': case '`':
455 if (*++m != ']') return FALSE;
456 if (*++m == '=') ++m;
460 localid = !ISUPPER(*m);
462 if (*m != '_' && !ISALPHA(*m)) return FALSE;
463 while (is_identchar(*m)) m += 1;
466 case '!': case '?': case '=': ++m;
472 return *m ? FALSE : TRUE;
476 sym_inspect(mrb_state *mrb, mrb_value sym)
481 mrb_sym id = mrb_symbol(sym);
484 name = mrb_sym2name_len(mrb, id, &len);
485 str = mrb_str_new(mrb, 0, len+1);
486 sp = RSTRING_PTR(str);
487 RSTRING_PTR(str)[0] = ':';
488 memcpy(sp+1, name, len);
489 mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
490 if (!symname_p(name) || strlen(name) != (size_t)len) {
491 str = mrb_str_dump(mrb, str);
492 sp = RSTRING_PTR(str);
500 mrb_sym2str(mrb_state *mrb, mrb_sym sym)
503 const char *name = mrb_sym2name_len(mrb, sym, &len);
505 if (!name) return mrb_undef_value(); /* can't happen */
506 if (sym&1) { /* inline symbol */
507 return mrb_str_new(mrb, name, len);
509 return mrb_str_new_static(mrb, name, len);
513 mrb_sym2name(mrb_state *mrb, mrb_sym sym)
516 const char *name = mrb_sym2name_len(mrb, sym, &len);
518 if (!name) return NULL;
519 if (symname_p(name) && strlen(name) == (size_t)len) {
524 if (sym&1) { /* inline symbol */
525 str = mrb_str_new(mrb, name, len);
528 str = mrb_str_new_static(mrb, name, len);
530 str = mrb_str_dump(mrb, str);
531 return RSTRING_PTR(str);
535 #define lesser(a,b) (((a)>(b))?(b):(a))
538 sym_cmp(mrb_state *mrb, mrb_value s1)
543 mrb_get_args(mrb, "o", &s2);
544 if (mrb_type(s2) != MRB_TT_SYMBOL) return mrb_nil_value();
545 sym1 = mrb_symbol(s1);
546 sym2 = mrb_symbol(s2);
547 if (sym1 == sym2) return mrb_fixnum_value(0);
551 mrb_int len, len1, len2;
552 char buf1[8], buf2[8];
554 p1 = sym2name_len(mrb, sym1, buf1, &len1);
555 p2 = sym2name_len(mrb, sym2, buf2, &len2);
556 len = lesser(len1, len2);
557 retval = memcmp(p1, p2, len);
559 if (len1 == len2) return mrb_fixnum_value(0);
560 if (len1 > len2) return mrb_fixnum_value(1);
561 return mrb_fixnum_value(-1);
563 if (retval > 0) return mrb_fixnum_value(1);
564 return mrb_fixnum_value(-1);
569 mrb_init_symbol(mrb_state *mrb)
573 mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */
574 MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL);
575 mrb_undef_class_method(mrb, sym, "new");
577 mrb_define_method(mrb, sym, "id2name", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
578 mrb_define_method(mrb, sym, "to_s", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
579 mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
580 mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
581 mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));