/* The 1U keeps Solaris from griping when shifting sets the uppermost bit */
# define _CC_mask(classnum) (1U << (classnum))
# define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \
- && (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(classnum)))
+ && (PL_charclass[(U8) (c)] & _CC_mask(classnum)))
/* The mask for the _A versions of the macros; it just adds in the bit for
* ASCII. */
/* The _A version makes sure that both the desired bit and the ASCII bit
* are present */
# define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \
- && ((PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask_A(classnum)) \
+ && ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \
== _CC_mask_A(classnum)))
# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)
/* Either participates in a fold with a character above 255, or is a
* multi-char fold */
-# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
* out-of-range */
#define toLOWER_LATIN1(c) ((! FITS_IN_8_BITS(c)) \
? (c) \
- : LATIN1_TO_NATIVE(PL_latin1_lc[ \
- NATIVE_TO_LATIN1( (U8) (c)) ]))
+ : PL_latin1_lc[ (U8) (c) ])
#define toLOWER_L1(c) toLOWER_LATIN1(c) /* Synonym for consistency */
/* Modified uc. Is correct uc except for three non-ascii chars which are
* character for input out-of-range */
#define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c)) \
? (c) \
- : LATIN1_TO_NATIVE(PL_mod_latin1_uc[ \
- NATIVE_TO_LATIN1( (U8) (c)) ]))
+ : PL_mod_latin1_uc[ (U8) (c) ])
#ifdef USE_NEXT_CTYPE
# define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c))
if ( !UTF ) {
/* store first byte of utf8 representation of
variant codepoints */
- if (! UNI_IS_INVARIANT(uvc)) {
+ if (! NATIVE_IS_INVARIANT(uvc)) {
TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));
}
}
if (!(data->start_class->flags & ANYOF_LOCALE)) {
ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum) + 1);
for (value = 0; value < loop_max; value++) {
- if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
+ if (! _generic_isCC(value, classnum)) {
+ ANYOF_BITMAP_CLEAR(data->start_class, value);
}
}
}
* in case it isn't a true locale-node. This will
* create false positives if it truly is locale */
for (value = 0; value < loop_max; value++) {
- if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
+ if (_generic_isCC(value, classnum)) {
+ ANYOF_BITMAP_SET(data->start_class, value);
}
}
}
if (!(data->start_class->flags & ANYOF_LOCALE)) {
ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum));
for (value = 0; value < loop_max; value++) {
- if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
+ if (_generic_isCC(value, classnum)) {
+ ANYOF_BITMAP_CLEAR(data->start_class, value);
}
}
}
* case it isn't a true locale-node. This will create
* false positives if it truly is locale */
for (value = 0; value < loop_max; value++) {
- if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
+ if (! _generic_isCC(value, classnum)) {
+ ANYOF_BITMAP_SET(data->start_class, value);
}
}
if (PL_regkind[OP(scan)] == NPOSIXD) {
if (! len_passed_in) {
if (UTF) {
if (FOLD && (! LOC || code_point > 255)) {
- _to_uni_fold_flags(NATIVE_TO_UNI(code_point),
+ _to_uni_fold_flags(code_point,
character,
&len,
FOLD_FLAGS_FULL | ((LOC)
UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
p += 2;
if (uv < 0x80) {
-#ifdef EBCDIC
- *d++ = LATIN1_TO_NATIVE(uv);
-#else
*d++ = (U8)uv;
-#endif
continue;
}
if (uv < 0x800) {
assert(S_or_s == 'S' || S_or_s == 's');
- if (UNI_IS_INVARIANT(converted)) { /* No difference between the two for
- characters in this range */
+ if (NATIVE_IS_INVARIANT(converted)) { /* No difference between the two for
+ characters in this range */
*p = (U8) converted;
*lenp = 1;
return converted;
U8 converted = toLOWER_LATIN1(c);
if (p != NULL) {
- if (UNI_IS_INVARIANT(converted)) {
+ if (NATIVE_IS_INVARIANT(converted)) {
*p = converted;
*lenp = 1;
}
converted = toLOWER_LATIN1(c);
}
- if (UNI_IS_INVARIANT(converted)) {
+ if (NATIVE_IS_INVARIANT(converted)) {
*p = (U8) converted;
*lenp = 1;
}
Perl_is_uni_alnum_lc(pTHX_ UV c)
{
if (c < 256) {
- return isALNUM_LC(UNI_TO_NATIVE(c));
+ return isALNUM_LC(c);
}
return _is_uni_FOO(_CC_WORDCHAR, c);
}
Perl_is_uni_alnumc_lc(pTHX_ UV c)
{
if (c < 256) {
- return isALPHANUMERIC_LC(UNI_TO_NATIVE(c));
+ return isALPHANUMERIC_LC(c);
}
return _is_uni_FOO(_CC_ALPHANUMERIC, c);
}
Perl_is_uni_idfirst_lc(pTHX_ UV c)
{
if (c < 256) {
- return isIDFIRST_LC(UNI_TO_NATIVE(c));
+ return isIDFIRST_LC(c);
}
return _is_uni_perl_idstart(c);
}
Perl_is_uni_alpha_lc(pTHX_ UV c)
{
if (c < 256) {
- return isALPHA_LC(UNI_TO_NATIVE(c));
+ return isALPHA_LC(c);
}
return _is_uni_FOO(_CC_ALPHA, c);
}
Perl_is_uni_ascii_lc(pTHX_ UV c)
{
if (c < 256) {
- return isASCII_LC(UNI_TO_NATIVE(c));
+ return isASCII_LC(c);
}
return 0;
}
Perl_is_uni_blank_lc(pTHX_ UV c)
{
if (c < 256) {
- return isBLANK_LC(UNI_TO_NATIVE(c));
+ return isBLANK_LC(c);
}
return isBLANK_uni(c);
}
Perl_is_uni_space_lc(pTHX_ UV c)
{
if (c < 256) {
- return isSPACE_LC(UNI_TO_NATIVE(c));
+ return isSPACE_LC(c);
}
return isSPACE_uni(c);
}
Perl_is_uni_digit_lc(pTHX_ UV c)
{
if (c < 256) {
- return isDIGIT_LC(UNI_TO_NATIVE(c));
+ return isDIGIT_LC(c);
}
return _is_uni_FOO(_CC_DIGIT, c);
}
Perl_is_uni_upper_lc(pTHX_ UV c)
{
if (c < 256) {
- return isUPPER_LC(UNI_TO_NATIVE(c));
+ return isUPPER_LC(c);
}
return _is_uni_FOO(_CC_UPPER, c);
}
Perl_is_uni_lower_lc(pTHX_ UV c)
{
if (c < 256) {
- return isLOWER_LC(UNI_TO_NATIVE(c));
+ return isLOWER_LC(c);
}
return _is_uni_FOO(_CC_LOWER, c);
}
Perl_is_uni_cntrl_lc(pTHX_ UV c)
{
if (c < 256) {
- return isCNTRL_LC(UNI_TO_NATIVE(c));
+ return isCNTRL_LC(c);
}
return 0;
}
Perl_is_uni_graph_lc(pTHX_ UV c)
{
if (c < 256) {
- return isGRAPH_LC(UNI_TO_NATIVE(c));
+ return isGRAPH_LC(c);
}
return _is_uni_FOO(_CC_GRAPH, c);
}
Perl_is_uni_print_lc(pTHX_ UV c)
{
if (c < 256) {
- return isPRINT_LC(UNI_TO_NATIVE(c));
+ return isPRINT_LC(c);
}
return _is_uni_FOO(_CC_PRINT, c);
}
Perl_is_uni_punct_lc(pTHX_ UV c)
{
if (c < 256) {
- return isPUNCT_LC(UNI_TO_NATIVE(c));
+ return isPUNCT_LC(c);
}
return _is_uni_FOO(_CC_PUNCT, c);
}
Perl_is_uni_xdigit_lc(pTHX_ UV c)
{
if (c < 256) {
- return isXDIGIT_LC(UNI_TO_NATIVE(c));
+ return isXDIGIT_LC(c);
}
return isXDIGIT_uni(c);
}
SV **swashp, const char *normal, const char *special)
{
dVAR;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
STRLEN len = 0;
- const UV uv0 = valid_utf8_to_uvchr(p, NULL);
- /* The NATIVE_TO_UNI() and UNI_TO_NATIVE() mappings
- * are necessary in EBCDIC, they are redundant no-ops
- * in ASCII-ish platforms, and hopefully optimized away. */
- const UV uv1 = NATIVE_TO_UNI(uv0);
+ const UV uv1 = valid_utf8_to_uvchr(p, NULL);
PERL_ARGS_ASSERT_TO_UTF8_CASE;
* be given */
}
- uvuni_to_utf8(tmpbuf, uv1);
-
if (!*swashp) /* load on-demand */
*swashp = _core_swash_init("utf8", normal, &PL_sv_undef, 4, 0, NULL, NULL);
SV **svp;
if (hv &&
- (svp = hv_fetch(hv, (const char*)tmpbuf, UNISKIP(uv1), FALSE)) &&
+ (svp = hv_fetch(hv, (const char*)p, UNISKIP(uv1), FALSE)) &&
(*svp)) {
const char *s;
s = SvPV_const(*svp, len);
if (len == 1)
+ /* EIGHTBIT */
len = uvuni_to_utf8(ustrp, NATIVE_TO_UNI(*(U8*)s)) - ustrp;
else {
-#ifdef EBCDIC
- /* If we have EBCDIC we need to remap the characters
- * since any characters in the low 256 are Unicode
- * code points, not EBCDIC. */
- U8 *t = (U8*)s, *tend = t + len, *d;
-
- d = tmpbuf;
- if (SvUTF8(*svp)) {
- STRLEN tlen = 0;
-
- while (t < tend) {
- const UV c = utf8_to_uvchr_buf(t, tend, &tlen);
- if (tlen > 0) {
- d = uvchr_to_utf8(d, UNI_TO_NATIVE(c));
- t += tlen;
- }
- else
- break;
- }
- }
- else {
- while (t < tend) {
- d = uvchr_to_utf8(d, UNI_TO_NATIVE(*t));
- t++;
- }
- }
- len = d - tmpbuf;
- Copy(tmpbuf, ustrp, len, U8);
-#else
Copy(s, ustrp, len, U8);
-#endif
}
}
}
if (!len && *swashp) {
- const UV uv2 = swash_fetch(*swashp, tmpbuf, TRUE /* => is utf8 */);
+ const UV uv2 = swash_fetch(*swashp, p, TRUE /* => is utf8 */);
if (uv2) {
/* It was "normal" (a single character mapping). */
- const UV uv3 = UNI_TO_NATIVE(uv2);
- len = uvchr_to_utf8(ustrp, uv3) - ustrp;
+ len = uvchr_to_utf8(ustrp, uv2) - ustrp;
}
}
if (lenp)
*lenp = len;
- return uv0;
+ return uv1;
}
U32 bit;
SV *swatch;
U8 tmputf8[2];
- const UV c = NATIVE_TO_ASCII(*ptr);
+ const UV c = *ptr;
PERL_ARGS_ASSERT_SWASH_FETCH;
}
/* Convert to utf8 if not already */
- if (!do_utf8 && !UNI_IS_INVARIANT(c)) {
+ if (!do_utf8 && !NATIVE_IS_INVARIANT(c)) {
tmputf8[0] = (U8)UTF8_EIGHT_BIT_HI(c);
tmputf8[1] = (U8)UTF8_EIGHT_BIT_LO(c);
ptr = tmputf8;
to_utf8_fold(p1, foldbuf1, &n1);
}
else { /* Not utf8, get utf8 fold */
- to_uni_fold(NATIVE_TO_LATIN1(*p1), foldbuf1, &n1);
+ to_uni_fold(*p1, foldbuf1, &n1);
}
f1 = foldbuf1;
}
to_utf8_fold(p2, foldbuf2, &n2);
}
else {
- to_uni_fold(NATIVE_TO_LATIN1(*p2), foldbuf2, &n2);
+ to_uni_fold(*p2, foldbuf2, &n2);
}
f2 = foldbuf2;
}