From e294cc5d7355a434d6b698c777674e1b7d4d4583 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sat, 22 Jul 2006 21:51:48 +0300 Subject: [PATCH] z/OS: pp_sys.c, reg*.c, toke.c, utf8.c Message-ID: <44C24994.6020008@iki.fi> p4raw-id: //depot/perl@28607 --- pp_sys.c | 11 +++++++ regcomp.c | 25 +++++++++++++++ regexec.c | 14 ++++++-- toke.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ utf8.c | 4 +++ 5 files changed, 149 insertions(+), 13 deletions(-) diff --git a/pp_sys.c b/pp_sys.c index d0b3b10..4246e3c 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -2557,6 +2557,17 @@ PP(pp_accept) nstio = GvIOn(ngv); fd = PerlSock_accept(PerlIO_fileno(IoIFP(gstio)), (struct sockaddr *) namebuf, &len); +#if defined(OEMVS) + if (len == 0) { + /* Some platforms indicate zero length when an AF_UNIX client is + * not bound. Simulate a non-zero-length sockaddr structure in + * this case. */ + namebuf[0] = 0; /* sun_len */ + namebuf[1] = AF_UNIX; /* sun_family */ + len = 2; + } +#endif + if (fd < 0) goto badexit; if (IoIFP(nstio)) diff --git a/regcomp.c b/regcomp.c index f4821c0..8928c41 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2005,14 +2005,23 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags char * const s0 = STRING(scan), *s, *t; char * const s1 = s0 + STR_LEN(scan) - 1; char * const s2 = s1 - 4; +#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */ + const char t0[] = "\xaf\x49\xaf\x42"; +#else const char t0[] = "\xcc\x88\xcc\x81"; +#endif const char * const t1 = t0 + 3; for (s = s0 + 2; s < s2 && (t = ninstr(s, s1, t0, t1)); s = t + 4) { +#ifdef EBCDIC + if (((U8)t[-1] == 0x68 && (U8)t[-2] == 0xB4) || + ((U8)t[-1] == 0x46 && (U8)t[-2] == 0xB5)) +#else if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) || ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF)) +#endif *min -= 4; } } @@ -5881,11 +5890,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth) STRLEN foldlen; const UV f = to_uni_fold(natvalue, foldbuf, &foldlen); +#ifdef EBCDIC /* RD t/uni/fold ff and 6b */ + if (RExC_precomp[0] == ':' && + RExC_precomp[1] == '[' && + (f == 0xDF || f == 0x92)) { + f = NATIVE_TO_UNI(f); + } +#endif /* If folding and foldable and a single * character, insert also the folded version * to the charclass. */ if (f != value) { +#ifdef EBCDIC /* RD tunifold ligatures s,t fb05, fb06 */ + if ((RExC_precomp[0] == ':' && + RExC_precomp[1] == '[' && + (f == 0xA2 && + (value == 0xFB05 || value == 0xFB06))) ? + foldlen == ((STRLEN)UNISKIP(f) - 1) : + foldlen == (STRLEN)UNISKIP(f) ) +#else if (foldlen == (STRLEN)UNISKIP(f)) +#endif Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", f); else { diff --git a/regexec.c b/regexec.c index 3eee31e..59d5624 100644 --- a/regexec.c +++ b/regexec.c @@ -3941,11 +3941,19 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) to_utf8_lower((U8*)s, tmpbuf1, &ulen1); to_utf8_upper((U8*)s, tmpbuf2, &ulen2); - +#ifdef EBCDIC + ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); + ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); +#else ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0, - uniflags); + uniflags); ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0, - uniflags); + uniflags); +#endif } else { ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, diff --git a/toke.c b/toke.c index 30a4548..2c6d8be 100644 --- a/toke.c +++ b/toke.c @@ -1787,6 +1787,7 @@ S_scan_const(pTHX_ char *start) UV uv; #ifdef EBCDIC UV literal_endpoint = 0; + bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ #endif const char * const leaveit = /* set of acceptably-backslashed characters */ @@ -1810,7 +1811,15 @@ S_scan_const(pTHX_ char *start) I32 min; /* first character in range */ I32 max; /* last character in range */ - if (has_utf8) { +#ifdef EBCDIC + UV uvmax = 0; +#endif + + if (has_utf8 +#ifdef EBCDIC + && !native_range +#endif + ) { char * const c = (char*)utf8_hop((U8*)d, -1); char *e = d++; while (e-- > c) @@ -1823,12 +1832,43 @@ S_scan_const(pTHX_ char *start) } i = d - SvPVX_const(sv); /* remember current offset */ +#ifdef EBCDIC + SvGROW(sv, + SvLEN(sv) + (has_utf8 ? + (512 - UTF_CONTINUATION_MARK + + UNISKIP(0x100)) + : 256)); + /* How many two-byte within 0..255: 128 in UTF-8, + * 96 in UTF-8-mod. */ +#else SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ +#endif d = SvPVX(sv) + i; /* refresh d after realloc */ - d -= 2; /* eat the first char and the - */ - - min = (U8)*d; /* first char in range */ - max = (U8)d[1]; /* last char in range */ +#ifdef EBCDIC + if (has_utf8) { + int j; + for (j = 0; j <= 1; j++) { + char * const c = (char*)utf8_hop((U8*)d, -1); + const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0); + if (j) + min = (U8)uv; + else if (uv < 256) + max = (U8)uv; + else { + max = (U8)0xff; /* only to \xff */ + uvmax = uv; /* \x{100} to uvmax */ + } + d = c; /* eat endpoint chars */ + } + } + else { +#endif + d -= 2; /* eat the first char and the - */ + min = (U8)*d; /* first char in range */ + max = (U8)d[1]; /* last char in range */ +#ifdef EBCDIC + } +#endif if (min > max) { Perl_croak(aTHX_ @@ -1853,7 +1893,29 @@ S_scan_const(pTHX_ char *start) else #endif for (i = min; i <= max; i++) - *d++ = (char)i; +#ifdef EBCDIC + if (has_utf8) { + const U8 ch = (U8)NATIVE_TO_UTF(i); + if (UNI_IS_INVARIANT(ch)) + *d++ = (U8)i; + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(ch); + *d++ = (U8)UTF8_EIGHT_BIT_LO(ch); + } + } + else +#endif + *d++ = (char)i; + +#ifdef EBCDIC + if (uvmax) { + d = (char*)uvchr_to_utf8((U8*)d, 0x100); + if (uvmax > 0x101) + *d++ = (char)UTF_TO_NATIVE(0xff); + if (uvmax > 0x100) + d = (char*)uvchr_to_utf8((U8*)d, uvmax); + } +#endif /* mark the range as done, and continue */ dorange = FALSE; @@ -1869,7 +1931,11 @@ S_scan_const(pTHX_ char *start) if (didrange) { Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); } - if (has_utf8) { + if (has_utf8 +#ifdef EBCDIC + && !native_range +#endif + ) { *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ s++; continue; @@ -1881,6 +1947,7 @@ S_scan_const(pTHX_ char *start) didrange = FALSE; #ifdef EBCDIC literal_endpoint = 0; + native_range = TRUE; #endif } } @@ -1985,8 +2052,8 @@ S_scan_const(pTHX_ char *start) if ((isALPHA(*s) || isDIGIT(*s)) && ckWARN(WARN_MISC)) Perl_warner(aTHX_ packWARN(WARN_MISC), - "Unrecognized escape \\%c passed through", - *s); + "Unrecognized escape \\%c passed through", + *s); /* default action is to copy the quoted character */ goto default_action; } @@ -2084,6 +2151,10 @@ S_scan_const(pTHX_ char *start) (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); } +#ifdef EBCDIC + if (uv > 255 && !dorange) + native_range = FALSE; +#endif } else { *d++ = (char)uv; @@ -2161,6 +2232,10 @@ S_scan_const(pTHX_ char *start) SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); d = SvPVX(sv) + (d - odest); } +#ifdef EBCDIC + if (!dorange) + native_range = FALSE; /* \N{} is guessed to be Unicode */ +#endif Copy(str, d, len, char); d += len; SvREFCNT_dec(res); @@ -2234,6 +2309,10 @@ S_scan_const(pTHX_ char *start) } d = (char*)uvchr_to_utf8((U8*)d, nextuv); has_utf8 = TRUE; +#ifdef EBCDIC + if (uv > 255 && !dorange) + native_range = FALSE; +#endif } else { *d++ = NATIVE_TO_NEED(has_utf8,*s++); @@ -12343,7 +12422,7 @@ S_swallow_bom(pTHX_ U8 *s) filter_add(utf16rev_textfilter, NULL); Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); utf16_to_utf8_reversed(s, news, - PL_bufend - (char*)s - 1, + PL_bufend - (char*)s, &newlen); sv_setpvn(PL_linestr, (const char*)news, newlen); #ifdef PERL_MAD @@ -12413,6 +12492,15 @@ S_swallow_bom(pTHX_ U8 *s) goto utf16be; } } +#ifdef EBCDIC + case 0xDD: + if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) { + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); + s += 4; /* UTF-8 */ + } + break; +#endif + default: if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { /* Leading bytes diff --git a/utf8.c b/utf8.c index b62e552..3b2297a 100644 --- a/utf8.c +++ b/utf8.c @@ -896,7 +896,11 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */ p += 2; if (uv < 0x80) { +#ifdef EBCDIC + *d++ = UNI_TO_NATIVE(uv); +#else *d++ = (U8)uv; +#endif continue; } if (uv < 0x800) { -- 2.7.4