From 6940069f6d5beebb5f66572e358b4e7d0c3d1c43 Mon Sep 17 00:00:00 2001 From: Gurusamy Sarathy Date: Tue, 11 Jul 2000 17:48:28 +0000 Subject: [PATCH] integrate cfgperl changes#6220..6222 into mainline p4raw-link: @6222 on //depot/cfgperl: cb6e01d9fd93f1025bb60ed9c000931b2c8542a3 p4raw-link: @6220 on //depot/cfgperl: 94414bfbc497e71da32f6edca513d34725e3cae6 p4raw-id: //depot/perl@6350 p4raw-integrated: from //depot/cfgperl@6349 'copy in' lib/Pod/Usage.pm (@5717..) win32/win32.h (@6026..) pod/perlop.pod (@6206..) p4raw-integrated: from //depot/cfgperl@6221 'copy in' utf8.c (@6174..) doop.c (@6193..) toke.c (@6196..) 'merge in' embed.pl (@6217..) p4raw-integrated: from //depot/cfgperl@6220 'merge in' makedef.pl (@6156..) --- doop.c | 212 +++---------------------------------------------------- embed.pl | 4 +- lib/Pod/Usage.pm | 2 +- makedef.pl | 3 + pod/perlop.pod | 13 +--- toke.c | 29 +------- utf8.c | 66 +++++++++++++++++ win32/win32.h | 7 ++ 8 files changed, 92 insertions(+), 244 deletions(-) diff --git a/doop.c b/doop.c index ebac52f..fe2df46 100644 --- a/doop.c +++ b/doop.c @@ -214,188 +214,6 @@ S_do_trans_UU_count(pTHX_ SV *sv) } STATIC I32 -S_do_trans_UC_simple(pTHX_ SV *sv) -{ - dTHR; - U8 *s; - U8 *send; - U8 *d; - I32 matches = 0; - STRLEN len; - - SV* rv = (SV*)cSVOP->op_sv; - HV* hv = (HV*)SvRV(rv); - SV** svp = hv_fetch(hv, "NONE", 4, FALSE); - UV none = svp ? SvUV(*svp) : 0x7fffffff; - UV extra = none + 1; - UV final; - UV uv; - - s = (U8*)SvPV(sv, len); - send = s + len; - - svp = hv_fetch(hv, "FINAL", 5, FALSE); - if (svp) - final = SvUV(*svp); - - d = s; - while (s < send) { - if ((uv = swash_fetch(rv, s)) < none) { - s += UTF8SKIP(s); - matches++; - *d++ = (U8)uv; - } - else if (uv == none) { - I32 ulen; - uv = utf8_to_uv(s, &ulen); - s += ulen; - *d++ = (U8)uv; - } - else if (uv == extra) { - s += UTF8SKIP(s); - matches++; - *d++ = (U8)final; - } - else - s += UTF8SKIP(s); - } - *d = '\0'; - SvCUR_set(sv, d - (U8*)SvPVX(sv)); - SvSETMAGIC(sv); - - return matches; -} - -STATIC I32 -S_do_trans_CU_simple(pTHX_ SV *sv) -{ - dTHR; - U8 *s; - U8 *send; - U8 *d; - U8 *dst; - I32 matches = 0; - STRLEN len; - - SV* rv = (SV*)cSVOP->op_sv; - HV* hv = (HV*)SvRV(rv); - SV** svp = hv_fetch(hv, "NONE", 4, FALSE); - UV none = svp ? SvUV(*svp) : 0x7fffffff; - UV extra = none + 1; - UV final; - UV uv; - U8 tmpbuf[UTF8_MAXLEN]; - I32 bits = 16; - - s = (U8*)SvPV(sv, len); - send = s + len; - - svp = hv_fetch(hv, "BITS", 4, FALSE); - if (svp) - bits = (I32)SvIV(*svp); - - svp = hv_fetch(hv, "FINAL", 5, FALSE); - if (svp) - final = SvUV(*svp); - - Newz(801, d, len * (bits >> 3) + 1, U8); - dst = d; - - while (s < send) { - uv = *s++; - if (uv < 0x80) - tmpbuf[0] = uv; - else { - tmpbuf[0] = (( uv >> 6) | 0xc0); - tmpbuf[1] = (( uv & 0x3f) | 0x80); - } - - if ((uv = swash_fetch(rv, tmpbuf)) < none) { - matches++; - d = uv_to_utf8(d, uv); - } - else if (uv == none) - d = uv_to_utf8(d, s[-1]); - else if (uv == extra) { - matches++; - d = uv_to_utf8(d, final); - } - } - *d = '\0'; - sv_usepvn_mg(sv, (char*)dst, d - dst); - - return matches; -} - -/* utf-8 to latin-1 */ - -STATIC I32 -S_do_trans_UC_trivial(pTHX_ SV *sv) -{ - dTHR; - U8 *s; - U8 *send; - U8 *d; - STRLEN len; - - s = (U8*)SvPV(sv, len); - send = s + len; - - d = s; - while (s < send) { - if (*s < 0x80) - *d++ = *s++; - else { - I32 ulen; - UV uv = utf8_to_uv(s, &ulen); - s += ulen; - *d++ = (U8)uv; - } - } - *d = '\0'; - SvCUR_set(sv, d - (U8*)SvPVX(sv)); - SvSETMAGIC(sv); - - return SvCUR(sv); -} - -/* latin-1 to utf-8 */ - -STATIC I32 -S_do_trans_CU_trivial(pTHX_ SV *sv) -{ - dTHR; - U8 *s; - U8 *send; - U8 *d; - U8 *dst; - I32 matches; - STRLEN len; - - s = (U8*)SvPV(sv, len); - send = s + len; - - Newz(801, d, len * 2 + 1, U8); - dst = d; - - matches = send - s; - - while (s < send) { - if (*s < 0x80) - *d++ = *s++; - else { - UV uv = *s++; - *d++ = (( uv >> 6) | 0xc0); - *d++ = (( uv & 0x3f) | 0x80); - } - } - *d = '\0'; - sv_usepvn_mg(sv, (char*)dst, d - dst); - - return matches; -} - -STATIC I32 S_do_trans_UU_complex(pTHX_ SV *sv) { dTHR; @@ -601,31 +419,19 @@ Perl_do_trans(pTHX_ SV *sv) switch (PL_op->op_private & 63) { case 0: - return do_trans_CC_simple(sv); - - case OPpTRANS_FROM_UTF: - return do_trans_UC_simple(sv); - - case OPpTRANS_TO_UTF: - return do_trans_CU_simple(sv); - - case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF: - return do_trans_UU_simple(sv); + if (SvUTF8(sv)) + return do_trans_UU_simple(sv); + else + return do_trans_CC_simple(sv); case OPpTRANS_IDENTICAL: - return do_trans_CC_count(sv); - - case OPpTRANS_FROM_UTF|OPpTRANS_IDENTICAL: - return do_trans_UC_trivial(sv); - - case OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL: - return do_trans_CU_trivial(sv); - - case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL: - return do_trans_UU_count(sv); + if (SvUTF8(sv)) + return do_trans_UU_count(sv); + else + return do_trans_CC_count(sv); default: - if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) + if (SvUTF8(sv)) return do_trans_UU_complex(sv); /* could be UC or CU too */ else return do_trans_CC_complex(sv); diff --git a/embed.pl b/embed.pl index 7afe36d..7d15916 100755 --- a/embed.pl +++ b/embed.pl @@ -2048,6 +2048,8 @@ Ap |U8* |utf16_to_utf8 |U16* p|U8 *d|I32 bytelen Ap |U8* |utf16_to_utf8_reversed|U16* p|U8 *d|I32 bytelen Ap |I32 |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off +Ap |U8* |utf8_to_bytes |U8 *s|STRLEN len +Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN len Ap |UV |utf8_to_uv |U8 *s|I32* retlen Ap |U8* |uv_to_utf8 |U8 *d|UV uv p |void |vivify_defelem |SV* sv @@ -2189,8 +2191,6 @@ s |I32 |do_trans_CC_complex |SV *sv s |I32 |do_trans_UU_simple |SV *sv s |I32 |do_trans_UU_count |SV *sv s |I32 |do_trans_UU_complex |SV *sv -s |I32 |do_trans_UC_simple |SV *sv -s |I32 |do_trans_CU_simple |SV *sv s |I32 |do_trans_UC_trivial |SV *sv s |I32 |do_trans_CU_trivial |SV *sv #endif diff --git a/lib/Pod/Usage.pm b/lib/Pod/Usage.pm index aa8f712..571588e 100644 --- a/lib/Pod/Usage.pm +++ b/lib/Pod/Usage.pm @@ -211,7 +211,7 @@ convenient to use as an innocent looking error message handling function: ## Check for too many filenames pod2usage("$0: Too many files given.\n") if (@ARGV > 1); -Some user's however may feel that the above "economy of expression" is +Some users however may feel that the above "economy of expression" is not particularly readable nor consistent and may instead choose to do something more like the following: diff --git a/makedef.pl b/makedef.pl index b47237c..108993c 100644 --- a/makedef.pl +++ b/makedef.pl @@ -403,6 +403,8 @@ unless ($define{'USE_5005THREADS'}) { PL_svref_mutex PL_cred_mutex PL_eval_mutex + PL_fdpid_mutex + PL_sv_lock_mutex PL_eval_cond PL_eval_owner PL_threads_mutex @@ -419,6 +421,7 @@ unless ($define{'USE_5005THREADS'}) { Perl_find_threadsv Perl_unlock_condpair Perl_magic_mutexfree + Perl_lock )]; } diff --git a/pod/perlop.pod b/pod/perlop.pod index b4caed9..3c84e60 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1207,9 +1207,9 @@ to occur that you might want. Here are two common cases: # expand tabs to 8-column spacing 1 while s/\t+/' ' x (length($&)*8 - length($`)%8)/e; -=item tr/SEARCHLIST/REPLACEMENTLIST/cdsUC +=item tr/SEARCHLIST/REPLACEMENTLIST/cds -=item y/SEARCHLIST/REPLACEMENTLIST/cdsUC +=item y/SEARCHLIST/REPLACEMENTLIST/cds Transliterates all occurrences of the characters found in the search list with the corresponding character in the replacement list. It returns @@ -1243,8 +1243,6 @@ Options: c Complement the SEARCHLIST. d Delete found but unreplaced characters. s Squash duplicate replaced characters. - U Translate to/from UTF-8. - C Translate to/from 8-bit char (octet). If the C modifier is specified, the SEARCHLIST character set is complemented. If the C modifier is specified, any characters @@ -1262,10 +1260,6 @@ enough. If the REPLACEMENTLIST is empty, the SEARCHLIST is replicated. This latter is useful for counting characters in a class or for squashing character sequences in a class. -The first C or C modifier applies to the left side of the translation. -The second one applies to the right side. If present, these modifiers override -the current utf8 state. - Examples: $ARGV[1] =~ tr/A-Z/a-z/; # canonicalize to lower case @@ -1285,9 +1279,6 @@ Examples: tr [\200-\377] [\000-\177]; # delete 8th bit - tr/\0-\xFF//CU; # change Latin-1 to Unicode - tr/\0-\x{FF}//UC; # change Unicode to Latin-1 - If multiple transliterations are given for a character, only the first one is used: diff --git a/toke.c b/toke.c index 05822e1..fe14358 100644 --- a/toke.c +++ b/toke.c @@ -6130,45 +6130,20 @@ S_scan_trans(pTHX_ char *start) Perl_croak(aTHX_ "Transliteration replacement not terminated"); } - if (UTF) { - o = newSVOP(OP_TRANS, 0, 0); - utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF; - } - else { New(803,tbl,256,short); o = newPVOP(OP_TRANS, 0, (char*)tbl); - utf8 = 0; - } complement = del = squash = 0; - while (strchr("cdsCU", *s)) { + while (strchr("cds", *s)) { if (*s == 'c') complement = OPpTRANS_COMPLEMENT; else if (*s == 'd') del = OPpTRANS_DELETE; else if (*s == 's') squash = OPpTRANS_SQUASH; - else { - switch (count++) { - case 0: - if (*s == 'C') - utf8 &= ~OPpTRANS_FROM_UTF; - else - utf8 |= OPpTRANS_FROM_UTF; - break; - case 1: - if (*s == 'C') - utf8 &= ~OPpTRANS_TO_UTF; - else - utf8 |= OPpTRANS_TO_UTF; - break; - default: - Perl_croak(aTHX_ "Too many /C and /U options"); - } - } s++; } - o->op_private = del|squash|complement|utf8; + o->op_private = del|squash|complement; PL_lex_op = o; yylval.ival = OP_TRANS; diff --git a/utf8.c b/utf8.c index 76eb932..b570b12 100644 --- a/utf8.c +++ b/utf8.c @@ -222,6 +222,72 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) return s; } +/* +=for apidoc utf8_to_bytes + +Converts a string C of length C from UTF8 into ASCII encoding. +Unlike C, this over-writes the original string. + +=cut +*/ + +U8 * +Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) +{ + dTHR; + U8 *send; + U8 *d; + U8 *save; + + send = s + len; + d = save = s; + while (s < send) { + if (*s < 0x80) + *d++ = *s++; + else { + I32 ulen; + UV uv = utf8_to_uv(s, &ulen); + s += ulen; + *d++ = (U8)uv; + } + } + *d = '\0'; + return save; +} + +/* +=for apidoc bytes_to_utf8 + +Converts a string C of length C from ASCII into UTF8 encoding. +Returns a pointer to the newly-created string. + +*/ + +U8* +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +{ + dTHR; + U8 *send; + U8 *d; + U8 *dst; + send = s + len; + + Newz(801, d, len * 2 + 1, U8); + dst = d; + + while (s < send) { + if (*s < 0x80) + *d++ = *s++; + else { + UV uv = *s++; + *d++ = (( uv >> 6) | 0xc0); + *d++ = (( uv & 0x3f) | 0x80); + } + } + *d = '\0'; + return dst; +} + /* XXX NOTHING CALLS THE FOLLOWING TWO ROUTINES YET!!! */ /* * Convert native or reversed UTF-16 to UTF-8. diff --git a/win32/win32.h b/win32/win32.h index 3929bcc..2e5b074 100644 --- a/win32/win32.h +++ b/win32/win32.h @@ -135,6 +135,12 @@ struct utsname { #define USE_FIXED_OSFHANDLE #endif +/* Define PERL_WIN32_SOCK_DLOAD to have Perl dynamically load the winsock + DLL when needed. Don't use if your compiler supports delayloading (ie, VC++ 6.0) + -- BKS 5-29-2000 */ +#if !(defined(_M_IX86) && _MSC_VER >= 1200) +#define PERL_WIN32_SOCK_DLOAD +#endif #define ENV_IS_CASELESS #ifndef VER_PLATFORM_WIN32_WINDOWS /* VC-2.0 headers don't have this */ @@ -213,6 +219,7 @@ typedef long gid_t; #endif #define flushall _flushall #define fcloseall _fcloseall +#define isnan _isnan /* ...same libraries as MSVC */ #ifdef PERL_OBJECT # define MEMBER_TO_FPTR(name) &(name) -- 2.7.4