[Patch] doc patch on Unicode

author SADAHIRO Tomoyuki <BQW10602@nifty.com>

Sun, 19 May 2002 01:01:58 +0000 (10:01 +0900)

committer Jarkko Hietaniemi <jhi@iki.fi>

Sat, 18 May 2002 15:40:35 +0000 (15:40 +0000)
author SADAHIRO Tomoyuki <BQW10602@nifty.com>
Sun, 19 May 2002 01:01:58 +0000 (10:01 +0900)
committer Jarkko Hietaniemi <jhi@iki.fi>
Sat, 18 May 2002 15:40:35 +0000 (15:40 +0000)
diff --git a/pod/perlmodlib.pod b/pod/perlmodlib.pod

index 3a687070a39f0fd3b8106e34454443842ed938ab..a1ecea7df1f6f52c37fb358c0f2ba39f945c2a5a 100644 (file)
--- a/pod/perlmodlib.pod
+++ b/pod/perlmodlib.pod
@@ -53,6 +53,10 @@ The following pragmas are defined (and have their own documentation).
  
  Get/set subroutine or variable attributes
  
+=item attrs
+
+Set/get attributes of a subroutine (deprecated)
+
  =item autouse
  
  Postpone load of modules until a function is used
@@ -83,7 +87,7 @@ Force byte semantics rather than character semantics
  
  =item charnames
  
-Define character names for C<\N{named}> string literal escapes.
+Define character names for C<\N{named}> string literal escapes
  
  =item constant
  
@@ -95,7 +99,7 @@ Perl compiler pragma to force verbose warning diagnostics
  
  =item encoding
  
-Pragma to control the conversion of legacy data into Unicode
+Allows you to write your script in non-ascii or non-utf8
  
  =item fields
  
@@ -125,6 +129,10 @@ Use and avoid POSIX locales for built-in operations
  
  Set default disciplines for input and output
  
+=item ops
+
+Restrict unsafe operations when compiling
+
  =item overload
  
  Package for overloading perl operations
@@ -149,6 +157,10 @@ Restrict unsafe constructs
  
  Predeclare sub names
  
+=item threads
+
+Perl extension allowing use of interpreter based threads from perl
+
  =item utf8
  
  Enable/disable UTF-8 (or UTF-EBCDIC) in source code
@@ -195,10 +207,82 @@ Load subroutines only on demand
  
  Split a package for autoloading
  
+=item B
+
+The Perl Compiler
+
+=item B::Asmdata
+
+Autogenerated data about Perl ops, used to generate bytecode
+
+=item B::Assembler
+
+Assemble Perl bytecode
+
+=item B::Bblock
+
+Walk basic blocks
+
+=item B::Bytecode
+
+Perl compiler's bytecode backend
+
+=item B::C
+
+Perl compiler's C backend
+
+=item B::CC
+
+Perl compiler's optimized C translation backend
+
+=item B::Concise
+
+Walk Perl syntax tree, printing concise info about ops
+
+=item B::Debug
+
+Walk Perl syntax tree, printing debug info about ops
+
+=item B::Deparse
+
+Perl compiler backend to produce perl code
+
+=item B::Disassembler
+
+Disassemble Perl bytecode
+
+=item B::Lint
+
+Perl lint
+
+=item B::Showlex
+
+Show lexical variables used in functions or files
+
+=item B::Stackobj
+
+Helper module for CC backend
+
+=item B::Stash
+
+Show what stashes are loaded
+
+=item B::Terse
+
+Walk Perl syntax tree, printing terse info about ops
+
+=item B::Xref
+
+Generates cross reference reports for Perl programs
+
  =item Benchmark
  
  Benchmark running times of Perl code
  
+=item ByteLoader
+
+Load byte compiled perl code
+
  =item CGI
  
  Simple Common Gateway Interface Class
@@ -271,6 +355,10 @@ Get pathname of current working directory
  
  Programmatic interface to the Perl debugging API (draft, subject to
  
+=item DB_File
+
+Perl5 access to Berkeley DB version 1.x
+
  =item Devel::SelfStubber
  
  Generate stubs for a SelfLoading module
@@ -287,6 +375,10 @@ Supply object methods for directory handles
  
  Provides screen dump of Perl data.
  
+=item Encode
+
+Character encodings
+
  =item English
  
  Use nice English (or awk) names for ugly punctuation variables
@@ -307,6 +399,10 @@ Exporter guts
  
  Utilities to replace common UNIX commands in Makefiles etc.
  
+=item ExtUtils::Command::MM
+
+Commands for the MM's to use in Makefiles
+
  =item ExtUtils::Constant
  
  Generate XS code to import C header constants
@@ -327,6 +423,14 @@ Inventory management of installed modules
  
  Determine libraries to use and how to use them
  
+=item ExtUtils::MM
+
+OS adjusted ExtUtils::MakeMaker subclass
+
+=item ExtUtils::MM_Any
+
+Platform agnostic MM methods
+
  =item ExtUtils::MM_BeOS
  
  Methods to override UN*X behaviour in ExtUtils::MakeMaker
@@ -335,6 +439,14 @@ Methods to override UN*X behaviour in ExtUtils::MakeMaker
  
  Methods to override UN*X behaviour in ExtUtils::MakeMaker
  
+=item ExtUtils::MM_DOS
+
+DOS specific subclass of ExtUtils::MM_Unix
+
+=item ExtUtils::MM_MacOS
+
+Methods to override UN*X behaviour in ExtUtils::MakeMaker
+
  =item ExtUtils::MM_NW5
  
  Methods to override UN*X behaviour in ExtUtils::MakeMaker
@@ -343,6 +455,10 @@ Methods to override UN*X behaviour in ExtUtils::MakeMaker
  
  Methods to override UN*X behaviour in ExtUtils::MakeMaker
  
+=item ExtUtils::MM_UWIN
+
+U/WIN specific subclass of ExtUtils::MM_Unix
+
  =item ExtUtils::MM_Unix
  
  Methods used by ExtUtils::MakeMaker
@@ -355,6 +471,14 @@ Methods to override UN*X behaviour in ExtUtils::MakeMaker
  
  Methods to override UN*X behaviour in ExtUtils::MakeMaker
  
+=item ExtUtils::MM_Win95
+
+Method to customize MakeMaker for Win9X
+
+=item ExtUtils::MY
+
+ExtUtils::MakeMaker subclass for customization
+
  =item ExtUtils::MakeMaker
  
  Create an extension Makefile
@@ -383,6 +507,10 @@ Add blib/* directories to @INC
  
  Replace functions with equivalents which succeed or die
  
+=item Fcntl
+
+Load the C Fcntl.h defines
+
  =item File::Basename
  
  Split a pathname into pieces
@@ -495,6 +623,10 @@ Functions for dealing with RFC3066-style language tags
  
  Tags and names for human languages
  
+=item IO
+
+Load various IO modules
+
  =item IPC::Open2
  
  Open a process for both reading and writing
@@ -557,7 +689,7 @@ Trigonometric functions
  
  =item Memoize
  
-Make your functions faster by trading space for time
+Make functions faster by trading space for time
  
  =item Memoize::AnyDBM_File
  
@@ -587,6 +719,10 @@ Glue to provide EXISTS for SDBM_File for Storable use
  
  Store Memoized data in Storable database
  
+=item NDBM_File
+
+Tied access to ndbm files
+
  =item NEXT
  
  Provide a pseudo-class NEXT that allows method redispatch
@@ -651,6 +787,22 @@ By-name interface to Perl's built-in getproto*() functions
  
  By-name interface to Perl's built-in getserv*() functions
  
+=item O
+
+Generic interface to Perl Compiler backends
+
+=item ODBM_File
+
+Tied access to odbm files
+
+=item Opcode
+
+Disable named opcodes when compiling perl code
+
+=item POSIX
+
+Perl interface to IEEE Std 1003.1
+
  =item PerlIO
  
  On demand loader for PerlIO layers and root of PerlIO::* name space
@@ -727,6 +879,14 @@ Print a usage message from embedded pod documentation
  
  Test of various basic POD features in translators.
  
+=item SDBM_File
+
+Tied access to sdbm files
+
+=item Safe
+
+Compile and execute code in restricted compartments
+
  =item Search::Dict
  
  Search for key in dictionary file
@@ -743,6 +903,14 @@ Load functions only on demand
  
  Run shell commands transparently within perl
  
+=item Socket
+
+Load the C socket.h defines and structure manipulators 
+
+=item Storable
+
+Persistence for Perl data structures
+
  =item Switch
  
  A switch statement for Perl
@@ -831,6 +999,14 @@ Line wrapping to form simple paragraphs
  
  Manipulate threads in Perl
  
+=item Thread::Queue
+
+Thread-safe queues
+
+=item Thread::Semaphore
+
+Thread-safe semaphores
+
  =item Tie::Array
  
  Base class for tied arrays
@@ -885,7 +1061,7 @@ Base class for ALL classes (blessed references)
  
  =item Unicode::Collate
  
-Use UCA (Unicode Collation Algorithm)
+Unicode Collation Algorithm
  
  =item Unicode::UCD
  
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod

index d2c48e26b508f170fcaf25970a14c6bda579fe14..38cd9c7b20c56adb8c5737e5366d1913586e3929 100644 (file)
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -162,7 +162,7 @@ Named Unicode properties, scripts, and block ranges may be used like
  character classes via the new C<\p{}> (matches property) and C<\P{}>
  (doesn't match property) constructs. For instance, C<\p{Lu}> matches any
  character with the Unicode "Lu" (Letter, uppercase) property, while
-C<\p{M}> matches any character with a "M" (mark -- accents and such)
+C<\p{M}> matches any character with an "M" (mark -- accents and such)
  property. Single letter properties may omit the brackets, so that can be
  written C<\pM> also. Many predefined properties are available, such
  as C<\p{Mirrored}> and C<\p{Tibetan}>.
@@ -814,11 +814,11 @@ The following table is from Unicode 3.2.
  
     U+0000..U+007F       00..7F
     U+0080..U+07FF       C2..DF    80..BF
-   U+0800..U+0FFF       E0        A0..BF    80..BF  
-   U+1000..U+CFFF       E1..EC    80..BF    80..BF  
-   U+D000..U+D7FF       ED        80..9F    80..BF  
+   U+0800..U+0FFF       E0        A0..BF    80..BF
+   U+1000..U+CFFF       E1..EC    80..BF    80..BF
+   U+D000..U+D7FF       ED        80..9F    80..BF
     U+D800..U+DFFF       ******* ill-formed *******
-   U+E000..U+FFFF       EE..EF    80..BF    80..BF  
+   U+E000..U+FFFF       EE..EF    80..BF    80..BF
    U+10000..U+3FFFF      F0        90..BF    80..BF    80..BF
    U+40000..U+FFFFF      F1..F3    80..BF    80..BF    80..BF
   U+100000..U+10FFFF     F4        80..8F    80..BF    80..BF
@@ -857,15 +857,15 @@ UTF-16, UTF-16BE, UTF16-LE, Surrogates, and BOMs (Byte Order Marks)
  use them internally.)
  
  UTF-16 is a 2 or 4 byte encoding.  The Unicode code points
-0x0000..0xFFFF are stored in two 16-bit units, and the code points
-0x010000..0x10FFFF in two 16-bit units.  The latter case is
+U+0000..U+FFFF are stored in a single 16-bit unit, and the code points
+U+10000..U+10FFFF in two 16-bit units.  The latter case is
  using I<surrogates>, the first 16-bit unit being the I<high
  surrogate>, and the second being the I<low surrogate>.
  
-Surrogates are code points set aside to encode the 0x01000..0x10FFFF
+Surrogates are code points set aside to encode the U+10000..U+10FFFF
  range of Unicode code points in pairs of 16-bit units.  The I<high
-surrogates> are the range 0xD800..0xDBFF, and the I<low surrogates>
-are the range 0xDC00..0xDFFFF.  The surrogate encoding is
+surrogates> are the range U+D800..U+DBFF, and the I<low surrogates>
+are the range U+DC00..U+DFFF.  The surrogate encoding is
  
         $hi = ($uni - 0x10000) / 0x400 + 0xD800;
         $lo = ($uni - 0x10000) % 0x400 + 0xDC00;
@@ -888,7 +888,7 @@ This introduces another problem: what if you just know that your data
  is UTF-16, but you don't know which endianness?  Byte Order Marks
  (BOMs) are a solution to this.  A special character has been reserved
  in Unicode to function as a byte order marker: the character with the
-code point 0xFEFF is the BOM.
+code point U+FEFF is the BOM.
  
  The trick is that if you read a BOM, you will know the byte order,
  since if it was written on a big endian platform, you will read the
@@ -897,9 +897,9 @@ you will read the bytes 0xFF 0xFE.  (And if the originating platform
  was writing in UTF-8, you will read the bytes 0xEF 0xBB 0xBF.)
  
  The way this trick works is that the character with the code point
-0xFFFE is guaranteed not to be a valid Unicode character, so the
+U+FFFE is guaranteed not to be a valid Unicode character, so the
  sequence of bytes 0xFF 0xFE is unambiguously "BOM, represented in
-little-endian format" and cannot be "0xFFFE, represented in big-endian
+little-endian format" and cannot be "U+FFFE, represented in big-endian
  format".
  
  =item *
@@ -916,7 +916,7 @@ needed.  The BOM signatures will be 0x00 0x00 0xFE 0xFF for BE and
  UCS-2, UCS-4
  
  Encodings defined by the ISO 10646 standard.  UCS-2 is a 16-bit
-encoding.  Unlike UTF-16, UCS-2 is not extensible beyond 0xFFFF,
+encoding.  Unlike UTF-16, UCS-2 is not extensible beyond U+FFFF,
  because it does not use surrogates.  UCS-4 is a 32-bit encoding,
  functionally identical to UTF-32.
  
diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod

index d6eae60c4be41d4939466cfa63f887ef8f942ed5..743d4ed9166bd727c87743c82c7bdf78c2731f69 100644 (file)
--- a/pod/perluniintro.pod
+++ b/pod/perluniintro.pod
@@ -302,17 +302,23 @@ To ensure that the output is explicitly rendered in the encoding you
  desire (and to avoid the warning), open the stream with the desired
  encoding. Some examples:
  
-    open FH, ">:ucs2",      "file"
-    open FH, ">:utf8",      "file";
-    open FH, ">:Shift-JIS", "file";
+    open FH, ">:utf8", "file";
+
+    open FH, ">:encoding(ucs2)",      "file";
+    open FH, ">:encoding(UTF-8)",     "file";
+    open FH, ">:encoding(shift_jis)", "file";
  
  and on already open streams use C<binmode()>:
  
-    binmode(STDOUT, ":ucs2");
      binmode(STDOUT, ":utf8");
-    binmode(STDOUT, ":Shift-JIS");
  
-See documentation for the C<Encode> module for many supported encodings.
+    binmode(STDOUT, ":encoding(ucs2)");
+    binmode(STDOUT, ":encoding(UTF-8)");
+    binmode(STDOUT, ":encoding(shift_jis)");
+
+See L<PerlIO> for the C<:utf8> layer;
+L<PerlIO::encoding> and L<Encode::PerlIO> for the C<:encoding()> layer;
+L<Encode::Supported> for many encodings supported by the C<Encode> module.
  
  Reading in a file that you know happens to be encoded in one of the
  Unicode encodings does not magically turn the data into Unicode in
@@ -322,7 +328,7 @@ opening files
      open(my $fh,'<:utf8', 'anything');
      my $line_of_unicode = <$fh>;
  
-    open(my $fh,'<:Big5', 'anything');
+    open(my $fh,'<:encoding(Big5)', 'anything');
      my $line_of_unicode = <$fh>;
  
  The I/O disciplines can also be specified more flexibly with
author	SADAHIRO Tomoyuki <BQW10602@nifty.com>
	Sun, 19 May 2002 01:01:58 +0000 (10:01 +0900)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Sat, 18 May 2002 15:40:35 +0000 (15:40 +0000)
pod/perlmodlib.pod		patch \| blob \| history
pod/perlunicode.pod		patch \| blob \| history
pod/perluniintro.pod		patch \| blob \| history