Upstream version 11.40.271.0
[platform/framework/web/crosswalk.git] / src / third_party / icu / scripts / single_byte_gen.sh
1 #!/bin/bash
2 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 function preamble {
7
8 encoding="$1"
9 cat <<PREAMBLE
10 # ***************************************************************************
11 # *
12 # *   Generated from index-$encoding.txt (
13 # *   https://encoding.spec.whatwg.org/index-${encoding}.txt )
14 # *   following the algorithm for the single byte legacy encoding
15 # *   described at http://encoding.spec.whatwg.org/#single-byte-decoder
16 # *
17 # ***************************************************************************
18 <code_set_name>               "${encoding}-html"
19 <char_name_mask>              "AXXXX"
20 <mb_cur_max>                  1
21 <mb_cur_min>                  1
22 <uconv_class>                 "SBCS"
23 <subchar>                     \x3F
24 <icu:charsetFamily>           "ASCII"
25
26 CHARMAP
27 PREAMBLE
28
29 }
30
31 # The list of html5 encodings. Note that iso-8859-8-i is not listed here
32 # because its mapping table is exactly the same as iso-8859-8. The difference
33 # is BiDi handling (logical vs visual).
34 encodings="ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6\
35            iso-8859-7 iso-8859-8 iso-8859-10 iso-8859-13 iso-8859-14\
36            iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh\
37            windows-874 windows-1250 windows-1251 windows-1252 windows-1253\
38            windows-1254 windows-1255 windows-1256 windows-1257 windows-1258\
39            x-mac-cyrillic"
40
41 ENCODING_DIR="$(dirname $0)/../source/data/mappings"
42 for e in ${encodings}
43 do
44   output="${ENCODING_DIR}/${e}-html.ucm"
45   index="index-${e}.txt"
46   indexurl="https://encoding.spec.whatwg.org/index-${e}.txt"
47   curl -o ${index} "${indexurl}"
48   preamble ${e} > ${output}
49   awk 'BEGIN \
50        { \
51          for (i=0; i < 0x80; ++i) \
52          { \
53            printf("<U%04X> \\x%02X |0\n", i, i);} \
54          } \
55        !/^#/ && !/^$/ \
56        {
57          printf ("<U%4s> \\x%02X |0\n", substr($2, 3), $1 + 0x80); \
58        }' ${index} | sort >> ${output}
59   echo 'END CHARMAP' >> ${output}
60   rm ${index}
61 done
62