2 * Copyright (C) 2014 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "platform/PlatformExport.h"
35 #include "platform/text/TextDirection.h"
36 #include "platform/text/TextPath.h"
37 #include "wtf/HashSet.h"
38 #include "wtf/text/WTFString.h"
39 #include "wtf/unicode/CharacterNames.h"
43 class PLATFORM_EXPORT Character {
45 static CodePath characterRangeCodePath(const LChar*, unsigned) { return SimplePath; }
46 static CodePath characterRangeCodePath(const UChar*, unsigned len);
48 static bool isCJKIdeograph(UChar32);
49 static bool isCJKIdeographOrSymbol(UChar32);
51 static unsigned expansionOpportunityCount(const LChar*, size_t length, TextDirection, bool& isAfterExpansion);
52 static unsigned expansionOpportunityCount(const UChar*, size_t length, TextDirection, bool& isAfterExpansion);
54 static bool treatAsSpace(UChar c)
57 || c == characterTabulation
58 || c == newlineCharacter
61 static bool treatAsZeroWidthSpace(UChar c)
63 return treatAsZeroWidthSpaceInComplexScript(c)
64 || c == zeroWidthNonJoiner
65 || c == zeroWidthJoiner;
67 static bool treatAsZeroWidthSpaceInComplexScript(UChar c)
69 return c < 0x20 // ASCII Control Characters
70 || (c >= 0x7F && c < 0xA0) // ASCII Delete .. No-break space
72 || c == zeroWidthSpace
73 || (c >= leftToRightMark && c <= rightToLeftMark)
74 || (c >= leftToRightEmbed && c <= rightToLeftOverride)
75 || c == zeroWidthNoBreakSpace
76 || c == objectReplacementCharacter;
78 static bool canReceiveTextEmphasis(UChar32);
80 static inline UChar normalizeSpaces(UChar character)
82 if (treatAsSpace(character))
85 if (treatAsZeroWidthSpace(character))
86 return zeroWidthSpace;
91 static inline bool isNormalizedCanvasSpaceCharacter(UChar c)
93 // According to specification all space characters should be replaced with 0x0020 space character.
94 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-canvas-element.html#text-preparation-algorithm
95 // The space characters according to specification are : U+0020, U+0009, U+000A, U+000C, and U+000D.
96 // http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#space-character
97 // This function returns true for 0x000B also, so that this is backward compatible.
98 // Otherwise, the test LayoutTests/canvas/philip/tests/2d.text.draw.space.collapse.space.html will fail
99 return c == 0x0009 || (c >= 0x000A && c <= 0x000D);
102 static String normalizeSpaces(const LChar*, unsigned length);
103 static String normalizeSpaces(const UChar*, unsigned length);