2 * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // A wrapper around Uniscribe that provides a reasonable API.
33 #ifndef UniscribeHelper_h
34 #define UniscribeHelper_h
40 #include <unicode/uchar.h>
41 #include <wtf/Vector.h>
43 class UniscribeTest_TooBig_Test; // A gunit test for UniscribeHelper.
47 class FontFeatureSettings;
48 class GraphicsContext;
50 const unsigned cUniscribeHelperStackRuns = 8;
51 const unsigned cUniscribeHelperStackChars = 32;
52 const unsigned cUniscribeHelperFeatures = 4;
54 // This object should be safe to create & destroy frequently, as long as the
55 // caller preserves the script_cache when possible (this data may be slow to
58 // This object is "kind of large" (~1K) because it reserves a lot of space for
59 // working with to avoid expensive heap operations. Therefore, not only should
60 // you not worry about creating and destroying it, you should try to not keep
62 class UniscribeHelper {
64 // Initializes this Uniscribe run with the text pointed to by |run| with
65 // |length|. The input is NOT null terminated.
67 // The is_rtl flag should be set if the input script is RTL. It is assumed
68 // that the caller has already divided up the input text (using ICU, for
69 // example) into runs of the same direction of script. This avoids
70 // disagreements between the caller and Uniscribe later (see FillItems).
72 // A script cache should be provided by the caller that is initialized to
73 // NULL. When the caller is done with the cache (it may be stored between
74 // runs as long as it is used consistently with the same HFONT), it should
75 // call ScriptFreeCache().
76 UniscribeHelper(const UChar* input,
81 SCRIPT_FONTPROPERTIES*,
84 virtual ~UniscribeHelper();
86 // Sets Uniscribe's directional override flag. False by default.
87 bool directionalOverride() const
89 return m_directionalOverride;
91 void setDirectionalOverride(bool override)
93 m_directionalOverride = override;
96 // Set's Uniscribe's no-ligate override flag. False by default.
97 bool inhibitLigate() const
99 return m_inhibitLigate;
101 void setInhibitLigate(bool inhibit)
103 m_inhibitLigate = inhibit;
106 // Set letter spacing. We will try to insert this much space between
107 // graphemes (one or more glyphs perceived as a single unit by ordinary
108 // users of a script). Positive values increase letter spacing, negative
109 // values decrease it. 0 by default.
110 int letterSpacing() const
112 return m_letterSpacing;
114 void setLetterSpacing(int letterSpacing)
116 m_letterSpacing = letterSpacing;
119 // Set the width of a standard space character. We use this to normalize
120 // space widths. Windows will make spaces after Hindi characters larger than
121 // other spaces. A space_width of 0 means to use the default space width.
123 // Must be set before Init() is called.
124 int spaceWidth() const
128 void setSpaceWidth(int spaceWidth)
130 m_spaceWidth = spaceWidth;
133 // Set word spacing. We will try to insert this much extra space between
134 // each word in the input (beyond whatever whitespace character separates
135 // words). Positive values lead to increased letter spacing, negative values
136 // decrease it. 0 by default.
138 // Must be set before Init() is called.
139 int wordSpacing() const
141 return m_wordSpacing;
143 void setWordSpacing(int wordSpacing)
145 m_wordSpacing = wordSpacing;
148 void setAscent(int ascent)
153 // When set to true, this class is used only to look up glyph
154 // indices for a range of Unicode characters without glyph placement.
155 // By default, it's false. This should be set to true when this
156 // class is used for glyph index look-up for non-BMP characters
157 // in GlyphPageNodeChromiumWin.cpp.
158 void setDisableFontFallback(bool disableFontFallback)
160 m_disableFontFallback = true;
163 // Set TEXTRANGE_PROPERTIES structure which contains
164 // OpenType feature records generated from FontFeatureSettings.
165 void setRangeProperties(const FontFeatureSettings*);
167 // You must call this after setting any options but before doing any
168 // other calls like asking for widths or drawing.
171 initWithOptionalLengthProtection(true);
174 // Returns the total width in pixels of the text run.
177 // Call to justify the text, with the amount of space that should be ADDED
178 // to get the desired width that the column should be justified to.
179 // Normally, spaces are inserted, but for Arabic there will be kashidas
180 // (extra strokes) inserted instead.
182 // This function MUST be called AFTER Init().
183 void justify(int additionalSpace);
185 // Computes the given character offset into a pixel offset of the beginning
186 // of that character.
187 int characterToX(int offset) const;
189 // Converts the given pixel X position into a logical character offset into
190 // the run. For positions appearing before the first character, this will
192 int xToCharacter(int x) const;
194 // Draws the given characters to (x, y) in the given DC. The font will be
195 // handled by this function, but the font color and other attributes should
198 // The y position is the upper left corner, NOT the baseline.
199 void draw(GraphicsContext* graphicsContext, HDC dc, int x, int y, int from,
202 // Returns the first glyph assigned to the character at the given offset.
203 // This function is used to retrieve glyph information when Uniscribe is
204 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
205 // characters. These characters are not otherwise special and have no
206 // complex shaping rules, so we don't otherwise need Uniscribe, except
207 // Uniscribe is the only way to get glyphs for non-BMP characters.
209 // Returns 0 if there is no glyph for the given character.
210 WORD firstGlyphForCharacter(int charOffset) const;
213 // Backend for init. The flag allows the unit test to specify whether we
214 // should fail early for very long strings like normal, or try to pass the
215 // long string to Uniscribe. The latter provides a way to force failure of
217 void initWithOptionalLengthProtection(bool lengthProtection);
219 // Tries to preload the font when the it is not accessible.
220 // This is the default implementation and it does not do anything.
221 virtual void tryToPreloadFont(HFONT) {}
224 friend class UniscribeTest_TooBig_Test;
226 // An array corresponding to each item in runs_ containing information
227 // on each of the glyphs that were generated. Like runs_, this is in
228 // reading order. However, for rtl text, the characters within each
229 // item will be reversed.
234 , m_scriptCache(NULL)
243 // Returns the number of glyphs (which will be drawn to the screen)
245 int glyphLength() const
247 return static_cast<int>(m_glyphs.size());
250 // Returns the number of characters (that we started with) in this run.
251 int charLength() const
253 return static_cast<int>(m_logs.size());
256 // Returns the advance array that should be used when measuring glyphs.
257 // The returned pointer will indicate an array with glyph_length()
258 // elements and the advance that should be used for each one. This is
259 // either the real advance, or the justified advances if there is one,
260 // and is the array we want to use for measurement.
261 const int* effectiveAdvances() const
263 if (m_advance.size() == 0)
265 if (m_justify.size() == 0)
266 return &m_advance[0];
267 return &m_justify[0];
270 // This is the advance amount of space that we have added to the
271 // beginning of the run. It is like the ABC's |A| advance but one that
272 // we create and must handle internally whenever computing with pixel
276 // Glyph indices in the font used to display this item. These indices
277 // are in screen order.
278 Vector<WORD, cUniscribeHelperStackChars> m_glyphs;
280 // For each input character, this tells us the first glyph index it
281 // generated. This is the only array with size of the input chars.
283 // All offsets are from the beginning of this run. Multiple characters
284 // can generate one glyph, in which case there will be adjacent
285 // duplicates in this list. One character can also generate multiple
286 // glyphs, in which case there will be skipped indices in this list.
287 Vector<WORD, cUniscribeHelperStackChars> m_logs;
289 // Flags and such for each glyph.
290 Vector<SCRIPT_VISATTR, cUniscribeHelperStackChars> m_visualAttributes;
292 // Horizontal advances for each glyph listed above, this is basically
293 // how wide each glyph is.
294 Vector<int, cUniscribeHelperStackChars> m_advance;
296 // This contains glyph offsets, from the nominal position of a glyph.
297 // It is used to adjust the positions of multiple combining characters
298 // around/above/below base characters in a context-sensitive manner so
299 // that they don't bump against each other and the base character.
300 Vector<GOFFSET, cUniscribeHelperStackChars> m_offsets;
302 // Filled by a call to Justify, this is empty for nonjustified text.
303 // If nonempty, this contains the array of justify characters for each
304 // character as returned by ScriptJustify.
306 // This is the same as the advance array, but with extra space added
307 // for some characters. The difference between a glyph's |justify|
308 // width and it's |advance| width is the extra space added.
309 Vector<int, cUniscribeHelperStackChars> m_justify;
311 // Sizing information for this run. This treats the entire run as a
312 // character with a preceeding advance, width, and ending advance. The
313 // B width is the sum of the |advance| array, and the A and C widths
314 // are any extra spacing applied to each end.
316 // It is unclear from the documentation what this actually means. From
317 // experimentation, it seems that the sum of the character advances is
318 // always the sum of the ABC values, and I'm not sure what you're
319 // supposed to do with the ABC values.
322 // Pointers to windows font data used to render this run.
324 SCRIPT_CACHE* m_scriptCache;
326 // Ascent offset between the ascent of the primary font
327 // and that of the fallback font. The offset needs to be applied,
328 // when drawing a string, to align multiple runs rendered with
335 // Computes the runs_ array from the text run.
338 // Computes the shapes_ array given an runs_ array already filled in.
341 // Fills in the screen_order_ array (see below).
342 void fillScreenOrder();
344 // Called to update the glyph positions based on the current spacing
345 // options that are set.
348 // Normalizes all advances for spaces to the same width. This keeps windows
349 // from making spaces after Hindi characters larger, which is then
350 // inconsistent with our meaure of the width since WebKit doesn't include
351 // spaces in text-runs sent to uniscribe unless white-space:pre.
352 void adjustSpaceAdvances();
354 // Returns the total width of a single item.
355 int advanceForItem(int) const;
357 bool containsMissingGlyphs(const Shaping&,
359 const SCRIPT_FONTPROPERTIES*) const;
361 // Shapes a run (pointed to by |input|) using |hfont| first.
362 // Tries a series of fonts specified retrieved with NextWinFontData
363 // and finally a font covering characters in |*input|. A string pointed
364 // by |input| comes from ScriptItemize and is supposed to contain
365 // characters belonging to a single script aside from characters common to
366 // all scripts (e.g. space).
367 bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, OPENTYPE_TAG, Shaping&);
369 // Gets Windows font data for the next best font to try in the list
370 // of fonts. When there's no more font available, returns false
371 // without touching any of out params. Need to call ResetFontIndex
372 // to start scanning of the font list from the beginning.
373 virtual bool nextWinFontData(HFONT*, SCRIPT_CACHE**, SCRIPT_FONTPROPERTIES**, int* ascent)
378 // Resets the font index to the first in the list of fonts to try after the
379 // primaryFont turns out not to work. With fontIndex reset,
380 // NextWinFontData scans fallback fonts from the beginning.
381 virtual void resetFontIndex() {}
383 // If m_cachedDC is 0, creates one that is compatible with the screen DC.
384 void EnsureCachedDCCreated();
386 // The input data for this run of Uniscribe. See the constructor.
387 const UChar* m_input;
388 const int m_inputLength;
391 // Windows font data for the primary font. In a sense, m_logfont and m_style
392 // are redundant because m_hfont contains all the information. However,
393 // invoking GetObject, everytime we need the height and the style, is rather
394 // expensive so that we cache them. Would it be better to add getter and
395 // (virtual) setter for the height and the style of the primary font,
396 // instead of m_logfont? Then, a derived class ctor can set m_ascent,
397 // m_height and m_style if they're known. Getters for them would have to
398 // 'infer' their values from m_hfont ONLY when they're not set.
400 // We cache the DC to use with ScriptShape/ScriptPlace.
401 static HDC m_cachedDC;
402 SCRIPT_CACHE* m_scriptCache;
403 SCRIPT_FONTPROPERTIES* m_fontProperties;
409 // Options, see the getters/setters above.
410 bool m_directionalOverride;
411 bool m_inhibitLigate;
415 bool m_disableFontFallback;
417 // Uniscribe breaks the text into Runs. These are one length of text that is
418 // in one script and one direction. This array is in reading order.
419 Vector<SCRIPT_ITEM, cUniscribeHelperStackRuns> m_runs;
421 Vector<Shaping, cUniscribeHelperStackRuns> m_shapes;
422 Vector<OPENTYPE_TAG, cUniscribeHelperStackRuns> m_scriptTags;
424 // This is a mapping between reading order and screen order for the items.
425 // Uniscribe's items array are in reading order. For right-to-left text,
426 // or mixed (although WebKit's |TextRun| should really be only one
427 // direction), this makes it very difficult to compute character offsets
428 // and positions. This list is in screen order from left to right, and
429 // gives the index into the |m_runs| and |m_shapes| arrays of each
431 Vector<int, cUniscribeHelperStackRuns> m_screenOrder;
433 // This contains Uniscribe's OpenType feature settings. This structure
434 // is filled by using WebKit's |FontFeatureSettings|.
435 TEXTRANGE_PROPERTIES m_rangeProperties;
436 Vector<OPENTYPE_FEATURE_RECORD, cUniscribeHelperFeatures> m_featureRecords;
439 } // namespace WebCore
441 #endif // UniscribeHelper_h