Source/WebCore/platform/graphics/chromium/UniscribeHelper.h

   1 /*
   2  * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *     * Redistributions of source code must retain the above copyright
   9  * notice, this list of conditions and the following disclaimer.
  10  *     * Redistributions in binary form must reproduce the above
  11  * copyright notice, this list of conditions and the following disclaimer
  12  * in the documentation and/or other materials provided with the
  13  * distribution.
  14  *     * Neither the name of Google Inc. nor the names of its
  15  * contributors may be used to endorse or promote products derived from
  16  * this software without specific prior written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  */
  30
  31 // A wrapper around Uniscribe that provides a reasonable API.
  32
  33 #ifndef UniscribeHelper_h
  34 #define UniscribeHelper_h
  35
  36 #include <windows.h>
  37 #include <usp10.h>
  38 #include <map>
  39
  40 #include <unicode/uchar.h>
  41 #include <wtf/Vector.h>
  42
  43 class UniscribeTest_TooBig_Test; // A gunit test for UniscribeHelper.
  44
  45 namespace WebCore {
  46
  47 class FontFeatureSettings;
  48 class GraphicsContext;
  49
  50 const unsigned cUniscribeHelperStackRuns = 8;
  51 const unsigned cUniscribeHelperStackChars = 32;
  52 const unsigned cUniscribeHelperFeatures = 4;
  53
  54 // This object should be safe to create & destroy frequently, as long as the
  55 // caller preserves the script_cache when possible (this data may be slow to
  56 // compute).
  57 //
  58 // This object is "kind of large" (~1K) because it reserves a lot of space for
  59 // working with to avoid expensive heap operations. Therefore, not only should
  60 // you not worry about creating and destroying it, you should try to not keep
  61 // them around.
  62 class UniscribeHelper {
  63 public:
  64     // Initializes this Uniscribe run with the text pointed to by |run| with
  65     // |length|. The input is NOT null terminated.
  66     //
  67     // The is_rtl flag should be set if the input script is RTL. It is assumed
  68     // that the caller has already divided up the input text (using ICU, for
  69     // example) into runs of the same direction of script. This avoids
  70     // disagreements between the caller and Uniscribe later (see FillItems).
  71     //
  72     // A script cache should be provided by the caller that is initialized to
  73     // NULL. When the caller is done with the cache (it may be stored between
  74     // runs as long as it is used consistently with the same HFONT), it should
  75     // call ScriptFreeCache().
  76     UniscribeHelper(const UChar* input,
  77                     int inputLength,
  78                     bool isRtl,
  79                     HFONT,
  80                     SCRIPT_CACHE*,
  81                     SCRIPT_FONTPROPERTIES*,
  82                     WORD);
  83
  84     virtual ~UniscribeHelper();
  85
  86     // Sets Uniscribe's directional override flag. False by default.
  87     bool directionalOverride() const
  88     {
  89         return m_directionalOverride;
  90     }
  91     void setDirectionalOverride(bool override)
  92     {
  93         m_directionalOverride = override;
  94     }
  95
  96     // Set's Uniscribe's no-ligate override flag. False by default.
  97     bool inhibitLigate() const
  98     {
  99         return m_inhibitLigate;
 100     }
 101     void setInhibitLigate(bool inhibit)
 102     {
 103         m_inhibitLigate = inhibit;
 104     }
 105
 106     // Set letter spacing. We will try to insert this much space between
 107     // graphemes (one or more glyphs perceived as a single unit by ordinary
 108     // users of a script). Positive values increase letter spacing, negative
 109     // values decrease it. 0 by default.
 110     int letterSpacing() const
 111     {
 112         return m_letterSpacing;
 113     }
 114     void setLetterSpacing(int letterSpacing)
 115     {
 116         m_letterSpacing = letterSpacing;
 117     }
 118
 119     // Set the width of a standard space character. We use this to normalize
 120     // space widths. Windows will make spaces after Hindi characters larger than
 121     // other spaces. A space_width of 0 means to use the default space width.
 122     //
 123     // Must be set before Init() is called.
 124     int spaceWidth() const
 125     {
 126         return m_spaceWidth;
 127     }
 128     void setSpaceWidth(int spaceWidth)
 129     {
 130         m_spaceWidth = spaceWidth;
 131     }
 132
 133     // Set word spacing. We will try to insert this much extra space between
 134     // each word in the input (beyond whatever whitespace character separates
 135     // words). Positive values lead to increased letter spacing, negative values
 136     // decrease it. 0 by default.
 137     //
 138     // Must be set before Init() is called.
 139     int wordSpacing() const
 140     {
 141         return m_wordSpacing;
 142     }
 143     void setWordSpacing(int wordSpacing)
 144     {
 145         m_wordSpacing = wordSpacing;
 146     }
 147
 148     void setAscent(int ascent)
 149     {
 150         m_ascent = ascent;
 151     }
 152
 153     // When set to true, this class is used only to look up glyph
 154     // indices for a range of Unicode characters without glyph placement.
 155     // By default, it's false. This should be set to true when this
 156     // class is used for glyph index look-up for non-BMP characters
 157     // in GlyphPageNodeChromiumWin.cpp.
 158     void setDisableFontFallback(bool disableFontFallback)
 159     {
 160         m_disableFontFallback = true;
 161     }
 162
 163     // Set TEXTRANGE_PROPERTIES structure which contains
 164     // OpenType feature records generated from FontFeatureSettings.
 165     void setRangeProperties(const FontFeatureSettings*);
 166
 167     // You must call this after setting any options but before doing any
 168     // other calls like asking for widths or drawing.
 169     void init()
 170     {
 171         initWithOptionalLengthProtection(true);
 172     }
 173
 174     // Returns the total width in pixels of the text run.
 175     int width() const;
 176
 177     // Call to justify the text, with the amount of space that should be ADDED
 178     // to get the desired width that the column should be justified to.
 179     // Normally, spaces are inserted, but for Arabic there will be kashidas
 180     // (extra strokes) inserted instead.
 181     //
 182     // This function MUST be called AFTER Init().
 183     void justify(int additionalSpace);
 184
 185     // Computes the given character offset into a pixel offset of the beginning
 186     // of that character.
 187     int characterToX(int offset) const;
 188
 189     // Converts the given pixel X position into a logical character offset into
 190     // the run. For positions appearing before the first character, this will
 191     // return -1.
 192     int xToCharacter(int x) const;
 193
 194     // Draws the given characters to (x, y) in the given DC. The font will be
 195     // handled by this function, but the font color and other attributes should
 196     // be pre-set.
 197     //
 198     // The y position is the upper left corner, NOT the baseline.
 199     void draw(GraphicsContext* graphicsContext, HDC dc, int x, int y, int from,
 200               int to);
 201
 202     // Returns the first glyph assigned to the character at the given offset.
 203     // This function is used to retrieve glyph information when Uniscribe is
 204     // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
 205     // characters. These characters are not otherwise special and have no
 206     // complex shaping rules, so we don't otherwise need Uniscribe, except
 207     // Uniscribe is the only way to get glyphs for non-BMP characters.
 208     //
 209     // Returns 0 if there is no glyph for the given character.
 210     WORD firstGlyphForCharacter(int charOffset) const;
 211
 212 protected:
 213     // Backend for init. The flag allows the unit test to specify whether we
 214     // should fail early for very long strings like normal, or try to pass the
 215     // long string to Uniscribe. The latter provides a way to force failure of
 216     // shaping.
 217     void initWithOptionalLengthProtection(bool lengthProtection);
 218
 219     // Tries to preload the font when the it is not accessible.
 220     // This is the default implementation and it does not do anything.
 221     virtual void tryToPreloadFont(HFONT) {}
 222
 223 private:
 224     friend class UniscribeTest_TooBig_Test;
 225
 226     // An array corresponding to each item in runs_ containing information
 227     // on each of the glyphs that were generated. Like runs_, this is in
 228     // reading order. However, for rtl text, the characters within each
 229     // item will be reversed.
 230     struct Shaping {
 231         Shaping()
 232             : m_prePadding(0)
 233             , m_hfont(NULL)
 234             , m_scriptCache(NULL)
 235             , m_ascentOffset(0)
 236             , m_spaceGlyph(0)
 237         {
 238             m_abc.abcA = 0;
 239             m_abc.abcB = 0;
 240             m_abc.abcC = 0;
 241         }
 242
 243         // Returns the number of glyphs (which will be drawn to the screen)
 244         // in this run.
 245         int glyphLength() const
 246         {
 247             return static_cast<int>(m_glyphs.size());
 248         }
 249
 250         // Returns the number of characters (that we started with) in this run.
 251         int charLength() const
 252         {
 253             return static_cast<int>(m_logs.size());
 254         }
 255
 256         // Returns the advance array that should be used when measuring glyphs.
 257         // The returned pointer will indicate an array with glyph_length()
 258         // elements and the advance that should be used for each one. This is
 259         // either the real advance, or the justified advances if there is one,
 260         // and is the array we want to use for measurement.
 261         const int* effectiveAdvances() const
 262         {
 263             if (m_advance.size() == 0)
 264                 return 0;
 265             if (m_justify.size() == 0)
 266                 return &m_advance[0];
 267             return &m_justify[0];
 268         }
 269
 270         // This is the advance amount of space that we have added to the
 271         // beginning of the run. It is like the ABC's |A| advance but one that
 272         // we create and must handle internally whenever computing with pixel
 273         // offsets.
 274         int m_prePadding;
 275
 276         // Glyph indices in the font used to display this item. These indices
 277         // are in screen order.
 278         Vector<WORD, cUniscribeHelperStackChars> m_glyphs;
 279
 280         // For each input character, this tells us the first glyph index it
 281         // generated. This is the only array with size of the input chars.
 282         //
 283         // All offsets are from the beginning of this run. Multiple characters
 284         // can generate one glyph, in which case there will be adjacent
 285         // duplicates in this list. One character can also generate multiple
 286         // glyphs, in which case there will be skipped indices in this list.
 287         Vector<WORD, cUniscribeHelperStackChars> m_logs;
 288
 289         // Flags and such for each glyph.
 290         Vector<SCRIPT_VISATTR, cUniscribeHelperStackChars> m_visualAttributes;
 291
 292         // Horizontal advances for each glyph listed above, this is basically
 293         // how wide each glyph is.
 294         Vector<int, cUniscribeHelperStackChars> m_advance;
 295
 296         // This contains glyph offsets, from the nominal position of a glyph.
 297         // It is used to adjust the positions of multiple combining characters
 298         // around/above/below base characters in a context-sensitive manner so
 299         // that they don't bump against each other and the base character.
 300         Vector<GOFFSET, cUniscribeHelperStackChars> m_offsets;
 301
 302         // Filled by a call to Justify, this is empty for nonjustified text.
 303         // If nonempty, this contains the array of justify characters for each
 304         // character as returned by ScriptJustify.
 305         //
 306         // This is the same as the advance array, but with extra space added
 307         // for some characters. The difference between a glyph's |justify|
 308         // width and it's |advance| width is the extra space added.
 309         Vector<int, cUniscribeHelperStackChars> m_justify;
 310
 311         // Sizing information for this run. This treats the entire run as a
 312         // character with a preceeding advance, width, and ending advance.  The
 313         // B width is the sum of the |advance| array, and the A and C widths
 314         // are any extra spacing applied to each end.
 315         //
 316         // It is unclear from the documentation what this actually means. From
 317         // experimentation, it seems that the sum of the character advances is
 318         // always the sum of the ABC values, and I'm not sure what you're
 319         // supposed to do with the ABC values.
 320         ABC m_abc;
 321
 322         // Pointers to windows font data used to render this run.
 323         HFONT m_hfont;
 324         SCRIPT_CACHE* m_scriptCache;
 325
 326         // Ascent offset between the ascent of the primary font
 327         // and that of the fallback font. The offset needs to be applied,
 328         // when drawing a string, to align multiple runs rendered with
 329         // different fonts.
 330         int m_ascentOffset;
 331
 332         WORD m_spaceGlyph;
 333     };
 334
 335     // Computes the runs_ array from the text run.
 336     void fillRuns();
 337
 338     // Computes the shapes_ array given an runs_ array already filled in.
 339     void fillShapes();
 340
 341     // Fills in the screen_order_ array (see below).
 342     void fillScreenOrder();
 343
 344     // Called to update the glyph positions based on the current spacing
 345     // options that are set.
 346     void applySpacing();
 347
 348     // Normalizes all advances for spaces to the same width. This keeps windows
 349     // from making spaces after Hindi characters larger, which is then
 350     // inconsistent with our meaure of the width since WebKit doesn't include
 351     // spaces in text-runs sent to uniscribe unless white-space:pre.
 352     void adjustSpaceAdvances();
 353
 354     // Returns the total width of a single item.
 355     int advanceForItem(int) const;
 356
 357     bool containsMissingGlyphs(const Shaping&,
 358                                const SCRIPT_ITEM&,
 359                                const SCRIPT_FONTPROPERTIES*) const;
 360
 361     // Shapes a run (pointed to by |input|) using |hfont| first.
 362     // Tries a series of fonts specified retrieved with NextWinFontData
 363     // and finally a font covering characters in |*input|. A string pointed
 364     // by |input| comes from ScriptItemize and is supposed to contain
 365     // characters belonging to a single script aside from characters common to
 366     // all scripts (e.g. space).
 367     bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, OPENTYPE_TAG, Shaping&);
 368
 369     // Gets Windows font data for the next best font to try in the list
 370     // of fonts. When there's no more font available, returns false
 371     // without touching any of out params. Need to call ResetFontIndex
 372     // to start scanning of the font list from the beginning.
 373     virtual bool nextWinFontData(HFONT*, SCRIPT_CACHE**, SCRIPT_FONTPROPERTIES**, int* ascent)
 374     {
 375         return false;
 376     }
 377
 378     // Resets the font index to the first in the list of fonts to try after the
 379     // primaryFont turns out not to work. With fontIndex reset,
 380     // NextWinFontData scans fallback fonts from the beginning.
 381     virtual void resetFontIndex() {}
 382
 383     // If m_cachedDC is 0, creates one that is compatible with the screen DC.
 384     void EnsureCachedDCCreated();
 385
 386     // The input data for this run of Uniscribe. See the constructor.
 387     const UChar* m_input;
 388     const int m_inputLength;
 389     const bool m_isRtl;
 390
 391     // Windows font data for the primary font. In a sense, m_logfont and m_style
 392     // are redundant because m_hfont contains all the information. However,
 393     // invoking GetObject, everytime we need the height and the style, is rather
 394     // expensive so that we cache them. Would it be better to add getter and
 395     // (virtual) setter for the height and the style of the primary font,
 396     // instead of m_logfont? Then, a derived class ctor can set m_ascent,
 397     // m_height and m_style if they're known. Getters for them would have to
 398     // 'infer' their values from m_hfont ONLY when they're not set.
 399     HFONT m_hfont;
 400     // We cache the DC to use with ScriptShape/ScriptPlace.
 401     static HDC m_cachedDC;
 402     SCRIPT_CACHE* m_scriptCache;
 403     SCRIPT_FONTPROPERTIES* m_fontProperties;
 404     int m_ascent;
 405     LOGFONT m_logfont;
 406     int m_style;
 407     WORD m_spaceGlyph;
 408
 409     // Options, see the getters/setters above.
 410     bool m_directionalOverride;
 411     bool m_inhibitLigate;
 412     int m_letterSpacing;
 413     int m_spaceWidth;
 414     int m_wordSpacing;
 415     bool m_disableFontFallback;
 416
 417     // Uniscribe breaks the text into Runs. These are one length of text that is
 418     // in one script and one direction. This array is in reading order.
 419     Vector<SCRIPT_ITEM, cUniscribeHelperStackRuns> m_runs;
 420
 421     Vector<Shaping, cUniscribeHelperStackRuns> m_shapes;
 422     Vector<OPENTYPE_TAG, cUniscribeHelperStackRuns> m_scriptTags;
 423
 424     // This is a mapping between reading order and screen order for the items.
 425     // Uniscribe's items array are in reading order. For right-to-left text,
 426     // or mixed (although WebKit's |TextRun| should really be only one
 427     // direction), this makes it very difficult to compute character offsets
 428     // and positions. This list is in screen order from left to right, and
 429     // gives the index into the |m_runs| and |m_shapes| arrays of each
 430     // subsequent item.
 431     Vector<int, cUniscribeHelperStackRuns> m_screenOrder;
 432
 433     // This contains Uniscribe's OpenType feature settings. This structure
 434     // is filled by using WebKit's |FontFeatureSettings|.
 435     TEXTRANGE_PROPERTIES m_rangeProperties;
 436     Vector<OPENTYPE_FEATURE_RECORD, cUniscribeHelperFeatures> m_featureRecords;
 437 };
 438
 439 }  // namespace WebCore
 440
 441 #endif  // UniscribeHelper_h