Convert xid ranges classes into compile time lookup tables
authorMatt Kuruc <mkuruc@nvidia.com>
Fri, 8 Dec 2023 21:10:08 +0000 (13:10 -0800)
committerEdward Slavin <eslavin@nvidia.com>
Tue, 12 Dec 2023 15:02:30 +0000 (10:02 -0500)
(cherry picked from commit 278d3cb62f96a1dfc283280c73de359b6452fe11)

pxr/base/tf/unicode/tfGenCharacterClasses.py
pxr/base/tf/unicode/unicodeCharacterClasses.template.cpp
pxr/base/tf/unicodeCharacterClasses.cpp
pxr/base/tf/unicodeCharacterClasses.h

index 1726536721c56221506384cc4699e87e6f2ec758..5e7b340a29f683819fb939108e5e7d3384aec5da 100644 (file)
@@ -68,25 +68,25 @@ def _write_cpp_file(source_template_path : str, destination_directory : str):
     generated_cpp_file_name = os.path.join(destination_directory,
                                            CPP_FILE_NAME)
     with open(generated_cpp_file_name, 'w') as generated_cpp_file:
-        # we need to replace two markers, {xid_start_ranges}
-        # and {xid_continue_ranges} with the content we derived
-        # from DerivedCoreProperties.txt
-        xid_start_range_expression = "ranges = {\n"
-        for x in xid_start_range_pairs:
-            range_expression = "{" + str(x[0]) + ", " + str(x[1]) + "}"
-            xid_start_range_expression += f"        {range_expression},\n"
-        xid_start_range_expression += "    };"
-
-        xid_continue_range_expression = "ranges = {\n"
-        for x in xid_continue_range_pairs:
-            range_expression = "{" + str(x[0]) + ", " + str(x[1]) + "}"
-            xid_continue_range_expression += f"        {range_expression},\n"
-        xid_continue_range_expression += "    };"
+        # we need to replace markers {xid_start_ranges} and
+        # {xid_continue_ranges} (along with their sizes) with the content we
+        # derived from DerivedCoreProperties.txt
+        xid_start_range_expression = "\n".join(
+            "        {{{}, {}}},".format(str(x[0]), str(x[1]))
+            for x in xid_start_range_pairs)
+
+        xid_continue_range_expression = "\n".join(
+            "        {{{}, {}}},".format(str(x[0]), str(x[1]))
+            for x in xid_continue_range_pairs)
 
         destination_template_content = source_template_content.replace(
             r"{xid_start_ranges}", xid_start_range_expression)
         destination_template_content = destination_template_content.replace(
             r"{xid_continue_ranges}", xid_continue_range_expression)
+        destination_template_content = destination_template_content.replace(
+            r"{xid_start_ranges_size}", str(len(xid_start_range_pairs)))
+        destination_template_content = destination_template_content.replace(
+            r"{xid_continue_ranges_size}", str(len(xid_continue_range_pairs)))
 
         generated_cpp_file.write(destination_template_content)
 
index e16e7e454d62106c8ce687a4431052b25c292261..fed07c5d6e1dec5570c9413cb0d35a4d18e23a19 100644 (file)
 #include "pxr/pxr.h"
 #include "pxr/base/tf/unicodeCharacterClasses.h"
 
-#include <vector>
+#include <array>
 
 PXR_NAMESPACE_OPEN_SCOPE
 
-/// @brief 
-/// Provides static initialization of the character class data
-/// contained within the XID_Start set of Unicode character classes.
-///
-struct Tf_UnicodeXidStartRangeData
-{
-public:
-
-    Tf_UnicodeXidStartRangeData();
-
-    std::vector<std::pair<uint32_t, uint32_t>> ranges;
-};
-
-/// @brief 
-/// Provides static initialization of the character class data
-/// contained within the XID_Continue set of Unicode character classes.
-///
-struct Tf_UnicodeXidContinueRangeData
-{
-public:
+static constexpr
+std::array<std::pair<uint32_t, uint32_t>, {xid_start_ranges_size}>
+_xidStartRanges = {{
+{xid_start_ranges}
+}};
 
-    Tf_UnicodeXidContinueRangeData();
-
-    std::vector<std::pair<uint32_t, uint32_t>> ranges;
-};
-
-// holds the compacted ranges of XID_Start and XID_Continue
-// character classes
-static TfStaticData<Tf_UnicodeXidStartRangeData> _xidStartRangeData;
-static TfStaticData<Tf_UnicodeXidContinueRangeData> _xidContinueRangeData;
-
-Tf_UnicodeXidStartRangeData::Tf_UnicodeXidStartRangeData()
-{
-    {xid_start_ranges}
-}
-
-Tf_UnicodeXidContinueRangeData::Tf_UnicodeXidContinueRangeData()
-{
-    {xid_continue_ranges}
-}
+static constexpr
+std::array<std::pair<uint32_t, uint32_t>, {xid_continue_ranges_size}>
+_xidContinueRanges = {{
+{xid_continue_ranges}
+}};
 
 TfUnicodeXidStartFlagData::TfUnicodeXidStartFlagData()
 {
     // set all of the bits corresponding to the code points in the range
-    for (const auto& pair : _xidStartRangeData->ranges)
+    for (const auto& pair : _xidStartRanges)
     {
         for (uint32_t i = pair.first; i <= pair.second; i++)
         {
@@ -82,16 +53,10 @@ TfUnicodeXidStartFlagData::TfUnicodeXidStartFlagData()
     }
 }
 
-bool
-TfUnicodeXidStartFlagData::IsXidStartCodePoint(uint32_t codePoint) const
-{
-    return (codePoint < TF_MAX_CODE_POINT) ? _flags[codePoint] : false;
-}
-
 TfUnicodeXidContinueFlagData::TfUnicodeXidContinueFlagData()
 {
     // set all of the bits corresponding to the code points in the range
-    for (const auto& pair : _xidContinueRangeData->ranges)
+    for (const auto& pair : _xidContinueRanges)
     {
         for (uint32_t i = pair.first; i <= pair.second; i++)
         {
@@ -100,25 +65,19 @@ TfUnicodeXidContinueFlagData::TfUnicodeXidContinueFlagData()
     }
 }
 
-bool
-TfUnicodeXidContinueFlagData::IsXidContinueCodePoint(uint32_t codePoint) const
-{
-    return (codePoint < TF_MAX_CODE_POINT) ? _flags[codePoint] : false;
-}
-
-static TfStaticData<TfUnicodeXidStartFlagData> xidStartFlagData;
-static TfStaticData<TfUnicodeXidContinueFlagData> xidContinueFlagData;
+static TfStaticData<TfUnicodeXidStartFlagData> _xidStartFlagData;
+static TfStaticData<TfUnicodeXidContinueFlagData> _xidContinueFlagData;
 
 const TfUnicodeXidStartFlagData&
 TfUnicodeGetXidStartFlagData()
 {
-    return *xidStartFlagData;
+    return *_xidStartFlagData;
 }
 
 const TfUnicodeXidContinueFlagData&
 TfUnicodeGetXidContinueFlagData()
 {
-    return *xidContinueFlagData;
+    return *_xidContinueFlagData;
 }
 
 PXR_NAMESPACE_CLOSE_SCOPE
\ No newline at end of file
index 45f46f54b069ffcdcd1d0936c9cb7e42f3647cb6..5569548731cb33908d450907a706eb9674f572a1 100644 (file)
 #include "pxr/pxr.h"
 #include "pxr/base/tf/unicodeCharacterClasses.h"
 
-#include <vector>
+#include <array>
 
 PXR_NAMESPACE_OPEN_SCOPE
 
-/// @brief 
-/// Provides static initialization of the character class data
-/// contained within the XID_Start set of Unicode character classes.
-///
-struct Tf_UnicodeXidStartRangeData
-{
-public:
-
-    Tf_UnicodeXidStartRangeData();
-
-    std::vector<std::pair<uint32_t, uint32_t>> ranges;
-};
-
-/// @brief 
-/// Provides static initialization of the character class data
-/// contained within the XID_Continue set of Unicode character classes.
-///
-struct Tf_UnicodeXidContinueRangeData
-{
-public:
-
-    Tf_UnicodeXidContinueRangeData();
-
-    std::vector<std::pair<uint32_t, uint32_t>> ranges;
-};
-
-// holds the compacted ranges of XID_Start and XID_Continue
-// character classes
-static TfStaticData<Tf_UnicodeXidStartRangeData> _xidStartRangeData;
-static TfStaticData<Tf_UnicodeXidContinueRangeData> _xidContinueRangeData;
-
-Tf_UnicodeXidStartRangeData::Tf_UnicodeXidStartRangeData()
-{
-    ranges = {
+static constexpr
+std::array<std::pair<uint32_t, uint32_t>, 743>
+_xidStartRanges = {{
         {65, 90},
         {97, 122},
         {170, 170},
@@ -806,12 +775,11 @@ Tf_UnicodeXidStartRangeData::Tf_UnicodeXidStartRangeData()
         {194560, 195101},
         {196608, 201546},
         {201552, 205743},
-    };
-}
+}};
 
-Tf_UnicodeXidContinueRangeData::Tf_UnicodeXidContinueRangeData()
-{
-    ranges = {
+static constexpr
+std::array<std::pair<uint32_t, uint32_t>, 1348>
+_xidContinueRanges = {{
         {48, 57},
         {65, 90},
         {95, 95},
@@ -2160,13 +2128,12 @@ Tf_UnicodeXidContinueRangeData::Tf_UnicodeXidContinueRangeData()
         {196608, 201546},
         {201552, 205743},
         {917760, 917999},
-    };
-}
+}};
 
 TfUnicodeXidStartFlagData::TfUnicodeXidStartFlagData()
 {
     // set all of the bits corresponding to the code points in the range
-    for (const auto& pair : _xidStartRangeData->ranges)
+    for (const auto& pair : _xidStartRanges)
     {
         for (uint32_t i = pair.first; i <= pair.second; i++)
         {
@@ -2175,16 +2142,10 @@ TfUnicodeXidStartFlagData::TfUnicodeXidStartFlagData()
     }
 }
 
-bool
-TfUnicodeXidStartFlagData::IsXidStartCodePoint(uint32_t codePoint) const
-{
-    return (codePoint < TF_MAX_CODE_POINT) ? _flags[codePoint] : false;
-}
-
 TfUnicodeXidContinueFlagData::TfUnicodeXidContinueFlagData()
 {
     // set all of the bits corresponding to the code points in the range
-    for (const auto& pair : _xidContinueRangeData->ranges)
+    for (const auto& pair : _xidContinueRanges)
     {
         for (uint32_t i = pair.first; i <= pair.second; i++)
         {
@@ -2193,25 +2154,19 @@ TfUnicodeXidContinueFlagData::TfUnicodeXidContinueFlagData()
     }
 }
 
-bool
-TfUnicodeXidContinueFlagData::IsXidContinueCodePoint(uint32_t codePoint) const
-{
-    return (codePoint < TF_MAX_CODE_POINT) ? _flags[codePoint] : false;
-}
-
-static TfStaticData<TfUnicodeXidStartFlagData> xidStartFlagData;
-static TfStaticData<TfUnicodeXidContinueFlagData> xidContinueFlagData;
+static TfStaticData<TfUnicodeXidStartFlagData> _xidStartFlagData;
+static TfStaticData<TfUnicodeXidContinueFlagData> _xidContinueFlagData;
 
 const TfUnicodeXidStartFlagData&
 TfUnicodeGetXidStartFlagData()
 {
-    return *xidStartFlagData;
+    return *_xidStartFlagData;
 }
 
 const TfUnicodeXidContinueFlagData&
 TfUnicodeGetXidContinueFlagData()
 {
-    return *xidContinueFlagData;
+    return *_xidContinueFlagData;
 }
 
 PXR_NAMESPACE_CLOSE_SCOPE
\ No newline at end of file
index 3b637caf230eb3c77b0b0225c3c05473071c181c..15d3f005ea085d2c7c853468e5c105642dba6669 100644 (file)
@@ -52,7 +52,9 @@ public:
     /// @param codePoint The Unicode code point to determine inclusion for.
     /// @return true if the given codePoint is in the XID_Start character
     /// class, false otherwise.
-    bool IsXidStartCodePoint(uint32_t codePoint) const;
+    inline bool IsXidStartCodePoint(uint32_t codePoint) const {
+        return (codePoint < TF_MAX_CODE_POINT) ? _flags[codePoint] : false;
+    }
 
 private:
 
@@ -75,7 +77,9 @@ public:
     /// @param codePoint The Unicode code point to determine inclusion for.
     /// @return true if the given codePoint is in the XID_Continue 
     /// character class false otherwise.
-    bool IsXidContinueCodePoint(uint32_t codePoint) const;
+    inline bool IsXidContinueCodePoint(uint32_t codePoint) const {
+        return (codePoint < TF_MAX_CODE_POINT) ? _flags[codePoint] : false;
+    }
 
 private: