Accept long UTF-8 Literal Strings

author David Neto <dneto@google.com>

Thu, 8 Oct 2015 19:20:25 +0000 (15:20 -0400)

committer David Neto <dneto@google.com>

Mon, 26 Oct 2015 16:55:33 +0000 (12:55 -0400)
author David Neto <dneto@google.com>
Thu, 8 Oct 2015 19:20:25 +0000 (15:20 -0400)
committer David Neto <dneto@google.com>
Mon, 26 Oct 2015 16:55:33 +0000 (12:55 -0400)
diff --git a/include/libspirv/libspirv.h b/include/libspirv/libspirv.h

index 26534de..16f7ccb 100644 (file)
--- a/include/libspirv/libspirv.h
+++ b/include/libspirv/libspirv.h
@@ -58,13 +58,17 @@ extern "C" {
  
  // SPIR-V 1.0 limits
  #define SPV_LIMIT_INSTRUCTION_WORD_COUNT_MAX 0xffff
+#define SPV_LIMIT_LITERAL_STRING_UTF8_CHARS_MAX 0xffff
+
+// A single Unicode character in UTF-8 encoding can take
+// up 4 bytes.
+#define SPV_LIMIT_LITERAL_STRING_BYTES_MAX \
+  (SPV_LIMIT_LITERAL_STRING_UTF8_CHARS_MAX * 4)
  
  // NOTE: These are set to the minimum maximum values
  // TODO(dneto): Check these.
  
  // libspirv limits.
-#define SPV_LIMIT_LITERAL_NAME_MAX 0x00000400
-#define SPV_LIMIT_LITERAL_STRING_MAX 0x00010000
  #define SPV_LIMIT_RESULT_ID_BOUND 0x00400000
  #define SPV_LIMIT_CONTROL_FLOW_NEST_DEPTH 0x00000400
  #define SPV_LIMIT_GLOBAL_VARIABLES_MAX 0x00010000
diff --git a/source/text.h b/source/text.h

index ebb8d8c..94e5b18 100644 (file)
--- a/source/text.h
+++ b/source/text.h
@@ -55,20 +55,23 @@ typedef struct spv_literal_t {
      float f;
      double d;
      // Allow room for the null terminator, and two surrounding quotes.
-    char str[SPV_LIMIT_LITERAL_STRING_MAX + 3];
+    // TODO(dneto): This is a very large array.  We should use a
+    // different kind of container.
+    char str[SPV_LIMIT_LITERAL_STRING_BYTES_MAX + 3];
    } value;
  } spv_literal_t;
  
  
  // Functions
  
-/// @brief Convert the input text to one of the number types.
+/// @brief Convert the input text to one of the number types, or to
+/// a string.
  ///
  /// String literals must be surrounded by double-quotes ("), which are
  /// then stripped.
  ///
  /// @param[in] textValue input text to parse
-/// @param[out] pLiteral the returned literal number
+/// @param[out] pLiteral the returned literal
  ///
  /// @return result code
  spv_result_t spvTextToLiteral(const char *textValue, spv_literal_t *pLiteral);
diff --git a/source/text_handler.cpp b/source/text_handler.cpp

index 847df69..a0e2512 100644 (file)
--- a/source/text_handler.cpp
+++ b/source/text_handler.cpp
@@ -342,8 +342,8 @@ spv_result_t AssemblyContext::binaryEncodeString(
  
    // TODO(dneto): We can just defer this check until later.
    if (newWordCount > SPV_LIMIT_INSTRUCTION_WORD_COUNT_MAX) {
-    diagnostic() << "Instruction word count '"
-             << SPV_LIMIT_INSTRUCTION_WORD_COUNT_MAX << "'exceeded.";
+    diagnostic() << "Instruction too long: more than "
+             << SPV_LIMIT_INSTRUCTION_WORD_COUNT_MAX << " words.";
      return SPV_ERROR_INVALID_TEXT;
    }
  
diff --git a/test/TextLiteral.cpp b/test/TextLiteral.cpp

index 4f125a6..7b277b2 100644 (file)
--- a/test/TextLiteral.cpp
+++ b/test/TextLiteral.cpp
@@ -129,18 +129,33 @@ TEST(TextLiteral, GoodString) {
  TEST(TextLiteral, StringTooLong) {
    spv_literal_t l;
    std::string too_long = std::string("\"") +
-                         std::string(SPV_LIMIT_LITERAL_STRING_MAX + 1, 'a') +
+                         std::string(SPV_LIMIT_LITERAL_STRING_BYTES_MAX + 1, 'a') +
                           "\"";
    EXPECT_EQ(SPV_ERROR_OUT_OF_MEMORY, spvTextToLiteral(too_long.data(), &l));
  }
  
  TEST(TextLiteral, GoodLongString) {
    spv_literal_t l;
-  std::string unquoted(SPV_LIMIT_LITERAL_STRING_MAX, 'a');
+  // The universal limit of 65535 Unicode characters might make this
+  // fail validation, since SPV_LIMIT_LITERAL_STRING_BYTES_MAX is 4*65535.
+  // However, as an implementation detail, we'll allow the assembler
+  // to parse it.  Otherwise we'd have to scan the string for valid UTF-8
+  // characters.
+  std::string unquoted(SPV_LIMIT_LITERAL_STRING_BYTES_MAX, 'a');
    std::string good_long = std::string("\"") + unquoted + "\"";
    EXPECT_EQ(SPV_SUCCESS, spvTextToLiteral(good_long.data(), &l));
    EXPECT_EQ(SPV_LITERAL_TYPE_STRING, l.type);
    EXPECT_STREQ(unquoted.data(), l.value.str);
  }
  
+TEST(TextLiteral, GoodUTF8String) {
+  const std::string unquoted =
+      spvtest::MakeLongUTF8String(SPV_LIMIT_LITERAL_STRING_UTF8_CHARS_MAX);
+  const std::string good_long = std::string("\"") + unquoted + "\"";
+  spv_literal_t l;
+  EXPECT_EQ(SPV_SUCCESS, spvTextToLiteral(good_long.data(), &l));
+  EXPECT_EQ(SPV_LITERAL_TYPE_STRING, l.type);
+  EXPECT_STREQ(unquoted.data(), l.value.str);
+}
+
  }  // anonymous namespace
diff --git a/test/TextToBinary.Literal.cpp b/test/TextToBinary.Literal.cpp

index 1af57ec..596b645 100644 (file)
--- a/test/TextToBinary.Literal.cpp
+++ b/test/TextToBinary.Literal.cpp
@@ -50,11 +50,56 @@ TEST_F(TextToBinaryTest, LiteralNumberInPlaceOfLiteralString) {
        CompileFailure(R"(OpSourceExtension 1000)"));
  }
  
-TEST_F(TextToBinaryTest, LiteralStringTooLong) {
+TEST_F(TextToBinaryTest, LiteralStringASCIILong) {
    // SPIR-V allows strings up to 65535 characters.
+  // Test the simple case of UTF-8 code points corresponding
+  // to ASCII characters.
+  EXPECT_EQ(65535, SPV_LIMIT_LITERAL_STRING_UTF8_CHARS_MAX);
    const std::string code =
-      "OpSourceExtension \"" + std::string(65535, 'o') + "\"\n";
+      "OpSourceExtension \"" +
+      std::string(SPV_LIMIT_LITERAL_STRING_UTF8_CHARS_MAX, 'o') + "\"\n";
    EXPECT_EQ(code, EncodeAndDecodeSuccessfully(code));
  }
  
+TEST_F(TextToBinaryTest, LiteralStringUTF8LongEncodings) {
+  // SPIR-V allows strings up to 65535 characters.
+  // Test the case of many Unicode characters, each of which has
+  // a 4-byte UTF-8 encoding.
+
+  // An instruction is at most 65535 words long. The first one
+  // contains the wordcount and opcode.  So the worst case number of
+  // 4-byte UTF-8 characters is 65533, since we also need to
+  // store a terminating null character.
+
+  // This string fits exactly into 65534 words.
+  const std::string good_string =
+      spvtest::MakeLongUTF8String(65533)
+      // The following single character has a 3 byte encoding,
+      // which fits snugly against the terminating null.
+      + "\u8000";
+
+  // These strings will overflow any instruction with 0 or 1 other
+  // arguments, respectively.
+  const std::string bad_0_arg_string = spvtest::MakeLongUTF8String(65534);
+  const std::string bad_1_arg_string = spvtest::MakeLongUTF8String(65533);
+
+  const std::string good_code = "OpSourceExtension \"" + good_string + "\"\n";
+  EXPECT_EQ(good_code, EncodeAndDecodeSuccessfully(good_code));
+
+  // Prove that it works on more than one instruction.
+  const std::string good_code_2 = "OpSourceContinued \"" + good_string + "\"\n";
+  EXPECT_EQ(good_code, EncodeAndDecodeSuccessfully(good_code));
+
+  // Failure cases.
+  EXPECT_EQ(
+      R"(Instruction too long: more than 65535 words.)",
+      CompileFailure("OpSourceExtension \"" + bad_0_arg_string + "\"\n"));
+  EXPECT_EQ(
+      R"(Instruction too long: more than 65535 words.)",
+      CompileFailure("OpSourceContinued \"" + bad_0_arg_string + "\"\n"));
+  EXPECT_EQ(
+      R"(Instruction too long: more than 65535 words.)",
+      CompileFailure("OpName %target \"" + bad_1_arg_string + "\"\n"));
+}
+
  }  // anonymous namespace
diff --git a/test/UnitSPIRV.h b/test/UnitSPIRV.h

index ed39081..acd4fd2 100644 (file)
--- a/test/UnitSPIRV.h
+++ b/test/UnitSPIRV.h
@@ -191,6 +191,21 @@ class EnumCase {
    std::vector<uint32_t> operands_;
  };
  
+// Returns a string with num_4_byte_chars Unicode characters,
+// each of which has a 4-byte UTF-8 encoding.
+inline std::string MakeLongUTF8String(size_t num_4_byte_chars) {
+  // An example of a longest valid UTF-8 character.
+  const std::string earth_africa("\U0001F30D");
+  EXPECT_EQ(4, earth_africa.size());
+  std::string result;
+  result.reserve(num_4_byte_chars * 4);
+  for (size_t i = 0; i < num_4_byte_chars; i++ ) {
+    result += earth_africa;
+  }
+  EXPECT_EQ(4 * num_4_byte_chars, result.size());
+  return result;
+}
+
  }  // namespace spvtest
  
  #endif
author	David Neto <dneto@google.com>
	Thu, 8 Oct 2015 19:20:25 +0000 (15:20 -0400)
committer	David Neto <dneto@google.com>
	Mon, 26 Oct 2015 16:55:33 +0000 (12:55 -0400)
include/libspirv/libspirv.h		patch \| blob \| history
source/text.h		patch \| blob \| history
source/text_handler.cpp		patch \| blob \| history
test/TextLiteral.cpp		patch \| blob \| history
test/TextToBinary.Literal.cpp		patch \| blob \| history
test/UnitSPIRV.h		patch \| blob \| history