Add 8-bit/16-bit transform feedback support for future use

author Rex Xu <rex.xu@amd.com>

Mon, 21 Jan 2019 08:50:17 +0000 (16:50 +0800)

committer Rex Xu <rex.xu@amd.com>

Sun, 3 Feb 2019 15:31:32 +0000 (23:31 +0800)
author Rex Xu <rex.xu@amd.com>
Mon, 21 Jan 2019 08:50:17 +0000 (16:50 +0800)
committer Rex Xu <rex.xu@amd.com>
Sun, 3 Feb 2019 15:31:32 +0000 (23:31 +0800)
diff --git a/Test/baseResults/440.vert.out b/Test/baseResults/440.vert.out

index 09c6af3..5af757a 100644 (file)
--- a/Test/baseResults/440.vert.out
+++ b/Test/baseResults/440.vert.out
@@ -39,9 +39,9 @@ ERROR: 0:131: 'xfb_stride' : all stride settings must match for xfb buffer 3
  ERROR: 0:152: 'xfb_offset' : overlapping offsets at offset 64 in buffer 0
  ERROR: 0:157: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4
  ERROR: 0:158: 'xfb_offset' : must be a multiple of size of first component 
-ERROR: 0:159: 'xfb_offset' : type contains double; xfb_offset must be a multiple of 8 
+ERROR: 0:159: 'xfb_offset' : type contains double or 64-bit integer; xfb_offset must be a multiple of 8 
  ERROR: 0:161: 'xfb_offset' : must be a multiple of size of first component 
-ERROR: 0:162: 'xfb_offset' : type contains double; xfb_offset must be a multiple of 8 
+ERROR: 0:162: 'xfb_offset' : type contains double or 64-bit integer; xfb_offset must be a multiple of 8 
  ERROR: 0:166: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4
  ERROR: 0:169: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4
  ERROR: 0:169: 'xfb_stride' : 1/4 stride is too large: gl_MaxTransformFeedbackInterleavedComponents is 64
diff --git a/Test/baseResults/spv.16bitxfb.vert.out b/Test/baseResults/spv.16bitxfb.vert.out

new file mode 100644 (file)

index 0000000..7d989c5
--- /dev/null
+++ b/Test/baseResults/spv.16bitxfb.vert.out
@@ -0,0 +1,120 @@
+spv.16bitxfb.vert
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 59
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int16
+                              Capability TransformFeedback
+                              Capability StorageInputOutput16
+                              Extension  "SPV_KHR_16bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Vertex 4  "main" 9 12 18 36 39 46 49
+                              ExecutionMode 4 Xfb
+                              Source GLSL 450
+                              SourceExtension  "GL_AMD_gpu_shader_half_float"
+                              SourceExtension  "GL_AMD_gpu_shader_int16"
+                              Name 4  "main"
+                              Name 9  "of16v3"
+                              Name 12  "if16v4"
+                              Name 16  "F16Out"
+                              MemberName 16(F16Out) 0  "of16"
+                              MemberName 16(F16Out) 1  "of16v2"
+                              Name 18  ""
+                              Name 36  "oi16v3"
+                              Name 39  "ii16v4"
+                              Name 44  "I16Out"
+                              MemberName 44(I16Out) 0  "ou16"
+                              MemberName 44(I16Out) 1  "ou16v2"
+                              Name 46  ""
+                              Name 49  "iu16v4"
+                              Decorate 9(of16v3) Location 0
+                              Decorate 9(of16v3) XfbBuffer 0
+                              Decorate 9(of16v3) XfbStride 6
+                              Decorate 9(of16v3) Offset 0
+                              Decorate 12(if16v4) Location 0
+                              MemberDecorate 16(F16Out) 0 Offset 0
+                              MemberDecorate 16(F16Out) 1 Offset 2
+                              Decorate 16(F16Out) Block
+                              Decorate 18 Location 1
+                              Decorate 18 XfbBuffer 1
+                              Decorate 18 XfbStride 6
+                              Decorate 36(oi16v3) Location 5
+                              Decorate 36(oi16v3) XfbBuffer 2
+                              Decorate 36(oi16v3) XfbStride 6
+                              Decorate 36(oi16v3) Offset 0
+                              Decorate 39(ii16v4) Location 1
+                              MemberDecorate 44(I16Out) 0 Offset 0
+                              MemberDecorate 44(I16Out) 1 Offset 2
+                              Decorate 44(I16Out) Block
+                              Decorate 46 Location 6
+                              Decorate 46 XfbBuffer 3
+                              Decorate 46 XfbStride 6
+                              Decorate 49(iu16v4) Location 2
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 16
+               7:             TypeVector 6(float16_t) 3
+               8:             TypePointer Output 7(f16vec3)
+       9(of16v3):      8(ptr) Variable Output
+              10:             TypeVector 6(float16_t) 4
+              11:             TypePointer Input 10(f16vec4)
+      12(if16v4):     11(ptr) Variable Input
+              15:             TypeVector 6(float16_t) 2
+      16(F16Out):             TypeStruct 6(float16_t) 15(f16vec2)
+              17:             TypePointer Output 16(F16Out)
+              18:     17(ptr) Variable Output
+              19:             TypeInt 32 1
+              20:     19(int) Constant 0
+              21:             TypeInt 32 0
+              22:     21(int) Constant 0
+              23:             TypePointer Input 6(float16_t)
+              26:             TypePointer Output 6(float16_t)
+              28:     19(int) Constant 1
+              31:             TypePointer Output 15(f16vec2)
+              33:             TypeInt 16 1
+              34:             TypeVector 33(int16_t) 3
+              35:             TypePointer Output 34(i16vec3)
+      36(oi16v3):     35(ptr) Variable Output
+              37:             TypeVector 33(int16_t) 4
+              38:             TypePointer Input 37(i16vec4)
+      39(ii16v4):     38(ptr) Variable Input
+              42:             TypeInt 16 0
+              43:             TypeVector 42(int16_t) 2
+      44(I16Out):             TypeStruct 42(int16_t) 43(i16vec2)
+              45:             TypePointer Output 44(I16Out)
+              46:     45(ptr) Variable Output
+              47:             TypeVector 42(int16_t) 4
+              48:             TypePointer Input 47(i16vec4)
+      49(iu16v4):     48(ptr) Variable Input
+              50:             TypePointer Input 42(int16_t)
+              53:             TypePointer Output 42(int16_t)
+              57:             TypePointer Output 43(i16vec2)
+         4(main):           2 Function None 3
+               5:             Label
+              13: 10(f16vec4) Load 12(if16v4)
+              14:  7(f16vec3) VectorShuffle 13 13 0 1 2
+                              Store 9(of16v3) 14
+              24:     23(ptr) AccessChain 12(if16v4) 22
+              25:6(float16_t) Load 24
+              27:     26(ptr) AccessChain 18 20
+                              Store 27 25
+              29: 10(f16vec4) Load 12(if16v4)
+              30: 15(f16vec2) VectorShuffle 29 29 0 1
+              32:     31(ptr) AccessChain 18 28
+                              Store 32 30
+              40: 37(i16vec4) Load 39(ii16v4)
+              41: 34(i16vec3) VectorShuffle 40 40 0 1 2
+                              Store 36(oi16v3) 41
+              51:     50(ptr) AccessChain 49(iu16v4) 22
+              52: 42(int16_t) Load 51
+              54:     53(ptr) AccessChain 46 20
+                              Store 54 52
+              55: 47(i16vec4) Load 49(iu16v4)
+              56: 43(i16vec2) VectorShuffle 55 55 0 1
+              58:     57(ptr) AccessChain 46 28
+                              Store 58 56
+                              Return
+                              FunctionEnd
diff --git a/Test/spv.16bitxfb.vert b/Test/spv.16bitxfb.vert

new file mode 100644 (file)

index 0000000..f971943
--- /dev/null
+++ b/Test/spv.16bitxfb.vert
@@ -0,0 +1,33 @@
+#version 450 core\r
+\r
+#extension GL_AMD_gpu_shader_half_float: enable\r
+#extension GL_AMD_gpu_shader_int16: enable\r
+\r
+layout(location = 0) in f16vec4 if16v4;\r
+layout(location = 1) in i16vec4 ii16v4;\r
+layout(location = 2) in u16vec4 iu16v4;\r
+\r
+layout(location = 0, xfb_buffer = 0, xfb_stride = 6, xfb_offset = 0) out f16vec3 of16v3;\r
+layout(location = 1, xfb_buffer = 1, xfb_stride = 6, xfb_offset = 0) out F16Out\r
+{\r
+    float16_t of16;\r
+    f16vec2   of16v2;\r
+};\r
+\r
+layout(location = 5, xfb_buffer = 2, xfb_stride = 6, xfb_offset = 0) out i16vec3 oi16v3;\r
+layout(location = 6, xfb_buffer = 3, xfb_stride = 6, xfb_offset = 0) out I16Out\r
+{\r
+    uint16_t ou16;\r
+    u16vec2  ou16v2;\r
+};\r
+\r
+void main()\r
+{\r
+    of16v3 = if16v4.xyz;\r
+    of16   = if16v4.x;\r
+    of16v2 = if16v4.xy;\r
+\r
+    oi16v3 = ii16v4.xyz;\r
+    ou16   = iu16v4.x;\r
+    ou16v2 = iu16v4.xy;\r
+}
+\ No newline at end of file
diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp

index eac7b50..bef6956 100755 (executable)
--- a/glslang/MachineIndependent/ParseHelper.cpp
+++ b/glslang/MachineIndependent/ParseHelper.cpp
@@ -5463,14 +5463,23 @@ void TParseContext::layoutTypeCheck(const TSourceLoc& loc, const TType& type)
  
          // "The offset must be a multiple of the size of the first component of the first
          // qualified variable or block member, or a compile-time error results. Further, if applied to an aggregate
-        // containing a double, the offset must also be a multiple of 8..."
-        if (type.containsBasicType(EbtDouble) && ! IsMultipleOfPow2(qualifier.layoutXfbOffset, 8))
-            error(loc, "type contains double; xfb_offset must be a multiple of 8", "xfb_offset", "");
-        // ..., if applied to an aggregate containing a float16_t, the offset must also be a multiple of 2..."
-        else if (type.containsBasicType(EbtFloat16) && !IsMultipleOfPow2(qualifier.layoutXfbOffset, 2))
-            error(loc, "type contains half float; xfb_offset must be a multiple of 2", "xfb_offset", "");
+        // containing a double or 64-bit integer, the offset must also be a multiple of 8..."
+        if ((type.containsBasicType(EbtDouble) || type.containsBasicType(EbtInt64) || type.containsBasicType(EbtUint64)) &&
+            ! IsMultipleOfPow2(qualifier.layoutXfbOffset, 8))
+            error(loc, "type contains double or 64-bit integer; xfb_offset must be a multiple of 8", "xfb_offset", "");
+#ifdef AMD_EXTENSIONS
+        else if ((type.containsBasicType(EbtBool) || type.containsBasicType(EbtFloat) ||
+                  type.containsBasicType(EbtInt) || type.containsBasicType(EbtUint)) &&
+                 ! IsMultipleOfPow2(qualifier.layoutXfbOffset, 4))
+            error(loc, "must be a multiple of size of first component", "xfb_offset", "");
+        // ..., if applied to an aggregate containing a half float or 16-bit integer, the offset must also be a multiple of 2..."
+        else if ((type.containsBasicType(EbtFloat16) || type.containsBasicType(EbtInt16) || type.containsBasicType(EbtUint16)) &&
+                 !IsMultipleOfPow2(qualifier.layoutXfbOffset, 2))
+            error(loc, "type contains half float or 16-bit integer; xfb_offset must be a multiple of 2", "xfb_offset", "");
+#else
          else if (! IsMultipleOfPow2(qualifier.layoutXfbOffset, 4))
              error(loc, "must be a multiple of size of first component", "xfb_offset", "");
+#endif
      }
  
      if (qualifier.hasXfbStride() && qualifier.hasXfbBuffer()) {
@@ -7287,12 +7296,24 @@ void TParseContext::fixXfbOffsets(TQualifier& qualifier, TTypeList& typeList)
      for (unsigned int member = 0; member < typeList.size(); ++member) {
          TQualifier& memberQualifier = typeList[member].type->getQualifier();
          bool contains64BitType = false;
+#ifdef AMD_EXTENSIONS
+        bool contains32BitType = false;
+        bool contains16BitType = false;
+        int memberSize = intermediate.computeTypeXfbSize(*typeList[member].type, contains64BitType, contains32BitType, contains16BitType);
+#else
          int memberSize = intermediate.computeTypeXfbSize(*typeList[member].type, contains64BitType);
+#endif
          // see if we need to auto-assign an offset to this member
          if (! memberQualifier.hasXfbOffset()) {
              // "if applied to an aggregate containing a double or 64-bit integer, the offset must also be a multiple of 8"
              if (contains64BitType)
                  RoundToPow2(nextOffset, 8);
+#ifdef AMD_EXTENSIONS
+            else if (contains32BitType)
+                RoundToPow2(nextOffset, 4);
+            else if (contains16BitType)
+                RoundToPow2(nextOffset, 2);
+#endif
              memberQualifier.layoutXfbOffset = nextOffset;
          } else
              nextOffset = memberQualifier.layoutXfbOffset;
diff --git a/glslang/MachineIndependent/linkValidate.cpp b/glslang/MachineIndependent/linkValidate.cpp

index b7d8545..0cf2d36 100755 (executable)
--- a/glslang/MachineIndependent/linkValidate.cpp
+++ b/glslang/MachineIndependent/linkValidate.cpp
@@ -224,6 +224,12 @@ void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
          xfbBuffers[b].implicitStride = std::max(xfbBuffers[b].implicitStride, unit.xfbBuffers[b].implicitStride);
          if (unit.xfbBuffers[b].contains64BitType)
              xfbBuffers[b].contains64BitType = true;
+#ifdef AMD_EXTENSIONS
+        if (unit.xfbBuffers[b].contains32BitType)
+            xfbBuffers[b].contains32BitType = true;
+        if (unit.xfbBuffers[b].contains16BitType)
+            xfbBuffers[b].contains16BitType = true;
+#endif
          // TODO: 4.4 link: enhanced layouts: compare ranges
      }
  
@@ -636,6 +642,12 @@ void TIntermediate::finalCheck(TInfoSink& infoSink, bool keepUncalled)
      for (size_t b = 0; b < xfbBuffers.size(); ++b) {
          if (xfbBuffers[b].contains64BitType)
              RoundToPow2(xfbBuffers[b].implicitStride, 8);
+#ifdef AMD_EXTENSIONS
+        else if (xfbBuffers[b].contains32BitType)
+            RoundToPow2(xfbBuffers[b].implicitStride, 4);
+        else if (xfbBuffers[b].contains16BitType)
+            RoundToPow2(xfbBuffers[b].implicitStride, 2);
+#endif
  
          // "It is a compile-time or link-time error to have
          // any xfb_offset that overflows xfb_stride, whether stated on declarations before or after the xfb_stride, or
@@ -656,12 +668,25 @@ void TIntermediate::finalCheck(TInfoSink& infoSink, bool keepUncalled)
              error(infoSink, "xfb_stride must be multiple of 8 for buffer holding a double or 64-bit integer:");
              infoSink.info.prefix(EPrefixError);
              infoSink.info << "    xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";
+#ifdef AMD_EXTENSIONS
+        } else if (xfbBuffers[b].contains32BitType && ! IsMultipleOfPow2(xfbBuffers[b].stride, 4)) {
+#else
          } else if (! IsMultipleOfPow2(xfbBuffers[b].stride, 4)) {
+#endif
              error(infoSink, "xfb_stride must be multiple of 4:");
              infoSink.info.prefix(EPrefixError);
              infoSink.info << "    xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";
          }
+#ifdef AMD_EXTENSIONS
+        // "If the buffer is capturing any
+        // outputs with half-precision or 16-bit integer components, the stride must be a multiple of 2"
+        else if (xfbBuffers[b].contains16BitType && ! IsMultipleOfPow2(xfbBuffers[b].stride, 2)) {
+            error(infoSink, "xfb_stride must be multiple of 2 for buffer holding a half float or 16-bit integer:");
+            infoSink.info.prefix(EPrefixError);
+            infoSink.info << "    xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";
+        }
  
+#endif
          // "The resulting stride (implicit or explicit), when divided by 4, must be less than or equal to the
          // implementation-dependent constant gl_MaxTransformFeedbackInterleavedComponents."
          if (xfbBuffers[b].stride > (unsigned int)(4 * resources.maxTransformFeedbackInterleavedComponents)) {
@@ -1260,7 +1285,11 @@ int TIntermediate::addXfbBufferOffset(const TType& type)
      TXfbBuffer& buffer = xfbBuffers[qualifier.layoutXfbBuffer];
  
      // compute the range
+#ifdef AMD_EXTENSIONS
+    unsigned int size = computeTypeXfbSize(type, buffer.contains64BitType, buffer.contains32BitType, buffer.contains16BitType);
+#else
      unsigned int size = computeTypeXfbSize(type, buffer.contains64BitType);
+#endif
      buffer.implicitStride = std::max(buffer.implicitStride, qualifier.layoutXfbOffset + size);
      TRange range(qualifier.layoutXfbOffset, qualifier.layoutXfbOffset + size - 1);
  
@@ -1279,9 +1308,16 @@ int TIntermediate::addXfbBufferOffset(const TType& type)
  
  // Recursively figure out how many bytes of xfb buffer are used by the given type.
  // Return the size of type, in bytes.
-// Sets contains64BitType to true if the type contains a double.
+// Sets contains64BitType to true if the type contains a 64-bit data type.
+#ifdef AMD_EXTENSIONS
+// Sets contains32BitType to true if the type contains a 32-bit data type.
+// Sets contains16BitType to true if the type contains a 16-bit data type.
+// N.B. Caller must set contains64BitType, contains32BitType, and contains16BitType to false before calling.
+unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const
+#else
  // N.B. Caller must set contains64BitType to false before calling.
  unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains64BitType) const
+#endif
  {
      // "...if applied to an aggregate containing a double or 64-bit integer, the offset must also be a multiple of 8,
      // and the space taken in the buffer will be a multiple of 8.
@@ -1294,22 +1330,44 @@ unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains
          // TODO: perf: this can be flattened by using getCumulativeArraySize(), and a deref that discards all arrayness
          assert(type.isSizedArray());
          TType elementType(type, 0);
+#ifdef AMD_EXTENSIONS
+        return type.getOuterArraySize() * computeTypeXfbSize(elementType, contains64BitType, contains16BitType, contains16BitType);
+#else
          return type.getOuterArraySize() * computeTypeXfbSize(elementType, contains64BitType);
+#endif
      }
  
      if (type.isStruct()) {
          unsigned int size = 0;
          bool structContains64BitType = false;
+#ifdef AMD_EXTENSIONS
+        bool structContains32BitType = false;
+        bool structContains16BitType = false;
+#endif
          for (int member = 0; member < (int)type.getStruct()->size(); ++member) {
              TType memberType(type, member);
              // "... if applied to
              // an aggregate containing a double or 64-bit integer, the offset must also be a multiple of 8,
              // and the space taken in the buffer will be a multiple of 8."
              bool memberContains64BitType = false;
+#ifdef AMD_EXTENSIONS
+            bool memberContains32BitType = false;
+            bool memberContains16BitType = false;
+            int memberSize = computeTypeXfbSize(memberType, memberContains64BitType, memberContains32BitType, memberContains16BitType);
+#else
              int memberSize = computeTypeXfbSize(memberType, memberContains64BitType);
+#endif
              if (memberContains64BitType) {
                  structContains64BitType = true;
                  RoundToPow2(size, 8);
+#ifdef AMD_EXTENSIONS
+            } else if (memberContains32BitType) {
+                structContains32BitType = true;
+                RoundToPow2(size, 4);
+            } else if (memberContains16BitType) {
+                structContains16BitType = true;
+                RoundToPow2(size, 2);
+#endif
              }
              size += memberSize;
          }
@@ -1317,6 +1375,14 @@ unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains
          if (structContains64BitType) {
              contains64BitType = true;
              RoundToPow2(size, 8);
+#ifdef AMD_EXTENSIONS
+        } else if (structContains32BitType) {
+            contains32BitType = true;
+            RoundToPow2(size, 4);
+        } else if (structContains16BitType) {
+            contains16BitType = true;
+            RoundToPow2(size, 2);
+#endif
          }
          return size;
      }
@@ -1336,8 +1402,20 @@ unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains
      if (type.getBasicType() == EbtDouble || type.getBasicType() == EbtInt64 || type.getBasicType() == EbtUint64) {
          contains64BitType = true;
          return 8 * numComponents;
+#ifdef AMD_EXTENSIONS
+    } else if (type.getBasicType() == EbtFloat16 || type.getBasicType() == EbtInt16 || type.getBasicType() == EbtUint16) {
+        contains16BitType = true;
+        return 2 * numComponents;
+    } else if (type.getBasicType() == EbtInt8 || type.getBasicType() == EbtUint8)
+        return numComponents;
+    else {
+        contains32BitType = true;
+        return 4 * numComponents;
+    }
+#else
      } else
          return 4 * numComponents;
+#endif
  }
  
  const int baseAlignmentVec4Std140 = 16;
diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h

index 2a0ecf9..aecbc6b 100755 (executable)
--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@@ -149,11 +149,20 @@ struct TOffsetRange {
  
  // Things that need to be tracked per xfb buffer.
  struct TXfbBuffer {
+#ifdef AMD_EXTENSIONS
+    TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), contains64BitType(false),
+                   contains32BitType(false), contains16BitType(false) { }
+#else
      TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), contains64BitType(false) { }
+#endif
      std::vector<TRange> ranges;  // byte offsets that have already been assigned
      unsigned int stride;
      unsigned int implicitStride;
      bool contains64BitType;
+#ifdef AMD_EXTENSIONS
+    bool contains32BitType;
+    bool contains16BitType;
+#endif
  };
  
  // Track a set of strings describing how the module was processed.
@@ -669,7 +678,11 @@ public:
      }
      unsigned getXfbStride(int buffer) const { return xfbBuffers[buffer].stride; }
      int addXfbBufferOffset(const TType&);
+#ifdef AMD_EXTENSIONS
+    unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const;
+#else
      unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType) const;
+#endif
      static int getBaseAlignmentScalar(const TType&, int& size);
      static int getBaseAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor);
      static int getScalarAlignment(const TType&, int& size, int& stride, bool rowMajor);
diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp

index 170f985..1a14407 100644 (file)
--- a/gtests/Spv.FromFile.cpp
+++ b/gtests/Spv.FromFile.cpp
@@ -528,6 +528,7 @@ INSTANTIATE_TEST_CASE_P(
  INSTANTIATE_TEST_CASE_P(
      Glsl, CompileVulkanToSpirvTestAMD,
      ::testing::ValuesIn(std::vector<std::string>({
+        "spv.16bitxfb.vert",
          "spv.float16.frag",
          "spv.float16Fetch.frag",
          "spv.imageLoadStoreLod.frag",
diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp

index ed21e02..2634f91 100644 (file)
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -8695,12 +8695,25 @@ void HlslParseContext::fixXfbOffsets(TQualifier& qualifier, TTypeList& typeList)
      for (unsigned int member = 0; member < typeList.size(); ++member) {
          TQualifier& memberQualifier = typeList[member].type->getQualifier();
          bool contains64BitType = false;
+#ifdef AMD_EXTENSIONS
+        bool contains32BitType = false;
+        bool contains16BitType = false;
+        int memberSize = intermediate.computeTypeXfbSize(*typeList[member].type, contains64BitType, contains32BitType, contains16BitType);
+#else
          int memberSize = intermediate.computeTypeXfbSize(*typeList[member].type, contains64BitType);
+#endif
          // see if we need to auto-assign an offset to this member
          if (! memberQualifier.hasXfbOffset()) {
              // "if applied to an aggregate containing a double or 64-bit integer, the offset must also be a multiple of 8"
              if (contains64BitType)
                  RoundToPow2(nextOffset, 8);
+#ifdef AMD_EXTENSIONS
+            else if (contains32BitType)
+                RoundToPow2(nextOffset, 4);
+            // "if applied to an aggregate containing a half float or 16-bit integer, the offset must also be a multiple of 2"
+            else if (contains16BitType)
+                RoundToPow2(nextOffset, 2);
+#endif
              memberQualifier.layoutXfbOffset = nextOffset;
          } else
              nextOffset = memberQualifier.layoutXfbOffset;
author	Rex Xu <rex.xu@amd.com>
	Mon, 21 Jan 2019 08:50:17 +0000 (16:50 +0800)
committer	Rex Xu <rex.xu@amd.com>
	Sun, 3 Feb 2019 15:31:32 +0000 (23:31 +0800)
Test/baseResults/440.vert.out		patch \| blob \| history
Test/baseResults/spv.16bitxfb.vert.out	[new file with mode: 0644]	patch \| blob
Test/spv.16bitxfb.vert	[new file with mode: 0644]	patch \| blob
glslang/MachineIndependent/ParseHelper.cpp		patch \| blob \| history
glslang/MachineIndependent/linkValidate.cpp		patch \| blob \| history
glslang/MachineIndependent/localintermediate.h		patch \| blob \| history
gtests/Spv.FromFile.cpp		patch \| blob \| history
hlsl/hlslParseHelper.cpp		patch \| blob \| history