Add fma(), tie the 2x16 stuff to the correct versions.

author John Kessenich <cepheus@frii.com>

Thu, 14 Aug 2014 20:05:57 +0000 (20:05 +0000)

committer John Kessenich <cepheus@frii.com>

Thu, 14 Aug 2014 20:05:57 +0000 (20:05 +0000)
author John Kessenich <cepheus@frii.com>
Thu, 14 Aug 2014 20:05:57 +0000 (20:05 +0000)
committer John Kessenich <cepheus@frii.com>
Thu, 14 Aug 2014 20:05:57 +0000 (20:05 +0000)
diff --git a/Test/400.tesc b/Test/400.tesc

index d167d5d..0671d3b 100644 (file)
--- a/Test/400.tesc
+++ b/Test/400.tesc
@@ -58,3 +58,14 @@ layout(location = 4) in vec4 ivlc[];  // ERROR
  layout(location = 3) out vec4 ovla[];\r
  layout(location = 4) out vec4 ovlb[];\r
  layout(location = 4) out vec4 ovlc[];  // ERROR\r
+\r
+precise vec3 pv3;\r
+\r
+void foop()\r
+{\r
+    precise double d;\r
+\r
+    pv3 *= pv3;\r
+    pv3 = fma(pv3, pv3, pv3);\r
+    d = fma(d, d, d);\r
+}\r
diff --git a/Test/baseResults/150.tesc.out b/Test/baseResults/150.tesc.out

index 5db3e49..944a082 100644 (file)
--- a/Test/baseResults/150.tesc.out
+++ b/Test/baseResults/150.tesc.out
@@ -341,6 +341,24 @@ ERROR: node is still EOpNull!
  0:44            4 (const int)\r
  0:44        Constant:\r
  0:44          1 (const int)\r
+0:64  Function Definition: foop( (void)\r
+0:64    Function Parameters: \r
+0:?     Sequence\r
+0:68      multiply second child into first child (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:69      move second child to first child (3-component vector of float)\r
+0:69        'pv3' (3-component vector of float)\r
+0:69        Function Call: fma(vf3;vf3;vf3; (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:70      move second child to first child (double)\r
+0:70        'd' (double)\r
+0:70        Function Call: fma(d1;d1;d1; (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
  0:?   Linker Objects\r
  0:?     'gl_out' (out 4-element array of block{out 4-component vector of float gl_Position, out float gl_PointSize, out implicitly-sized array of float gl_ClipDistance})\r
  0:?     'outa' (4-element array of int)\r
@@ -356,6 +374,7 @@ ERROR: node is still EOpNull!
  0:?     'ovla' (layout(location=3 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlb' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlc' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
+0:?     'pv3' (3-component vector of float)\r
  \r
  400.tese\r
  Warning, version 400 is not yet complete; most version-specific features are present, but some are missing.\r
@@ -1020,6 +1039,24 @@ vertices = 4
  0:44            4 (const int)\r
  0:44        Constant:\r
  0:44          1 (const int)\r
+0:64  Function Definition: foop( (void)\r
+0:64    Function Parameters: \r
+0:?     Sequence\r
+0:68      multiply second child into first child (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:69      move second child to first child (3-component vector of float)\r
+0:69        'pv3' (3-component vector of float)\r
+0:69        Function Call: fma(vf3;vf3;vf3; (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:70      move second child to first child (double)\r
+0:70        'd' (double)\r
+0:70        Function Call: fma(d1;d1;d1; (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
  0:8  Function Definition: main( (void)\r
  0:8    Function Parameters: \r
  0:15  Function Definition: main( (void)\r
@@ -1098,6 +1135,7 @@ vertices = 4
  0:?     'ovla' (layout(location=3 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlb' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlc' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
+0:?     'pv3' (3-component vector of float)\r
  0:?     'a' (out 3-element array of int)\r
  0:?     'outb' (out 5-element array of int)\r
  0:?     'outc' (out 4-element array of int)\r
diff --git a/Test/baseResults/400.tesc.out b/Test/baseResults/400.tesc.out

index 79baf2e..9beb63d 100644 (file)
--- a/Test/baseResults/400.tesc.out
+++ b/Test/baseResults/400.tesc.out
@@ -126,6 +126,24 @@ ERROR: node is still EOpNull!
  0:44            4 (const int)\r
  0:44        Constant:\r
  0:44          1 (const int)\r
+0:64  Function Definition: foop( (void)\r
+0:64    Function Parameters: \r
+0:?     Sequence\r
+0:68      multiply second child into first child (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:69      move second child to first child (3-component vector of float)\r
+0:69        'pv3' (3-component vector of float)\r
+0:69        Function Call: fma(vf3;vf3;vf3; (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:70      move second child to first child (double)\r
+0:70        'd' (double)\r
+0:70        Function Call: fma(d1;d1;d1; (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
  0:?   Linker Objects\r
  0:?     'gl_out' (out 4-element array of block{out 4-component vector of float gl_Position, out float gl_PointSize, out implicitly-sized array of float gl_ClipDistance})\r
  0:?     'outa' (4-element array of int)\r
@@ -141,6 +159,7 @@ ERROR: node is still EOpNull!
  0:?     'ovla' (layout(location=3 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlb' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlc' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
+0:?     'pv3' (3-component vector of float)\r
  \r
  \r
  Linked tessellation control stage:\r
@@ -258,6 +277,24 @@ ERROR: node is still EOpNull!
  0:44            4 (const int)\r
  0:44        Constant:\r
  0:44          1 (const int)\r
+0:64  Function Definition: foop( (void)\r
+0:64    Function Parameters: \r
+0:?     Sequence\r
+0:68      multiply second child into first child (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:68        'pv3' (3-component vector of float)\r
+0:69      move second child to first child (3-component vector of float)\r
+0:69        'pv3' (3-component vector of float)\r
+0:69        Function Call: fma(vf3;vf3;vf3; (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:69          'pv3' (3-component vector of float)\r
+0:70      move second child to first child (double)\r
+0:70        'd' (double)\r
+0:70        Function Call: fma(d1;d1;d1; (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
+0:70          'd' (double)\r
  0:?   Linker Objects\r
  0:?     'gl_out' (out 4-element array of block{out 4-component vector of float gl_Position, out float gl_PointSize, out 1-element array of float gl_ClipDistance})\r
  0:?     'outa' (4-element array of int)\r
@@ -273,4 +310,5 @@ ERROR: node is still EOpNull!
  0:?     'ovla' (layout(location=3 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlb' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
  0:?     'ovlc' (layout(location=4 ) out 4-element array of 4-component vector of float)\r
+0:?     'pv3' (3-component vector of float)\r
  \r
diff --git a/Todo.txt b/Todo.txt

index 9848a64..f3912ad 100644 (file)
--- a/Todo.txt
+++ b/Todo.txt
@@ -147,7 +147,7 @@ Shader Functionality to Implement/Finish
        + Sampler arrays can take a variable index now, as long as it's value is uniform for all uses.
        - Per-sample shading. Including sample input mask gl_SampleMaskIn[] and per-sample interpolation, with explicit interpolation built-ins interpolateAtCentroid(), interpolateAtSample(), and interpolateAtOffset().
        - New precise qualifier to disallow optimizations that re-order operations or treat different instances of the same operator with different precision.
-      - Add a fused multiply and add built-in, fma(), in relation to the new precise qualifier. (Because \93a * b + c\94 will require two operations under new rules for precise.)
+      + Add a fused multiply and add built-in, fma(), in relation to the new precise qualifier. (Because \93a * b + c\94 will require two operations under new rules for precise.)
        + Added new built-in floating-point functions 
           + frexp() and ldexp()
           + packUnorm2x16(), unpackUnorm2x16(),
@@ -203,9 +203,9 @@ Shader Functionality to Implement/Finish
        + Add a new atomic_uint type to support atomic counters. Also, add built-in functions for manipulating atomic counters.
          + atomicCounterIncrement, atomicCounterDecrement, and atomicCounter
        - Add layout qualifier identifiers binding and offset to bind units to sampler and image variable declarations, atomic counters, and uniform blocks.
-      - Add built-in functions to pack/unpack 16 bit floating-point numbers (ARB_shading_language_pack2f).
-        - packHalf2x16 and unpackHalf2x16
-        - packSnorm2x16and unpackSnorm2x16
+      + Add built-in functions to pack/unpack 16 bit floating-point numbers (ARB_shading_language_pack2f).
+        + packHalf2x16 and unpackHalf2x16
+        + packSnorm2x16and unpackSnorm2x16
        - Add gl_FragDepth layout qualifiers to communicate what kind of changes will be made to gl_FragDepth (GL_AMD_conservative depth).
        + Add C-style curly brace initializer lists syntax for initializers. Full initialization of aggregates is required when these are used.
        + Allow .length() to be applied to vectors and matrices, returning the number of components or columns.
diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp

index 4a53a42..a0a7deb 100644 (file)
--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
@@ -492,6 +492,21 @@ void TBuiltIns::initialize(int version, EProfile profile)
              "\n");
      }
  
+    if (profile != EEsProfile && version >= 400) {
+        commonBuiltins.append(
+            "float  fma(float,  float,  float );"
+            "vec2   fma(vec2,   vec2,   vec2  );"
+            "vec3   fma(vec3,   vec3,   vec3  );"
+            "vec4   fma(vec4,   vec4,   vec4  );"
+
+            "double fma(double, double, double);"
+            "dvec2  fma(dvec2,  dvec2,  dvec2 );"
+            "dvec3  fma(dvec3,  dvec3,  dvec3 );"
+            "dvec4  fma(dvec4,  dvec4,  dvec4 );"
+            
+            "\n");
+    }
+
      if ((profile == EEsProfile && version >= 310) ||
          (profile != EEsProfile && version >= 400)) {
          commonBuiltins.append(
@@ -529,10 +544,16 @@ void TBuiltIns::initialize(int version, EProfile profile)
      if ((profile == EEsProfile && version >= 300) ||
          (profile != EEsProfile && version >= 400)) {
          commonBuiltins.append(
-            "highp uint packSnorm2x16(vec2);"
-            "highp vec2 unpackSnorm2x16(highp uint);"
              "highp uint packUnorm2x16(vec2);"
              "highp vec2 unpackUnorm2x16(highp uint);"
+            "\n");
+    }
+
+    if ((profile == EEsProfile && version >= 300) ||
+        (profile != EEsProfile && version >= 420)) {
+        commonBuiltins.append(
+            "highp uint packSnorm2x16(vec2);"
+            "highp vec2 unpackSnorm2x16(highp uint);"
              "highp uint packHalf2x16(mediump vec2);"
              "mediump vec2 unpackHalf2x16(highp uint);"
              "\n");
author	John Kessenich <cepheus@frii.com>
	Thu, 14 Aug 2014 20:05:57 +0000 (20:05 +0000)
committer	John Kessenich <cepheus@frii.com>
	Thu, 14 Aug 2014 20:05:57 +0000 (20:05 +0000)
Test/400.tesc		patch \| blob \| history
Test/baseResults/150.tesc.out		patch \| blob \| history
Test/baseResults/400.tesc.out		patch \| blob \| history
Todo.txt		patch \| blob \| history
glslang/MachineIndependent/Initialize.cpp		patch \| blob \| history