glslang/MachineIndependent/linkValidate.cpp

   1 //\r
   2 //Copyright (C) 2013 LunarG, Inc.\r
   3 //\r
   4 //All rights reserved.\r
   5 //\r
   6 //Redistribution and use in source and binary forms, with or without\r
   7 //modification, are permitted provided that the following conditions\r
   8 //are met:\r
   9 //\r
  10 //    Redistributions of source code must retain the above copyright\r
  11 //    notice, this list of conditions and the following disclaimer.\r
  12 //\r
  13 //    Redistributions in binary form must reproduce the above\r
  14 //    copyright notice, this list of conditions and the following\r
  15 //    disclaimer in the documentation and/or other materials provided\r
  16 //    with the distribution.\r
  17 //\r
  18 //    Neither the name of 3Dlabs Inc. Ltd. nor the names of its\r
  19 //    contributors may be used to endorse or promote products derived\r
  20 //    from this software without specific prior written permission.\r
  21 //\r
  22 //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
  23 //"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
  24 //LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS\r
  25 //FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\r
  26 //COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\r
  27 //INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\r
  28 //BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\r
  29 //LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r
  30 //CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
  31 //LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN\r
  32 //ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\r
  33 //POSSIBILITY OF SUCH DAMAGE.\r
  34 //\r
  35 \r
  36 //\r
  37 // Do link-time merging and validation of intermediate representations.\r
  38 //\r
  39 // Basic model is that during compilation, each compilation unit (shader) is\r
  40 // compiled into one TIntermediate instance.  Then, at link time, multiple\r
  41 // units for the same stage can be merged together, which can generate errors.\r
  42 // Then, after all merging, a single instance of TIntermediate represents\r
  43 // the whole stage.  A final error check can be done on the resulting stage,\r
  44 // even if no merging was done (i.e., the stage was only one compilation unit).\r
  45 //\r
  46 \r
  47 #include "localintermediate.h"\r
  48 #include "../Include/InfoSink.h"\r
  49 \r
  50 namespace glslang {\r
  51     \r
  52 //\r
  53 // Link-time error emitter.\r
  54 //\r
  55 void TIntermediate::error(TInfoSink& infoSink, const char* message)\r
  56 {\r
  57     infoSink.info.prefix(EPrefixError);\r
  58     infoSink.info << "Linking " << StageName(language) << " stage: " << message << "\n";\r
  59 \r
  60     ++numErrors;\r
  61 }\r
  62 \r
  63 // TODO: 4.4 offset/align:  "Two blocks linked together in the same program with the same block \r
  64 // name must have the exact same set of members qualified with offset and their integral-constant \r
  65 // expression values must be the same, or a link-time error results."\r
  66 \r
  67 //\r
  68 // Merge the information from 'unit' into 'this'\r
  69 //\r
  70 void TIntermediate::merge(TInfoSink& infoSink, TIntermediate& unit)\r
  71 {\r
  72     numMains += unit.numMains;\r
  73     numErrors += unit.numErrors;\r
  74     callGraph.insert(callGraph.end(), unit.callGraph.begin(), unit.callGraph.end());\r
  75 \r
  76     if ((profile != EEsProfile && unit.profile == EEsProfile) ||\r
  77         (profile == EEsProfile && unit.profile != EEsProfile))\r
  78         error(infoSink, "Cannot mix ES profile with non-ES profile shaders\n");\r
  79 \r
  80     if (originUpperLeft != unit.originUpperLeft || pixelCenterInteger != unit.pixelCenterInteger)\r
  81         error(infoSink, "gl_FragCoord redeclarations must match across shaders\n");\r
  82 \r
  83     if (! earlyFragmentTests)\r
  84         earlyFragmentTests = unit.earlyFragmentTests;\r
  85 \r
  86     if (depthLayout == EldNone)\r
  87         depthLayout = unit.depthLayout;\r
  88     else if (depthLayout != unit.depthLayout)\r
  89         error(infoSink, "Contradictory depth layouts");\r
  90 \r
  91     if (inputPrimitive == ElgNone)\r
  92         inputPrimitive = unit.inputPrimitive;\r
  93     else if (inputPrimitive != unit.inputPrimitive)\r
  94         error(infoSink, "Contradictory input layout primitives");\r
  95     \r
  96     if (outputPrimitive == ElgNone)\r
  97         outputPrimitive = unit.outputPrimitive;\r
  98     else if (outputPrimitive != unit.outputPrimitive)\r
  99         error(infoSink, "Contradictory output layout primitives");\r
 100     \r
 101     if (vertices == 0)\r
 102         vertices = unit.vertices;\r
 103     else if (vertices != unit.vertices) {\r
 104         if (language == EShLangGeometry)\r
 105             error(infoSink, "Contradictory layout max_vertices values");\r
 106         else if (language == EShLangTessControl)\r
 107             error(infoSink, "Contradictory layout vertices values");\r
 108         else\r
 109             assert(0);\r
 110     }\r
 111 \r
 112     if (vertexSpacing == EvsNone)\r
 113         vertexSpacing = unit.vertexSpacing;\r
 114     else if (vertexSpacing != unit.vertexSpacing)\r
 115         error(infoSink, "Contradictory input vertex spacing");\r
 116 \r
 117     if (vertexOrder == EvoNone)\r
 118         vertexOrder = unit.vertexOrder;\r
 119     else if (vertexOrder != unit.vertexOrder)\r
 120         error(infoSink, "Contradictory triangle ordering");\r
 121 \r
 122     if (unit.pointMode)\r
 123         pointMode = true;\r
 124 \r
 125     for (int i = 0; i < 3; ++i) {\r
 126         if (localSize[i] > 1)\r
 127             localSize[i] = unit.localSize[i];\r
 128         else if (localSize[i] != unit.localSize[i])\r
 129             error(infoSink, "Contradictory local size");\r
 130     }\r
 131 \r
 132     if (unit.xfbMode)\r
 133         xfbMode = true;\r
 134     for (size_t b = 0; b < xfbBuffers.size(); ++b) {\r
 135         if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd)\r
 136             xfbBuffers[b].stride = unit.xfbBuffers[b].stride;\r
 137         else if (xfbBuffers[b].stride != unit.xfbBuffers[b].stride)\r
 138             error(infoSink, "Contradictory xfb_stride");\r
 139         xfbBuffers[b].implicitStride = std::max(xfbBuffers[b].implicitStride, unit.xfbBuffers[b].implicitStride);\r
 140         if (unit.xfbBuffers[b].containsDouble)\r
 141             xfbBuffers[b].containsDouble = true;\r
 142         // TODO: 4.4 link: enhanced layouts: compare ranges\r
 143     }\r
 144 \r
 145     if (unit.treeRoot == 0)\r
 146         return;\r
 147 \r
 148     if (treeRoot == 0) {\r
 149         treeRoot = unit.treeRoot;\r
 150         version = unit.version;\r
 151         requestedExtensions = unit.requestedExtensions;\r
 152         return;\r
 153     }\r
 154 \r
 155     // Getting this far means we have two existing trees to merge...\r
 156     \r
 157     version = std::max(version, unit.version);\r
 158     requestedExtensions.insert(unit.requestedExtensions.begin(), unit.requestedExtensions.end());\r
 159 \r
 160     // Get the top-level globals of each unit\r
 161     TIntermSequence& globals = treeRoot->getAsAggregate()->getSequence();\r
 162     TIntermSequence& unitGlobals = unit.treeRoot->getAsAggregate()->getSequence();\r
 163 \r
 164     // Get the linker-object lists\r
 165     TIntermSequence& linkerObjects = findLinkerObjects();\r
 166     TIntermSequence& unitLinkerObjects = unit.findLinkerObjects();\r
 167 \r
 168     mergeBodies(infoSink, globals, unitGlobals);\r
 169     mergeLinkerObjects(infoSink, linkerObjects, unitLinkerObjects);\r
 170 \r
 171     ioAccessed.insert(unit.ioAccessed.begin(), unit.ioAccessed.end());\r
 172 }\r
 173 \r
 174 //\r
 175 // Merge the function bodies and global-level initializers from unitGlobals into globals.\r
 176 // Will error check duplication of function bodies for the same signature.\r
 177 //\r
 178 void TIntermediate::mergeBodies(TInfoSink& infoSink, TIntermSequence& globals, const TIntermSequence& unitGlobals)\r
 179 {\r
 180     // TODO: link-time performance: Processing in alphabetical order will be faster\r
 181 \r
 182     // Error check the global objects, not including the linker objects\r
 183     for (unsigned int child = 0; child < globals.size() - 1; ++child) {\r
 184         for (unsigned int unitChild = 0; unitChild < unitGlobals.size() - 1; ++unitChild) {\r
 185             TIntermAggregate* body = globals[child]->getAsAggregate();\r
 186             TIntermAggregate* unitBody = unitGlobals[unitChild]->getAsAggregate();\r
 187             if (body && unitBody && body->getOp() == EOpFunction && unitBody->getOp() == EOpFunction && body->getName() == unitBody->getName()) {\r
 188                 error(infoSink, "Multiple function bodies in multiple compilation units for the same signature in the same stage:");\r
 189                 infoSink.info << "    " << globals[child]->getAsAggregate()->getName() << "\n";\r
 190             }\r
 191         }\r
 192     }\r
 193 \r
 194     // Merge the global objects, just in front of the linker objects\r
 195     globals.insert(globals.end() - 1, unitGlobals.begin(), unitGlobals.end() - 1);\r
 196 }\r
 197 \r
 198 //\r
 199 // Merge the linker objects from unitLinkerObjects into linkerObjects.\r
 200 // Duplication is expected and filtered out, but contradictions are an error.\r
 201 //\r
 202 void TIntermediate::mergeLinkerObjects(TInfoSink& infoSink, TIntermSequence& linkerObjects, const TIntermSequence& unitLinkerObjects)\r
 203 {\r
 204     // Error check and merge the linker objects (duplicates should not be created)\r
 205     std::size_t initialNumLinkerObjects = linkerObjects.size();\r
 206     for (unsigned int unitLinkObj = 0; unitLinkObj < unitLinkerObjects.size(); ++unitLinkObj) {\r
 207         bool merge = true;\r
 208         for (std::size_t linkObj = 0; linkObj < initialNumLinkerObjects; ++linkObj) {\r
 209             TIntermSymbol* symbol = linkerObjects[linkObj]->getAsSymbolNode();\r
 210             TIntermSymbol* unitSymbol = unitLinkerObjects[unitLinkObj]->getAsSymbolNode();\r
 211             assert(symbol && unitSymbol);\r
 212             if (symbol->getName() == unitSymbol->getName()) {\r
 213                 // filter out copy\r
 214                 merge = false;\r
 215 \r
 216                 // but if one has an initializer and the other does not, update\r
 217                 // the initializer\r
 218                 if (symbol->getConstArray().empty() && ! unitSymbol->getConstArray().empty())\r
 219                     symbol->setConstArray(unitSymbol->getConstArray());\r
 220 \r
 221                 // Similarly for binding\r
 222                 if (! symbol->getQualifier().hasBinding() && unitSymbol->getQualifier().hasBinding())\r
 223                     symbol->getQualifier().layoutBinding = unitSymbol->getQualifier().layoutBinding;\r
 224 \r
 225                 // Update implicit array sizes\r
 226                 mergeImplicitArraySizes(symbol->getWritableType(), unitSymbol->getType());\r
 227 \r
 228                 // Check for consistent types/qualification/initializers etc.\r
 229                 mergeErrorCheck(infoSink, *symbol, *unitSymbol, false);\r
 230             }\r
 231         }\r
 232         if (merge)\r
 233             linkerObjects.push_back(unitLinkerObjects[unitLinkObj]);\r
 234     }\r
 235 }\r
 236 \r
 237 // TODO 4.5 link functionality: cull distance array size checking\r
 238 \r
 239 // Recursively merge the implicit array sizes through the objects' respective type trees.\r
 240 void TIntermediate::mergeImplicitArraySizes(TType& type, const TType& unitType)\r
 241 {\r
 242     if (type.isImplicitlySizedArray() && unitType.isArray()) {\r
 243         int newImplicitArraySize = unitType.getArraySize();\r
 244         if (newImplicitArraySize == 0)\r
 245             newImplicitArraySize = unitType.getImplicitArraySize();\r
 246         if (newImplicitArraySize > type.getImplicitArraySize ())\r
 247             type.setImplicitArraySize(newImplicitArraySize);\r
 248     }\r
 249 \r
 250     // Type mismatches are caught and reported after this, just be careful for now.\r
 251     if (! type.isStruct() || ! unitType.isStruct() || type.getStruct()->size() != unitType.getStruct()->size())\r
 252         return;\r
 253 \r
 254     for (int i = 0; i < (int)type.getStruct()->size(); ++i)\r
 255         mergeImplicitArraySizes(*(*type.getStruct())[i].type, *(*unitType.getStruct())[i].type);\r
 256 }\r
 257 \r
 258 //\r
 259 // Compare two global objects from two compilation units and see if they match\r
 260 // well enough.  Rules can be different for intra- vs. cross-stage matching.\r
 261 //\r
 262 // This function only does one of intra- or cross-stage matching per call.\r
 263 //\r
 264 void TIntermediate::mergeErrorCheck(TInfoSink& infoSink, const TIntermSymbol& symbol, const TIntermSymbol& unitSymbol, bool crossStage)\r
 265 {\r
 266     bool writeTypeComparison = false;\r
 267 \r
 268     // Types have to match\r
 269     if (symbol.getType() != unitSymbol.getType()) {\r
 270         error(infoSink, "Types must match:");\r
 271         writeTypeComparison = true;\r
 272     }\r
 273 \r
 274     // Qualifiers have to (almost) match\r
 275 \r
 276     // Storage...\r
 277     if (symbol.getQualifier().storage != unitSymbol.getQualifier().storage) {\r
 278         error(infoSink, "Storage qualifiers must match:");\r
 279         writeTypeComparison = true;\r
 280     }\r
 281 \r
 282     // Precision...\r
 283     if (symbol.getQualifier().precision != unitSymbol.getQualifier().precision) {\r
 284         error(infoSink, "Precision qualifiers must match:");\r
 285         writeTypeComparison = true;\r
 286     }\r
 287 \r
 288     // Invariance...\r
 289     if (! crossStage && symbol.getQualifier().invariant != unitSymbol.getQualifier().invariant) {\r
 290         error(infoSink, "Presence of invariant qualifier must match:");\r
 291         writeTypeComparison = true;\r
 292     }\r
 293 \r
 294     // Auxiliary and interpolation...\r
 295     if (symbol.getQualifier().centroid  != unitSymbol.getQualifier().centroid ||\r
 296         symbol.getQualifier().smooth    != unitSymbol.getQualifier().smooth ||\r
 297         symbol.getQualifier().flat      != unitSymbol.getQualifier().flat ||\r
 298         symbol.getQualifier().sample    != unitSymbol.getQualifier().sample ||\r
 299         symbol.getQualifier().patch     != unitSymbol.getQualifier().patch ||\r
 300         symbol.getQualifier().nopersp   != unitSymbol.getQualifier().nopersp) {\r
 301         error(infoSink, "Interpolation and auxiliary storage qualifiers must match:");\r
 302         writeTypeComparison = true;\r
 303     }\r
 304 \r
 305     // Memory...\r
 306     if (symbol.getQualifier().coherent  != unitSymbol.getQualifier().coherent ||\r
 307         symbol.getQualifier().volatil   != unitSymbol.getQualifier().volatil ||\r
 308         symbol.getQualifier().restrict  != unitSymbol.getQualifier().restrict ||\r
 309         symbol.getQualifier().readonly  != unitSymbol.getQualifier().readonly ||\r
 310         symbol.getQualifier().writeonly != unitSymbol.getQualifier().writeonly) {\r
 311         error(infoSink, "Memory qualifiers must match:");\r
 312         writeTypeComparison = true;\r
 313     }\r
 314 \r
 315     // Layouts... \r
 316     // TODO: 4.4 enhanced layouts: Generalize to include offset/align: current spec \r
 317     //       requires separate user-supplied offset from actual computed offset, but \r
 318     //       current implementation only has one offset.\r
 319     if (symbol.getQualifier().layoutMatrix    != unitSymbol.getQualifier().layoutMatrix ||\r
 320         symbol.getQualifier().layoutPacking   != unitSymbol.getQualifier().layoutPacking ||\r
 321         symbol.getQualifier().layoutLocation  != unitSymbol.getQualifier().layoutLocation ||\r
 322         symbol.getQualifier().layoutComponent != unitSymbol.getQualifier().layoutComponent ||\r
 323         symbol.getQualifier().layoutIndex     != unitSymbol.getQualifier().layoutIndex ||\r
 324         symbol.getQualifier().layoutBinding   != unitSymbol.getQualifier().layoutBinding ||\r
 325         (symbol.getQualifier().hasBinding() && (symbol.getQualifier().layoutOffset != unitSymbol.getQualifier().layoutOffset))) {\r
 326         error(infoSink, "Layout qualification must match:");\r
 327         writeTypeComparison = true;\r
 328     }\r
 329 \r
 330     // Initializers have to match, if both are present, and if we don't already know the types don't match\r
 331     if (! writeTypeComparison) {\r
 332         if (! symbol.getConstArray().empty() && ! unitSymbol.getConstArray().empty()) {\r
 333             if (symbol.getConstArray() != unitSymbol.getConstArray()) {\r
 334                 error(infoSink, "Initializers must match:");\r
 335                 infoSink.info << "    " << symbol.getName() << "\n";\r
 336             }\r
 337         }\r
 338     }\r
 339 \r
 340     if (writeTypeComparison)\r
 341         infoSink.info << "    " << symbol.getName() << ": \"" << symbol.getType().getCompleteString() << "\" versus \"" <<\r
 342                                                              unitSymbol.getType().getCompleteString() << "\"\n";\r
 343 }\r
 344 \r
 345 //\r
 346 // Do final link-time error checking of a complete (merged) intermediate representation.\r
 347 // (Much error checking was done during merging).\r
 348 //\r
 349 // Also, lock in defaults of things not set, including array sizes.\r
 350 //\r
 351 void TIntermediate::finalCheck(TInfoSink& infoSink)\r
 352 {   \r
 353     if (numMains < 1)\r
 354         error(infoSink, "Missing entry point: Each stage requires one \"void main()\" entry point");\r
 355 \r
 356     // recursion checking\r
 357     checkCallGraphCycles(infoSink);\r
 358 \r
 359     // overlap/alias/missing I/O, etc.\r
 360     inOutLocationCheck(infoSink);\r
 361 \r
 362     if (inIoAccessed("gl_ClipDistance") && inIoAccessed("gl_ClipVertex"))\r
 363         error(infoSink, "Can only use one of gl_ClipDistance or gl_ClipVertex (gl_ClipDistance is preferred)");\r
 364 \r
 365     if (userOutputUsed() && (inIoAccessed("gl_FragColor") || inIoAccessed("gl_FragData")))\r
 366         error(infoSink, "Cannot use gl_FragColor or gl_FragData when using user-defined outputs");\r
 367     if (inIoAccessed("gl_FragColor") && inIoAccessed("gl_FragData"))\r
 368         error(infoSink, "Cannot use both gl_FragColor and gl_FragData");\r
 369 \r
 370     for (size_t b = 0; b < xfbBuffers.size(); ++b) {\r
 371         if (xfbBuffers[b].containsDouble)\r
 372             RoundToPow2(xfbBuffers[b].implicitStride, 8);\r
 373 \r
 374         // "It is a compile-time or link-time error to have \r
 375         // any xfb_offset that overflows xfb_stride, whether stated on declarations before or after the xfb_stride, or\r
 376         // in different compilation units. While xfb_stride can be declared multiple times for the same buffer, it is a\r
 377         // compile-time or link-time error to have different values specified for the stride for the same buffer."\r
 378         if (xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].implicitStride > xfbBuffers[b].stride) {\r
 379             error(infoSink, "xfb_stride is too small to hold all buffer entries:");\r
 380             infoSink.info.prefix(EPrefixError);\r
 381             infoSink.info << "    xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << ", minimum stride needed: " << xfbBuffers[b].implicitStride << "\n";\r
 382         }\r
 383         if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd)\r
 384             xfbBuffers[b].stride = xfbBuffers[b].implicitStride;\r
 385 \r
 386         // "If the buffer is capturing any \r
 387         // outputs with double-precision components, the stride must be a multiple of 8, otherwise it must be a \r
 388         // multiple of 4, or a compile-time or link-time error results."\r
 389         if (xfbBuffers[b].containsDouble && ! IsMultipleOfPow2(xfbBuffers[b].stride, 8)) {\r
 390             error(infoSink, "xfb_stride must be multiple of 8 for buffer holding a double:");\r
 391             infoSink.info.prefix(EPrefixError);\r
 392             infoSink.info << "    xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";\r
 393         } else if (! IsMultipleOfPow2(xfbBuffers[b].stride, 4)) {\r
 394             error(infoSink, "xfb_stride must be multiple of 4:");\r
 395             infoSink.info.prefix(EPrefixError);\r
 396             infoSink.info << "    xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";\r
 397         }\r
 398 \r
 399         // "The resulting stride (implicit or explicit), when divided by 4, must be less than or equal to the \r
 400         // implementation-dependent constant gl_MaxTransformFeedbackInterleavedComponents."\r
 401         if (xfbBuffers[b].stride > (unsigned int)(4 * resources.maxTransformFeedbackInterleavedComponents)) {\r
 402             error(infoSink, "xfb_stride is too large:");\r
 403             infoSink.info.prefix(EPrefixError);\r
 404             infoSink.info << "    xfb_buffer " << (unsigned int)b << ", components (1/4 stride) needed are " << xfbBuffers[b].stride/4 << ", gl_MaxTransformFeedbackInterleavedComponents is " << resources.maxTransformFeedbackInterleavedComponents << "\n";\r
 405         }\r
 406     }\r
 407 \r
 408     switch (language) {\r
 409     case EShLangVertex:\r
 410         break;\r
 411     case EShLangTessControl:\r
 412         if (vertices == 0)\r
 413             error(infoSink, "At least one shader must specify an output layout(vertices=...)");\r
 414         break;\r
 415     case EShLangTessEvaluation:\r
 416         if (inputPrimitive == ElgNone)\r
 417             error(infoSink, "At least one shader must specify an input layout primitive");\r
 418         if (vertexSpacing == EvsNone)\r
 419             vertexSpacing = EvsEqual;\r
 420         if (vertexOrder == EvoNone)\r
 421             vertexOrder = EvoCcw;\r
 422         break;\r
 423     case EShLangGeometry:\r
 424         if (inputPrimitive == ElgNone)\r
 425             error(infoSink, "At least one shader must specify an input layout primitive");\r
 426         if (outputPrimitive == ElgNone)\r
 427             error(infoSink, "At least one shader must specify an output layout primitive");\r
 428         if (vertices == 0)\r
 429             error(infoSink, "At least one shader must specify a layout(max_vertices = value)");\r
 430         break;\r
 431     case EShLangFragment:\r
 432         break;\r
 433     case EShLangCompute:\r
 434         break;\r
 435     }\r
 436 \r
 437     // Process the tree for any node-specific work.\r
 438     class TFinalLinkTraverser : public TIntermTraverser {\r
 439     public:\r
 440         TFinalLinkTraverser() { }\r
 441         virtual ~TFinalLinkTraverser() { }\r
 442 \r
 443         virtual void visitSymbol(TIntermSymbol* symbol)\r
 444         {\r
 445             // Implicitly size arrays.\r
 446             symbol->getWritableType().adoptImplicitArraySizes();\r
 447         }\r
 448     } finalLinkTraverser;\r
 449 \r
 450     treeRoot->traverse(&finalLinkTraverser);\r
 451 }\r
 452 \r
 453 //\r
 454 // See if the call graph contains any static recursion, which is disallowed\r
 455 // by the specification.\r
 456 //\r
 457 void TIntermediate::checkCallGraphCycles(TInfoSink& infoSink)\r
 458 {\r
 459     // Reset everything, once.\r
 460     for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {\r
 461         call->visited = false;\r
 462         call->currentPath = false;\r
 463         call->errorGiven = false;\r
 464     }\r
 465 \r
 466     //\r
 467     // Loop, looking for a new connected subgraph.  One subgraph is handled per loop iteration.\r
 468     //\r
 469 \r
 470     TCall* newRoot;\r
 471     do {\r
 472         // See if we have unvisited parts of the graph.\r
 473         newRoot = 0;\r
 474         for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {\r
 475             if (! call->visited) {\r
 476                 newRoot = &(*call);\r
 477                 break;\r
 478             }\r
 479         }\r
 480 \r
 481         // If not, we are done.\r
 482         if (! newRoot)\r
 483             break;\r
 484 \r
 485         // Otherwise, we found a new subgraph, process it:\r
 486         // See what all can be reached by this new root, and if any of \r
 487         // that is recursive.  This is done by depth-first traversals, seeing\r
 488         // if a new call is found that was already in the currentPath (a back edge),\r
 489         // thereby detecting recursion.\r
 490         std::list<TCall*> stack;\r
 491         newRoot->currentPath = true; // currentPath will be true iff it is on the stack\r
 492         stack.push_back(newRoot);\r
 493         while (! stack.empty()) {\r
 494             // get a caller\r
 495             TCall* call = stack.back();\r
 496 \r
 497             // Add to the stack just one callee.\r
 498             // This algorithm always terminates, because only !visited and !currentPath causes a push\r
 499             // and all pushes change currentPath to true, and all pops change visited to true.\r
 500             TGraph::iterator child = callGraph.begin();\r
 501             for (; child != callGraph.end(); ++child) {\r
 502 \r
 503                 // If we already visited this node, its whole subgraph has already been processed, so skip it.\r
 504                 if (child->visited)\r
 505                     continue;\r
 506 \r
 507                 if (call->callee == child->caller) {\r
 508                     if (child->currentPath) {\r
 509                         // Then, we found a back edge\r
 510                         if (! child->errorGiven) {\r
 511                             error(infoSink, "Recursion detected:");\r
 512                             infoSink.info << "    " << call->callee << " calling " << child->callee << "\n";\r
 513                             child->errorGiven = true;\r
 514                             recursive = true;\r
 515                         }\r
 516                     } else {\r
 517                         child->currentPath = true;\r
 518                         stack.push_back(&(*child));\r
 519                         break;\r
 520                     }\r
 521                 }\r
 522             }\r
 523             if (child == callGraph.end()) {\r
 524                 // no more callees, we bottomed out, never look at this node again\r
 525                 stack.back()->currentPath = false;\r
 526                 stack.back()->visited = true;\r
 527                 stack.pop_back();\r
 528             }\r
 529         }  // end while, meaning nothing left to process in this subtree\r
 530 \r
 531     } while (newRoot);  // redundant loop check; should always exit via the 'break' above\r
 532 }\r
 533 \r
 534 //\r
 535 // Satisfy rules for location qualifiers on inputs and outputs\r
 536 //\r
 537 void TIntermediate::inOutLocationCheck(TInfoSink& infoSink)\r
 538 {\r
 539     // ES 3.0 requires all outputs to have location qualifiers if there is more than one output\r
 540     bool fragOutHasLocation = false;\r
 541     bool fragOutWithNoLocation = false;\r
 542     int numFragOut = 0;\r
 543 \r
 544     // TODO: linker functionality: location collision checking\r
 545 \r
 546     TIntermSequence& linkObjects = findLinkerObjects();\r
 547     for (size_t i = 0; i < linkObjects.size(); ++i) {\r
 548         const TType& type = linkObjects[i]->getAsTyped()->getType();\r
 549         const TQualifier& qualifier = type.getQualifier();\r
 550         if (language == EShLangFragment) {\r
 551             if (qualifier.storage == EvqVaryingOut) {\r
 552                 ++numFragOut;\r
 553                 if (qualifier.hasAnyLocation())\r
 554                     fragOutHasLocation = true;\r
 555                 else\r
 556                     fragOutWithNoLocation = true;\r
 557             }\r
 558         }\r
 559     }\r
 560 \r
 561     if (profile == EEsProfile) {\r
 562         if (numFragOut > 1 && fragOutWithNoLocation)\r
 563             error(infoSink, "when more than one fragment shader output, all must have location qualifiers");\r
 564     }        \r
 565 }\r
 566 \r
 567 TIntermSequence& TIntermediate::findLinkerObjects() const\r
 568 {\r
 569     // Get the top-level globals\r
 570     TIntermSequence& globals = treeRoot->getAsAggregate()->getSequence();\r
 571 \r
 572     // Get the last member of the sequences, expected to be the linker-object lists\r
 573     assert(globals.back()->getAsAggregate()->getOp() == EOpLinkerObjects);\r
 574 \r
 575     return globals.back()->getAsAggregate()->getSequence();\r
 576 }\r
 577 \r
 578 // See if a variable was both a user-declared output and used.\r
 579 // Note: the spec discusses writing to one, but this looks at read or write, which \r
 580 // is more useful, and perhaps the spec should be changed to reflect that.\r
 581 bool TIntermediate::userOutputUsed() const\r
 582 {\r
 583     const TIntermSequence& linkerObjects = findLinkerObjects();\r
 584 \r
 585     bool found = false;\r
 586     for (size_t i = 0; i < linkerObjects.size(); ++i) {\r
 587         const TIntermSymbol& symbolNode = *linkerObjects[i]->getAsSymbolNode();\r
 588         if (symbolNode.getQualifier().storage == EvqVaryingOut &&\r
 589             symbolNode.getName().compare(0, 3, "gl_") != 0 &&\r
 590             inIoAccessed(symbolNode.getName())) {            \r
 591             found = true;\r
 592             break;\r
 593         }\r
 594     }\r
 595 \r
 596     return found;\r
 597 }\r
 598 \r
 599 // Accumulate locations used for inputs, outputs, and uniforms, and check for collisions\r
 600 // as the accumulation is done.\r
 601 //\r
 602 // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.\r
 603 //\r
 604 // typeCollision is set to true if there is no direct collision, but the types in the same location\r
 605 // are different.\r
 606 //\r
 607 int TIntermediate::addUsedLocation(const TQualifier& qualifier, const TType& type, bool& typeCollision)\r
 608 {\r
 609     typeCollision = false;\r
 610 \r
 611     int set;\r
 612     if (qualifier.isPipeInput())\r
 613         set = 0;\r
 614     else if (qualifier.isPipeOutput())\r
 615         set = 1;\r
 616     else if (qualifier.storage == EvqUniform)\r
 617         set = 2;\r
 618     else if (qualifier.storage == EvqBuffer)\r
 619         set = 3;\r
 620     else\r
 621         return -1;\r
 622 \r
 623     int size;\r
 624     if (qualifier.isUniformOrBuffer()) {\r
 625         if (type.isArray())\r
 626             size = type.getArraySize();\r
 627         else\r
 628             size = 1;\r
 629     } else {\r
 630         // Strip off the outer array dimension for those having an extra one.\r
 631         if (type.isArray() && qualifier.isArrayedIo(language)) {\r
 632             TType elementType(type, 0);\r
 633             size = computeTypeLocationSize(elementType);\r
 634         } else\r
 635             size = computeTypeLocationSize(type);\r
 636     }\r
 637 \r
 638     TRange locationRange(qualifier.layoutLocation, qualifier.layoutLocation + size - 1);\r
 639     TRange componentRange(0, 3);\r
 640     if (qualifier.hasComponent()) {\r
 641         componentRange.start = qualifier.layoutComponent;\r
 642         componentRange.last = componentRange.start + type.getVectorSize() - 1;\r
 643     }\r
 644     TIoRange range(locationRange, componentRange, type.getBasicType(), qualifier.hasIndex() ? qualifier.layoutIndex : 0);\r
 645 \r
 646     // check for collisions, except for vertex inputs on desktop\r
 647     if (! (profile != EEsProfile && language == EShLangVertex && qualifier.isPipeInput())) {\r
 648         for (size_t r = 0; r < usedIo[set].size(); ++r) {\r
 649             if (range.overlap(usedIo[set][r])) {\r
 650                 // there is a collision; pick one\r
 651                 return std::max(locationRange.start, usedIo[set][r].location.start);\r
 652             } else if (locationRange.overlap(usedIo[set][r].location) && type.getBasicType() != usedIo[set][r].basicType) {\r
 653                 // aliased-type mismatch\r
 654                 typeCollision = true;\r
 655                 return std::max(locationRange.start, usedIo[set][r].location.start);\r
 656             }\r
 657         }\r
 658     }\r
 659 \r
 660     usedIo[set].push_back(range);\r
 661 \r
 662     return -1; // no collision\r
 663 }\r
 664 \r
 665 // Accumulate locations used for inputs, outputs, and uniforms, and check for collisions\r
 666 // as the accumulation is done.\r
 667 //\r
 668 // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.\r
 669 //\r
 670 int TIntermediate::addUsedOffsets(int binding, int offset, int numOffsets)\r
 671 {\r
 672     TRange bindingRange(binding, binding);\r
 673     TRange offsetRange(offset, offset + numOffsets - 1);\r
 674     TOffsetRange range(bindingRange, offsetRange);\r
 675 \r
 676     // check for collisions, except for vertex inputs on desktop\r
 677     for (size_t r = 0; r < usedAtomics.size(); ++r) {\r
 678         if (range.overlap(usedAtomics[r])) {\r
 679             // there is a collision; pick one\r
 680             return std::max(offset, usedAtomics[r].offset.start);\r
 681         }\r
 682     }\r
 683 \r
 684     usedAtomics.push_back(range);\r
 685 \r
 686     return -1; // no collision\r
 687 }\r
 688 \r
 689 // Recursively figure out how many locations are used up by an input or output type.\r
 690 // Return the size of type, as measured by "locations".\r
 691 int TIntermediate::computeTypeLocationSize(const TType& type) const\r
 692 {\r
 693     // "If the declared input is an array of size n and each element takes m locations, it will be assigned m * n \r
 694     // consecutive locations..."\r
 695     if (type.isArray()) {\r
 696         TType elementType(type, 0);\r
 697         if (type.isImplicitlySizedArray()) {\r
 698             // TODO: are there valid cases of having an implicitly-sized array with a location?  If so, running this code too early.\r
 699             return computeTypeLocationSize(elementType);\r
 700         } else\r
 701             return type.getArraySize() * computeTypeLocationSize(elementType);\r
 702     }\r
 703 \r
 704     // "The locations consumed by block and structure members are determined by applying the rules above \r
 705     // recursively..."    \r
 706     if (type.isStruct()) {\r
 707         int size = 0;\r
 708         for (int member = 0; member < (int)type.getStruct()->size(); ++member) {\r
 709             TType memberType(type, member);\r
 710             size += computeTypeLocationSize(memberType);\r
 711         }\r
 712         return size;\r
 713     }\r
 714 \r
 715     // "If a vertex shader input is any scalar or vector type, it will consume a single location. If a non-vertex \r
 716     // shader input is a scalar or vector type other than dvec3 or dvec4, it will consume a single location, while \r
 717     // types dvec3 or dvec4 will consume two consecutive locations. Inputs of type double and dvec2 will \r
 718     // consume only a single location, in all stages."\r
 719     if (type.isScalar())\r
 720         return 1;\r
 721     if (type.isVector()) {\r
 722         if (language == EShLangVertex && type.getQualifier().isPipeInput())\r
 723             return 1;\r
 724         if (type.getBasicType() == EbtDouble && type.getVectorSize() > 2)\r
 725             return 2;\r
 726         else\r
 727             return 1;\r
 728     }\r
 729 \r
 730     // "If the declared input is an n x m single- or double-precision matrix, ...\r
 731     // The number of locations assigned for each matrix will be the same as \r
 732     // for an n-element array of m-component vectors..."\r
 733     if (type.isMatrix()) {\r
 734         TType columnType(type, 0);\r
 735         return type.getMatrixCols() * computeTypeLocationSize(columnType);\r
 736     }\r
 737 \r
 738     assert(0);\r
 739     return 1;\r
 740 }\r
 741 \r
 742 // Accumulate xfb buffer ranges and check for collisions as the accumulation is done.\r
 743 //\r
 744 // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.\r
 745 //\r
 746 int TIntermediate::addXfbBufferOffset(const TType& type)\r
 747 {\r
 748     const TQualifier& qualifier = type.getQualifier();\r
 749 \r
 750     assert(qualifier.hasXfbOffset() && qualifier.hasXfbBuffer());\r
 751     TXfbBuffer& buffer = xfbBuffers[qualifier.layoutXfbBuffer];\r
 752 \r
 753     // compute the range\r
 754     unsigned int size = computeTypeXfbSize(type, buffer.containsDouble);\r
 755     buffer.implicitStride = std::max(buffer.implicitStride, qualifier.layoutXfbOffset + size);\r
 756     TRange range(qualifier.layoutXfbOffset, qualifier.layoutXfbOffset + size - 1);\r
 757 \r
 758     // check for collisions\r
 759     for (size_t r = 0; r < buffer.ranges.size(); ++r) {\r
 760         if (range.overlap(buffer.ranges[r])) {\r
 761             // there is a collision; pick an example to return\r
 762             return std::max(range.start, buffer.ranges[r].start);\r
 763         }\r
 764     }\r
 765 \r
 766     buffer.ranges.push_back(range);\r
 767 \r
 768     return -1;  // no collision\r
 769 }\r
 770 \r
 771 // Recursively figure out how many bytes of xfb buffer are used by the given type.\r
 772 // Return the size of type, in bytes.\r
 773 // Sets containsDouble to true if the type contains a double.\r
 774 // N.B. Caller must set containsDouble to false before calling.\r
 775 unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& containsDouble) const\r
 776 {\r
 777     // "...if applied to an aggregate containing a double, the offset must also be a multiple of 8, \r
 778     // and the space taken in the buffer will be a multiple of 8.\r
 779     // ...within the qualified entity, subsequent components are each \r
 780     // assigned, in order, to the next available offset aligned to a multiple of\r
 781     // that component's size.  Aggregate types are flattened down to the component\r
 782     // level to get this sequence of components."\r
 783 \r
 784     if (type.isArray()) {\r
 785         assert(type.isExplicitlySizedArray());\r
 786         TType elementType(type, 0);\r
 787         return type.getArraySize() * computeTypeXfbSize(elementType, containsDouble);\r
 788     }\r
 789 \r
 790     if (type.isStruct()) {\r
 791         unsigned int size = 0;\r
 792         bool structContainsDouble = false;\r
 793         for (int member = 0; member < (int)type.getStruct()->size(); ++member) {\r
 794             TType memberType(type, member);\r
 795             // "... if applied to \r
 796             // an aggregate containing a double, the offset must also be a multiple of 8, \r
 797             // and the space taken in the buffer will be a multiple of 8."\r
 798             bool memberContainsDouble = false;\r
 799             int memberSize = computeTypeXfbSize(memberType, memberContainsDouble);\r
 800             if (memberContainsDouble) {\r
 801                 structContainsDouble = true;\r
 802                 RoundToPow2(size, 8);\r
 803             }\r
 804             size += memberSize;\r
 805         }\r
 806 \r
 807         if (structContainsDouble) {\r
 808             containsDouble = true;\r
 809             RoundToPow2(size, 8);\r
 810         }\r
 811         return size;\r
 812     }\r
 813 \r
 814     int numComponents;\r
 815     if (type.isScalar())\r
 816         numComponents = 1;\r
 817     else if (type.isVector())\r
 818         numComponents = type.getVectorSize();\r
 819     else if (type.isMatrix())\r
 820         numComponents = type.getMatrixCols() * type.getMatrixRows();\r
 821     else {\r
 822         assert(0);\r
 823         numComponents = 1;\r
 824     }\r
 825 \r
 826     if (type.getBasicType() == EbtDouble) {\r
 827         containsDouble = true;\r
 828         return 8 * numComponents;\r
 829     } else\r
 830         return 4 * numComponents;\r
 831 }\r
 832 \r
 833 const int baseAlignmentVec4Std140 = 16;\r
 834 \r
 835 // Return the size and alignment of a scalar.\r
 836 // The size is returned in the 'size' parameter\r
 837 // Return value is the alignment of the type.\r
 838 int TIntermediate::getBaseAlignmentScalar(const TType& type, int& size) const\r
 839 {\r
 840     switch (type.getBasicType()) {\r
 841     case EbtDouble:  size = 8; return 8;\r
 842     default:         size = 4; return 4;\r
 843     }\r
 844 }\r
 845 \r
 846 // Implement base-alignment and size rules from section 7.6.2.2 Standard Uniform Block Layout\r
 847 // Operates recursively.\r
 848 //\r
 849 // If std140 is true, it does the rounding up to vec4 size required by std140, \r
 850 // otherwise it does not, yielding std430 rules.\r
 851 //\r
 852 // The size is returned in the 'size' parameter\r
 853 // Return value is the alignment of the type.\r
 854 int TIntermediate::getBaseAlignment(const TType& type, int& size, bool std140) const\r
 855 {\r
 856     int alignment;\r
 857 \r
 858     // When using the std140 storage layout, structures will be laid out in buffer\r
 859     // storage with its members stored in monotonically increasing order based on their\r
 860     // location in the declaration. A structure and each structure member have a base\r
 861     // offset and a base alignment, from which an aligned offset is computed by rounding\r
 862     // the base offset up to a multiple of the base alignment. The base offset of the first\r
 863     // member of a structure is taken from the aligned offset of the structure itself. The\r
 864     // base offset of all other structure members is derived by taking the offset of the\r
 865     // last basic machine unit consumed by the previous member and adding one. Each\r
 866     // structure member is stored in memory at its aligned offset. The members of a top-\r
 867     // level uniform block are laid out in buffer storage by treating the uniform block as\r
 868     // a structure with a base offset of zero.\r
 869     //\r
 870     //   1. If the member is a scalar consuming N basic machine units, the base alignment is N.\r
 871     //\r
 872     //   2. If the member is a two- or four-component vector with components consuming N basic \r
 873     //      machine units, the base alignment is 2N or 4N, respectively.\r
 874     //\r
 875     //   3. If the member is a three-component vector with components consuming N\r
 876     //      basic machine units, the base alignment is 4N.\r
 877     //\r
 878     //   4. If the member is an array of scalars or vectors, the base alignment and array\r
 879     //      stride are set to match the base alignment of a single array element, according\r
 880     //      to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The\r
 881     //      array may have padding at the end; the base offset of the member following\r
 882     //      the array is rounded up to the next multiple of the base alignment.\r
 883     //\r
 884     //   5. If the member is a column-major matrix with C columns and R rows, the\r
 885     //      matrix is stored identically to an array of C column vectors with R \r
 886     //      components each, according to rule (4).\r
 887     //\r
 888     //   6. If the member is an array of S column-major matrices with C columns and\r
 889     //      R rows, the matrix is stored identically to a row of S \ 2 C column vectors\r
 890     //      with R components each, according to rule (4).\r
 891     //\r
 892     //   7. If the member is a row-major matrix with C columns and R rows, the matrix\r
 893     //      is stored identically to an array of R row vectors with C components each,\r
 894     //      according to rule (4).\r
 895     //\r
 896     //   8. If the member is an array of S row-major matrices with C columns and R\r
 897     //      rows, the matrix is stored identically to a row of S \ 2 R row vectors with C\r
 898     //      components each, according to rule (4).\r
 899     //\r
 900     //   9. If the member is a structure, the base alignment of the structure is N , where\r
 901     //      N is the largest base alignment value of any    of its members, and rounded\r
 902     //      up to the base alignment of a vec4. The individual members of this substructure \r
 903     //      are then assigned offsets by applying this set of rules recursively,\r
 904     //      where the base offset of the first member of the sub-structure is equal to the\r
 905     //      aligned offset of the structure. The structure may have padding at the end;\r
 906     //      the base offset of the member following the sub-structure is rounded up to\r
 907     //      the next multiple of the base alignment of the structure.\r
 908     //\r
 909     //   10. If the member is an array of S structures, the S elements of the array are laid\r
 910     //       out in order, according to rule (9).\r
 911 \r
 912     // rules 4, 6, and 8\r
 913     if (type.isArray()) {\r
 914         TType derefType(type, 0);\r
 915         alignment = getBaseAlignment(derefType, size, std140);\r
 916         if (std140)\r
 917             alignment = std::max(baseAlignmentVec4Std140, alignment);\r
 918         RoundToPow2(size, alignment);\r
 919         size *= type.getArraySize();\r
 920         return alignment;\r
 921     }\r
 922 \r
 923     // rule 9\r
 924     if (type.getBasicType() == EbtStruct) {\r
 925         const TTypeList& memberList = *type.getStruct();\r
 926 \r
 927         size = 0;\r
 928         int maxAlignment = std140 ? baseAlignmentVec4Std140 : 0;\r
 929         for (size_t m = 0; m < memberList.size(); ++m) {\r
 930             int memberSize;\r
 931             int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, std140);\r
 932             maxAlignment = std::max(maxAlignment, memberAlignment);\r
 933             RoundToPow2(size, memberAlignment);         \r
 934             size += memberSize;\r
 935         }\r
 936 \r
 937         return maxAlignment;\r
 938     }\r
 939 \r
 940     // rule 1\r
 941     if (type.isScalar())\r
 942         return getBaseAlignmentScalar(type, size);\r
 943 \r
 944     // rules 2 and 3\r
 945     if (type.isVector()) {\r
 946         int scalarAlign = getBaseAlignmentScalar(type, size);\r
 947         switch (type.getVectorSize()) {\r
 948         case 2:\r
 949             size *= 2;\r
 950             return 2 * scalarAlign;\r
 951         default: \r
 952             size *= type.getVectorSize();\r
 953             return 4 * scalarAlign;\r
 954         }\r
 955     }\r
 956 \r
 957     // rules 5 and 7\r
 958     if (type.isMatrix()) {\r
 959         // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows\r
 960         TType derefType(type, 0, type.getQualifier().layoutMatrix == ElmRowMajor);\r
 961             \r
 962         alignment = getBaseAlignment(derefType, size, std140);\r
 963         if (std140)\r
 964             alignment = std::max(baseAlignmentVec4Std140, alignment);\r
 965         RoundToPow2(size, alignment);\r
 966         if (type.getQualifier().layoutMatrix == ElmRowMajor)\r
 967             size *= type.getMatrixRows();\r
 968         else\r
 969             size *= type.getMatrixCols();\r
 970 \r
 971         return alignment;\r
 972     }\r
 973 \r
 974     assert(0);  // all cases should be covered above\r
 975     size = baseAlignmentVec4Std140;\r
 976     return baseAlignmentVec4Std140;\r
 977 }\r
 978 \r
 979 } // end namespace glslang\r