1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
5 * Copyright 2014 The Android Open Source Project
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * \brief Optimized vs unoptimized shader performance tests.
22 *//*--------------------------------------------------------------------*/
24 #include "es3pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
35 #include "glwFunctions.hpp"
41 using glu::ShaderProgram;
53 using gls::ShaderPerformanceMeasurer;
60 static inline std::map<string, string> singleMap (const string& key, const string& value)
62 std::map<string, string> res;
67 static inline string repeat (const string& str, int numRepeats, const string& delim = "")
70 for (int i = 1; i < numRepeats; i++)
71 result += delim + str;
75 static inline string repeatIndexedTemplate (const string& strTempl, int numRepeats, const string& delim = "", int ndxStart = 0)
77 const tcu::StringTemplate templ(strTempl);
79 std::map<string, string> params;
81 for (int i = 0; i < numRepeats; i++)
83 params["PREV_NDX"] = toString(i + ndxStart - 1);
84 params["NDX"] = toString(i + ndxStart);
86 result += (i > 0 ? delim : "") + templ.specialize(params);
97 CASESHADERTYPE_VERTEX = 0,
98 CASESHADERTYPE_FRAGMENT,
103 static inline string getShaderPrecision (CaseShaderType shaderType)
107 case CASESHADERTYPE_VERTEX: return "highp";
108 case CASESHADERTYPE_FRAGMENT: return "highp";
117 glu::ProgramSources sources;
118 vector<gls::AttribSpec> attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
120 ProgramData (void) {}
121 ProgramData (const glu::ProgramSources& sources_, const vector<gls::AttribSpec>& attributes_ = vector<gls::AttribSpec>()) : sources(sources_), attributes(attributes_) {}
122 ProgramData (const glu::ProgramSources& sources_, const gls::AttribSpec& attribute) : sources(sources_), attributes(1, attribute) {}
125 //! Shader boilerplate helper; most cases have similar basic shader structure.
126 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& funcDefs, const string& mainStatements)
128 const bool isVertexCase = shaderType == CASESHADERTYPE_VERTEX;
129 const bool isFragmentCase = shaderType == CASESHADERTYPE_FRAGMENT;
130 const string vtxPrec = getShaderPrecision(CASESHADERTYPE_VERTEX);
131 const string fragPrec = getShaderPrecision(CASESHADERTYPE_FRAGMENT);
133 return ProgramData(glu::ProgramSources() << glu::VertexSource( "#version 300 es\n"
134 "in " + vtxPrec + " vec4 a_position;\n"
135 "in " + vtxPrec + " vec4 a_value;\n"
136 "out " + fragPrec + " vec4 v_value;\n"
137 + (isVertexCase ? funcDefs : "") +
140 " gl_Position = a_position;\n"
141 " " + vtxPrec + " vec4 value = a_value;\n"
142 + (isVertexCase ? mainStatements : "") +
143 " v_value = value;\n"
146 << glu::FragmentSource( "#version 300 es\n"
147 "layout (location = 0) out " + fragPrec + " vec4 o_color;\n"
148 "in " + fragPrec + " vec4 v_value;\n"
149 + (isFragmentCase ? funcDefs : "") +
152 " " + fragPrec + " vec4 value = v_value;\n"
153 + (isFragmentCase ? mainStatements : "") +
154 " o_color = value;\n"
156 gls::AttribSpec("a_value",
157 Vec4(1.0f, 0.0f, 0.0f, 0.0f),
158 Vec4(0.0f, 1.0f, 0.0f, 0.0f),
159 Vec4(0.0f, 0.0f, 1.0f, 0.0f),
160 Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
163 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& mainStatements)
165 return defaultProgramData(shaderType, "", mainStatements);
168 class ShaderOptimizationCase : public TestCase
171 ShaderOptimizationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType)
172 : TestCase (context, tcu::NODETYPE_PERFORMANCE, name, description)
173 , m_caseShaderType (caseShaderType)
174 , m_state (STATE_LAST)
175 , m_measurer (context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX ? gls::CASETYPE_VERTEX
176 : caseShaderType == CASESHADERTYPE_FRAGMENT ? gls::CASETYPE_FRAGMENT
177 : gls::CASETYPE_LAST)
178 , m_unoptimizedResult (-1.0f, -1.0f)
179 , m_optimizedResult (-1.0f, -1.0f)
183 virtual ~ShaderOptimizationCase (void) {}
186 IterateResult iterate (void);
189 virtual ProgramData generateProgramData (bool optimized) const = 0;
191 const CaseShaderType m_caseShaderType;
196 STATE_INIT_UNOPTIMIZED = 0,
197 STATE_MEASURE_UNOPTIMIZED,
198 STATE_INIT_OPTIMIZED,
199 STATE_MEASURE_OPTIMIZED,
205 ProgramData& programData (bool optimized) { return optimized ? m_optimizedData : m_unoptimizedData; }
206 SharedPtr<const ShaderProgram>& program (bool optimized) { return optimized ? m_optimizedProgram : m_unoptimizedProgram; }
207 ShaderPerformanceMeasurer::Result& result (bool optimized) { return optimized ? m_optimizedResult : m_unoptimizedResult; }
210 ShaderPerformanceMeasurer m_measurer;
212 ProgramData m_unoptimizedData;
213 ProgramData m_optimizedData;
214 SharedPtr<const ShaderProgram> m_unoptimizedProgram;
215 SharedPtr<const ShaderProgram> m_optimizedProgram;
216 ShaderPerformanceMeasurer::Result m_unoptimizedResult;
217 ShaderPerformanceMeasurer::Result m_optimizedResult;
220 void ShaderOptimizationCase::init (void)
222 const glu::RenderContext& renderCtx = m_context.getRenderContext();
223 TestLog& log = m_testCtx.getLog();
225 m_measurer.logParameters(log);
227 for (int ndx = 0; ndx < 2; ndx++)
229 const bool optimized = ndx == 1;
231 programData(optimized) = generateProgramData(optimized);
233 for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
234 DE_ASSERT(programData(optimized).attributes[i].name != "a_position"); // \note Position attribute is set by m_measurer.
236 program(optimized) = SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
239 const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram" : "UnoptimizedProgram",
240 optimized ? "Hand-optimized program" : "Unoptimized program");
241 log << *program(optimized);
244 if (!program(optimized)->isOk())
245 TCU_FAIL("Shader compilation failed");
248 m_state = STATE_INIT_UNOPTIMIZED;
251 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate (void)
253 TestLog& log = m_testCtx.getLog();
255 if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
257 const bool optimized = m_state == STATE_INIT_OPTIMIZED;
258 m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
259 m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
263 else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
265 m_measurer.iterate();
267 if (m_measurer.isFinished())
269 const bool optimized = m_state == STATE_MEASURE_OPTIMIZED;
270 const tcu::ScopedLogSection section (log, optimized ? "OptimizedResult" : "UnoptimizedResult",
271 optimized ? "Measurement results for hand-optimized program" : "Measurement result for unoptimized program");
272 m_measurer.logMeasurementInfo(log);
273 result(optimized) = m_measurer.getResult();
275 m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
282 DE_ASSERT(m_state == STATE_FINISHED);
284 const float unoptimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ? m_unoptimizedResult.megaVertPerSec : m_unoptimizedResult.megaFragPerSec;
285 const float optimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ? m_optimizedResult.megaVertPerSec : m_optimizedResult.megaFragPerSec;
286 const char* const relevantResultName = m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex" : "fragment";
287 const float ratio = unoptimizedRelevantResult / optimizedRelevantResult;
288 const int handOptimizationGain = (int)deFloatRound(100.0f/ratio) - 100;
290 log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio << TestLog::EndMessage;
292 if (handOptimizationGain >= 0)
293 log << TestLog::Message << "Note: " << handOptimizationGain << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
295 log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain << "%" << TestLog::EndMessage;
297 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
303 class LoopUnrollCase : public ShaderOptimizationCase
308 CASETYPE_INDEPENDENT = 0,
314 LoopUnrollCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, int numRepetitions)
315 : ShaderOptimizationCase (context, name, description, caseShaderType)
316 , m_numRepetitions (numRepetitions)
317 , m_caseType (caseType)
322 ProgramData generateProgramData (bool optimized) const
324 const string repetition = optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions)
325 : loop(m_numRepetitions, expressionTemplate(m_caseType));
327 return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) + " vec4 valueOrig = value;\n" + repetition);
331 const int m_numRepetitions;
332 const CaseType m_caseType;
334 static inline string expressionTemplate (CaseType caseType)
338 case CASETYPE_INDEPENDENT: return "value += sin(float(${NDX}+1)*valueOrig)";
339 case CASETYPE_DEPENDENT: return "value = sin(value)";
346 static inline string loop (int iterations, const string& innerExpr)
348 return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" + tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
352 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
355 LoopInvariantCodeMotionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int numLoopIterations)
356 : ShaderOptimizationCase (context, name, description, caseShaderType)
357 , m_numLoopIterations (numLoopIterations)
362 ProgramData generateProgramData (bool optimized) const
365 for (int i = 0; i < m_numLoopIterations; i++)
366 scale += 3.2f*(float)i + 4.6f;
367 scale = 1.0f / scale;
369 const string precision = getShaderPrecision(m_caseShaderType);
370 const string statements = optimized ? " " + precision + " vec4 valueOrig = value;\n"
371 " " + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
372 " for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
374 " " + precision + " float x = 3.2*float(i) + 4.6;\n"
377 " value *= " + toString(scale) + ";\n"
379 : " " + precision + " vec4 valueOrig = value;\n"
380 " for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
382 " " + precision + " float x = 3.2*float(i) + 4.6;\n"
383 " " + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
386 " value *= " + toString(scale) + ";\n";
388 return defaultProgramData(m_caseShaderType, statements);
392 const int m_numLoopIterations;
395 class FunctionInliningCase : public ShaderOptimizationCase
398 FunctionInliningCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int callNestingDepth)
399 : ShaderOptimizationCase (context, name, description, caseShaderType)
400 , m_callNestingDepth (callNestingDepth)
405 ProgramData generateProgramData (bool optimized) const
407 const string precision = getShaderPrecision(m_caseShaderType);
408 const string expression = "value*vec4(0.8, 0.7, 0.6, 0.9)";
409 const string maybeFuncDefs = optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
410 const string mainValueStatement = (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth-1) + "(value)") + ";\n";
412 return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
416 const int m_callNestingDepth;
418 static inline string funcDefinitions (int callNestingDepth, const string& precision, const string& expression)
420 string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
422 for (int i = 1; i < callNestingDepth; i++)
423 result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" + toString(i-1) + "(v); }\n";
429 class ConstantPropagationCase : public ShaderOptimizationCase
434 CASETYPE_BUILT_IN_FUNCTIONS = 0,
441 ConstantPropagationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, bool useConstantExpressionsOnly)
442 : ShaderOptimizationCase (context, name, description, caseShaderType)
443 , m_caseType (caseType)
444 , m_useConstantExpressionsOnly (useConstantExpressionsOnly)
449 ProgramData generateProgramData (bool optimized) const
451 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
452 const string precision = getShaderPrecision(m_caseShaderType);
453 const string statements = m_caseType == CASETYPE_BUILT_IN_FUNCTIONS ? builtinFunctionsCaseStatements (optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
454 : m_caseType == CASETYPE_ARRAY ? arrayCaseStatements (optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
455 : m_caseType == CASETYPE_STRUCT ? structCaseStatements (optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
458 return defaultProgramData(m_caseShaderType, statements);
462 const CaseType m_caseType;
463 const bool m_useConstantExpressionsOnly;
465 static inline string builtinFunctionsCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
467 const string constMaybe = constantExpressionsOnly ? "const " : "";
468 const int numSinRows = useHeavierWorkload ? 12 : 1;
470 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
472 : " " + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
473 " " + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
474 " " + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
475 " " + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
476 " " + constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n"
477 + repeatIndexedTemplate(" " + constMaybe + precision + " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n", numSinRows, "", 1) +
478 " " + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
479 " value = f*value;\n";
482 static inline string arrayCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
484 const string constMaybe = constantExpressionsOnly ? "const " : "";
485 const int numSinRows = useHeavierWorkload ? 12 : 1;
487 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
489 : " const int arrLen = 4;\n"
490 + (constantExpressionsOnly ?
491 " const " + precision + " vec4 arr[arrLen] =\n"
492 " vec4[](vec4(0.1, 0.5, 0.9, 1.3),\n"
493 " vec4(0.2, 0.6, 1.0, 1.4),\n"
494 " vec4(0.3, 0.7, 1.1, 1.5),\n"
495 " vec4(0.4, 0.8, 1.2, 1.6));\n"
497 : " " + precision + " vec4 arr[arrLen];\n"
498 " arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
499 " arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
500 " arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
501 " arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n"
503 " " + constMaybe + precision + " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * (1.0 / float(arr.length()));\n"
504 " " + constMaybe + precision + " vec4 b0 = cos(sin(a));\n"
505 + repeatIndexedTemplate(" " + constMaybe + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
506 " " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
507 " value = c*value;\n";
510 static inline string structCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
512 const string constMaybe = constantExpressionsOnly ? "const " : "";
513 const int numSinRows = useHeavierWorkload ? 12 : 1;
515 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
519 " " + precision + " vec4 a;\n"
520 " " + precision + " vec4 b;\n"
521 " " + precision + " vec4 c;\n"
522 " " + precision + " vec4 d;\n"
525 " " + constMaybe + "S s =\n"
526 " S(vec4(0.1, 0.5, 0.9, 1.3),\n"
527 " vec4(0.2, 0.6, 1.0, 1.4),\n"
528 " vec4(0.3, 0.7, 1.1, 1.5),\n"
529 " vec4(0.4, 0.8, 1.2, 1.6));\n"
530 " " + constMaybe + precision + " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
531 " " + constMaybe + precision + " vec4 b0 = cos(sin(a));\n"
532 + repeatIndexedTemplate(" " + constMaybe + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
533 " " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
534 " value = c*value;\n";
538 class CommonSubexpressionCase : public ShaderOptimizationCase
543 CASETYPE_SINGLE_STATEMENT = 0,
544 CASETYPE_MULTIPLE_STATEMENTS,
545 CASETYPE_STATIC_BRANCH,
551 CommonSubexpressionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
552 : ShaderOptimizationCase (context, name, description, caseShaderType)
553 , m_caseType (caseType)
558 ProgramData generateProgramData (bool optimized) const
560 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
561 const string precision = getShaderPrecision(m_caseShaderType);
562 const string statements = m_caseType == CASETYPE_SINGLE_STATEMENT ? singleStatementCaseStatements (optimized, precision, isVertexCase)
563 : m_caseType == CASETYPE_MULTIPLE_STATEMENTS ? multipleStatementsCaseStatements (optimized, precision, isVertexCase)
564 : m_caseType == CASETYPE_STATIC_BRANCH ? staticBranchCaseStatements (optimized, precision, isVertexCase)
565 : m_caseType == CASETYPE_LOOP ? loopCaseStatements (optimized, precision, isVertexCase)
568 return defaultProgramData(m_caseShaderType, statements);
572 const CaseType m_caseType;
574 static inline string singleStatementCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
576 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
578 return optimized ? " " + precision + " vec4 s = sin(value);\n"
579 " " + precision + " vec4 cs = cos(s);\n"
580 " " + precision + " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
581 " value = " + repeat("d", numTopLevelRepeats, "+") + ";\n"
583 : " value = " + repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))", numTopLevelRepeats, "\n\t + ") + ";\n";
586 static inline string multipleStatementsCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
588 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
589 DE_ASSERT(numTopLevelRepeats >= 2);
591 return optimized ? " " + precision + " vec4 a = sin(value) + cos(exp(value));\n"
592 " " + precision + " vec4 b = cos(cos(a));\n"
593 " a = fract(exp(sqrt(b)));\n"
595 + repeat("\tvalue += a*b;\n", numTopLevelRepeats)
597 : repeatIndexedTemplate( " " + precision + " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
598 " " + precision + " vec4 b${NDX} = cos(cos(a${NDX}));\n"
599 " a${NDX} = fract(exp(sqrt(b${NDX})));\n"
601 numTopLevelRepeats) +
603 repeatIndexedTemplate( " value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
606 static inline string staticBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
608 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
609 DE_ASSERT(numTopLevelRepeats >= 2);
613 return " " + precision + " vec4 a = sin(value) + cos(exp(value));\n"
614 " " + precision + " vec4 b = cos(a);\n"
616 " a = fract(exp(sqrt(b)));\n"
618 + repeat(" value += a*b;\n", numTopLevelRepeats);
624 for (int i = 0; i < numTopLevelRepeats; i++)
626 result += " " + precision + " vec4 a" + toString(i) + " = sin(value) + cos(exp(value));\n"
627 " " + precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
630 result += " if (1 < 2)\n"
631 " b" + toString(i) + " = cos(b" + toString(i) + ");\n";
633 result += " b" + toString(i) + " = cos(b" + toString(i) + ");\n";
635 result += " if (2 < 1);\n"
637 " b" + toString(i) + " = cos(b" + toString(i) + ");\n";
641 result += " a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
644 result += repeatIndexedTemplate(" value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
650 static inline string loopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
652 const int numLoopIterations = useHeavierWorkload ? 32 : 4;
654 return optimized ? " " + precision + " vec4 acc = value;\n"
655 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
661 : " " + precision + " vec4 acc0 = value;\n"
662 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
663 " acc0 = sin(acc0);\n"
665 " " + precision + " vec4 acc1 = value;\n"
666 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
667 " acc1 = sin(acc1);\n"
674 class DeadCodeEliminationCase : public ShaderOptimizationCase
679 CASETYPE_DEAD_BRANCH_SIMPLE = 0,
680 CASETYPE_DEAD_BRANCH_COMPLEX,
681 CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
682 CASETYPE_DEAD_BRANCH_FUNC_CALL,
683 CASETYPE_UNUSED_VALUE_BASIC,
684 CASETYPE_UNUSED_VALUE_LOOP,
685 CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
686 CASETYPE_UNUSED_VALUE_AFTER_RETURN,
687 CASETYPE_UNUSED_VALUE_MUL_ZERO,
692 DeadCodeEliminationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
693 : ShaderOptimizationCase (context, name, description, caseShaderType)
694 , m_caseType (caseType)
699 ProgramData generateProgramData (bool optimized) const
701 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
702 const string precision = getShaderPrecision(m_caseShaderType);
703 const string funcDefs = m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ? deadBranchFuncCallCaseFuncDefs (optimized, precision)
704 : m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ? unusedValueAfterReturnCaseFuncDefs (optimized, precision, isVertexCase)
707 const string statements = m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE ? deadBranchSimpleCaseStatements (optimized, isVertexCase)
708 : m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX ? deadBranchComplexCaseStatements (optimized, precision, true, isVertexCase)
709 : m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ? deadBranchComplexCaseStatements (optimized, precision, false, isVertexCase)
710 : m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ? deadBranchFuncCallCaseStatements (optimized, isVertexCase)
711 : m_caseType == CASETYPE_UNUSED_VALUE_BASIC ? unusedValueBasicCaseStatements (optimized, precision, isVertexCase)
712 : m_caseType == CASETYPE_UNUSED_VALUE_LOOP ? unusedValueLoopCaseStatements (optimized, precision, isVertexCase)
713 : m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH ? unusedValueDeadBranchCaseStatements (optimized, precision, isVertexCase)
714 : m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ? unusedValueAfterReturnCaseStatements ()
715 : m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO ? unusedValueMulZeroCaseStatements (optimized, precision, isVertexCase)
718 return defaultProgramData(m_caseShaderType, funcDefs, statements);
722 const CaseType m_caseType;
724 static inline string deadBranchSimpleCaseStatements (bool optimized, bool useHeavierWorkload)
726 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
728 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
730 : " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
733 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
734 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
735 " value = sin(value);\n"
739 static inline string deadBranchComplexCaseStatements (bool optimized, const string& precision, bool useConst, bool useHeavierWorkload)
741 const string constMaybe = useConst ? "const " : "";
742 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
744 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
746 : " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
747 " " + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
748 " " + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
749 " " + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
750 " " + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
751 " " + constMaybe + precision + " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
754 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
755 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
756 " value = sin(value);\n"
760 static inline string deadBranchFuncCallCaseFuncDefs (bool optimized, const string& precision)
762 return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
765 static inline string deadBranchFuncCallCaseStatements (bool optimized, bool useHeavierWorkload)
767 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
769 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
771 : " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
772 " if (func(0.3) > 1.0)\n"
774 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
775 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
776 " value = sin(value);\n"
780 static inline string unusedValueBasicCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
782 const int numSinRows = useHeavierWorkload ? 12 : 1;
784 return optimized ? " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
787 : " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
788 " " + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n"
789 + repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
793 static inline string unusedValueLoopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
795 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
797 return optimized ? " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
800 : " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
801 " " + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
802 " for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
803 " unused = sin(unused + used);\n"
807 static inline string unusedValueAfterReturnCaseFuncDefs (bool optimized, const string& precision, bool useHeavierWorkload)
809 const int numSinRows = useHeavierWorkload ? 12 : 1;
811 return optimized ? precision + " vec4 func (" + precision + " vec4 v)\n"
813 " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
817 : precision + " vec4 func (" + precision + " vec4 v)\n"
819 " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
820 " " + precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n"
821 + repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
823 " used = used*unused;"
828 static inline string unusedValueAfterReturnCaseStatements (void)
830 return " value = func(value);\n";
833 static inline string unusedValueDeadBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
835 const int numSinRows = useHeavierWorkload ? 12 : 1;
837 return optimized ? " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
840 : " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
841 " " + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
842 + repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
844 " used = used*unused;\n"
848 static inline string unusedValueMulZeroCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
850 const int numSinRows = useHeavierWorkload ? 12 : 1;
852 return optimized ? " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
855 : " " + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
856 " " + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
857 + repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
858 " value = used + unused*float(1-1);\n";
864 ShaderOptimizationTests::ShaderOptimizationTests (Context& context)
865 : TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
869 ShaderOptimizationTests::~ShaderOptimizationTests (void)
873 void ShaderOptimizationTests::init (void)
875 TestCaseGroup* const unrollGroup = new TestCaseGroup(m_context, "loop_unrolling", "Loop Unrolling Cases");
876 TestCaseGroup* const loopInvariantCodeMotionGroup = new TestCaseGroup(m_context, "loop_invariant_code_motion", "Loop-Invariant Code Motion Cases");
877 TestCaseGroup* const inlineGroup = new TestCaseGroup(m_context, "function_inlining", "Function Inlining Cases");
878 TestCaseGroup* const constantPropagationGroup = new TestCaseGroup(m_context, "constant_propagation", "Constant Propagation Cases");
879 TestCaseGroup* const commonSubexpressionGroup = new TestCaseGroup(m_context, "common_subexpression_elimination", "Common Subexpression Elimination Cases");
880 TestCaseGroup* const deadCodeEliminationGroup = new TestCaseGroup(m_context, "dead_code_elimination", "Dead Code Elimination Cases");
881 addChild(unrollGroup);
882 addChild(loopInvariantCodeMotionGroup);
883 addChild(inlineGroup);
884 addChild(constantPropagationGroup);
885 addChild(commonSubexpressionGroup);
886 addChild(deadCodeEliminationGroup);
888 for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
890 const CaseShaderType caseShaderType = (CaseShaderType)caseShaderTypeI;
891 const char* const caseShaderTypeSuffix = caseShaderType == CASESHADERTYPE_VERTEX ? "_vertex"
892 : caseShaderType == CASESHADERTYPE_FRAGMENT ? "_fragment"
895 // Loop unrolling cases.
898 static const int loopIterationCounts[] = { 4, 8, 32 };
900 for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
902 const LoopUnrollCase::CaseType caseType = (LoopUnrollCase::CaseType)caseTypeI;
903 const string caseTypeName = caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ? "independent_iterations"
904 : caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "dependent_iterations"
906 const string caseTypeDesc = caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ? "loop iterations don't depend on each other"
907 : caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "loop iterations depend on each other"
910 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
912 const int loopIterations = loopIterationCounts[loopIterNdx];
913 const string name = caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
914 const string description = toString(loopIterations) + " iterations; " + caseTypeDesc;
916 unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(), caseShaderType, caseType, loopIterations));
921 // Loop-invariant code motion cases.
924 static const int loopIterationCounts[] = { 4, 8, 32 };
926 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
928 const int loopIterations = loopIterationCounts[loopIterNdx];
929 const string name = toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
931 loopInvariantCodeMotionGroup->addChild(new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
935 // Function inlining cases.
938 static const int callNestingDepths[] = { 4, 8, 32 };
940 for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
942 const int nestingDepth = callNestingDepths[nestDepthNdx];
943 const string name = toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
945 inlineGroup->addChild(new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
949 // Constant propagation cases.
951 for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
953 const ConstantPropagationCase::CaseType caseType = (ConstantPropagationCase::CaseType)caseTypeI;
954 const string caseTypeName = caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS ? "built_in_functions"
955 : caseType == ConstantPropagationCase::CASETYPE_ARRAY ? "array"
956 : caseType == ConstantPropagationCase::CASETYPE_STRUCT ? "struct"
959 for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
961 const bool constantExpressionsOnly = constantExpressionsOnlyI != 0;
962 const string name = caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
964 constantPropagationGroup->addChild(new ConstantPropagationCase(m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
968 // Common subexpression cases.
970 for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
972 const CommonSubexpressionCase::CaseType caseType = (CommonSubexpressionCase::CaseType)caseTypeI;
974 const string caseTypeName = caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ? "single_statement"
975 : caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ? "multiple_statements"
976 : caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ? "static_branch"
977 : caseType == CommonSubexpressionCase::CASETYPE_LOOP ? "loop"
980 const string description = caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ? "A single statement containing multiple uses of same subexpression"
981 : caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ? "Multiple statements performing same computations"
982 : caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ? "Multiple statements including a static conditional"
983 : caseType == CommonSubexpressionCase::CASETYPE_LOOP ? "Multiple loops performing the same computations"
986 commonSubexpressionGroup->addChild(new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(), description.c_str(), caseShaderType, caseType));
989 // Dead code elimination cases.
991 for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
993 const DeadCodeEliminationCase::CaseType caseType = (DeadCodeEliminationCase::CaseType)caseTypeI;
994 const char* const caseTypeName = caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ? "dead_branch_simple"
995 : caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ? "dead_branch_complex"
996 : caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ? "dead_branch_complex_no_const"
997 : caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ? "dead_branch_func_call"
998 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ? "unused_value_basic"
999 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ? "unused_value_loop"
1000 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ? "unused_value_dead_branch"
1001 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ? "unused_value_after_return"
1002 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ? "unused_value_mul_zero"
1005 const char* const caseTypeDescription = caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ? "Do computation inside a branch that is never taken (condition is simple false constant expression)"
1006 : caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ? "Do computation inside a branch that is never taken (condition is complex false constant expression)"
1007 : caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ? "Do computation inside a branch that is never taken (condition is complex false expression, not constant expression but still compile-time computable)"
1008 : caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ? "Do computation inside a branch that is never taken (condition is compile-time computable false expression containing function call to a simple inlineable function)"
1009 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ? "Compute a value that is never used even statically"
1010 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ? "Compute a value, using a loop, that is never used even statically"
1011 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ? "Compute a value that is used only inside a statically dead branch"
1012 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ? "Compute a value that is used only after a return statement"
1013 : caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ? "Compute a value that is used but multiplied by a zero constant expression"
1016 deadCodeEliminationGroup->addChild(new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(), caseTypeDescription, caseShaderType, caseType));