1 // Copyright (c) 2016 Google Inc.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv_validator_options.h>
25 #include "opt/set_spec_constant_default_value_pass.h"
26 #include "spirv-tools/optimizer.hpp"
31 using namespace spvtools;
35 // Status and actions to perform after parsing command-line arguments.
36 enum OptActions { OPT_CONTINUE, OPT_STOP };
43 std::string GetListOfPassesAsString(const spvtools::Optimizer& optimizer) {
45 for (const auto& name : optimizer.GetPassNames()) {
46 ss << "\n\t\t" << name;
51 const auto kDefaultEnvironment = SPV_ENV_UNIVERSAL_1_3;
53 std::string GetLegalizationPasses() {
54 spvtools::Optimizer optimizer(kDefaultEnvironment);
55 optimizer.RegisterLegalizationPasses();
56 return GetListOfPassesAsString(optimizer);
59 std::string GetOptimizationPasses() {
60 spvtools::Optimizer optimizer(kDefaultEnvironment);
61 optimizer.RegisterPerformancePasses();
62 return GetListOfPassesAsString(optimizer);
65 std::string GetSizePasses() {
66 spvtools::Optimizer optimizer(kDefaultEnvironment);
67 optimizer.RegisterSizePasses();
68 return GetListOfPassesAsString(optimizer);
71 void PrintUsage(const char* program) {
72 // NOTE: Please maintain flags in lexicographical order.
74 R"(%s - Optimize a SPIR-V binary file.
76 USAGE: %s [options] [<input>] -o <output>
78 The SPIR-V binary is read from <input>. If no file is specified,
79 or if <input> is "-", then the binary is read from standard input.
80 if <output> is "-", then the optimized output is written to
83 NOTE: The optimizer is a work in progress.
85 Options (in lexicographical order):
87 Apply the conditional constant propagation transform. This will
88 propagate constant values throughout the program, and simplify
89 expressions and conditional jumps with known predicate
90 values. Performed on entry point call tree functions and
93 Cleanup the control flow graph. This will remove any unnecessary
94 code from the CFG like unreachable code. Performed on entry
95 point call tree functions and exported functions.
97 Remap result ids to a compact range starting from %%1 and without
99 --convert-local-access-chains
100 Convert constant index access chain loads/stores into
101 equivalent load/stores with inserts and extracts. Performed
102 on function scope variables referenced only with load, store,
103 and constant index access chains in entry point call tree
105 --eliminate-common-uniform
106 Perform load/load elimination for duplicate uniform values.
107 Converts any constant index access chain uniform loads into
108 its equivalent load and extract. Some loads will be moved
109 to facilitate sharing. Performed only on entry point
111 --eliminate-dead-branches
112 Convert conditional branches with constant condition to the
113 indicated unconditional brranch. Delete all resulting dead
114 code. Performed only on entry point call tree functions.
115 --eliminate-dead-code-aggressive
116 Delete instructions which do not contribute to a function's
117 output. Performed only on entry point call tree functions.
118 --eliminate-dead-const
119 Eliminate dead constants.
120 --eliminate-dead-functions
121 Deletes functions that cannot be reached from entry points or
123 --eliminate-dead-insert
124 Deletes unreferenced inserts into composites, most notably
125 unused stores to vector components, that are not removed by
126 aggressive dead code elimination.
127 --eliminate-dead-variables
128 Deletes module scope variables that are not referenced.
129 --eliminate-insert-extract
130 Replace extract from a sequence of inserts with the
131 corresponding value. Performed only on entry point call tree
133 --eliminate-local-multi-store
134 Replace stores and loads of function scope variables that are
135 stored multiple times. Performed on variables referenceed only
136 with loads and stores. Performed only on entry point call tree
138 --eliminate-local-single-block
139 Perform single-block store/load and load/load elimination.
140 Performed only on function scope variables in entry point
142 --eliminate-local-single-store
143 Replace stores and loads of function scope variables that are
144 only stored once. Performed on variables referenceed only with
145 loads and stores. Performed only on entry point call tree
147 --flatten-decorations
148 Replace decoration groups with repeated OpDecorate and
149 OpMemberDecorate instructions.
150 --fold-spec-const-op-composite
151 Fold the spec constants defined by OpSpecConstantOp or
152 OpSpecConstantComposite instructions to front-end constants
155 Freeze the values of specialization constants to their default
158 Convert if-then-else like assignments into OpSelect.
159 --inline-entry-points-exhaustive
160 Exhaustively inline all function calls in entry point call tree
161 functions. Currently does not inline calls to functions with
162 early return in a loop.
164 Runs a series of optimizations that attempts to take SPIR-V
165 generated by and HLSL front-end and generate legal Vulkan SPIR-V.
166 The optimizations are:
169 Note this does not guarantee legal code. This option implies
171 --local-redundancy-elimination
172 Looks for instructions in the same basic block that compute the
173 same value, and deletes the redundant ones.
175 Fully unrolls loops marked with the Unroll flag
176 --loop-unroll-partial
177 Partially unrolls loops marked with the Unroll flag. Takes an
178 additional non-0 integer argument to set the unroll factor, or
179 how many times a loop body should be duplicated
181 Join two blocks into a single block if the second has the
182 first as its only predecessor. Performed only on entry point
185 Replace all return instructions with unconditional branches to
186 a new basic block containing an unified return.
187 This pass does not currently support structured control flow. It
188 makes no changes if the shader capability is detected.
189 --local-redundancy-elimination
190 Looks for instructions in the same basic block that compute the
191 same value, and deletes the redundant ones.
193 Hoists loop-invariant conditionals out of loops by duplicating
194 the loop on each branch of the conditional and adjusting each
197 Optimize for performance. Apply a sequence of transformations
198 in an attempt to improve the performance of the generated
199 code. For this version of the optimizer, this flag is equivalent
200 to specifying the following optimization code names:
203 Optimize for size. Apply a sequence of transformations in an
204 attempt to minimize the size of the generated code. For this
205 version of the optimizer, this flag is equivalent to specifying
206 the following optimization code names:
209 NOTE: The specific transformations done by -O and -Os change
210 from release to release.
212 Apply the sequence of transformations indicated in <file>.
213 This file contains a sequence of strings separated by whitespace
214 (tabs, newlines or blanks). Each string is one of the flags
215 accepted by spirv-opt. Optimizations will be applied in the
216 sequence they appear in the file. This is equivalent to
217 specifying all the flags on the command line. For example,
218 given the file opts.cfg with the content:
220 --inline-entry-points-exhaustive
221 --eliminate-dead-code-aggressive
223 The following two invocations to spirv-opt are equivalent:
225 $ spirv-opt -Oconfig=opts.cfg program.spv
227 $ spirv-opt --inline-entry-points-exhaustive \
228 --eliminate-dead-code-aggressive program.spv
230 Lines starting with the character '#' in the configuration
231 file indicate a comment and will be ignored.
233 The -O, -Os, and -Oconfig flags act as macros. Using one of them
234 is equivalent to explicitly inserting the underlying flags at
235 that position in the command line. For example, the invocation
236 'spirv-opt --merge-blocks -O ...' applies the transformation
237 --merge-blocks followed by all the transformations implied by
240 Print SPIR-V assembly to standard error output before each pass
241 and after the last pass.
243 Change the scope of private variables that are used in a single
244 function to that function.
246 Removes duplicate types, decorations, capabilities and extension
248 --redundancy-elimination
249 Looks for instructions in the same function that compute the
250 same value, and deletes the redundant ones.
252 Allow store from one struct type to a different type with
253 compatible layout and members. This option is forwarded to the
255 --replace-invalid-opcode
256 Replaces instructions whose opcode is valid for shader modules,
257 but not for the current shader stage. To have an effect, all
258 entry points must have the same execution model.
260 Replace aggregate function scope variables that are only accessed
261 via their elements with new function variables representing each
263 --set-spec-const-default-value "<spec id>:<default value> ..."
264 Set the default values of the specialization constants with
265 <spec id>:<default value> pairs specified in a double-quoted
266 string. <spec id>:<default value> pairs must be separated by
267 blank spaces, and in each pair, spec id and default value must
268 be separated with colon ':' without any blank spaces in between.
269 e.g.: --set-spec-const-default-value "1:100 2:400"
270 --simplify-instructions
271 Will simplfy all instructions in the function as much as
274 Will not validate the SPIR-V before optimizing. If the SPIR-V
275 is invalid, the optimizer may fail or generate incorrect code.
276 This options should be used rarely, and with caution.
278 Replaces instructions with equivalent and less expensive ones.
280 Remove all debug instructions.
282 Rewrites instructions for which there are known driver bugs to
283 avoid triggering those bugs.
284 Current workarounds: Avoid OpUnreachable in loops.
286 Remove the duplicated constants.
290 Display optimizer version information.
292 program, program, GetLegalizationPasses().c_str(),
293 GetOptimizationPasses().c_str(), GetSizePasses().c_str());
296 // Reads command-line flags the file specified in |oconfig_flag|. This string
297 // is assumed to have the form "-Oconfig=FILENAME". This function parses the
298 // string and extracts the file name after the '=' sign.
300 // Flags found in |FILENAME| are pushed at the end of the vector |file_flags|.
302 // This function returns true on success, false on failure.
303 bool ReadFlagsFromFile(const char* oconfig_flag,
304 std::vector<std::string>* file_flags) {
305 const char* fname = strchr(oconfig_flag, '=');
306 if (fname == nullptr || fname[0] != '=') {
307 fprintf(stderr, "error: Invalid -Oconfig flag %s\n", oconfig_flag);
312 std::ifstream input_file;
313 input_file.open(fname);
314 if (input_file.fail()) {
315 fprintf(stderr, "error: Could not open file '%s'\n", fname);
319 while (!input_file.eof()) {
322 if (flag.length() > 0 && flag[0] != '#') {
323 file_flags->push_back(flag);
330 OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
331 const char** in_file, const char** out_file,
332 spv_validator_options options, bool* skip_validator);
334 // Parses and handles the -Oconfig flag. |prog_name| contains the name of
335 // the spirv-opt binary (used to build a new argv vector for the recursive
336 // invocation to ParseFlags). |opt_flag| contains the -Oconfig=FILENAME flag.
337 // |optimizer|, |in_file| and |out_file| are as in ParseFlags.
339 // This returns the same OptStatus instance returned by ParseFlags.
340 OptStatus ParseOconfigFlag(const char* prog_name, const char* opt_flag,
341 Optimizer* optimizer, const char** in_file,
342 const char** out_file) {
343 std::vector<std::string> flags;
344 flags.push_back(prog_name);
346 std::vector<std::string> file_flags;
347 if (!ReadFlagsFromFile(opt_flag, &file_flags)) {
349 "error: Could not read optimizer flags from configuration file\n");
350 return {OPT_STOP, 1};
352 flags.insert(flags.end(), file_flags.begin(), file_flags.end());
354 const char** new_argv = new const char*[flags.size()];
355 for (size_t i = 0; i < flags.size(); i++) {
356 if (flags[i].find("-Oconfig=") != std::string::npos) {
358 "error: Flag -Oconfig= may not be used inside the configuration "
360 return {OPT_STOP, 1};
362 new_argv[i] = flags[i].c_str();
365 bool skip_validator = false;
366 return ParseFlags(static_cast<int>(flags.size()), new_argv, optimizer,
367 in_file, out_file, nullptr, &skip_validator);
370 OptStatus ParseLoopUnrollPartialArg(int argc, const char** argv, int argi,
371 Optimizer* optimizer) {
373 int factor = atoi(argv[argi]);
375 optimizer->RegisterPass(CreateLoopUnrollPass(false, factor));
376 return {OPT_CONTINUE, 0};
380 "error: --loop-unroll-partial must be followed by a non-0 "
382 return {OPT_STOP, 1};
385 // Parses command-line flags. |argc| contains the number of command-line flags.
386 // |argv| points to an array of strings holding the flags. |optimizer| is the
387 // Optimizer instance used to optimize the program.
389 // On return, this function stores the name of the input program in |in_file|.
390 // The name of the output file in |out_file|. The return value indicates whether
391 // optimization should continue and a status code indicating an error or
393 OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
394 const char** in_file, const char** out_file,
395 spv_validator_options options, bool* skip_validator) {
396 for (int argi = 1; argi < argc; ++argi) {
397 const char* cur_arg = argv[argi];
398 if ('-' == cur_arg[0]) {
399 if (0 == strcmp(cur_arg, "--version")) {
400 printf("%s\n", spvSoftwareVersionDetailsString());
401 return {OPT_STOP, 0};
402 } else if (0 == strcmp(cur_arg, "--help") || 0 == strcmp(cur_arg, "-h")) {
404 return {OPT_STOP, 0};
405 } else if (0 == strcmp(cur_arg, "-o")) {
406 if (!*out_file && argi + 1 < argc) {
407 *out_file = argv[++argi];
410 return {OPT_STOP, 1};
412 } else if (0 == strcmp(cur_arg, "--strip-debug")) {
413 optimizer->RegisterPass(CreateStripDebugInfoPass());
414 } else if (0 == strcmp(cur_arg, "--set-spec-const-default-value")) {
417 opt::SetSpecConstantDefaultValuePass::ParseDefaultValuesString(
419 if (!spec_ids_vals) {
421 "error: Invalid argument for "
422 "--set-spec-const-default-value: %s\n",
424 return {OPT_STOP, 1};
426 optimizer->RegisterPass(
427 CreateSetSpecConstantDefaultValuePass(std::move(*spec_ids_vals)));
431 "error: Expected a string of <spec id>:<default value> pairs.");
432 return {OPT_STOP, 1};
434 } else if (0 == strcmp(cur_arg, "--if-conversion")) {
435 optimizer->RegisterPass(CreateIfConversionPass());
436 } else if (0 == strcmp(cur_arg, "--freeze-spec-const")) {
437 optimizer->RegisterPass(CreateFreezeSpecConstantValuePass());
438 } else if (0 == strcmp(cur_arg, "--inline-entry-points-exhaustive")) {
439 optimizer->RegisterPass(CreateInlineExhaustivePass());
440 } else if (0 == strcmp(cur_arg, "--inline-entry-points-opaque")) {
441 optimizer->RegisterPass(CreateInlineOpaquePass());
442 } else if (0 == strcmp(cur_arg, "--convert-local-access-chains")) {
443 optimizer->RegisterPass(CreateLocalAccessChainConvertPass());
444 } else if (0 == strcmp(cur_arg, "--eliminate-dead-code-aggressive")) {
445 optimizer->RegisterPass(CreateAggressiveDCEPass());
446 } else if (0 == strcmp(cur_arg, "--eliminate-insert-extract")) {
447 optimizer->RegisterPass(CreateInsertExtractElimPass());
448 } else if (0 == strcmp(cur_arg, "--eliminate-local-single-block")) {
449 optimizer->RegisterPass(CreateLocalSingleBlockLoadStoreElimPass());
450 } else if (0 == strcmp(cur_arg, "--eliminate-local-single-store")) {
451 optimizer->RegisterPass(CreateLocalSingleStoreElimPass());
452 } else if (0 == strcmp(cur_arg, "--merge-blocks")) {
453 optimizer->RegisterPass(CreateBlockMergePass());
454 } else if (0 == strcmp(cur_arg, "--merge-return")) {
455 optimizer->RegisterPass(CreateMergeReturnPass());
456 } else if (0 == strcmp(cur_arg, "--eliminate-dead-branches")) {
457 optimizer->RegisterPass(CreateDeadBranchElimPass());
458 } else if (0 == strcmp(cur_arg, "--eliminate-dead-functions")) {
459 optimizer->RegisterPass(CreateEliminateDeadFunctionsPass());
460 } else if (0 == strcmp(cur_arg, "--eliminate-local-multi-store")) {
461 optimizer->RegisterPass(CreateLocalMultiStoreElimPass());
462 } else if (0 == strcmp(cur_arg, "--eliminate-common-uniform")) {
463 optimizer->RegisterPass(CreateCommonUniformElimPass());
464 } else if (0 == strcmp(cur_arg, "--eliminate-dead-const")) {
465 optimizer->RegisterPass(CreateEliminateDeadConstantPass());
466 } else if (0 == strcmp(cur_arg, "--eliminate-dead-inserts")) {
467 optimizer->RegisterPass(CreateDeadInsertElimPass());
468 } else if (0 == strcmp(cur_arg, "--eliminate-dead-variables")) {
469 optimizer->RegisterPass(CreateDeadVariableEliminationPass());
470 } else if (0 == strcmp(cur_arg, "--fold-spec-const-op-composite")) {
471 optimizer->RegisterPass(CreateFoldSpecConstantOpAndCompositePass());
472 } else if (0 == strcmp(cur_arg, "--loop-unswitch")) {
473 optimizer->RegisterPass(CreateLoopUnswitchPass());
474 } else if (0 == strcmp(cur_arg, "--scalar-replacement")) {
475 optimizer->RegisterPass(CreateScalarReplacementPass());
476 } else if (0 == strcmp(cur_arg, "--strength-reduction")) {
477 optimizer->RegisterPass(CreateStrengthReductionPass());
478 } else if (0 == strcmp(cur_arg, "--unify-const")) {
479 optimizer->RegisterPass(CreateUnifyConstantPass());
480 } else if (0 == strcmp(cur_arg, "--flatten-decorations")) {
481 optimizer->RegisterPass(CreateFlattenDecorationPass());
482 } else if (0 == strcmp(cur_arg, "--compact-ids")) {
483 optimizer->RegisterPass(CreateCompactIdsPass());
484 } else if (0 == strcmp(cur_arg, "--cfg-cleanup")) {
485 optimizer->RegisterPass(CreateCFGCleanupPass());
486 } else if (0 == strcmp(cur_arg, "--local-redundancy-elimination")) {
487 optimizer->RegisterPass(CreateLocalRedundancyEliminationPass());
488 } else if (0 == strcmp(cur_arg, "--loop-invariant-code-motion")) {
489 optimizer->RegisterPass(CreateLoopInvariantCodeMotionPass());
490 } else if (0 == strcmp(cur_arg, "--redundancy-elimination")) {
491 optimizer->RegisterPass(CreateRedundancyEliminationPass());
492 } else if (0 == strcmp(cur_arg, "--private-to-local")) {
493 optimizer->RegisterPass(CreatePrivateToLocalPass());
494 } else if (0 == strcmp(cur_arg, "--remove-duplicates")) {
495 optimizer->RegisterPass(CreateRemoveDuplicatesPass());
496 } else if (0 == strcmp(cur_arg, "--workaround-1209")) {
497 optimizer->RegisterPass(CreateWorkaround1209Pass());
498 } else if (0 == strcmp(cur_arg, "--relax-struct-store")) {
499 options->relax_struct_store = true;
500 } else if (0 == strcmp(cur_arg, "--replace-invalid-opcode")) {
501 optimizer->RegisterPass(CreateReplaceInvalidOpcodePass());
502 } else if (0 == strcmp(cur_arg, "--simplify-instructions")) {
503 optimizer->RegisterPass(CreateSimplificationPass());
504 } else if (0 == strcmp(cur_arg, "--loop-unroll")) {
505 optimizer->RegisterPass(CreateLoopUnrollPass(true));
506 } else if (0 == strcmp(cur_arg, "--loop-unroll-partial")) {
508 ParseLoopUnrollPartialArg(argc, argv, ++argi, optimizer);
509 if (status.action != OPT_CONTINUE) {
512 } else if (0 == strcmp(cur_arg, "--skip-validation")) {
513 *skip_validator = true;
514 } else if (0 == strcmp(cur_arg, "-O")) {
515 optimizer->RegisterPerformancePasses();
516 } else if (0 == strcmp(cur_arg, "-Os")) {
517 optimizer->RegisterSizePasses();
518 } else if (0 == strcmp(cur_arg, "--legalize-hlsl")) {
519 *skip_validator = true;
520 optimizer->RegisterLegalizationPasses();
521 } else if (0 == strncmp(cur_arg, "-Oconfig=", sizeof("-Oconfig=") - 1)) {
523 ParseOconfigFlag(argv[0], cur_arg, optimizer, in_file, out_file);
524 if (status.action != OPT_CONTINUE) {
527 } else if (0 == strcmp(cur_arg, "--ccp")) {
528 optimizer->RegisterPass(CreateCCPPass());
529 } else if (0 == strcmp(cur_arg, "--print-all")) {
530 optimizer->SetPrintAll(&std::cerr);
531 } else if ('\0' == cur_arg[1]) {
532 // Setting a filename of "-" to indicate stdin.
536 fprintf(stderr, "error: More than one input file specified\n");
537 return {OPT_STOP, 1};
542 "error: Unknown flag '%s'. Use --help for a list of valid flags\n",
544 return {OPT_STOP, 1};
550 fprintf(stderr, "error: More than one input file specified\n");
551 return {OPT_STOP, 1};
556 return {OPT_CONTINUE, 0};
561 int main(int argc, const char** argv) {
562 const char* in_file = nullptr;
563 const char* out_file = nullptr;
564 bool skip_validator = false;
566 spv_target_env target_env = kDefaultEnvironment;
567 spv_validator_options options = spvValidatorOptionsCreate();
569 spvtools::Optimizer optimizer(target_env);
570 optimizer.SetMessageConsumer([](spv_message_level_t level, const char* source,
571 const spv_position_t& position,
572 const char* message) {
573 std::cerr << StringifyMessage(level, source, position, message)
577 OptStatus status = ParseFlags(argc, argv, &optimizer, &in_file, &out_file,
578 options, &skip_validator);
580 if (status.action == OPT_STOP) {
584 if (out_file == nullptr) {
585 fprintf(stderr, "error: -o required\n");
589 std::vector<uint32_t> binary;
590 if (!ReadFile<uint32_t>(in_file, "rb", &binary)) {
594 if (!skip_validator) {
595 // Let's do validation first.
596 spv_context context = spvContextCreate(target_env);
597 spv_diagnostic diagnostic = nullptr;
598 spv_const_binary_t binary_struct = {binary.data(), binary.size()};
600 spvValidateWithOptions(context, options, &binary_struct, &diagnostic);
602 spvDiagnosticPrint(diagnostic);
603 spvDiagnosticDestroy(diagnostic);
604 spvValidatorOptionsDestroy(options);
605 spvContextDestroy(context);
608 spvDiagnosticDestroy(diagnostic);
609 spvValidatorOptionsDestroy(options);
610 spvContextDestroy(context);
613 // By using the same vector as input and output, we save time in the case
614 // that there was no change.
615 bool ok = optimizer.Run(binary.data(), binary.size(), &binary);
617 if (!WriteFile<uint32_t>(out_file, "wb", binary.data(), binary.size())) {