Added C++ code generation to spirv-stats
authorAndrey Tuganov <andreyt@google.com>
Mon, 31 Jul 2017 17:08:38 +0000 (13:08 -0400)
committerDavid Neto <dneto@google.com>
Tue, 1 Aug 2017 19:41:42 +0000 (15:41 -0400)
The tool can now generate C++ code returning some of the historgrams and
Huffman codecs generated from those historgrams.

source/spirv_stats.cpp
source/spirv_stats.h
tools/stats/stats.cpp
tools/stats/stats_analyzer.cpp
tools/stats/stats_analyzer.h

index 2186e0d..0c628d5 100644 (file)
@@ -74,10 +74,74 @@ class StatsAggregator {
     ProcessCapability();
     ProcessExtension();
     ProcessConstant();
+    ProcessEnums();
+    ProcessLiteralStrings();
+    ProcessNonIdWords();
 
     return SPV_SUCCESS;
   }
 
+  // Collects statistics of enum words for operands of specific types.
+  void ProcessEnums() {
+    const Instruction& inst = GetCurrentInstruction();
+    for (const auto& operand : inst.operands()) {
+      switch (operand.type) {
+        case SPV_OPERAND_TYPE_SOURCE_LANGUAGE:
+        case SPV_OPERAND_TYPE_EXECUTION_MODEL:
+        case SPV_OPERAND_TYPE_ADDRESSING_MODEL:
+        case SPV_OPERAND_TYPE_MEMORY_MODEL:
+        case SPV_OPERAND_TYPE_EXECUTION_MODE:
+        case SPV_OPERAND_TYPE_STORAGE_CLASS:
+        case SPV_OPERAND_TYPE_DIMENSIONALITY:
+        case SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE:
+        case SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE:
+        case SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT:
+        case SPV_OPERAND_TYPE_IMAGE_CHANNEL_ORDER:
+        case SPV_OPERAND_TYPE_IMAGE_CHANNEL_DATA_TYPE:
+        case SPV_OPERAND_TYPE_FP_ROUNDING_MODE:
+        case SPV_OPERAND_TYPE_LINKAGE_TYPE:
+        case SPV_OPERAND_TYPE_ACCESS_QUALIFIER:
+        case SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE:
+        case SPV_OPERAND_TYPE_DECORATION:
+        case SPV_OPERAND_TYPE_BUILT_IN:
+        case SPV_OPERAND_TYPE_GROUP_OPERATION:
+        case SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS:
+        case SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO:
+        case SPV_OPERAND_TYPE_CAPABILITY: {
+          ++stats_->enum_hist[operand.type][inst.word(operand.offset)];
+          break;
+        }
+        default:
+          break;
+      }
+    }
+  }
+
+  // Collects statistics of literal strings used by opcodes.
+  void ProcessLiteralStrings() {
+    const Instruction& inst = GetCurrentInstruction();
+    for (const auto& operand : inst.operands()) {
+      if (operand.type == SPV_OPERAND_TYPE_LITERAL_STRING) {
+        const std::string str =
+            reinterpret_cast<const char*>(&inst.words()[operand.offset]);
+        ++stats_->literal_strings_hist[inst.opcode()][str];
+      }
+    }
+  }
+
+  // Collects statistics of all single word non-id operand slots.
+  void ProcessNonIdWords() {
+    const Instruction& inst = GetCurrentInstruction();
+    uint32_t index = 0;
+    for (const auto& operand : inst.operands()) {
+      if (operand.num_words == 1 && !spvIsIdType(operand.type)) {
+          ++stats_->non_id_words_hist[std::pair<uint32_t, uint32_t>(
+              inst.opcode(), index)][inst.word(operand.offset)];
+      }
+      ++index;
+    }
+  }
+
   // Collects OpCapability statistics.
   void ProcessCapability() {
     const Instruction& inst = GetCurrentInstruction();
@@ -100,7 +164,18 @@ class StatsAggregator {
     const SpvOp opcode = inst_it->opcode();
     ++stats_->opcode_hist[opcode];
 
+    const uint32_t opcode_and_num_operands =
+        (uint32_t(inst_it->operands().size()) << 16) | uint32_t(opcode);
+    ++stats_->opcode_and_num_operands_hist[opcode_and_num_operands];
+
     ++inst_it;
+
+    if (inst_it != vstate_->ordered_instructions().rend()) {
+      const SpvOp prev_opcode = inst_it->opcode();
+      ++stats_->opcode_and_num_operands_markov_hist[prev_opcode][
+          opcode_and_num_operands];
+    }
+
     auto step_it = stats_->opcode_markov_hist.begin();
     for (; inst_it != vstate_->ordered_instructions().rend() &&
          step_it != stats_->opcode_markov_hist.end(); ++inst_it, ++step_it) {
index 9c7a41a..4b16951 100644 (file)
@@ -15,6 +15,7 @@
 #ifndef LIBSPIRV_SPIRV_STATS_H_
 #define LIBSPIRV_SPIRV_STATS_H_
 
+#include <map>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -39,6 +40,10 @@ struct SpirvStats {
   // Opcode histogram, SpvOpXXX -> count.
   std::unordered_map<uint32_t, uint32_t> opcode_hist;
 
+  // Histogram of words combining opcode and number of operands,
+  // opcode | (num_operands << 16) -> count.
+  std::unordered_map<uint32_t, uint32_t> opcode_and_num_operands_hist;
+
   // OpConstant u16 histogram, value -> count.
   std::unordered_map<uint16_t, uint32_t> u16_constant_hist;
 
@@ -63,6 +68,29 @@ struct SpirvStats {
   // OpConstant f64 histogram, value -> count.
   std::unordered_map<double, uint32_t> f64_constant_hist;
 
+  // Enum histogram, operand type -> operand value -> count.
+  std::unordered_map<uint32_t,
+      std::unordered_map<uint32_t, uint32_t>> enum_hist;
+
+  // Histogram of all non-id single words.
+  // pair<opcode, operand index> -> value -> count.
+  // This is a generalization of enum_hist, also includes literal integers and
+  // masks.
+  std::map<std::pair<uint32_t, uint32_t>,
+      std::map<uint32_t, uint32_t>> non_id_words_hist;
+
+  // Histogram of literal strings, sharded by opcodes, opcode -> string -> count.
+  // This is suboptimal if an opcode has multiple literal string operands,
+  // as it wouldn't differentiate between operands.
+  std::unordered_map<uint32_t, std::unordered_map<std::string, uint32_t>>
+      literal_strings_hist;
+
+  // Markov chain histograms:
+  // opcode -> next(opcode | (num_operands << 16)) -> count.
+  // See also opcode_and_num_operands_hist, which collects global statistics.
+  std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>>
+      opcode_and_num_operands_markov_hist;
+
   // Used to collect statistics on opcodes triggering other opcodes.
   // Container scheme: gap between instructions -> cue opcode -> later opcode
   // -> count.
index 51d6183..6e6878d 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <cassert>
 #include <cstring>
+#include <fstream>
 #include <iostream>
 #include <unordered_map>
 
@@ -43,8 +44,35 @@ TIP: In order to collect statistics from all .spv files under current dir use
 find . -name "*.spv" -print0 | xargs -0 -s 2000000 %s
 
 Options:
-  -h, --help                       Print this help.
-  -v, --verbose                    Print additional info to stderr.
+  -h, --help
+                   Print this help.
+
+  -v, --verbose
+                   Print additional info to stderr.
+
+  --codegen_opcode_hist
+                   Output generated C++ code for opcode histogram.
+                   This flag disables non-C++ output.
+
+  --codegen_opcode_and_num_operands_hist
+                   Output generated C++ code for opcode_and_num_operands
+                   histogram.
+                   This flag disables non-C++ output.
+
+  --codegen_opcode_and_num_operands_markov_huffman_codecs
+                   Output generated C++ code for Huffman codecs of
+                   opcode_and_num_operands Markov chain.
+                   This flag disables non-C++ output.
+
+  --codegen_literal_string_huffman_codecs
+                   Output generated C++ code for Huffman codecs for
+                   literal strings.
+                   This flag disables non-C++ output.
+
+  --codegen_non_id_word_huffman_codecs
+                   Output generated C++ code for Huffman codecs for
+                   single-word non-id slots.
+                   This flag disables non-C++ output.
 )",
       argv0, argv0, argv0);
 }
@@ -77,9 +105,17 @@ int main(int argc, char** argv) {
   bool continue_processing = true;
   int return_code = 0;
 
+  bool expect_output_path = false;
   bool verbose = false;
+  bool export_text = true;
+  bool codegen_opcode_hist = false;
+  bool codegen_opcode_and_num_operands_hist = false;
+  bool codegen_opcode_and_num_operands_markov_huffman_codecs = false;
+  bool codegen_literal_string_huffman_codecs = false;
+  bool codegen_non_id_word_huffman_codecs = false;
 
   std::vector<const char*> paths;
+  const char* output_path = nullptr;
 
   for (int argi = 1; continue_processing && argi < argc; ++argi) {
     const char* cur_arg = argv[argi];
@@ -88,15 +124,44 @@ int main(int argc, char** argv) {
         PrintUsage(argv[0]);
         continue_processing = false;
         return_code = 0;
-      } else if (0 == strcmp(cur_arg, "--verbose") || 0 == strcmp(cur_arg, "-v")) {
+      } else if (0 == strcmp(cur_arg, "--codegen_opcode_hist")) {
+        codegen_opcode_hist = true;
+        export_text = false;
+      } else if (0 == strcmp(cur_arg,
+                            "--codegen_opcode_and_num_operands_hist")) {
+        codegen_opcode_and_num_operands_hist = true;
+        export_text = false;
+      } else if (strcmp(
+         "--codegen_opcode_and_num_operands_markov_huffman_codecs",
+         cur_arg) == 0) {
+        codegen_opcode_and_num_operands_markov_huffman_codecs = true;
+        export_text = false;
+      } else if (0 == strcmp(cur_arg,
+                            "--codegen_literal_string_huffman_codecs")) {
+        codegen_literal_string_huffman_codecs = true;
+        export_text = false;
+      } else if (0 == strcmp(cur_arg,
+                            "--codegen_non_id_word_huffman_codecs")) {
+        codegen_non_id_word_huffman_codecs = true;
+        export_text = false;
+      } else if (0 == strcmp(cur_arg, "--verbose") ||
+                0 == strcmp(cur_arg, "-v")) {
         verbose = true;
+      } else if (0 == strcmp(cur_arg, "--output") ||
+                0 == strcmp(cur_arg, "-o")) {
+        expect_output_path = true;
       } else {
         PrintUsage(argv[0]);
         continue_processing = false;
         return_code = 1;
       }
     } else {
-      paths.push_back(cur_arg);
+      if (expect_output_path) {
+       output_path = cur_arg;
+       expect_output_path = false;
+      } else {
+       paths.push_back(cur_arg);
+      }
     }
   }
 
@@ -133,26 +198,62 @@ int main(int argc, char** argv) {
 
   StatsAnalyzer analyzer(stats);
 
-  std::ostream& out = std::cout;
+  std::ofstream fout;
+  if (output_path) {
+    fout.open(output_path);
+    if (!fout.is_open()) {
+      std::cerr << "error: Failed to open " << output_path << std::endl;
+      return 1;
+    }
+  }
+
+  std::ostream& out = fout.is_open() ? fout : std::cout;
 
-  out << std::endl;
-  analyzer.WriteVersion(out);
-  analyzer.WriteGenerator(out);
+  if (export_text) {
+    out << std::endl;
+    analyzer.WriteVersion(out);
+    analyzer.WriteGenerator(out);
 
-  out << std::endl;
-  analyzer.WriteCapability(out);
+    out << std::endl;
+    analyzer.WriteCapability(out);
 
-  out << std::endl;
-  analyzer.WriteExtension(out);
+    out << std::endl;
+    analyzer.WriteExtension(out);
 
-  out << std::endl;
-  analyzer.WriteOpcode(out);
+    out << std::endl;
+    analyzer.WriteOpcode(out);
 
-  out << std::endl;
-  analyzer.WriteOpcodeMarkov(out);
+    out << std::endl;
+    analyzer.WriteOpcodeMarkov(out);
 
-  out << std::endl;
-  analyzer.WriteConstantLiterals(out);
+    out << std::endl;
+    analyzer.WriteConstantLiterals(out);
+  }
+
+  if (codegen_opcode_hist) {
+    out << std::endl;
+    analyzer.WriteCodegenOpcodeHist(out);
+  }
+
+  if (codegen_opcode_and_num_operands_hist) {
+    out << std::endl;
+    analyzer.WriteCodegenOpcodeAndNumOperandsHist(out);
+  }
+
+  if (codegen_opcode_and_num_operands_markov_huffman_codecs) {
+    out << std::endl;
+    analyzer.WriteCodegenOpcodeAndNumOperandsMarkovHuffmanCodecs(out);
+  }
+
+  if (codegen_literal_string_huffman_codecs) {
+    out << std::endl;
+    analyzer.WriteCodegenLiteralStringHuffmanCodecs(out);
+  }
+
+  if (codegen_non_id_word_huffman_codecs) {
+    out << std::endl;
+    analyzer.WriteCodegenNonIdWordHuffmanCodecs(out);
+  }
 
   return 0;
 }
index 9e248a4..e196e49 100644 (file)
 #include "stats_analyzer.h"
 
 #include <algorithm>
+#include <cassert>
+#include <cstring>
 #include <iostream>
 #include <sstream>
 #include <vector>
 
+#include "spirv/1.2/spirv.h"
 #include "source/enum_string_mapping.h"
 #include "source/opcode.h"
+#include "source/operand.h"
 #include "source/spirv_constant.h"
-#include "spirv/1.1/spirv.h"
 
 using libspirv::SpirvStats;
 
 namespace {
 
+// Returns all SPIR-V v1.2 opcodes.
+std::vector<uint32_t> GetAllOpcodes() {
+  return std::vector<uint32_t>({
+    SpvOpNop,
+    SpvOpUndef,
+    SpvOpSourceContinued,
+    SpvOpSource,
+    SpvOpSourceExtension,
+    SpvOpName,
+    SpvOpMemberName,
+    SpvOpString,
+    SpvOpLine,
+    SpvOpExtension,
+    SpvOpExtInstImport,
+    SpvOpExtInst,
+    SpvOpMemoryModel,
+    SpvOpEntryPoint,
+    SpvOpExecutionMode,
+    SpvOpCapability,
+    SpvOpTypeVoid,
+    SpvOpTypeBool,
+    SpvOpTypeInt,
+    SpvOpTypeFloat,
+    SpvOpTypeVector,
+    SpvOpTypeMatrix,
+    SpvOpTypeImage,
+    SpvOpTypeSampler,
+    SpvOpTypeSampledImage,
+    SpvOpTypeArray,
+    SpvOpTypeRuntimeArray,
+    SpvOpTypeStruct,
+    SpvOpTypeOpaque,
+    SpvOpTypePointer,
+    SpvOpTypeFunction,
+    SpvOpTypeEvent,
+    SpvOpTypeDeviceEvent,
+    SpvOpTypeReserveId,
+    SpvOpTypeQueue,
+    SpvOpTypePipe,
+    SpvOpTypeForwardPointer,
+    SpvOpConstantTrue,
+    SpvOpConstantFalse,
+    SpvOpConstant,
+    SpvOpConstantComposite,
+    SpvOpConstantSampler,
+    SpvOpConstantNull,
+    SpvOpSpecConstantTrue,
+    SpvOpSpecConstantFalse,
+    SpvOpSpecConstant,
+    SpvOpSpecConstantComposite,
+    SpvOpSpecConstantOp,
+    SpvOpFunction,
+    SpvOpFunctionParameter,
+    SpvOpFunctionEnd,
+    SpvOpFunctionCall,
+    SpvOpVariable,
+    SpvOpImageTexelPointer,
+    SpvOpLoad,
+    SpvOpStore,
+    SpvOpCopyMemory,
+    SpvOpCopyMemorySized,
+    SpvOpAccessChain,
+    SpvOpInBoundsAccessChain,
+    SpvOpPtrAccessChain,
+    SpvOpArrayLength,
+    SpvOpGenericPtrMemSemantics,
+    SpvOpInBoundsPtrAccessChain,
+    SpvOpDecorate,
+    SpvOpMemberDecorate,
+    SpvOpDecorationGroup,
+    SpvOpGroupDecorate,
+    SpvOpGroupMemberDecorate,
+    SpvOpVectorExtractDynamic,
+    SpvOpVectorInsertDynamic,
+    SpvOpVectorShuffle,
+    SpvOpCompositeConstruct,
+    SpvOpCompositeExtract,
+    SpvOpCompositeInsert,
+    SpvOpCopyObject,
+    SpvOpTranspose,
+    SpvOpSampledImage,
+    SpvOpImageSampleImplicitLod,
+    SpvOpImageSampleExplicitLod,
+    SpvOpImageSampleDrefImplicitLod,
+    SpvOpImageSampleDrefExplicitLod,
+    SpvOpImageSampleProjImplicitLod,
+    SpvOpImageSampleProjExplicitLod,
+    SpvOpImageSampleProjDrefImplicitLod,
+    SpvOpImageSampleProjDrefExplicitLod,
+    SpvOpImageFetch,
+    SpvOpImageGather,
+    SpvOpImageDrefGather,
+    SpvOpImageRead,
+    SpvOpImageWrite,
+    SpvOpImage,
+    SpvOpImageQueryFormat,
+    SpvOpImageQueryOrder,
+    SpvOpImageQuerySizeLod,
+    SpvOpImageQuerySize,
+    SpvOpImageQueryLod,
+    SpvOpImageQueryLevels,
+    SpvOpImageQuerySamples,
+    SpvOpConvertFToU,
+    SpvOpConvertFToS,
+    SpvOpConvertSToF,
+    SpvOpConvertUToF,
+    SpvOpUConvert,
+    SpvOpSConvert,
+    SpvOpFConvert,
+    SpvOpQuantizeToF16,
+    SpvOpConvertPtrToU,
+    SpvOpSatConvertSToU,
+    SpvOpSatConvertUToS,
+    SpvOpConvertUToPtr,
+    SpvOpPtrCastToGeneric,
+    SpvOpGenericCastToPtr,
+    SpvOpGenericCastToPtrExplicit,
+    SpvOpBitcast,
+    SpvOpSNegate,
+    SpvOpFNegate,
+    SpvOpIAdd,
+    SpvOpFAdd,
+    SpvOpISub,
+    SpvOpFSub,
+    SpvOpIMul,
+    SpvOpFMul,
+    SpvOpUDiv,
+    SpvOpSDiv,
+    SpvOpFDiv,
+    SpvOpUMod,
+    SpvOpSRem,
+    SpvOpSMod,
+    SpvOpFRem,
+    SpvOpFMod,
+    SpvOpVectorTimesScalar,
+    SpvOpMatrixTimesScalar,
+    SpvOpVectorTimesMatrix,
+    SpvOpMatrixTimesVector,
+    SpvOpMatrixTimesMatrix,
+    SpvOpOuterProduct,
+    SpvOpDot,
+    SpvOpIAddCarry,
+    SpvOpISubBorrow,
+    SpvOpUMulExtended,
+    SpvOpSMulExtended,
+    SpvOpAny,
+    SpvOpAll,
+    SpvOpIsNan,
+    SpvOpIsInf,
+    SpvOpIsFinite,
+    SpvOpIsNormal,
+    SpvOpSignBitSet,
+    SpvOpLessOrGreater,
+    SpvOpOrdered,
+    SpvOpUnordered,
+    SpvOpLogicalEqual,
+    SpvOpLogicalNotEqual,
+    SpvOpLogicalOr,
+    SpvOpLogicalAnd,
+    SpvOpLogicalNot,
+    SpvOpSelect,
+    SpvOpIEqual,
+    SpvOpINotEqual,
+    SpvOpUGreaterThan,
+    SpvOpSGreaterThan,
+    SpvOpUGreaterThanEqual,
+    SpvOpSGreaterThanEqual,
+    SpvOpULessThan,
+    SpvOpSLessThan,
+    SpvOpULessThanEqual,
+    SpvOpSLessThanEqual,
+    SpvOpFOrdEqual,
+    SpvOpFUnordEqual,
+    SpvOpFOrdNotEqual,
+    SpvOpFUnordNotEqual,
+    SpvOpFOrdLessThan,
+    SpvOpFUnordLessThan,
+    SpvOpFOrdGreaterThan,
+    SpvOpFUnordGreaterThan,
+    SpvOpFOrdLessThanEqual,
+    SpvOpFUnordLessThanEqual,
+    SpvOpFOrdGreaterThanEqual,
+    SpvOpFUnordGreaterThanEqual,
+    SpvOpShiftRightLogical,
+    SpvOpShiftRightArithmetic,
+    SpvOpShiftLeftLogical,
+    SpvOpBitwiseOr,
+    SpvOpBitwiseXor,
+    SpvOpBitwiseAnd,
+    SpvOpNot,
+    SpvOpBitFieldInsert,
+    SpvOpBitFieldSExtract,
+    SpvOpBitFieldUExtract,
+    SpvOpBitReverse,
+    SpvOpBitCount,
+    SpvOpDPdx,
+    SpvOpDPdy,
+    SpvOpFwidth,
+    SpvOpDPdxFine,
+    SpvOpDPdyFine,
+    SpvOpFwidthFine,
+    SpvOpDPdxCoarse,
+    SpvOpDPdyCoarse,
+    SpvOpFwidthCoarse,
+    SpvOpEmitVertex,
+    SpvOpEndPrimitive,
+    SpvOpEmitStreamVertex,
+    SpvOpEndStreamPrimitive,
+    SpvOpControlBarrier,
+    SpvOpMemoryBarrier,
+    SpvOpAtomicLoad,
+    SpvOpAtomicStore,
+    SpvOpAtomicExchange,
+    SpvOpAtomicCompareExchange,
+    SpvOpAtomicCompareExchangeWeak,
+    SpvOpAtomicIIncrement,
+    SpvOpAtomicIDecrement,
+    SpvOpAtomicIAdd,
+    SpvOpAtomicISub,
+    SpvOpAtomicSMin,
+    SpvOpAtomicUMin,
+    SpvOpAtomicSMax,
+    SpvOpAtomicUMax,
+    SpvOpAtomicAnd,
+    SpvOpAtomicOr,
+    SpvOpAtomicXor,
+    SpvOpPhi,
+    SpvOpLoopMerge,
+    SpvOpSelectionMerge,
+    SpvOpLabel,
+    SpvOpBranch,
+    SpvOpBranchConditional,
+    SpvOpSwitch,
+    SpvOpKill,
+    SpvOpReturn,
+    SpvOpReturnValue,
+    SpvOpUnreachable,
+    SpvOpLifetimeStart,
+    SpvOpLifetimeStop,
+    SpvOpGroupAsyncCopy,
+    SpvOpGroupWaitEvents,
+    SpvOpGroupAll,
+    SpvOpGroupAny,
+    SpvOpGroupBroadcast,
+    SpvOpGroupIAdd,
+    SpvOpGroupFAdd,
+    SpvOpGroupFMin,
+    SpvOpGroupUMin,
+    SpvOpGroupSMin,
+    SpvOpGroupFMax,
+    SpvOpGroupUMax,
+    SpvOpGroupSMax,
+    SpvOpReadPipe,
+    SpvOpWritePipe,
+    SpvOpReservedReadPipe,
+    SpvOpReservedWritePipe,
+    SpvOpReserveReadPipePackets,
+    SpvOpReserveWritePipePackets,
+    SpvOpCommitReadPipe,
+    SpvOpCommitWritePipe,
+    SpvOpIsValidReserveId,
+    SpvOpGetNumPipePackets,
+    SpvOpGetMaxPipePackets,
+    SpvOpGroupReserveReadPipePackets,
+    SpvOpGroupReserveWritePipePackets,
+    SpvOpGroupCommitReadPipe,
+    SpvOpGroupCommitWritePipe,
+    SpvOpEnqueueMarker,
+    SpvOpEnqueueKernel,
+    SpvOpGetKernelNDrangeSubGroupCount,
+    SpvOpGetKernelNDrangeMaxSubGroupSize,
+    SpvOpGetKernelWorkGroupSize,
+    SpvOpGetKernelPreferredWorkGroupSizeMultiple,
+    SpvOpRetainEvent,
+    SpvOpReleaseEvent,
+    SpvOpCreateUserEvent,
+    SpvOpIsValidEvent,
+    SpvOpSetUserEventStatus,
+    SpvOpCaptureEventProfilingInfo,
+    SpvOpGetDefaultQueue,
+    SpvOpBuildNDRange,
+    SpvOpImageSparseSampleImplicitLod,
+    SpvOpImageSparseSampleExplicitLod,
+    SpvOpImageSparseSampleDrefImplicitLod,
+    SpvOpImageSparseSampleDrefExplicitLod,
+    SpvOpImageSparseSampleProjImplicitLod,
+    SpvOpImageSparseSampleProjExplicitLod,
+    SpvOpImageSparseSampleProjDrefImplicitLod,
+    SpvOpImageSparseSampleProjDrefExplicitLod,
+    SpvOpImageSparseFetch,
+    SpvOpImageSparseGather,
+    SpvOpImageSparseDrefGather,
+    SpvOpImageSparseTexelsResident,
+    SpvOpNoLine,
+    SpvOpAtomicFlagTestAndSet,
+    SpvOpAtomicFlagClear,
+    SpvOpImageSparseRead,
+    SpvOpSizeOf,
+    SpvOpTypePipeStorage,
+    SpvOpConstantPipeStorage,
+    SpvOpCreatePipeFromPipeStorage,
+    SpvOpGetKernelLocalSizeForSubgroupCount,
+    SpvOpGetKernelMaxNumSubgroups,
+    SpvOpTypeNamedBarrier,
+    SpvOpNamedBarrierInitialize,
+    SpvOpMemoryNamedBarrier,
+    SpvOpModuleProcessed,
+    SpvOpExecutionModeId,
+    SpvOpDecorateId,
+    SpvOpSubgroupBallotKHR,
+    SpvOpSubgroupFirstInvocationKHR,
+    SpvOpSubgroupAllKHR,
+    SpvOpSubgroupAnyKHR,
+    SpvOpSubgroupAllEqualKHR,
+    SpvOpSubgroupReadInvocationKHR,
+  });
+}
+
 std::string GetVersionString(uint32_t word) {
   std::stringstream ss;
   ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word)
@@ -239,3 +560,207 @@ void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) {
     }
   }
 }
+
+void StatsAnalyzer::WriteCodegenOpcodeHist(std::ostream& out) {
+  auto all_opcodes = GetAllOpcodes();
+
+  // uint64_t is used because kMarkvNoneOfTheAbove is outside of uint32_t range.
+  out << "std::map<uint64_t, uint32_t> GetOpcodeHist() {\n"
+      << "  return std::map<uint64_t, uint32_t>({\n";
+
+  uint32_t total = 0;
+  for (const auto& kv : stats_.opcode_hist) {
+    total += kv.second;
+  }
+
+  for (uint32_t opcode : all_opcodes) {
+    const auto it = stats_.opcode_hist.find(opcode);
+    const uint32_t count = it == stats_.opcode_hist.end() ? 0 : it->second;
+    const double kMaxValue = 1000.0;
+    uint32_t value = uint32_t(kMaxValue * double(count) / double(total));
+    if (value == 0)
+      value = 1;
+    out << "    { SpvOp" << GetOpcodeString(opcode)
+        << ", " << value << " },\n";
+  }
+
+  // Add kMarkvNoneOfTheAbove as a signal for unknown opcode.
+  out << "    { kMarkvNoneOfTheAbove, " << 10 << " },\n";
+  out << "  });\n}\n";
+}
+
+void StatsAnalyzer::WriteCodegenOpcodeAndNumOperandsHist(std::ostream& out) {
+  out << "std::map<uint64_t, uint32_t> GetOpcodeAndNumOperandsHist() {\n"
+      << "  return std::map<uint64_t, uint32_t>({\n";
+
+
+  uint32_t total = 0;
+  for (const auto& kv : stats_.opcode_and_num_operands_hist) {
+    total += kv.second;
+  }
+
+  for (const auto& kv : stats_.opcode_and_num_operands_hist) {
+    const uint32_t count = kv.second;
+    const double kFrequentEnoughToAnalyze = 0.001;
+    if (double(count) / double(total) < kFrequentEnoughToAnalyze) continue;
+    const uint32_t opcode_and_num_operands = kv.first;
+    const uint32_t opcode = opcode_and_num_operands & 0xFFFF;
+    const uint32_t num_operands = opcode_and_num_operands >> 16;
+
+    if (opcode == SpvOpTypeStruct)
+      continue;
+
+    out << "    { CombineOpcodeAndNumOperands(SpvOp"
+        << spvOpcodeString(SpvOp(opcode))
+        << ", " << num_operands << "), " << count << " },\n";
+  }
+
+  out << "    { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n";
+  out << "  });\n}\n";
+}
+
+void StatsAnalyzer::WriteCodegenOpcodeAndNumOperandsMarkovHuffmanCodecs(
+    std::ostream& out) {
+  out << "std::map<uint32_t, std::unique_ptr<HuffmanCodec<uint64_t>>>\n"
+      << "GetOpcodeAndNumOperandsMarkovHuffmanCodecs() {\n"
+      << "  std::map<uint32_t, std::unique_ptr<HuffmanCodec<uint64_t>>> "
+      << "codecs;\n";
+
+  for (const auto& kv : stats_.opcode_and_num_operands_markov_hist) {
+    const uint32_t prev_opcode = kv.first;
+    const double kFrequentEnoughToAnalyze = 0.001;
+    if (opcode_freq_[prev_opcode] < kFrequentEnoughToAnalyze) continue;
+
+    const std::unordered_map<uint32_t, uint32_t>& hist = kv.second;
+
+    uint32_t total = 0;
+    for (const auto& pair : hist) {
+      total += pair.second;
+    }
+
+    out << "  {\n";
+    out << "    std::unique_ptr<HuffmanCodec<uint64_t>> "
+        << "codec(new HuffmanCodec<uint64_t>({\n";
+
+    for (const auto& pair : hist) {
+      const uint32_t opcode_and_num_operands = pair.first;
+      const uint32_t opcode = opcode_and_num_operands & 0xFFFF;
+
+      if (opcode == SpvOpTypeStruct)
+        continue;
+
+      const uint32_t num_operands = opcode_and_num_operands >> 16;
+      const uint32_t count = pair.second;
+      const double posterior_freq = double(count) / double(total);
+
+      if (opcode_freq_[opcode] < kFrequentEnoughToAnalyze &&
+          posterior_freq < kFrequentEnoughToAnalyze) continue;
+
+      total += count;
+      out << "      { CombineOpcodeAndNumOperands(SpvOp"
+          << spvOpcodeString(SpvOp(opcode))
+          << ", " << num_operands << "), " << count << " },\n";
+    }
+
+    out << "      { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n";
+
+    out << "    }));\n" << std::endl;
+    out << "    codecs.emplace(SpvOp" << GetOpcodeString(prev_opcode)
+        << ", std::move(codec));\n";
+    out << "  }\n\n";
+  }
+
+  out << "  return codecs;\n}\n";
+}
+
+void StatsAnalyzer::WriteCodegenLiteralStringHuffmanCodecs(std::ostream& out) {
+  out << "std::map<uint32_t, std::unique_ptr<HuffmanCodec<std::string>>>\n"
+      << "GetLiteralStringHuffmanCodecs() {\n"
+      << "  std::map<uint32_t, std::unique_ptr<HuffmanCodec<std::string>>> "
+      << "codecs;\n";
+
+  for (const auto& kv : stats_.literal_strings_hist) {
+    const uint32_t opcode = kv.first;
+
+    if (opcode == SpvOpName || opcode == SpvOpMemberName)
+      continue;
+
+    const double kOpcodeFrequentEnoughToAnalyze = 0.001;
+    if (opcode_freq_[opcode] < kOpcodeFrequentEnoughToAnalyze) continue;
+
+    const std::unordered_map<std::string, uint32_t>& hist = kv.second;
+
+    uint32_t total = 0;
+    for (const auto& pair : hist) {
+      total += pair.second;
+    }
+
+    out << "  {\n";
+    out << "    std::unique_ptr<HuffmanCodec<std::string>> "
+        << "codec(new HuffmanCodec<std::string>({\n";
+    for (const auto& pair : hist) {
+      const uint32_t count = pair.second;
+      const double freq = double(count) / double(total);
+      const double kStringFrequentEnoughToAnalyze = 0.001;
+      if (freq < kStringFrequentEnoughToAnalyze) continue;
+      out << "      { std::string(\"" << pair.first << "\"), " << count
+          << " },\n";
+    }
+
+    out << "      { std::string(\"kMarkvNoneOfTheAbove\"), "
+        << 1 + int(total * 0.05) << " },\n";
+
+    out << "    }));\n" << std::endl;
+    out << "    codecs.emplace(SpvOp" << spvOpcodeString(SpvOp(opcode))
+        << ", std::move(codec));\n";
+    out << "  }\n\n";
+  }
+
+  out << "  return codecs;\n}\n";
+}
+
+void StatsAnalyzer::WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out) {
+  out << "std::map<std::pair<uint32_t, uint32_t>, "
+      << "std::unique_ptr<HuffmanCodec<uint64_t>>>\n"
+      << "GetNonIdWordHuffmanCodecs() {\n"
+      << "  std::map<std::pair<uint32_t, uint32_t>, "
+      << "std::unique_ptr<HuffmanCodec<uint64_t>>> codecs;\n";
+
+  for (const auto& kv : stats_.non_id_words_hist) {
+    const auto& opcode_and_index = kv.first;
+    const uint32_t opcode = opcode_and_index.first;
+    const uint32_t index = opcode_and_index.second;
+
+    const double kOpcodeFrequentEnoughToAnalyze = 0.001;
+    if (opcode_freq_[opcode] < kOpcodeFrequentEnoughToAnalyze) continue;
+
+    const std::map<uint32_t, uint32_t>& hist = kv.second;
+
+    uint32_t total = 0;
+    for (const auto& pair : hist) {
+      total += pair.second;
+    }
+
+    out << "  {\n";
+    out << "    std::unique_ptr<HuffmanCodec<uint64_t>> "
+        << "codec(new HuffmanCodec<uint64_t>({\n";
+    for (const auto& pair : hist) {
+      const uint32_t word = pair.first;
+      const uint32_t count = pair.second;
+      const double freq = double(count) / double(total);
+      const double kWordFrequentEnoughToAnalyze = 0.001;
+      if (freq < kWordFrequentEnoughToAnalyze) continue;
+      out << "      { " << word << ", " << count << " },\n";
+    }
+
+    out << "      { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n";
+
+    out << "    }));\n" << std::endl;
+    out << "    codecs.emplace(std::pair<uint32_t, uint32_t>(SpvOp"
+        << spvOpcodeString(SpvOp(opcode))
+        << ", " << index << "), std::move(codec));\n";
+    out << "  }\n\n";
+  }
+
+  out << "  return codecs;\n}\n";
+}
index c1ff187..54e2c3d 100644 (file)
@@ -36,6 +36,30 @@ class StatsAnalyzer {
   // level.
   void WriteOpcodeMarkov(std::ostream& out);
 
+  // Writes C++ code containing a function returning opcode histogram.
+  void WriteCodegenOpcodeHist(std::ostream& out);
+
+  // Writes C++ code containing a function returning opcode_and_num_operands
+  // histogram.
+  void WriteCodegenOpcodeAndNumOperandsHist(std::ostream& out);
+
+  // Writes C++ code containing a function returning a map of Huffman codecs
+  // for opcode_and_num_operands. Each Huffman codec is created for a specific
+  // previous opcode.
+  // TODO(atgoo@github.com) Write code which would contain pregenerated Huffman
+  // codecs, instead of code which would generate them every time.
+  void WriteCodegenOpcodeAndNumOperandsMarkovHuffmanCodecs(std::ostream& out);
+
+  // Writes C++ code containing a function returning a map of Huffman codecs
+  // for literal strings. Each Huffman codec is created for a specific opcode.
+  // I.e. OpExtension and OpExtInstImport would use different codecs.
+  void WriteCodegenLiteralStringHuffmanCodecs(std::ostream& out);
+
+  // Writes C++ code containing a function returning a map of Huffman codecs
+  // for single-word non-id operands. Each Huffman codec is created for a
+  // specific operand slot (opcode and operand number).
+  void WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out);
+
  private:
   const libspirv::SpirvStats& stats_;