Speed up GUI analysis significantly by adding a location index cache
authorMilian Wolff <mail@milianw.de>
Sun, 22 Jan 2017 11:37:34 +0000 (12:37 +0100)
committerMilian Wolff <mail@milianw.de>
Sun, 22 Jan 2017 11:47:55 +0000 (12:47 +0100)
commitc370b7beb1bf47a6ed4731235bf65c309ba2da0f
tree4d35a980e40fdd56c70c93a9eb7583ebab39a782
parentc2efbffcb1ff072c1b590d17f0ae2e683f799d9f
Speed up GUI analysis significantly by adding a location index cache

Perf showed that quite a lot of time is consumed finding the interned
location for a given InstructionPointer. By adding a cache in front
of this lookup that uses the IpIndex as key, the parse step speeds
up significantly, as indicated by the following perf stat calls,
which got measured by adding an `exit(0)` after buildSizeHistogram:

Before:

 Performance counter stats for 'heaptrack_gui ./heaptrack.kwrite.27247.gz' (5 runs):

       1384.265399      task-clock (msec)         #    1.019 CPUs utilized            ( +-  0.91% )
               898      context-switches          #    0.649 K/sec                    ( +-  1.44% )
                77      cpu-migrations            #    0.055 K/sec                    ( +- 23.67% )
            27,747      page-faults               #    0.020 M/sec                    ( +-  0.41% )
     4,287,486,004      cycles                    #    3.097 GHz                      ( +-  0.90% )
     5,965,906,048      instructions              #    1.39  insn per cycle           ( +-  0.07% )
     1,298,988,220      branches                  #  938.395 M/sec                    ( +-  0.03% )
        17,667,331      branch-misses             #    1.36% of all branches          ( +-  0.04% )

       1.358443950 seconds time elapsed                                          ( +-  1.02% )

After:

 Performance counter stats for 'heaptrack_gui ./heaptrack.kwrite.27247.gz' (5 runs):

        969.782341      task-clock (msec)         #    1.023 CPUs utilized            ( +-  1.27% )
               844      context-switches          #    0.870 K/sec                    ( +-  6.82% )
                98      cpu-migrations            #    0.101 K/sec                    ( +-  5.30% )
            27,590      page-faults               #    0.028 M/sec                    ( +-  0.46% )
     3,000,707,304      cycles                    #    3.094 GHz                      ( +-  1.22% )
     4,374,580,309      instructions              #    1.46  insn per cycle           ( +-  0.03% )
       911,989,553      branches                  #  940.406 M/sec                    ( +-  0.03% )
        14,788,489      branch-misses             #    1.62% of all branches          ( +-  0.17% )

       0.947570109 seconds time elapsed                                          ( +-  1.34% )
src/analyze/gui/parser.cpp