From 03794400ec59d85ab7378e756518229c3d4348ef Mon Sep 17 00:00:00 2001
From: Hyeongsik Min <hyeongsik.min@samsung.com>
Date: Wed, 2 Mar 2016 19:19:27 +0900
Subject: [PATCH] Imported Upstream version 1.0.0

---
 Makefile.am                                   |    4 +-
 Makefile.in                                   |   19 +-
 README                                        |   33 +-
 TODO                                          |   90 +-
 aclocal.m4                                    |   14 -
 agents/Makefile.in                            |   13 +-
 agents/jvmpi/Makefile.in                      |   13 +-
 agents/jvmpi/jvmpi_oprofile.cpp               |    4 +-
 agents/jvmti/Makefile.in                      |   13 +-
 agents/jvmti/libjvmti_oprofile.c              |    4 +-
 config.guess                                  |  405 ++-
 config.h.in                                   |    9 +-
 configure                                     | 1653 ++++-----
 configure.ac                                  |  253 +-
 daemon/Makefile.am                            |   60 -
 daemon/Makefile.in                            |  785 -----
 daemon/init.c                                 |  372 ---
 daemon/liblegacy/Makefile.am                  |   29 -
 daemon/liblegacy/init.c                       |  387 ---
 daemon/liblegacy/opd_24_stats.c               |   62 -
 daemon/liblegacy/opd_24_stats.h               |   39 -
 daemon/liblegacy/opd_image.c                  |  264 --
 daemon/liblegacy/opd_image.h                  |  111 -
 daemon/liblegacy/opd_kernel.c                 |  464 ---
 daemon/liblegacy/opd_kernel.h                 |   67 -
 daemon/liblegacy/opd_mapping.c                |  170 -
 daemon/liblegacy/opd_mapping.h                |  111 -
 daemon/liblegacy/opd_parse_proc.c             |  223 --
 daemon/liblegacy/opd_parse_proc.h             |   23 -
 daemon/liblegacy/opd_proc.c                   |  432 ---
 daemon/liblegacy/opd_proc.h                   |  151 -
 daemon/liblegacy/opd_sample_files.c           |  179 -
 daemon/liblegacy/opd_sample_files.h           |   63 -
 daemon/liblegacy/p_module.h                   |  207 --
 daemon/opd_anon.c                             |  228 --
 daemon/opd_anon.h                             |   54 -
 daemon/opd_cookie.c                           |  210 --
 daemon/opd_cookie.h                           |   39 -
 daemon/opd_events.c                           |  189 --
 daemon/opd_events.h                           |   47 -
 daemon/opd_extended.c                         |  195 --
 daemon/opd_extended.h                         |   94 -
 daemon/opd_ibs.c                              |  832 -----
 daemon/opd_ibs.h                              |  133 -
 daemon/opd_ibs_macro.h                        |  397 ---
 daemon/opd_ibs_trans.c                        |  634 ----
 daemon/opd_ibs_trans.h                        |   35 -
 daemon/opd_interface.h                        |   48 -
 daemon/opd_kernel.c                           |  229 --
 daemon/opd_kernel.h                           |   43 -
 daemon/opd_mangling.c                         |  205 --
 daemon/opd_mangling.h                         |   33 -
 daemon/opd_perfmon.c                          |  522 ---
 daemon/opd_perfmon.h                          |  106 -
 daemon/opd_pipe.c                             |   99 -
 daemon/opd_pipe.h                             |   45 -
 daemon/opd_printf.h                           |   37 -
 daemon/opd_sfile.c                            |  750 -----
 daemon/opd_sfile.h                            |  119 -
 daemon/opd_spu.c                              |  176 -
 daemon/opd_stats.c                            |   92 -
 daemon/opd_stats.h                            |   32 -
 daemon/opd_trans.c                            |  354 --
 daemon/opd_trans.h                            |   86 -
 daemon/oprofiled.c                            |  532 ---
 daemon/oprofiled.h                            |   69 -
 doc/Makefile.am                               |    7 +-
 doc/Makefile.in                               |   45 +-
 doc/internals.html                            |   12 +-
 doc/ocount.1.in                               |  274 ++
 doc/op-check-perfevents.1.in                  |   36 +
 doc/opannotate.1.in                           |   47 +-
 doc/oparchive.1.in                            |   17 +-
 doc/opcontrol.1.in                            |  195 --
 doc/operf.1.in                                |  138 +-
 doc/opgprof.1.in                              |   13 +-
 doc/ophelp.1.in                               |   25 +-
 doc/ophelp.xsd                                |   58 +
 doc/opimport.1.in                             |    4 +-
 doc/opreport.1.in                             |   34 +-
 doc/opreport.xsd                              |   17 +-
 doc/oprofile.1                                |   36 +-
 doc/oprofile.1.in                             |   32 +-
 doc/oprofile.html                             | 2945 +++++++----------
 doc/oprofile.xml                              | 1647 ++++-----
 events/Makefile.am                            |   28 +-
 events/Makefile.in                            |   41 +-
 events/alpha/ev4/events                       |   18 -
 events/alpha/ev4/unit_masks                   |    4 -
 events/alpha/ev5/events                       |   49 -
 events/alpha/ev5/unit_masks                   |    4 -
 events/alpha/ev6/events                       |   11 -
 events/alpha/ev6/unit_masks                   |    4 -
 events/alpha/ev67/events                      |   29 +-
 events/alpha/pca56/events                     |    2 -
 events/alpha/pca56/unit_masks                 |    3 -
 events/arm/armv7-common/events                |    2 +-
 events/arm/armv7-krait/events                 |    3 +
 events/{avr32 => arm/armv7-krait}/unit_masks  |    2 +-
 events/arm/armv8-ca53/events                  |   38 +
 events/arm/armv8-ca53/unit_masks              |    3 +
 events/arm/armv8-ca57/events                  |   67 +
 events/arm/armv8-ca57/unit_masks              |    3 +
 events/arm/armv8-pmuv3-common/events          |   38 +
 events/arm/armv8-pmuv3-common/unit_masks      |    4 +
 events/arm/armv8-xgene/events                 |    7 +
 events/arm/armv8-xgene/unit_masks             |    3 +
 events/avr32/events                           |   27 -
 events/i386/atom/unit_masks                   |  154 +-
 events/i386/broadwell/events                  |   65 +
 events/i386/broadwell/unit_masks              |  347 ++
 events/i386/core_2/unit_masks                 |   54 +-
 events/i386/haswell/events                    |   64 +
 events/i386/haswell/unit_masks                |  355 ++
 events/i386/ivybridge/unit_masks              |  352 +-
 events/i386/nehalem/unit_masks                |  552 +--
 events/i386/sandybridge/unit_masks            |  394 +--
 events/i386/silvermont/events                 |   24 +
 events/i386/silvermont/unit_masks             |   89 +
 events/i386/westmere/unit_masks               |  480 +--
 events/ia64/ia64/events                       |    3 -
 events/ia64/ia64/unit_masks                   |    4 -
 events/ia64/itanium/events                    |    5 -
 events/ia64/itanium/unit_masks                |    4 -
 events/ia64/itanium2/events                   |  267 --
 events/ia64/itanium2/unit_masks               |  465 ---
 events/ppc/e500mc/events                      |  120 +
 events/{rtc => ppc/e500mc}/unit_masks         |    2 +-
 events/ppc/e6500/events                       |  266 ++
 events/ppc/e6500/unit_masks                   |    4 +
 events/ppc64/architected_events_v1/events     |   62 +
 .../unit_masks                                |    2 +-
 events/ppc64/cell-be/events                   |  517 ---
 events/ppc64/cell-be/unit_masks               |  137 -
 events/ppc64/ibm-compat-v1/event_mappings     |   82 -
 events/ppc64/ibm-compat-v1/events             |   91 -
 events/ppc64/pa6t/event_mappings              |   48 -
 events/ppc64/pa6t/events                      |   52 -
 events/ppc64/pa6t/unit_masks                  |    4 -
 events/ppc64/power5++/event_mappings          |    3 -
 events/ppc64/power5++/events                  |   10 +-
 events/ppc64/power5+/event_mappings           |    4 -
 events/ppc64/power5+/events                   |   10 +-
 events/ppc64/power5/event_mappings            |    4 -
 events/ppc64/power5/events                    |   10 +-
 events/ppc64/power6/event_mappings            |    3 -
 events/ppc64/power6/events                    |   10 +-
 events/ppc64/power7/event_mappings            |   49 +-
 events/ppc64/power7/events                    |   55 +-
 events/ppc64/power8/events                    | 1020 ++++++
 events/ppc64/power8/unit_masks                |    9 +
 events/rtc/events                             |    3 -
 events/s390/z10/events                        |    5 +-
 events/s390/z196/events                       |    2 +-
 events/s390/zEC12/events                      |    8 +
 events/s390/zEC12/unit_masks                  |    7 +
 events/x86-64/family10/events                 |   78 +-
 events/x86-64/family10/unit_masks             |    9 +-
 events/x86-64/family12h/events                |   69 +-
 events/x86-64/family12h/unit_masks            |   14 +-
 events/x86-64/family14h/events                |   69 +-
 events/x86-64/family14h/unit_masks            |   14 +-
 events/x86-64/family15h/events                |   69 +-
 events/x86-64/family15h/unit_masks            |   15 +-
 events/x86-64/generic/events                  |   40 +
 events/x86-64/generic/unit_masks              |   26 +
 gui/Makefile.am                               |   43 -
 gui/Makefile.in                               |  767 -----
 gui/oprof_start.cpp                           | 1087 ------
 gui/oprof_start.h                             |  170 -
 gui/oprof_start_config.cpp                    |  112 -
 gui/oprof_start_config.h                      |   56 -
 gui/oprof_start_main.cpp                      |   27 -
 gui/oprof_start_util.cpp                      |  331 --
 gui/oprof_start_util.h                        |   39 -
 gui/ui/Makefile.am                            |   24 -
 gui/ui/oprof_start.base.ui                    | 1190 -------
 libabi/Makefile.in                            |   13 +-
 libabi/opimport.cpp                           |   19 +-
 libabi/tests/Makefile.in                      |   13 +-
 libdb/Makefile.in                             |   13 +-
 libdb/db_stat.c                               |    3 +-
 libdb/tests/Makefile.in                       |   13 +-
 libop/Makefile.am                             |    7 +-
 libop/Makefile.in                             |   28 +-
 libop/op_alloc_counter.c                      |   61 +-
 libop/op_config.c                             |   36 +-
 libop/op_config.h                             |   36 +-
 libop/op_config_24.h                          |   79 -
 libop/op_cpu_type.c                           |  360 +-
 libop/op_cpu_type.h                           |   49 +-
 libop/op_events.c                             |  418 ++-
 libop/op_events.h                             |   19 +-
 libop/op_get_interface.c                      |   32 -
 libop/op_hw_config.h                          |   12 +-
 libop/op_hw_specific.h                        |   13 +
 libop/op_interface.h                          |   87 -
 libop/op_mangle.c                             |    9 +-
 libop/op_netburst.c                           | 1597 +++++++++
 libop/op_netburst.h                           |  256 ++
 libop/op_parse_event.c                        |   28 +-
 libop/op_parse_event.h                        |    2 +-
 libop/op_sample_file.h                        |    4 -
 libop/op_xml_events.c                         |   25 +-
 libop/op_xml_out.c                            |    2 +-
 libop/op_xml_out.h                            |    2 +-
 libop/tests/Makefile.in                       |   13 +-
 libop/tests/alloc_counter_tests.c             |   17 +-
 libop/tests/cpu_type_tests.c                  |    8 -
 libop/tests/parse_event_tests.c               |    2 +-
 libopagent/Makefile.am                        |    8 +-
 libopagent/Makefile.in                        |   21 +-
 libopagent/opagent.c                          |  235 +-
 libopagent/opagent.h                          |    4 +-
 libopt++/Makefile.in                          |   13 +-
 libopt++/popt_options.cpp                     |    2 +-
 libpe_utils/Makefile.am                       |   20 +
 {gui/ui => libpe_utils}/Makefile.in           |  117 +-
 libpe_utils/op_pe_utils.cpp                   | 1023 ++++++
 libpe_utils/op_pe_utils.h                     |   51 +
 libperf_events/Makefile.am                    |    3 +
 libperf_events/Makefile.in                    |   15 +-
 libperf_events/operf_counter.cpp              |  689 +++-
 libperf_events/operf_counter.h                |   57 +-
 libperf_events/operf_event.h                  |   16 +-
 libperf_events/operf_kernel.cpp               |   13 +-
 libperf_events/operf_mangling.cpp             |   36 +-
 libperf_events/operf_process_info.cpp         |  359 +-
 libperf_events/operf_process_info.h           |   70 +-
 libperf_events/operf_sfile.cpp                |   22 +-
 libperf_events/operf_sfile.h                  |    5 +-
 libperf_events/operf_stats.cpp                |  118 +-
 libperf_events/operf_stats.h                  |   26 +-
 libperf_events/operf_utils.cpp                |  795 ++---
 libperf_events/operf_utils.h                  |   42 +-
 libpp/Makefile.am                             |    4 +-
 libpp/Makefile.in                             |   20 +-
 libpp/arrange_profiles.cpp                    |    9 +-
 libpp/callgraph_container.cpp                 |   42 +-
 libpp/filename_spec.cpp                       |    2 +-
 libpp/format_output.cpp                       |   31 +-
 libpp/format_output.h                         |    3 +-
 libpp/image_errors.cpp                        |    5 +-
 libpp/op_header.cpp                           |   34 +-
 libpp/parse_filename.h                        |    6 +
 libpp/populate.cpp                            |   27 +-
 libpp/populate_for_spu.cpp                    |  166 -
 libpp/populate_for_spu.h                      |   42 -
 libpp/profile.cpp                             |   20 +-
 libpp/profile.h                               |   12 +-
 libpp/profile_container.cpp                   |   49 +-
 libpp/profile_spec.cpp                        |   72 +-
 libpp/symbol.h                                |    4 +-
 libpp/xml_utils.cpp                           |   20 +-
 libregex/Makefile.in                          |   13 +-
 libregex/op_regex.cpp                         |    5 +-
 libregex/tests/Makefile.in                    |   13 +-
 libutil++/Makefile.am                         |    4 +-
 libutil++/Makefile.in                         |   22 +-
 libutil++/bfd_spu_support.cpp                 |  116 -
 libutil++/bfd_support.cpp                     |  167 +-
 libutil++/bfd_support.h                       |   10 +-
 libutil++/cached_value.h                      |    2 +-
 libutil++/child_reader.cpp                    |    2 +-
 libutil++/op_bfd.cpp                          |  137 +-
 libutil++/op_bfd.h                            |   29 +-
 libutil++/op_spu_bfd.cpp                      |  185 --
 libutil++/tests/Makefile.in                   |   13 +-
 libutil++/utility.h                           |    2 +
 libutil/Makefile.in                           |   13 +-
 libutil/op_cpufreq.c                          |    5 +-
 libutil/op_fileio.c                           |   30 +
 libutil/op_fileio.h                           |   11 +
 libutil/tests/Makefile.in                     |   13 +-
 m4/Makefile.am                                |    1 -
 m4/Makefile.in                                |   14 +-
 m4/binutils.m4                                |   52 +-
 m4/cellspubfdsupport.m4                       |   52 -
 m4/kernelversion.m4                           |    2 +-
 m4/qt.m4                                      |  225 --
 opjitconv/Makefile.am                         |    1 -
 opjitconv/Makefile.in                         |   14 +-
 opjitconv/conversion.c                        |    6 +-
 opjitconv/create_bfd.c                        |   12 +-
 opjitconv/debug_line.c                        |    2 +-
 opjitconv/jitsymbol.c                         |   14 +-
 opjitconv/opjitconv.c                         |  191 +-
 opjitconv/opjitconv.h                         |   10 +-
 opjitconv/parse_dump.c                        |    1 -
 pe_counting/Makefile.am                       |   28 +
 {daemon/liblegacy => pe_counting}/Makefile.in |  237 +-
 pe_counting/ocount.cpp                        |  929 ++++++
 pe_counting/ocount_counter.cpp                |  795 +++++
 pe_counting/ocount_counter.h                  |  133 +
 pe_profiling/Makefile.am                      |    4 +-
 pe_profiling/Makefile.in                      |   16 +-
 pe_profiling/operf.cpp                        |  965 +++---
 pp/Makefile.in                                |   13 +-
 pp/common_option.cpp                          |    2 +-
 pp/opannotate.cpp                             |    8 +-
 pp/opannotate_options.cpp                     |    6 +
 pp/oparchive.cpp                              |   78 +-
 pp/oparchive_options.cpp                      |    4 +-
 pp/opreport.cpp                               |    7 +-
 utils/Makefile.am                             |    1 -
 utils/Makefile.in                             |  103 +-
 utils/op_perf_events_checker.c                |    9 +-
 utils/opcontrol                               | 2283 -------------
 utils/ophelp.c                                |  185 +-
 309 files changed, 15979 insertions(+), 28708 deletions(-)
 delete mode 100644 daemon/Makefile.am
 delete mode 100644 daemon/Makefile.in
 delete mode 100644 daemon/init.c
 delete mode 100644 daemon/liblegacy/Makefile.am
 delete mode 100644 daemon/liblegacy/init.c
 delete mode 100644 daemon/liblegacy/opd_24_stats.c
 delete mode 100644 daemon/liblegacy/opd_24_stats.h
 delete mode 100644 daemon/liblegacy/opd_image.c
 delete mode 100644 daemon/liblegacy/opd_image.h
 delete mode 100644 daemon/liblegacy/opd_kernel.c
 delete mode 100644 daemon/liblegacy/opd_kernel.h
 delete mode 100644 daemon/liblegacy/opd_mapping.c
 delete mode 100644 daemon/liblegacy/opd_mapping.h
 delete mode 100644 daemon/liblegacy/opd_parse_proc.c
 delete mode 100644 daemon/liblegacy/opd_parse_proc.h
 delete mode 100644 daemon/liblegacy/opd_proc.c
 delete mode 100644 daemon/liblegacy/opd_proc.h
 delete mode 100644 daemon/liblegacy/opd_sample_files.c
 delete mode 100644 daemon/liblegacy/opd_sample_files.h
 delete mode 100644 daemon/liblegacy/p_module.h
 delete mode 100644 daemon/opd_anon.c
 delete mode 100644 daemon/opd_anon.h
 delete mode 100644 daemon/opd_cookie.c
 delete mode 100644 daemon/opd_cookie.h
 delete mode 100644 daemon/opd_events.c
 delete mode 100644 daemon/opd_events.h
 delete mode 100644 daemon/opd_extended.c
 delete mode 100644 daemon/opd_extended.h
 delete mode 100644 daemon/opd_ibs.c
 delete mode 100644 daemon/opd_ibs.h
 delete mode 100644 daemon/opd_ibs_macro.h
 delete mode 100644 daemon/opd_ibs_trans.c
 delete mode 100644 daemon/opd_ibs_trans.h
 delete mode 100644 daemon/opd_interface.h
 delete mode 100644 daemon/opd_kernel.c
 delete mode 100644 daemon/opd_kernel.h
 delete mode 100644 daemon/opd_mangling.c
 delete mode 100644 daemon/opd_mangling.h
 delete mode 100644 daemon/opd_perfmon.c
 delete mode 100644 daemon/opd_perfmon.h
 delete mode 100644 daemon/opd_pipe.c
 delete mode 100644 daemon/opd_pipe.h
 delete mode 100644 daemon/opd_printf.h
 delete mode 100644 daemon/opd_sfile.c
 delete mode 100644 daemon/opd_sfile.h
 delete mode 100644 daemon/opd_spu.c
 delete mode 100644 daemon/opd_stats.c
 delete mode 100644 daemon/opd_stats.h
 delete mode 100644 daemon/opd_trans.c
 delete mode 100644 daemon/opd_trans.h
 delete mode 100644 daemon/oprofiled.c
 delete mode 100644 daemon/oprofiled.h
 create mode 100644 doc/ocount.1.in
 create mode 100644 doc/op-check-perfevents.1.in
 delete mode 100644 doc/opcontrol.1.in
 create mode 100644 doc/ophelp.xsd
 delete mode 100644 events/alpha/ev4/events
 delete mode 100644 events/alpha/ev4/unit_masks
 delete mode 100644 events/alpha/ev5/events
 delete mode 100644 events/alpha/ev5/unit_masks
 delete mode 100644 events/alpha/ev6/events
 delete mode 100644 events/alpha/ev6/unit_masks
 delete mode 100644 events/alpha/pca56/events
 delete mode 100644 events/alpha/pca56/unit_masks
 create mode 100644 events/arm/armv7-krait/events
 rename events/{avr32 => arm/armv7-krait}/unit_masks (54%)
 create mode 100644 events/arm/armv8-ca53/events
 create mode 100644 events/arm/armv8-ca53/unit_masks
 create mode 100644 events/arm/armv8-ca57/events
 create mode 100644 events/arm/armv8-ca57/unit_masks
 create mode 100644 events/arm/armv8-pmuv3-common/events
 create mode 100644 events/arm/armv8-pmuv3-common/unit_masks
 create mode 100644 events/arm/armv8-xgene/events
 create mode 100644 events/arm/armv8-xgene/unit_masks
 delete mode 100644 events/avr32/events
 create mode 100644 events/i386/broadwell/events
 create mode 100644 events/i386/broadwell/unit_masks
 create mode 100644 events/i386/haswell/events
 create mode 100644 events/i386/haswell/unit_masks
 create mode 100644 events/i386/silvermont/events
 create mode 100644 events/i386/silvermont/unit_masks
 delete mode 100644 events/ia64/ia64/events
 delete mode 100644 events/ia64/ia64/unit_masks
 delete mode 100644 events/ia64/itanium/events
 delete mode 100644 events/ia64/itanium/unit_masks
 delete mode 100644 events/ia64/itanium2/events
 delete mode 100644 events/ia64/itanium2/unit_masks
 create mode 100644 events/ppc/e500mc/events
 rename events/{rtc => ppc/e500mc}/unit_masks (67%)
 create mode 100644 events/ppc/e6500/events
 create mode 100644 events/ppc/e6500/unit_masks
 create mode 100644 events/ppc64/architected_events_v1/events
 rename events/ppc64/{ibm-compat-v1 => architected_events_v1}/unit_masks (78%)
 delete mode 100644 events/ppc64/cell-be/events
 delete mode 100644 events/ppc64/cell-be/unit_masks
 delete mode 100644 events/ppc64/ibm-compat-v1/event_mappings
 delete mode 100644 events/ppc64/ibm-compat-v1/events
 delete mode 100644 events/ppc64/pa6t/event_mappings
 delete mode 100644 events/ppc64/pa6t/events
 delete mode 100644 events/ppc64/pa6t/unit_masks
 create mode 100644 events/ppc64/power8/events
 create mode 100644 events/ppc64/power8/unit_masks
 delete mode 100644 events/rtc/events
 create mode 100644 events/s390/zEC12/events
 create mode 100644 events/s390/zEC12/unit_masks
 create mode 100644 events/x86-64/generic/events
 create mode 100644 events/x86-64/generic/unit_masks
 delete mode 100644 gui/Makefile.am
 delete mode 100644 gui/Makefile.in
 delete mode 100644 gui/oprof_start.cpp
 delete mode 100644 gui/oprof_start.h
 delete mode 100644 gui/oprof_start_config.cpp
 delete mode 100644 gui/oprof_start_config.h
 delete mode 100644 gui/oprof_start_main.cpp
 delete mode 100644 gui/oprof_start_util.cpp
 delete mode 100644 gui/oprof_start_util.h
 delete mode 100644 gui/ui/Makefile.am
 delete mode 100644 gui/ui/oprof_start.base.ui
 delete mode 100644 libop/op_config_24.h
 delete mode 100644 libop/op_get_interface.c
 delete mode 100644 libop/op_interface.h
 create mode 100644 libop/op_netburst.c
 create mode 100644 libop/op_netburst.h
 create mode 100644 libpe_utils/Makefile.am
 rename {gui/ui => libpe_utils}/Makefile.in (83%)
 create mode 100644 libpe_utils/op_pe_utils.cpp
 create mode 100644 libpe_utils/op_pe_utils.h
 delete mode 100644 libpp/populate_for_spu.cpp
 delete mode 100644 libpp/populate_for_spu.h
 delete mode 100644 libutil++/bfd_spu_support.cpp
 delete mode 100644 libutil++/op_spu_bfd.cpp
 delete mode 100644 m4/cellspubfdsupport.m4
 delete mode 100644 m4/qt.m4
 create mode 100644 pe_counting/Makefile.am
 rename {daemon/liblegacy => pe_counting}/Makefile.in (66%)
 create mode 100644 pe_counting/ocount.cpp
 create mode 100644 pe_counting/ocount_counter.cpp
 create mode 100644 pe_counting/ocount_counter.h
 delete mode 100644 utils/opcontrol

diff --git a/Makefile.am b/Makefile.am
index b16c381..e52d3f0 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,7 +10,6 @@ SUBDIRS = \
 	libutil++ \
 	libopt++ \
 	libabi \
-	daemon \
 	utils \
 	libregex \
 	libpp \
@@ -18,9 +17,10 @@ SUBDIRS = \
 	pp \
 	events \
 	doc \
-	gui \
+	libpe_utils \
 	libperf_events \
 	pe_profiling \
+	pe_counting \
 	agents
 #### ATTENTION ####
 #    The agents directory must be kept as the last subdir
diff --git a/Makefile.in b/Makefile.in
index 16364d2..422005a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -42,7 +42,6 @@ DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -51,7 +50,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -166,7 +165,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -190,20 +188,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -280,7 +271,6 @@ SUBDIRS = \
 	libutil++ \
 	libopt++ \
 	libabi \
-	daemon \
 	utils \
 	libregex \
 	libpp \
@@ -288,9 +278,10 @@ SUBDIRS = \
 	pp \
 	events \
 	doc \
-	gui \
+	libpe_utils \
 	libperf_events \
 	pe_profiling \
+	pe_counting \
 	agents
 
 #### ATTENTION ####
@@ -636,7 +627,7 @@ distcheck: dist
 	*.zip*) \
 	  unzip $(distdir).zip ;;\
 	esac
-	chmod -R a-w $(distdir); chmod a+w $(distdir)
+	chmod -R a-w $(distdir); chmod u+w $(distdir)
 	mkdir $(distdir)/_build
 	mkdir $(distdir)/_inst
 	chmod a-w $(distdir)
diff --git a/README b/README
index 74b0a12..d4a8651 100644
--- a/README
+++ b/README
@@ -1,17 +1,21 @@
-This is an alpha release version of oprofile, a transparent
-low-overhead system-wide profiler.
+OProfile provides a low-overhead profiler (operf) capable of both
+single-application profiling and system-wide profiling.  There is
+also a simple event counting tool (ocount).
 
 You can find some documentation in the doc/ directory.
 
 Please visit the oprofile website at : http://oprofile.sf.net/
 
-oprofile was written by John Levon <levon@movementarian.org>
-and Philippe Elie <phil.el@wanadoo.fr>.
-
-Maynard Johnson <maynardj@us.ibm.com> is the current maintainer.
+oprofile was originally written by John Levon <levon@movementarian.org>
+and Philippe Elie <phil.el@wanadoo.fr>.  The operf and ocount
+tools were developed by Maynard Johnson <maynardj@us.ibm.com>, who
+is the current maintainer.
 
 Dave Jones <davej@suse.de> provided bug fixes and support for
-the AMD Athlon, and AMD Hammer families of CPUs.
+the AMD Athlon, and AMD Hammer families of CPUs. Suravee.Suthikulpanit@amd.com
+<Suravee.Suthikulpanit@amd.com> contributed various AMD-related patches,
+including Instruction-Based-Sampling support (available only in
+pre-1.0 releases).
 
 Bob Montgomery <bobm@fc.hp.com> provided bug fixes, the initial RTC
 driver and the initial ia64 driver.
@@ -19,17 +23,21 @@ driver and the initial ia64 driver.
 Will Cohen <wcohen@redhat.com> integrated the ia64 driver into the
 oprofile release, and contributed bug fixes and several cleanups.
 
+Will Deacon <will.deacon@arm.com> has contributed patches as well as
+his time to support the ARM architecture.
+
 Graydon Hoare <graydon@redhat.com> provided P4 port, bug fixes and cleanups.
 
 Ralf Baechle <ralf@linux-mips.org> provided the MIPS port.
 
-Other contributors are listed in the ChangeLog.
+Other contributors can be seen via 'git log'.
 
 Building
 --------
 
 Please read the installation instructions in doc/oprofile.html or
 http://oprofile.sourceforge.net/doc/install.html.
+Only 2.6 kernels (or later) are supported.
 
 Quick start :
 
@@ -37,10 +45,7 @@ Quick start :
 can specify a different version, e.g.
 ACLOCAL=aclocal-1.5 AUTOMAKE=automake-1.5 AUTOCONF=autoconf-2.13 AUTOHEADER=autoheader-2.13 ./autogen.sh)
 
-2.4 kernels
-
-	./configure --with-linux=/path/to/kernel/source
-
-2.6 kernels
+Then run the following commands
+	./configure [options]  (use './configure --help' to see options)
+	make
 
-	./configure --with-kernel-support
diff --git a/TODO b/TODO
index fcae2bd..310cdb9 100644
--- a/TODO
+++ b/TODO
@@ -1,4 +1,4 @@
-This is an (incomplete) list of some of the stuff we want to look at doing.
+This is a list (not exhaustive) of some of the stuff to cleanup/fix.
 
 If you're interested in hacking on any of these, please contact the list first
 for some pointers and/or read HACKING and doc/CodingStyle.
@@ -7,92 +7,29 @@ for some pointers and/or read HACKING and doc/CodingStyle.
 -----------
 
 (this is a minimal selection of stuff I think we need)
-
- o default to a vmlinux location: need agreement from kernel developers
- o default to --separate=library (with anon, =none, makes not much sense)
- o prettify image name for .jo files and allow lib-image: to specify it
- o gisle's fixes
- o opreport tgid:<tgid> doesn't work even if .jo files with that pid
- o Fix:
-
-warning: [vdso] (tgid:9236 range:0x7fff98ffd000-0x7fff98fff000) could not be found.
-warning: /no-vmlinux could not be found.
-warning: /usr/lib64/libpanel-applet-2.so.0.2.27.#prelink#.sXCUK1 (deleted) could not be found.
-
- o amd64 32 bit build needs a sys32_lookup_dcookie() translator in the
-   kernel
- o decide on -m tgid semantics for anon regions
- o if ev67 is not fixed, back it out
- o lapic : module should says "didn't find apic" if needed, FAQ and doc should
-  speak a bit about lapic kernel option on x86 and recent kernel
- o see the big comment in db_insert.c, it's possible to allow unlimited
-   amount of samples with a very minor change in libdb.
- o if oprofile doesn't recognize the processor selected by the kernel
-   opcontrol could setup the module in timer mode (remove/reload prolly), and
-   warn the user it must upgrade oprofile to get all the feature from its
-   hardware.
+ o Remove opcontrol and all daemon-related stuff (as it's been deprecated
+   since 0.9.8)
+ o the various open bugs
 
 Later
 -----
-
- o remove 2.95/2.2 support so we can use boost multi index container in
-   symbol/sample container
- o consider if we can improve anon mapping growing support
-
-<movement> [moz@lambent pp]$ ./opreport -lf lib-image:/lib/tls/libc-2.3.2.so /bin/bash | grep vfprintf
-<movement> 14        0.1301  6         0.0102  /lib/tls/libc-2.3.2.so   vfprintf
-<movement> [moz@lambent pp]$ ./opreport -lf lib-image:/lib/tls/libc-2.3.2.so /usr/bin/vim | grep vfprintf
-<movement> 176       2.0927  349       1.2552  /lib/tls/libc-2.3.2.so   vfprintf
-<movement> [moz@lambent pp]$ ./opreport -lf lib-image:/lib/tls/libc-2.3.2.so { image:/bin/bash } { image:/usr/bin/vim } | grep vfprintf
-<movement> 176      10.9657  +++       349       7.8888  +++       vfprintf
-<movement> 14       ---      ---       6        ---      ---       vfprintf
-<movement> it seems them as two separate symbols
-<movement> but can we remove the app_name from rough_less and still be able to walk the two lists?
-<movement> even if we could, it would still go wrong when we're profiling multiple apps
-
- o Java stuff??
- o with opreport -c I can get "warning: /no-vmlinux could not be found.".
-   Should be smarter ?
- o opreport -c gives weird output for an image with no symbols:
-
-    samples  %        symbol name
-  15965    100.000  (no symbols)
-253      100.000  (no symbols)
-  15965    98.4400  (no symbols)
-  253       1.5600  (no symbols) [self]
+(Thoughts from John Levon. Some of these may no longer be valid.
+And for opcontrol-related issues, we don't care, since oprofile 1.0
+will no longer support opcontrol.)
 
  o consider tagging opreport -c entries with a number like gprof
  o --details for opreport -c, or diff??
  o should [self] entries be ommitted if 0 ??
- o stress test opreport -c: compile a Big Application w/o frame pointer and look
-   how driver and opreport -c react.
  o oparchive could fix up {kern} paths with -p (what about diff between
    archive and current though?)
- o can say more in opcontrol --status
  o consider a sort option for diff %
  o opannotate is silent about symbols missing debug info
- o oprofiled.log now contains various statistics about lost sample etc. from
-  the driver. Post profile tools must parse that and warn eventually, warning
-  must include a proposed work around. User need this: if nothing seems wrong
-  people are unlikely to get a look in oprofiled.log (I ran oprofile on 2.6.1
-  2 weeks before noticing at 30000 I lost a lot of samples, the profile seemed
-  ok du to the randomization of lost samples). As developper we need that too,
-  actually we have no clear idea of the behavior on different arch, NUMA etc.
-  Not perfect because if the profiler is running the oprofiled.log will show
-  those warning only after the first  alarm signal, I think we must dump the
-  statistics information after each opcontrol --dump to avoid that.
  o odb_insert() can fail on ftruncate or mremap() in db_manage.c but we don't
   try to recover gracefully.
  o output column shortname headers for opreport -l
  o is relative_to_absolute_path guaranteeing a trailing '/' documented ?
  o move oprofiled.log to OP_SAMPLE_DIR/current ?
  o pp tools must handle samples count overflow (marked as (unsigned)-1)
- o the way we show kernel modules in 2.5 is not very obvious - "/oprofile"
- o oparchive will be more usefull with a --root= options to allow profiling
-  on a small box, nfs mount / to another box and transfer sample file and
-  binary on a bigger box for analysis. There is also a problem in oparchive
-  you can use session: to get the right path to samples files but oprofiled.log
-  and abi files path are hardcoded to /var/lib/oprofile.
  o callgraph patch: better way to skip ignored backtrace ?
  o lib-image: and image: behavior depend on --separate=, if --separate=library
   opreport "lib-image:*libc*" --merge=lib works but not
@@ -110,8 +47,6 @@ Later
   extension
  o do we need an opreport like opreport -c (showing caller/callee at binary
   boundary not symbols) ?
- o we should notice an opcontrol config change (--separate etc.) and
-   auto-restart the daemon if necessary (Run)
  o we can add lots more unit tests yet
  o Itanium event constraints are not implemented
  o GUI still has a physical-counter interface, should have a general one
@@ -130,16 +65,6 @@ vma      samples  cum. samples  %           cum. %     symbol name             i
  o i18n. We need a good formatter, and also remember format_percent()
  o opannotate --source --output-dir=~moz/op/ /usr/bin/oprofiled
    will fail because the ~ is not expanded (no space around it) (popt bug I say)
- o cpu names instead of numbers in 2.4 module/ ?
- o remove 1 and 2 magic numbers for oprof_ready
- o adapt Anton's patch for handling non-symbolled libraries ? (nowaday C++
-  anon namespace symbol are static, 3.4 iirc, so with recent distro we are
-  more likely to get problems with a "fallback to dynamic symbols" approch)
- o use standard C integer type <stdint.h> int32_t int16_t etc.
- o event multiplexing for real
- o randomizing of reset value
- o XML output
- o profile the NMI handler code
  o opannotate : I added this to the doc about difference between nr samples
   credited to a source function and total number of samples for this function:
    "The missing samples are not lost, they will be credited to another source
@@ -254,6 +179,7 @@ General checks to make
 ----------------------
  
  o rgrep FIXME
+ o run Coverity
  o valgrind (--show-reachable=yes --leak-check=yes)
  o audit to track unnecessary include <>
  o gcc 3.0/3.x compile
diff --git a/aclocal.m4 b/aclocal.m4
index c6ad4e0..601b166 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -562,18 +562,6 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
      [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
 ])
 
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 8
-
-# AM_CONFIG_HEADER is obsolete.  It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
 # Do all the work for Automake.                             -*- Autoconf -*-
 
 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
@@ -1119,7 +1107,6 @@ AC_SUBST([am__untar])
 
 m4_include([m4/binutils.m4])
 m4_include([m4/builtinexpect.m4])
-m4_include([m4/cellspubfdsupport.m4])
 m4_include([m4/compileroption.m4])
 m4_include([m4/copyifchange.m4])
 m4_include([m4/docbook.m4])
@@ -1133,6 +1120,5 @@ m4_include([m4/lt~obsolete.m4])
 m4_include([m4/mallocattribute.m4])
 m4_include([m4/poptconst.m4])
 m4_include([m4/precompiledheader.m4])
-m4_include([m4/qt.m4])
 m4_include([m4/sstream.m4])
 m4_include([m4/typedef.m4])
diff --git a/agents/Makefile.in b/agents/Makefile.in
index 7c6f8c7..43309ed 100644
--- a/agents/Makefile.in
+++ b/agents/Makefile.in
@@ -41,7 +41,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -50,7 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -153,7 +152,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -177,20 +175,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/agents/jvmpi/Makefile.in b/agents/jvmpi/Makefile.in
index 884bfbf..65faa71 100644
--- a/agents/jvmpi/Makefile.in
+++ b/agents/jvmpi/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -157,7 +156,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -181,20 +179,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/agents/jvmpi/jvmpi_oprofile.cpp b/agents/jvmpi/jvmpi_oprofile.cpp
index 800667f..9ae5cda 100644
--- a/agents/jvmpi/jvmpi_oprofile.cpp
+++ b/agents/jvmpi/jvmpi_oprofile.cpp
@@ -106,13 +106,13 @@ void compiled_method_load(JVMPI_Event * event)
 		throw runtime_error("Error: Cannot find method name for "
 				    "compiled method\n");
 	}
-	char const * method_name = ((string)method_it->second).c_str();
+	char const * method_name = method_it->second.c_str();
 	method_it = cls_info.method_signatures.find(method);
 	if (method_it == cls_info.method_signatures.end()) {
 		throw runtime_error("Error: Cannot find method signature "
 				    "for compiled method\n");
 	}
-	char const * method_signature = ((string)method_it->second).c_str();
+	char const * method_signature = method_it->second.c_str();
 
 	string const class_signature = "L" + cls_info.name + ";";
 	pthread_mutex_unlock(&class_map_mutex);
diff --git a/agents/jvmti/Makefile.in b/agents/jvmti/Makefile.in
index 8b8b2e1..6a525b3 100644
--- a/agents/jvmti/Makefile.in
+++ b/agents/jvmti/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -157,7 +156,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -181,20 +179,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/agents/jvmti/libjvmti_oprofile.c b/agents/jvmti/libjvmti_oprofile.c
index 7157d44..40f9979 100644
--- a/agents/jvmti/libjvmti_oprofile.c
+++ b/agents/jvmti/libjvmti_oprofile.c
@@ -147,11 +147,11 @@ static void JNICALL cb_compiled_method_load(jvmtiEnv * jvmti,
 						entry_count, table_ptr,
 						source_filename);
 			} else if (err != JVMTI_ERROR_ABSENT_INFORMATION) {
-				handle_error(err, "GetSourceFileName()", 1);
+				(void)handle_error(err, "GetSourceFileName()", 1);
 			}
 		} else if (err != JVMTI_ERROR_NATIVE_METHOD &&
 			   err != JVMTI_ERROR_ABSENT_INFORMATION) {
-			handle_error(err, "GetLineNumberTable()", 1);
+			(void)handle_error(err, "GetLineNumberTable()", 1);
 		}
 	}
 
diff --git a/config.guess b/config.guess
index dc84c68..b79252d 100755
--- a/config.guess
+++ b/config.guess
@@ -1,14 +1,12 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
-#   Free Software Foundation, Inc.
+#   Copyright 1992-2013 Free Software Foundation, Inc.
 
-timestamp='2009-11-20'
+timestamp='2013-06-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
+# the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -17,26 +15,22 @@ timestamp='2009-11-20'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Originally written by Per Bothner.  Please send patches (context
-# diff format) to <config-patches@gnu.org> and include a ChangeLog
-# entry.
+# the same distribution terms that you use for the rest of that
+# program.  This Exception is an additional permission under section 7
+# of the GNU General Public License, version 3 ("GPLv3").
 #
-# This script attempts to guess a canonical system name similar to
-# config.sub.  If it succeeds, it prints the system name on stdout, and
-# exits with 0.  Otherwise, it exits with 1.
+# Originally written by Per Bothner.
 #
 # You can get the latest version of this script from:
 # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+#
+# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+
 
 me=`echo "$0" | sed -e 's,.*/,,'`
 
@@ -56,8 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 1992-2013 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -139,12 +132,33 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
 UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
 UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
+case "${UNAME_SYSTEM}" in
+Linux|GNU|GNU/*)
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	LIBC=gnu
+
+	eval $set_cc_for_build
+	cat <<-EOF > $dummy.c
+	#include <features.h>
+	#if defined(__UCLIBC__)
+	LIBC=uclibc
+	#elif defined(__dietlibc__)
+	LIBC=dietlibc
+	#else
+	LIBC=gnu
+	#endif
+	EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+	;;
+esac
+
 # Note: order is significant - the case branches are not exclusive.
 
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -180,7 +194,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -201,6 +215,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
 	echo "${machine}-${os}${release}"
 	exit ;;
+    *:Bitrig:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+	exit ;;
     *:OpenBSD:*:*)
 	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
 	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
@@ -223,7 +241,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +287,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,12 +316,12 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo s390-ibm-zvmoe
 	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
+	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
 	exit ;;
-    arm:riscos:*:*|arm:RISCOS:*:*)
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
 	echo arm-unknown-riscos
 	exit ;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
@@ -394,23 +415,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
 	exit ;;
@@ -480,8 +501,8 @@ EOF
 	echo m88k-motorola-sysv3
 	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -494,7 +515,7 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
 	exit ;;
@@ -551,7 +572,7 @@ EOF
 		echo rs6000-ibm-aix3.2
 	fi
 	exit ;;
-    *:AIX:*:[456])
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -594,52 +615,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -730,22 +751,22 @@ EOF
 	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
@@ -769,14 +790,14 @@ EOF
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -788,30 +809,35 @@ EOF
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
 	exit ;;
     *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
 	    amd64)
 		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
 	exit ;;
     i*:CYGWIN*:*)
 	echo ${UNAME_MACHINE}-pc-cygwin
 	exit ;;
+    *:MINGW64*:*)
+	echo ${UNAME_MACHINE}-pc-mingw64
+	exit ;;
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
     i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
 	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
     *:Interix*:*)
-    	case ${UNAME_MACHINE} in
+	case ${UNAME_MACHINE} in
 	    x86)
 		echo i586-pc-interix${UNAME_RELEASE}
 		exit ;;
@@ -848,15 +874,22 @@ EOF
 	exit ;;
     *:GNU:*:*)
 	# the GNU system
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
 	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     alpha:Linux:*:*)
 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
@@ -866,52 +899,56 @@ EOF
 	  EV6)   UNAME_MACHINE=alphaev6 ;;
 	  EV67)  UNAME_MACHINE=alphaev67 ;;
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
+	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    arc:Linux:*:* | arceb:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     arm*:Linux:*:*)
 	eval $set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo ${UNAME_MACHINE}-unknown-linux-gnu
+	    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
+	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
 	exit ;;
     crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
 	exit ;;
     frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     i*86:Linux:*:*)
-	LIBC=gnu
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
-	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
+	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
 	exit ;;
     ia64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     m32r*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     m68*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
@@ -930,51 +967,63 @@ EOF
 	#endif
 EOF
 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
 	;;
+    or1k:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-gnu
+	echo sparc-unknown-linux-${LIBC}
 	exit ;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
+	echo hppa64-unknown-linux-${LIBC}
 	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
-	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
-	  *)    echo hppa-unknown-linux-gnu ;;
+	  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
+	  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
+	  *)    echo hppa-unknown-linux-${LIBC} ;;
 	esac
 	exit ;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
+	echo powerpc64-unknown-linux-${LIBC}
 	exit ;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
+	echo powerpc-unknown-linux-${LIBC}
+	exit ;;
+    ppc64le:Linux:*:*)
+	echo powerpc64le-unknown-linux-${LIBC}
+	exit ;;
+    ppcle:Linux:*:*)
+	echo powerpcle-unknown-linux-${LIBC}
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo ${UNAME_MACHINE}-ibm-linux
+	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
 	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     sh*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     vax:Linux:*:*)
-	echo ${UNAME_MACHINE}-dec-linux-gnu
+	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
 	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -983,11 +1032,11 @@ EOF
 	echo i386-sequent-sysv4
 	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 	exit ;;
     i*86:OS/2:*:*)
@@ -1019,7 +1068,7 @@ EOF
 	fi
 	exit ;;
     i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
@@ -1047,13 +1096,13 @@ EOF
 	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
-        exit ;;
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
 	exit ;;
@@ -1088,8 +1137,8 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
 	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
@@ -1132,10 +1181,10 @@ EOF
 		echo ns32k-sni-sysv
 	fi
 	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel@ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
@@ -1161,11 +1210,11 @@ EOF
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
 	exit ;;
@@ -1178,6 +1227,9 @@ EOF
     BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
 	echo i586-pc-haiku
 	exit ;;
+    x86_64:Haiku:*:*)
+	echo x86_64-unknown-haiku
+	exit ;;
     SX-4:SUPER-UX:*:*)
 	echo sx4-nec-superux${UNAME_RELEASE}
 	exit ;;
@@ -1204,19 +1256,21 @@ EOF
 	exit ;;
     *:Darwin:*:*)
 	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
-	case $UNAME_PROCESSOR in
-	    i386)
-		eval $set_cc_for_build
-		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
-		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
-		      grep IS_64BIT_ARCH >/dev/null
-		  then
-		      UNAME_PROCESSOR="x86_64"
-		  fi
-		fi ;;
-	    unknown) UNAME_PROCESSOR=powerpc ;;
-	esac
+	eval $set_cc_for_build
+	if test "$UNAME_PROCESSOR" = unknown ; then
+	    UNAME_PROCESSOR=powerpc
+	fi
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		case $UNAME_PROCESSOR in
+		    i386) UNAME_PROCESSOR=x86_64 ;;
+		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		esac
+	    fi
+	fi
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
@@ -1230,7 +1284,10 @@ EOF
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
-    NSE-?:NONSTOP_KERNEL:*:*)
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    NSE-*:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
     NSR-?:NONSTOP_KERNEL:*:*)
@@ -1275,13 +1332,13 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux${UNAME_RELEASE}
 	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
@@ -1299,11 +1356,11 @@ EOF
     i*86:AROS:*:*)
 	echo ${UNAME_MACHINE}-pc-aros
 	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
 eval $set_cc_for_build
 cat >$dummy.c <<EOF
 #ifdef _SEQUENT_
@@ -1321,11 +1378,11 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
diff --git a/config.h.in b/config.h.in
index 36d65e1..2463ed9 100644
--- a/config.h.in
+++ b/config.h.in
@@ -3,9 +3,6 @@
 /* whether popt prototype takes a const char ** */
 #undef CONST_POPT
 
-/* Defined if you have the version of bfd_openr_iovec with 7 parameters */
-#undef HAVE_BFD_OPENR_IOVEC_WITH_7PARMS
-
 /* Define to 1 if you have the declaration of `basename', and to 0 if you
    don't. */
 #undef HAVE_DECL_BASENAME
@@ -40,6 +37,12 @@
 /* Kernel support for perf_events exists */
 #undef HAVE_PERF_EVENTS
 
+/* PERF_RECORD_MISC_GUEST_KERNEL is defined in perf_event.h */
+#undef HAVE_PERF_GUEST_MACROS
+
+/* precise_ip is defined in perf_event.h */
+#undef HAVE_PERF_PRECISE_IP
+
 /* Define to 1 if you have the `sched_setaffinity' function. */
 #undef HAVE_SCHED_SETAFFINITY
 
diff --git a/configure b/configure
index 662aa97..fc482b2 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63.
+# Generated by GNU Autoconf 2.63 for OProfile 1.0.0.
 #
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 # 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
@@ -741,11 +741,11 @@ MAKEFLAGS=
 SHELL=${CONFIG_SHELL-/bin/sh}
 
 # Identity of this package.
-PACKAGE_NAME=
-PACKAGE_TARNAME=
-PACKAGE_VERSION=
-PACKAGE_STRING=
-PACKAGE_BUGREPORT=
+PACKAGE_NAME='OProfile'
+PACKAGE_TARNAME='oprofile'
+PACKAGE_VERSION='1.0.0'
+PACKAGE_STRING='OProfile 1.0.0'
+PACKAGE_BUGREPORT=''
 
 ac_unique_file="libop/op_config.h"
 # Factoring default headers for most tests.
@@ -789,6 +789,7 @@ am__EXEEXT_TRUE
 LTLIBOBJS
 LIBOBJS
 OP_DOCDIR
+RT_LIB
 OP_CXXFLAGS
 OP_CFLAGS
 CHECK_ACCOUNT_FALSE
@@ -804,17 +805,6 @@ XML_CATALOG
 have_xsltproc_FALSE
 have_xsltproc_TRUE
 XSLTPROC
-have_qt_FALSE
-have_qt_TRUE
-UIChelp
-QT_LIBS
-QT_CFLAGS
-QT_VERSION
-QT_LIB
-UIC
-MOC
-QT_LDFLAGS
-QT_INCLUDES
 X_EXTRA_LIBS
 X_LIBS
 X_PRE_LIBS
@@ -832,9 +822,9 @@ BUILD_JVMTI_AGENT_FALSE
 BUILD_JVMTI_AGENT_TRUE
 JAVA_HOMEDIR
 PFM_LIB
-PERF_EVENT_FLAGS
 BUILD_FOR_PERF_EVENT_FALSE
 BUILD_FOR_PERF_EVENT_TRUE
+PERF_EVENT_FLAGS
 CXXCPP
 am__fastdepCXX_FALSE
 am__fastdepCXX_TRUE
@@ -964,12 +954,7 @@ with_kernel
 with_java
 with_extra_includes
 with_extra_libs
-with_target
 with_x
-enable_gui
-with_qt_dir
-with_qt_includes
-with_qt_libraries
 enable_pch
 enable_gcov
 enable_werror
@@ -990,9 +975,7 @@ CXX
 CXXFLAGS
 CCC
 CXXCPP
-XMKMF
-QT_CFLAGS
-QT_LIBS'
+XMKMF'
 
 
 # Initialize some variables set by options.
@@ -1033,7 +1016,7 @@ sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
-docdir='${datarootdir}/doc/${PACKAGE}'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
 infodir='${datarootdir}/info'
 htmldir='${docdir}'
 dvidir='${docdir}'
@@ -1545,7 +1528,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures this package to adapt to many kinds of systems.
+\`configure' configures OProfile 1.0.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1593,7 +1576,7 @@ Fine tuning of the installation directories:
   --infodir=DIR           info documentation [DATAROOTDIR/info]
   --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
   --mandir=DIR            man documentation [DATAROOTDIR/man]
-  --docdir=DIR            documentation root [DATAROOTDIR/doc/PACKAGE]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/oprofile]
   --htmldir=DIR           html documentation [DOCDIR]
   --dvidir=DIR            dvi documentation [DOCDIR]
   --pdfdir=DIR            pdf documentation [DOCDIR]
@@ -1618,7 +1601,9 @@ _ACEOF
 fi
 
 if test -n "$ac_init_help"; then
-
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of OProfile 1.0.0:";;
+   esac
   cat <<\_ACEOF
 
 Optional Features:
@@ -1632,8 +1617,6 @@ Optional Features:
   --enable-fast-install[=PKGS]
                           optimize for fast installation [default=yes]
   --disable-libtool-lock  avoid locking (might break parallel builds)
-  --enable-gui  compile with gui component (qt3|qt4|yes|no),
-		     if not given or set to yes, gui defaults to qt3
   --enable-pch                 enable precompiled header (default is disabled)
   --enable-gcov                enable option for gcov coverage testing (default is disabled)
   --disable-werror            disable -Werror flag (default is enabled for non-release)
@@ -1648,26 +1631,26 @@ Optional Packages:
   --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
   --with-binutils=dir		Path to binutils installation to use
   --with-binutils-libname       Lib dir name under binutils installation; [lib]]
-  --with-kernel=dir       Path to kernel include directory
-                          (...include/linux/perf_event.h) to use. If this
-                          option is not specified, configure will look for
-                          kernel header files in the usual installation
-                          location for a kernel-headers package --
-                          /usr/include. Use this option in cross-compile
-                          enviroments or in situations where the host system
-                          does not support perf_events but you wish to build
-                          binaries for a target system that does support
-                          perf_events. Because of OProfile's use of syscalls,
-                          be sure that the kernel headers used match the
-                          architecture of the intended target system.
+  --with-kernel=dir       Path to kernel include directory (e.g.
+                          /tmp/linux-xyz) to use. If this option is not
+                          specified, configure will look for kernel header
+                          files in the usual installation location for a
+                          kernel-headers package -- /usr. Use this option in
+                          cross-compile enviroments or in situations where the
+                          host system does not support perf_events but you
+                          wish to build binaries for a target system that does
+                          support perf_events. Because of OProfile's use of
+                          syscalls, kernel headers used during build must
+                          match the architecture of the intended target
+                          system. NOTE: Run the command 'make headers_install
+                          INSTALL_HDR_PATH=<kernel-hdrs-install-dir>' from the
+                          root directory of your kernel source tree, and use
+                          <kernel-hdrs-install-dir> for oprofile's
+                          '--with-kernel' configure option.
   --with-java=java-home        Path to Java home directory (default is "no"; "yes" will use /usr as Java home)
   --with-extra-includes=DIR    add extra include paths
   --with-extra-libs=DIR        add extra library paths
-  --with-target=cell-be   Check BFD support for Cell Broadband Engine SPU profiling
   --with-x                use the X Window System
-  --with-qt-dir           where the root of Qt is installed
-  --with-qt-includes      where the Qt includes are.
-  --with-qt-libraries     where the Qt library is installed.
 
 Some influential environment variables:
   CC          C compiler command
@@ -1683,8 +1666,6 @@ Some influential environment variables:
   CXXFLAGS    C++ compiler flags
   CXXCPP      C++ preprocessor
   XMKMF       Path to xmkmf, Makefile generator for X Window System
-  QT_CFLAGS   C compiler flags for QT, overriding pkg-config
-  QT_LIBS     linker flags for QT, overriding pkg-config
 
 Use these variables to override the choices made by `configure' or to help
 it to find libraries and programs with nonstandard names/locations.
@@ -1751,7 +1732,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-configure
+OProfile configure 1.0.0
 generated by GNU Autoconf 2.63
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1765,7 +1746,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by $as_me, which was
+It was created by OProfile $as_me 1.0.0, which was
 generated by GNU Autoconf 2.63.  Invocation command line was
 
   $ $0 $@
@@ -2113,6 +2094,14 @@ fi
 
 
 
+
+
+
+
+
+
+
+
 
 
 
@@ -2125,6 +2114,7 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
+
 am__api_version='1.11'
 
 ac_aux_dir=
@@ -2605,8 +2595,8 @@ fi
 
 
 # Define the identity of the package.
- PACKAGE=oprofile
- VERSION=0.9.8
+ PACKAGE='oprofile'
+ VERSION='1.0.0'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -4636,13 +4626,13 @@ if test "${lt_cv_nm_interface+set}" = set; then
 else
   lt_cv_nm_interface="BSD nm"
   echo "int some_variable = 0;" > conftest.$ac_ext
-  (eval echo "\"\$as_me:4639: $ac_compile\"" >&5)
+  (eval echo "\"\$as_me:4629: $ac_compile\"" >&5)
   (eval "$ac_compile" 2>conftest.err)
   cat conftest.err >&5
-  (eval echo "\"\$as_me:4642: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+  (eval echo "\"\$as_me:4632: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
   (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
   cat conftest.err >&5
-  (eval echo "\"\$as_me:4645: output\"" >&5)
+  (eval echo "\"\$as_me:4635: output\"" >&5)
   cat conftest.out >&5
   if $GREP 'External.*some_variable' conftest.out > /dev/null; then
     lt_cv_nm_interface="MS dumpbin"
@@ -5847,7 +5837,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 5850 "configure"' > conftest.$ac_ext
+  echo '#line 5840 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -7704,11 +7694,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:7707: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:7697: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:7711: \$? = $ac_status" >&5
+   echo "$as_me:7701: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -8043,11 +8033,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:8046: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:8036: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:8050: \$? = $ac_status" >&5
+   echo "$as_me:8040: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -8148,11 +8138,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:8151: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:8141: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:8155: \$? = $ac_status" >&5
+   echo "$as_me:8145: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -8203,11 +8193,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:8206: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:8196: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:8210: \$? = $ac_status" >&5
+   echo "$as_me:8200: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -11006,7 +10996,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11009 "configure"
+#line 10999 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11102,7 +11092,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11105 "configure"
+#line 11095 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -15579,11 +15569,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15582: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15572: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:15586: \$? = $ac_status" >&5
+   echo "$as_me:15576: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -15678,11 +15668,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15681: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15671: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:15685: \$? = $ac_status" >&5
+   echo "$as_me:15675: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -15730,11 +15720,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:15733: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:15723: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:15737: \$? = $ac_status" >&5
+   echo "$as_me:15727: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -16748,14 +16738,24 @@ fi
 
 
 
-{ $as_echo "$as_me:$LINENO: checking kernel version supports perf_events" >&5
-$as_echo_n "checking kernel version supports perf_events... " >&6; }
 if test "$KERNELDIR" != ""; then
-	KINC="$KERNELDIR/include"
+	if test -d $KERNELDIR; then
+		KINC="$KERNELDIR/include"
+		PERF_EVENT_FLAGS=" -I$KERNELDIR/include"
+
+		PERF_EVENT_H="$KERNELDIR/include/linux/perf_event.h"
+	else
+		echo "$KERNELDIR does not exist."
+	fi
+else
+	PERF_EVENT_H="/usr/include/linux/perf_event.h"
 fi
 
+PERF_EVENT_H_EXISTS="no"
+kernel_may_have_perf_events_support="no"
+
 SAVE_CFLAGS=$CFLAGS
-CFLAGS="-I$KINC -D__KERNEL__ -Werror"
+CFLAGS="-I$KINC -Werror"
 cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
@@ -16795,7 +16795,7 @@ $as_echo "$ac_try_echo") >&5
 	 test -z "$ac_c_werror_flag" ||
 	 test ! -s conftest.err
        } && test -s conftest.$ac_objext; then
-  kernel_has_perf_events_support="yes"
+  kernel_may_have_perf_events_support="yes"
 else
   $as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -16807,20 +16807,9 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 CFLAGS=$SAVE_CFLAGS
 
 
-if test "$kernel_has_perf_events_support" = "no"; then
-	{ $as_echo "$as_me:$LINENO: result: This kernel does not have perf_events support; falling back to legacy oprofile" >&5
-$as_echo "This kernel does not have perf_events support; falling back to legacy oprofile" >&6; }
-else
-	{ $as_echo "$as_me:$LINENO: result: This kernel has perf_events support" >&5
-$as_echo "This kernel has perf_events support" >&6; }
-fi
 
-if test "$KERNELDIR" == ""; then
-	PERF_EVENT_H="/usr/include/linux/perf_event.h"
-else
-	PERF_EVENT_H="$KERNELDIR/include/linux/perf_event.h"
-fi
-as_ac_Header=`$as_echo "ac_cv_header_$PERF_EVENT_H" | $as_tr_sh`
+if test "$kernel_may_have_perf_events_support" = "yes"; then
+	as_ac_Header=`$as_echo "ac_cv_header_$PERF_EVENT_H" | $as_tr_sh`
 if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
   { $as_echo "$as_me:$LINENO: checking for $PERF_EVENT_H" >&5
 $as_echo_n "checking for $PERF_EVENT_H... " >&6; }
@@ -16959,7 +16948,59 @@ as_val=`eval 'as_val=${'$as_ac_Header'}
 fi
 
 
- if test -n "$PERF_EVENT_H_EXISTS"; then
+	{ $as_echo "$as_me:$LINENO: checking kernel supports perf_events" >&5
+$as_echo_n "checking kernel supports perf_events... " >&6; }
+	if test "$PERF_EVENT_H_EXISTS" = "yes"; then
+		rm -f test-for-PERF_EVENT_OPEN
+		cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <linux/perf_event.h>
+				#include <asm/unistd.h>
+				#include <sys/types.h>
+				#include <string.h>
+
+int
+main ()
+{
+struct perf_event_attr attr;
+				pid_t pid;
+				memset(&attr, 0, sizeof(attr));
+				attr.size = sizeof(attr);
+				attr.sample_type = PERF_SAMPLE_IP;
+				pid = getpid();
+				syscall(__NR_perf_event_open, &attr, pid, 0, -1, 0);
+
+  ;
+  return 0;
+}
+
+_ACEOF
+		$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS $PERF_EVENT_FLAGS -o test-for-PERF_EVENT_OPEN  > /dev/null 2>&1
+		if test -f test-for-PERF_EVENT_OPEN; then
+			kernel_has_perf_events_support="yes"
+			{ $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+		else
+			{ $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+			kernel_has_perf_events_support="no"
+		fi
+		rm -f test-for-PERF_EVENT_OPEN
+	else
+		{ $as_echo "$as_me:$LINENO: result: unknown -- perf_event.h not found" >&5
+$as_echo "unknown -- perf_event.h not found" >&6; }
+	fi
+else
+	{ $as_echo "$as_me:$LINENO: result: kernel supports perf_events... no" >&5
+$as_echo "kernel supports perf_events... no" >&6; }
+	kernel_has_perf_events_support="no"
+fi
+
+ if test "$kernel_has_perf_events_support" = "yes"; then
   BUILD_FOR_PERF_EVENT_TRUE=
   BUILD_FOR_PERF_EVENT_FALSE='#'
 else
@@ -16967,25 +17008,130 @@ else
   BUILD_FOR_PERF_EVENT_FALSE=
 fi
 
-if test "$PERF_EVENT_H_EXISTS" = "yes"; then
+
+if test "$kernel_has_perf_events_support" = "yes"; then
 	HAVE_PERF_EVENTS='1'
-	if test "$KERNELDIR" != ""; then
-		PERF_EVENT_FLAGS=" -I$KERNELDIR/include"
+	{ $as_echo "$as_me:$LINENO: checking whether PERF_RECORD_MISC_GUEST_KERNEL is defined in perf_event.h" >&5
+$as_echo_n "checking whether PERF_RECORD_MISC_GUEST_KERNEL is defined in perf_event.h... " >&6; }
+	rm -f test-for-PERF_GUEST
+	cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <linux/perf_event.h>
+int
+main ()
+{
+unsigned int pr_guest_kern = PERF_RECORD_MISC_GUEST_KERNEL;
+			unsigned int pr_guest_user = PERF_RECORD_MISC_GUEST_USER;
+  ;
+  return 0;
+}
+
+_ACEOF
+	$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS $PERF_EVENT_FLAGS -o test-for-PERF_GUEST  > /dev/null 2>&1
+	if test -f test-for-PERF_GUEST; then
+		echo "yes"
+		HAVE_PERF_GUEST_MACROS='1'
+	else
+		echo "no"
+		HAVE_PERF_GUEST_MACROS='0'
+	fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_PERF_GUEST_MACROS $HAVE_PERF_GUEST_MACROS
+_ACEOF
+
+	rm -f test-for-PERF_GUEST*
+
+	{ $as_echo "$as_me:$LINENO: checking whether precise_ip is defined in perf_event.h" >&5
+$as_echo_n "checking whether precise_ip is defined in perf_event.h... " >&6; }
+	rm -f test-for-precise-ip
+	cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <linux/perf_event.h>
+int
+main ()
+{
+struct perf_event_attr attr;
+			attr.precise_ip = 2;
+  ;
+  return 0;
+}
 
+_ACEOF
+	$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS $PERF_EVENT_FLAGS -o test-for-precise-ip  > /dev/null 2>&1
+	if test -f test-for-precise-ip; then
+		echo "yes"
+		HAVE_PERF_PRECISE_IP='1'
+	else
+		echo "no"
+		HAVE_PERF_PRECISE_IP='0'
 	fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_PERF_PRECISE_IP $HAVE_PERF_PRECISE_IP
+_ACEOF
+
+	rm -f test-for-precise-ip*
+
 else
 	HAVE_PERF_EVENTS='0'
+	{ $as_echo "$as_me:$LINENO: result: No perf_events support available; falling back to legacy oprofile" >&5
+$as_echo "No perf_events support available; falling back to legacy oprofile" >&6; }
 fi
 
+
 cat >>confdefs.h <<_ACEOF
 #define HAVE_PERF_EVENTS $HAVE_PERF_EVENTS
 _ACEOF
 
+{ $as_echo "$as_me:$LINENO: checking host system type" >&5
+$as_echo_n "checking host system type... " >&6; }
+if test "${ac_cv_host+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test "x$host_alias" = x; then
+  ac_cv_host=$ac_cv_build
+else
+  ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
+    { { $as_echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5
+$as_echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_host" >&5
+$as_echo "$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) { { $as_echo "$as_me:$LINENO: error: invalid value of canonical host" >&5
+$as_echo "$as_me: error: invalid value of canonical host" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
 
 if test "$HAVE_PERF_EVENTS" = "1"; then
 	PFM_LIB=
-	arch="`uname -m`"
-	if test "$arch" = "ppc64"; then
+	if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64"; then
 		if test "${ac_cv_header_perfmon_pfmlib_h+set}" = set; then
   { $as_echo "$as_me:$LINENO: checking for perfmon/pfmlib.h" >&5
 $as_echo_n "checking for perfmon/pfmlib.h... " >&6; }
@@ -17116,15 +17262,15 @@ fi
 if test "x$ac_cv_header_perfmon_pfmlib_h" = x""yes; then
   :
 else
-  { { $as_echo "$as_me:$LINENO: error: pfmlib.h not found; usually provided in papi devel package" >&5
-$as_echo "$as_me: error: pfmlib.h not found; usually provided in papi devel package" >&2;}
+  { { $as_echo "$as_me:$LINENO: error: pfmlib.h not found; may be provided by libpfm devel or papi devel package" >&5
+$as_echo "$as_me: error: pfmlib.h not found; may be provided by libpfm devel or papi devel package" >&2;}
    { (exit 1); exit 1; }; }
 fi
 
 
-		{ $as_echo "$as_me:$LINENO: checking for pfm_get_event_name in -lpfm" >&5
-$as_echo_n "checking for pfm_get_event_name in -lpfm... " >&6; }
-if test "${ac_cv_lib_pfm_pfm_get_event_name+set}" = set; then
+		{ $as_echo "$as_me:$LINENO: checking for pfm_get_os_event_encoding in -lpfm" >&5
+$as_echo_n "checking for pfm_get_os_event_encoding in -lpfm... " >&6; }
+if test "${ac_cv_lib_pfm_pfm_get_os_event_encoding+set}" = set; then
   $as_echo_n "(cached) " >&6
 else
   ac_check_lib_save_LIBS=$LIBS
@@ -17142,11 +17288,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char pfm_get_event_name ();
+char pfm_get_os_event_encoding ();
 int
 main ()
 {
-return pfm_get_event_name ();
+return pfm_get_os_event_encoding ();
   ;
   return 0;
 }
@@ -17172,12 +17318,12 @@ $as_echo "$ac_try_echo") >&5
 	 test "$cross_compiling" = yes ||
 	 $as_test_x conftest$ac_exeext
        }; then
-  ac_cv_lib_pfm_pfm_get_event_name=yes
+  ac_cv_lib_pfm_pfm_get_os_event_encoding=yes
 else
   $as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_lib_pfm_pfm_get_event_name=no
+	ac_cv_lib_pfm_pfm_get_os_event_encoding=no
 fi
 
 rm -rf conftest.dSYM
@@ -17185,15 +17331,15 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
-{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_pfm_pfm_get_event_name" >&5
-$as_echo "$ac_cv_lib_pfm_pfm_get_event_name" >&6; }
-if test "x$ac_cv_lib_pfm_pfm_get_event_name" = x""yes; then
-  HAVE_LIBPFM3='1'; HAVE_LIBPFM='1'
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_pfm_pfm_get_os_event_encoding" >&5
+$as_echo "$ac_cv_lib_pfm_pfm_get_os_event_encoding" >&6; }
+if test "x$ac_cv_lib_pfm_pfm_get_os_event_encoding" = x""yes; then
+  HAVE_LIBPFM3='0'; HAVE_LIBPFM='1'
 else
 
-			{ $as_echo "$as_me:$LINENO: checking for pfm_get_os_event_encoding in -lpfm" >&5
-$as_echo_n "checking for pfm_get_os_event_encoding in -lpfm... " >&6; }
-if test "${ac_cv_lib_pfm_pfm_get_os_event_encoding+set}" = set; then
+			{ $as_echo "$as_me:$LINENO: checking for pfm_get_event_name in -lpfm" >&5
+$as_echo_n "checking for pfm_get_event_name in -lpfm... " >&6; }
+if test "${ac_cv_lib_pfm_pfm_get_event_name+set}" = set; then
   $as_echo_n "(cached) " >&6
 else
   ac_check_lib_save_LIBS=$LIBS
@@ -17211,11 +17357,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char pfm_get_os_event_encoding ();
+char pfm_get_event_name ();
 int
 main ()
 {
-return pfm_get_os_event_encoding ();
+return pfm_get_event_name ();
   ;
   return 0;
 }
@@ -17241,12 +17387,12 @@ $as_echo "$ac_try_echo") >&5
 	 test "$cross_compiling" = yes ||
 	 $as_test_x conftest$ac_exeext
        }; then
-  ac_cv_lib_pfm_pfm_get_os_event_encoding=yes
+  ac_cv_lib_pfm_pfm_get_event_name=yes
 else
   $as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_lib_pfm_pfm_get_os_event_encoding=no
+	ac_cv_lib_pfm_pfm_get_event_name=no
 fi
 
 rm -rf conftest.dSYM
@@ -17254,13 +17400,13 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
-{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_pfm_pfm_get_os_event_encoding" >&5
-$as_echo "$ac_cv_lib_pfm_pfm_get_os_event_encoding" >&6; }
-if test "x$ac_cv_lib_pfm_pfm_get_os_event_encoding" = x""yes; then
-  HAVE_LIBPFM3='0'; HAVE_LIBPFM='1'
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_pfm_pfm_get_event_name" >&5
+$as_echo "$ac_cv_lib_pfm_pfm_get_event_name" >&6; }
+if test "x$ac_cv_lib_pfm_pfm_get_event_name" = x""yes; then
+  HAVE_LIBPFM3='1'; HAVE_LIBPFM='1'
 else
-  { { $as_echo "$as_me:$LINENO: error: libpfm not found; usually provided in papi devel package" >&5
-$as_echo "$as_me: error: libpfm not found; usually provided in papi devel package" >&2;}
+  { { $as_echo "$as_me:$LINENO: error: libpfm not found; may be provided by libpfm devel or papi devel package" >&5
+$as_echo "$as_me: error: libpfm not found; may be provided by libpfm devel or papi devel package" >&2;}
    { (exit 1); exit 1; }; }
 fi
 
@@ -18512,106 +18658,26 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 # Determine if bfd_get_synthetic_symtab macro is available
-OS="`uname`"
-if test "$OS" = "Linux"; then
-	{ $as_echo "$as_me:$LINENO: checking whether bfd_get_synthetic_symtab() exists in BFD library" >&5
+{ $as_echo "$as_me:$LINENO: checking whether bfd_get_synthetic_symtab() exists in BFD library" >&5
 $as_echo_n "checking whether bfd_get_synthetic_symtab() exists in BFD library... " >&6; }
-	rm -f test-for-synth
-	cat >conftest.$ac_ext <<_ACEOF
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
 #include <bfd.h>
+	#include <stdio.h>
+	static bfd _ibfd;
 int
 main ()
 {
-asymbol * synthsyms;	bfd * ibfd = 0;
-			long synth_count = bfd_get_synthetic_symtab(ibfd, 0, 0, 0, 0, &synthsyms);
-			extern const bfd_target bfd_elf64_powerpc_vec;
-			extern const bfd_target bfd_elf64_powerpcle_vec;
-			char * ppc_name = bfd_elf64_powerpc_vec.name;
-			char * ppcle_name = bfd_elf64_powerpcle_vec.name;
-			printf("%s %s\n", ppc_name, ppcle_name);
-  ;
-  return 0;
-}
-
-_ACEOF
-	$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS -o  test-for-synth > /dev/null 2>&1
-	if test -f test-for-synth; then
-		echo "yes"
-		SYNTHESIZE_SYMBOLS='1'
-	else
-		echo "no"
-		SYNTHESIZE_SYMBOLS='0'
-	fi
-
-cat >>confdefs.h <<_ACEOF
-#define SYNTHESIZE_SYMBOLS $SYNTHESIZE_SYMBOLS
-_ACEOF
-
-	rm -f test-for-synth*
-
-fi
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-
-# Now we can restore original flag values, and may as well do the
-# AC_SUBST, too.
-CPPFLAGS="$CPPFLAGS_SAVE"
-LDFLAGS="$LDFLAGS_SAVE"
-
-
-
-
-# On Cell BE architecture, OProfile uses bfd_openr_iovec when processing some
-# SPU profiles.  To parse embedded SPU ELF on Cell BE, OProfile requires a
-# version of bfd_openr_iovec that supports the elf32-spu target.
-# This version of the function also has a 7th parameter that's been added.
-# First, we check for existence of the base bfd_openr_iovec.  If it exists,
-# we then use a temporary test program below that passes 7 arguments to
-# bfd_openr_iovec; if it compiles OK, we assume we have the right BFD
-# library to support Cell BE SPU profiling.
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-{ $as_echo "$as_me:$LINENO: checking for bfd_openr_iovec in -lbfd" >&5
-$as_echo_n "checking for bfd_openr_iovec in -lbfd... " >&6; }
-if test "${ac_cv_lib_bfd_bfd_openr_iovec+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-lbfd  $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
+asymbol * synthsyms;	bfd * ibfd = &_ibfd;
+	long synth_count = bfd_get_synthetic_symtab(ibfd, 0, 0, 0, 0, &synthsyms);
+	extern const bfd_target powerpc_elf64_vec;
+	char *ppc_name = powerpc_elf64_vec.name;
+	printf("%s\n", ppc_name);
 
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char bfd_openr_iovec ();
-int
-main ()
-{
-return bfd_openr_iovec ();
   ;
   return 0;
 }
@@ -18637,32 +18703,13 @@ $as_echo "$ac_try_echo") >&5
 	 test "$cross_compiling" = yes ||
 	 $as_test_x conftest$ac_exeext
        }; then
-  ac_cv_lib_bfd_bfd_openr_iovec=yes
+  { $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+	SYNTHESIZE_SYMBOLS=2
 else
   $as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_lib_bfd_bfd_openr_iovec=no
-fi
-
-rm -rf conftest.dSYM
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
-      conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_bfd_bfd_openr_iovec" >&5
-$as_echo "$ac_cv_lib_bfd_bfd_openr_iovec" >&6; }
-if test "x$ac_cv_lib_bfd_bfd_openr_iovec" = x""yes; then
-  bfd_openr_iovec_exists="yes"
-else
-  bfd_openr_iovec_exists="no"
-
-fi
-
-
-if test "$bfd_openr_iovec_exists" = "yes"; then
-	{ $as_echo "$as_me:$LINENO: checking whether bfd_openr_iovec has seven parameters" >&5
-$as_echo_n "checking whether bfd_openr_iovec has seven parameters... " >&6; }
 	cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
@@ -18670,28 +18717,30 @@ cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
 #include <bfd.h>
-	  #include <stdlib.h>
-
+		#include <stdio.h>
+		static bfd _ibfd;
 int
 main ()
 {
-struct bfd *nbfd = bfd_openr_iovec("some-file", "elf32-spu",
-			NULL, NULL, NULL, NULL, NULL);
-	  return 0;
+asymbol * synthsyms;	bfd * ibfd = &_ibfd;
+		long synth_count = bfd_get_synthetic_symtab(ibfd, 0, 0, 0, 0, &synthsyms);
+		extern const bfd_target bfd_elf64_powerpc_vec;
+		char *ppc_name = bfd_elf64_powerpc_vec.name;
+		printf("%s\n", ppc_name);
 
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
 $as_echo "$ac_try_echo") >&5
-  (eval "$ac_compile") 2>conftest.er1
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
   grep -v '^ *+' conftest.er1 >conftest.err
   rm -f conftest.er1
@@ -18700,27 +18749,37 @@ $as_echo "$ac_try_echo") >&5
   (exit $ac_status); } && {
 	 test -z "$ac_c_werror_flag" ||
 	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_BFD_OPENR_IOVEC_WITH_7PARMS /**/
-_ACEOF
-
-	bfd_open_iovec_7="yes"
-	{ $as_echo "$as_me:$LINENO: result: yes" >&5
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  { $as_echo "$as_me:$LINENO: result: yes" >&5
 $as_echo "yes" >&6; }
+		SYNTHESIZE_SYMBOLS=1
 else
   $as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
 	{ $as_echo "$as_me:$LINENO: result: no" >&5
 $as_echo "no" >&6; }
-
+		SYNTHESIZE_SYMBOLS=0
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+
 fi
 
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+
+cat >>confdefs.h <<_ACEOF
+#define SYNTHESIZE_SYMBOLS $SYNTHESIZE_SYMBOLS
+_ACEOF
+
+
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -18728,24 +18787,11 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
-arch="unknown"
-
-# Check whether --with-target was given.
-if test "${with_target+set}" = set; then
-  withval=$with_target; arch=$withval
-fi
-
 
-if test "$arch" = "cell-be"; then
-        if test "$bfd_open_iovec_7" = "yes"; then
-	        { $as_echo "$as_me:$LINENO: BFD library has support for Cell Broadband Engine SPU profiling" >&5
-$as_echo "$as_me: BFD library has support for Cell Broadband Engine SPU profiling" >&6;}
-	else
-		{ { $as_echo "$as_me:$LINENO: error: BFD library does not support elf32-spu target; SPU profiling is unsupported" >&5
-$as_echo "$as_me: error: BFD library does not support elf32-spu target; SPU profiling is unsupported" >&2;}
-   { (exit 1); exit 1; }; }
-	fi
-fi
+# Now we can restore original flag values, and may as well do the
+# AC_SUBST, too.
+CPPFLAGS="$CPPFLAGS_SAVE"
+LDFLAGS="$LDFLAGS_SAVE"
 
 
 
@@ -20384,790 +20430,98 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
-{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_ipc_shmat" >&5
-$as_echo "$ac_cv_lib_ipc_shmat" >&6; }
-if test "x$ac_cv_lib_ipc_shmat" = x""yes; then
-  X_EXTRA_LIBS="$X_EXTRA_LIBS -lipc"
-fi
-
-    fi
-  fi
-
-  # Check for libraries that X11R6 Xt/Xaw programs need.
-  ac_save_LDFLAGS=$LDFLAGS
-  test -n "$x_libraries" && LDFLAGS="$LDFLAGS -L$x_libraries"
-  # SM needs ICE to (dynamically) link under SunOS 4.x (so we have to
-  # check for ICE first), but we must link in the order -lSM -lICE or
-  # we get undefined symbols.  So assume we have SM if we have ICE.
-  # These have to be linked with before -lX11, unlike the other
-  # libraries we check for below, so use a different variable.
-  # John Interrante, Karl Berry
-  { $as_echo "$as_me:$LINENO: checking for IceConnectionNumber in -lICE" >&5
-$as_echo_n "checking for IceConnectionNumber in -lICE... " >&6; }
-if test "${ac_cv_lib_ICE_IceConnectionNumber+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-lICE $X_EXTRA_LIBS $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char IceConnectionNumber ();
-int
-main ()
-{
-return IceConnectionNumber ();
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
-$as_echo "$ac_try_echo") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext && {
-	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
-       }; then
-  ac_cv_lib_ICE_IceConnectionNumber=yes
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_lib_ICE_IceConnectionNumber=no
-fi
-
-rm -rf conftest.dSYM
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
-      conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_ICE_IceConnectionNumber" >&5
-$as_echo "$ac_cv_lib_ICE_IceConnectionNumber" >&6; }
-if test "x$ac_cv_lib_ICE_IceConnectionNumber" = x""yes; then
-  X_PRE_LIBS="$X_PRE_LIBS -lSM -lICE"
-fi
-
-  LDFLAGS=$ac_save_LDFLAGS
-
-fi
-
-ORIG_X_SAVE_LIBS="$LIBS"
-LIBS="$X_PRE_LIBS $LIBS $X_LIBS -lX11 $X_EXTRA_LIBS"
-X_LIBS="$LIBS"
-
-
-# Check whether --enable-gui was given.
-if test "${enable_gui+set}" = set; then
-  enableval=$enable_gui;
-else
-  enable_gui=qt3
-fi
-
-
-if test "x$enable_gui"  = "xqt3"  || test "x$enable_gui"  = "xyes"; then
-	QT_VERSION=3
-
-			FATAL=0
-
-
-# Check whether --with-qt-dir was given.
-if test "${with_qt_dir+set}" = set; then
-  withval=$with_qt_dir;  qt_cv_dir=`eval echo "$withval"/`
-fi
-
-
-
-# Check whether --with-qt-includes was given.
-if test "${with_qt_includes+set}" = set; then
-  withval=$with_qt_includes;  qt_cv_includes=`eval echo "$withval"`
-fi
-
-
-
-# Check whether --with-qt-libraries was given.
-if test "${with_qt_libraries+set}" = set; then
-  withval=$with_qt_libraries;   qt_cv_libraries=`eval echo "$withval"`
-fi
-
-
-		if test -z "$qt_cv_dir"; then
-		qt_cv_dir=$QTDIR
-	fi
-
-		if test -n "$qt_cv_dir"; then
-		if test -z "$qt_cv_includes"; then
-			qt_cv_includes=$qt_cv_dir/include
-		fi
-		if test -z "$qt_cv_libraries"; then
-			qt_cv_libraries=$qt_cv_dir/lib
-		fi
-	fi
-
-		QT_INCLUDES=
-	QT_LDFLAGS=
-	if test -n "$qt_cv_includes"; then
-		QT_INCLUDES="-isystem $qt_cv_includes"
-	fi
-	if test -n "$qt_cv_libraries"; then
-		QT_LDFLAGS="-L$qt_cv_libraries"
-	fi
-
-
-
-
-
-	{ $as_echo "$as_me:$LINENO: checking for moc2" >&5
-$as_echo_n "checking for moc2... " >&6; }
-	if test "${qt_cv_path_moc2+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-
-		qt_cv_path_moc2="NONE"
-		if test -n "$ac_moc2"; then
-			qt_cv_path_moc2="$ac_moc2";
-		else
-			dirs="$qt_cv_dir/bin"
-			qt_save_IFS=$IFS
-			IFS=':'
-			for dir in $PATH; do
-				dirs="$dirs $dir"
-			done
-			IFS=$qt_save_IFS
-
-			for dir in $dirs; do
-				if test -x "$dir/moc2"; then
-					if test -n ""; then
-						evalstr="$dir/moc2  2>&1 "
-						if eval $evalstr; then
-							qt_cv_path_moc2="$dir/moc2"
-							break
-						fi
-					else
-						qt_cv_path_moc2="$dir/moc2"
-						break
-					fi
-				fi
-			done
-		fi
-
-fi
-
-
-	if test -z "$qt_cv_path_moc2" || test "$qt_cv_path_moc2" = "NONE"; then
-		{ $as_echo "$as_me:$LINENO: result: not found" >&5
-$as_echo "not found" >&6; }
-
-	else
-		{ $as_echo "$as_me:$LINENO: result: $qt_cv_path_moc2" >&5
-$as_echo "$qt_cv_path_moc2" >&6; }
-		ac_moc2=$qt_cv_path_moc2
-	fi
-
-
-	{ $as_echo "$as_me:$LINENO: checking for moc" >&5
-$as_echo_n "checking for moc... " >&6; }
-	if test "${qt_cv_path_moc+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-
-		qt_cv_path_moc="NONE"
-		if test -n "$ac_moc1"; then
-			qt_cv_path_moc="$ac_moc1";
-		else
-			dirs="$qt_cv_dir/bin"
-			qt_save_IFS=$IFS
-			IFS=':'
-			for dir in $PATH; do
-				dirs="$dirs $dir"
-			done
-			IFS=$qt_save_IFS
-
-			for dir in $dirs; do
-				if test -x "$dir/moc"; then
-					if test -n ""; then
-						evalstr="$dir/moc  2>&1 "
-						if eval $evalstr; then
-							qt_cv_path_moc="$dir/moc"
-							break
-						fi
-					else
-						qt_cv_path_moc="$dir/moc"
-						break
-					fi
-				fi
-			done
-		fi
-
-fi
-
-
-	if test -z "$qt_cv_path_moc" || test "$qt_cv_path_moc" = "NONE"; then
-		{ $as_echo "$as_me:$LINENO: result: not found" >&5
-$as_echo "not found" >&6; }
-
-	else
-		{ $as_echo "$as_me:$LINENO: result: $qt_cv_path_moc" >&5
-$as_echo "$qt_cv_path_moc" >&6; }
-		ac_moc1=$qt_cv_path_moc
-	fi
-
-
-	if test -n "$ac_moc1" -a -n "$ac_moc2"; then
-				$ac_moc1 -v 2>&1 | grep "Qt 3" >/dev/null
-		if test "$?" = 0; then
-			ac_moc=$ac_moc1;
-		else
-			ac_moc=$ac_moc2;
-		fi
-	else
-		if test -n "$ac_moc1"; then
-			ac_moc=$ac_moc1;
-		else
-			ac_moc=$ac_moc2;
-		fi
-	fi
-
-	if test -z "$ac_moc"  -a "$FATAL" = 1; then
-		{ { $as_echo "$as_me:$LINENO: error: moc binary not found in \$PATH or $qt_cv_dir/bin !" >&5
-$as_echo "$as_me: error: moc binary not found in \$PATH or $qt_cv_dir/bin !" >&2;}
-   { (exit 1); exit 1; }; }
-	fi
-
-	MOC=$ac_moc
-
-
-
-	{ $as_echo "$as_me:$LINENO: checking for uic" >&5
-$as_echo_n "checking for uic... " >&6; }
-	if test "${qt_cv_path_uic+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-
-		qt_cv_path_uic="NONE"
-		if test -n "$ac_uic"; then
-			qt_cv_path_uic="$ac_uic";
-		else
-			dirs="$qt_cv_dir/bin"
-			qt_save_IFS=$IFS
-			IFS=':'
-			for dir in $PATH; do
-				dirs="$dirs $dir"
-			done
-			IFS=$qt_save_IFS
-
-			for dir in $dirs; do
-				if test -x "$dir/uic"; then
-					if test -n ""; then
-						evalstr="$dir/uic  2>&1 "
-						if eval $evalstr; then
-							qt_cv_path_uic="$dir/uic"
-							break
-						fi
-					else
-						qt_cv_path_uic="$dir/uic"
-						break
-					fi
-				fi
-			done
-		fi
-
-fi
-
-
-	if test -z "$qt_cv_path_uic" || test "$qt_cv_path_uic" = "NONE"; then
-		{ $as_echo "$as_me:$LINENO: result: not found" >&5
-$as_echo "not found" >&6; }
-
-	else
-		{ $as_echo "$as_me:$LINENO: result: $qt_cv_path_uic" >&5
-$as_echo "$qt_cv_path_uic" >&6; }
-		ac_uic=$qt_cv_path_uic
-	fi
-
-	if test -z "$ac_uic" -a "$FATAL" = 1; then
-		{ { $as_echo "$as_me:$LINENO: error: uic binary not found in \$PATH or $qt_cv_dir/bin !" >&5
-$as_echo "$as_me: error: uic binary not found in \$PATH or $qt_cv_dir/bin !" >&2;}
-   { (exit 1); exit 1; }; }
-	fi
-
-	UIC=$ac_uic
-
-
-
-	{ $as_echo "$as_me:$LINENO: checking in lib for Qt library name" >&5
-$as_echo_n "checking in lib for Qt library name... " >&6; }
-
-	if test "${qt_cv_libname+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-
-		ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-		SAVE_CXXFLAGS=$CXXFLAGS
-		CXXFLAGS="$CXXFLAGS $QT_INCLUDES $QT_LDFLAGS"
-
-		for libname in -lqt-mt -lqt3 -lqt2 -lqt;
-		do
-
-	SAVE_LIBS="$LIBS"
-	LIBS="$LIBS $libname"
-	cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-	#include <qglobal.h>
-	#include <qstring.h>
-
-int
-main ()
-{
-
-	QString s("mangle_failure");
-	#if (QT_VERSION < 221)
-	break_me_(\\\);
-	#endif
-
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
-$as_echo "$ac_try_echo") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext && {
-	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
-       }; then
-  qt_cv_libname=$libname
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-
-fi
-
-rm -rf conftest.dSYM
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
-      conftest$ac_exeext conftest.$ac_ext
-	LIBS="$SAVE_LIBS"
-
-			if test -n "$qt_cv_libname"; then
-				break;
-			fi
-		done
-
-		CXXFLAGS=$SAVE_CXXFLAGS
-
-fi
-
-
-	if test -z "$qt_cv_libname"; then
-		{ $as_echo "$as_me:$LINENO: result: failed" >&5
-$as_echo "failed" >&6; }
-		if test "$FATAL" = 1 ; then
-			{ { $as_echo "$as_me:$LINENO: error: Cannot compile a simple Qt executable. Check you have the right \$QTDIR !" >&5
-$as_echo "$as_me: error: Cannot compile a simple Qt executable. Check you have the right \$QTDIR !" >&2;}
-   { (exit 1); exit 1; }; }
-		fi
-	else
-		{ $as_echo "$as_me:$LINENO: result: $qt_cv_libname" >&5
-$as_echo "$qt_cv_libname" >&6; }
-	fi
-
-	if test -z "$qt_cv_libname"; then
-		if test -n "$qt_cv_dir"; then
-					qt_cv_libraries=$qt_cv_dir/lib64
-			QT_LDFLAGS="-L$qt_cv_libraries"
-
-	{ $as_echo "$as_me:$LINENO: checking in lib64 for Qt library name" >&5
-$as_echo_n "checking in lib64 for Qt library name... " >&6; }
-
-	if test "${qt_cv_libname+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-
-		ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-		SAVE_CXXFLAGS=$CXXFLAGS
-		CXXFLAGS="$CXXFLAGS $QT_INCLUDES $QT_LDFLAGS"
-
-		for libname in -lqt-mt -lqt3 -lqt2 -lqt;
-		do
-
-	SAVE_LIBS="$LIBS"
-	LIBS="$LIBS $libname"
-	cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-	#include <qglobal.h>
-	#include <qstring.h>
-
-int
-main ()
-{
-
-	QString s("mangle_failure");
-	#if (QT_VERSION < 221)
-	break_me_(\\\);
-	#endif
-
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
-$as_echo "$ac_try_echo") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext && {
-	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
-       }; then
-  qt_cv_libname=$libname
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-
-fi
-
-rm -rf conftest.dSYM
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
-      conftest$ac_exeext conftest.$ac_ext
-	LIBS="$SAVE_LIBS"
-
-			if test -n "$qt_cv_libname"; then
-				break;
-			fi
-		done
-
-		CXXFLAGS=$SAVE_CXXFLAGS
-
-fi
-
-
-	if test -z "$qt_cv_libname"; then
-		{ $as_echo "$as_me:$LINENO: result: failed" >&5
-$as_echo "failed" >&6; }
-		if test "$FATAL" = 1 ; then
-			{ { $as_echo "$as_me:$LINENO: error: Cannot compile a simple Qt executable. Check you have the right \$QTDIR !" >&5
-$as_echo "$as_me: error: Cannot compile a simple Qt executable. Check you have the right \$QTDIR !" >&2;}
-   { (exit 1); exit 1; }; }
-		fi
-	else
-		{ $as_echo "$as_me:$LINENO: result: $qt_cv_libname" >&5
-$as_echo "$qt_cv_libname" >&6; }
-	fi
-
-		fi
-	fi
-
-	QT_LIB=$qt_cv_libname;
-
-
-	if test -n "$qt_cv_libname"; then
-
-	{ $as_echo "$as_me:$LINENO: checking Qt version" >&5
-$as_echo_n "checking Qt version... " >&6; }
-if test "${lyx_cv_qtversion+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-
-		ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-		SAVE_CPPFLAGS=$CPPFLAGS
-		CPPFLAGS="$CPPFLAGS $QT_INCLUDES"
-
-		cat > conftest.$ac_ext <<EOF
-#line 20952 "configure"
-#include "confdefs.h"
-#include <qglobal.h>
-"%%%"QT_VERSION_STR"%%%"
-EOF
-		lyx_cv_qtversion=`(eval "$ac_cpp conftest.$ac_ext") 2>&5 | \
-			grep '^"%%%"'  2>/dev/null | \
-			sed -e 's/"%%%"//g' -e 's/"//g'`
-		rm -f conftest.$ac_ext
-		CPPFLAGS=$SAVE_CPPFLAGS
-
-fi
-{ $as_echo "$as_me:$LINENO: result: $lyx_cv_qtversion" >&5
-$as_echo "$lyx_cv_qtversion" >&6; }
-
-	QT_VERSION=$lyx_cv_qtversion
-
-
-	fi
-
-	if test -n "$QT_LIB"; then
-		QT_LIBS="$QT_LIB $QT_LDFLAGS"
-	fi
-	QT_CFLAGS="$QT_INCLUDES"
-fi
-
-if test "x$enable_gui"  = "xqt4"; then
-	QT_VERSION=4
-
-pkg_failed=no
-{ $as_echo "$as_me:$LINENO: checking for QT" >&5
-$as_echo_n "checking for QT... " >&6; }
-
-if test -n "$QT_CFLAGS"; then
-    pkg_cv_QT_CFLAGS="$QT_CFLAGS"
- elif test -n "$PKG_CONFIG"; then
-    if test -n "$PKG_CONFIG" && \
-    { ($as_echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"Qt3Support QtGui QtCore \"") >&5
-  ($PKG_CONFIG --exists --print-errors "Qt3Support QtGui QtCore ") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; then
-  pkg_cv_QT_CFLAGS=`$PKG_CONFIG --cflags "Qt3Support QtGui QtCore " 2>/dev/null`
-else
-  pkg_failed=yes
-fi
- else
-    pkg_failed=untried
-fi
-if test -n "$QT_LIBS"; then
-    pkg_cv_QT_LIBS="$QT_LIBS"
- elif test -n "$PKG_CONFIG"; then
-    if test -n "$PKG_CONFIG" && \
-    { ($as_echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"Qt3Support QtGui QtCore \"") >&5
-  ($PKG_CONFIG --exists --print-errors "Qt3Support QtGui QtCore ") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; then
-  pkg_cv_QT_LIBS=`$PKG_CONFIG --libs "Qt3Support QtGui QtCore " 2>/dev/null`
-else
-  pkg_failed=yes
-fi
- else
-    pkg_failed=untried
-fi
-
-
-
-if test $pkg_failed = yes; then
-
-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
-        _pkg_short_errors_supported=yes
-else
-        _pkg_short_errors_supported=no
-fi
-        if test $_pkg_short_errors_supported = yes; then
-	        QT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors "Qt3Support QtGui QtCore " 2>&1`
-        else
-	        QT_PKG_ERRORS=`$PKG_CONFIG --print-errors "Qt3Support QtGui QtCore " 2>&1`
-        fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$QT_PKG_ERRORS" >&5
-
-	{ $as_echo "$as_me:$LINENO: result: no" >&5
-$as_echo "no" >&6; }
-
-	echo "You requested QT4 but its build files are not available. Exiting now."
-	exit
-
-elif test $pkg_failed = untried; then
-
-	echo "You requested QT4 but its build files are not available. Exiting now."
-	exit
-
-else
-	QT_CFLAGS=$pkg_cv_QT_CFLAGS
-	QT_LIBS=$pkg_cv_QT_LIBS
-        { $as_echo "$as_me:$LINENO: result: yes" >&5
-$as_echo "yes" >&6; }
-	:
-fi
-	MOC=$(pkg-config --variable=moc_location QtCore)
-	UIC=$(dirname $(pkg-config --variable=uic_location QtCore))/uic3
-	QT_VERSION=$(pkg-config --modversion QtCore)
-
-	if test "x$MOC" = "x"; then
-		echo "WARNING: Your QtCore.pc file is buggy, it doesn't provide the variable 'moc_location'"
-		echo "WARNING: I will try to find it in your PATH ..."
-		# Extract the first word of "moc", so it can be a program name with args.
-set dummy moc; ac_word=$2
-{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_MOC+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-  if test -n "$MOC"; then
-  ac_cv_prog_MOC="$MOC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_prog_MOC="moc"
-    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-done
-IFS=$as_save_IFS
-
-fi
-fi
-MOC=$ac_cv_prog_MOC
-if test -n "$MOC"; then
-  { $as_echo "$as_me:$LINENO: result: $MOC" >&5
-$as_echo "$MOC" >&6; }
-else
-  { $as_echo "$as_me:$LINENO: result: no" >&5
-$as_echo "no" >&6; }
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_ipc_shmat" >&5
+$as_echo "$ac_cv_lib_ipc_shmat" >&6; }
+if test "x$ac_cv_lib_ipc_shmat" = x""yes; then
+  X_EXTRA_LIBS="$X_EXTRA_LIBS -lipc"
 fi
 
+    fi
+  fi
 
-		if test "x$MOC" = "x"; then
-			echo "WARNING: You can fix this by adding the location of moc to your path."
-			echo "WARNING: Exiting now."
-			exit
-		fi
-	fi
-
-	if test "x$UIC" = "x3"; then
-		echo "WARNING: Your QtCore.pc file is buggy, it doesn't provide the variable 'uic_location'"
-		echo "WARNING: I will try to find it in your PATH ..."
-		# Extract the first word of "uic3", so it can be a program name with args.
-set dummy uic3; ac_word=$2
-{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_UIChelp+set}" = set; then
+  # Check for libraries that X11R6 Xt/Xaw programs need.
+  ac_save_LDFLAGS=$LDFLAGS
+  test -n "$x_libraries" && LDFLAGS="$LDFLAGS -L$x_libraries"
+  # SM needs ICE to (dynamically) link under SunOS 4.x (so we have to
+  # check for ICE first), but we must link in the order -lSM -lICE or
+  # we get undefined symbols.  So assume we have SM if we have ICE.
+  # These have to be linked with before -lX11, unlike the other
+  # libraries we check for below, so use a different variable.
+  # John Interrante, Karl Berry
+  { $as_echo "$as_me:$LINENO: checking for IceConnectionNumber in -lICE" >&5
+$as_echo_n "checking for IceConnectionNumber in -lICE... " >&6; }
+if test "${ac_cv_lib_ICE_IceConnectionNumber+set}" = set; then
   $as_echo_n "(cached) " >&6
 else
-  if test -n "$UIChelp"; then
-  ac_cv_prog_UIChelp="$UIChelp" # Let the user override the test.
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lICE $X_EXTRA_LIBS $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char IceConnectionNumber ();
+int
+main ()
+{
+return IceConnectionNumber ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  ac_cv_lib_ICE_IceConnectionNumber=yes
 else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_prog_UIChelp="uic3"
-    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-done
-IFS=$as_save_IFS
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
 
+	ac_cv_lib_ICE_IceConnectionNumber=no
 fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-UIChelp=$ac_cv_prog_UIChelp
-if test -n "$UIChelp"; then
-  { $as_echo "$as_me:$LINENO: result: $UIChelp" >&5
-$as_echo "$UIChelp" >&6; }
-else
-  { $as_echo "$as_me:$LINENO: result: no" >&5
-$as_echo "no" >&6; }
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_ICE_IceConnectionNumber" >&5
+$as_echo "$ac_cv_lib_ICE_IceConnectionNumber" >&6; }
+if test "x$ac_cv_lib_ICE_IceConnectionNumber" = x""yes; then
+  X_PRE_LIBS="$X_PRE_LIBS -lSM -lICE"
 fi
 
+  LDFLAGS=$ac_save_LDFLAGS
 
-		if test "x$UIChelp" = "x"; then
-			echo "WARNING: You can fix this by adding the location of uic3 to your path."
-			echo "WARNING: Exiting now."
-			exit
-		else
-			UIC="$UIChelp"
-		fi
-	fi
-
-	flags_has_qt3support=$(echo $QT_CFLAGS | grep QT3_SUPPORT)
-	if test "x$flags_has_qt3support" = "x" ; then
-		echo "WARNING: Your Qt3Support package is buggy; it dosn't include the 'QT3_SUPPORT' flag"
-		echo "WARNING: adding it manually"
-		QT_CFLAGS="$QT_CFLAGS -DQT3_SUPPORT"
-	fi
-fi
- if test -n "$QT_LIBS"; then
-  have_qt_TRUE=
-  have_qt_FALSE='#'
-else
-  have_qt_TRUE='#'
-  have_qt_FALSE=
 fi
 
 
-LIBS="$ORIG_X_SAVE_LIBS"
-
 # Check whether --enable-pch was given.
 if test "${enable_pch+set}" = set; then
   enableval=$enable_pch; enable_pch=$enableval
@@ -22303,6 +21657,173 @@ fi
 
 
 
+{ $as_echo "$as_me:$LINENO: checking for clock_gettime" >&5
+$as_echo_n "checking for clock_gettime... " >&6; }
+if test "${ac_cv_func_clock_gettime+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define clock_gettime to an innocuous variant, in case <limits.h> declares clock_gettime.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define clock_gettime innocuous_clock_gettime
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char clock_gettime (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef clock_gettime
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_clock_gettime || defined __stub___clock_gettime
+choke me
+#endif
+
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  ac_cv_func_clock_gettime=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_clock_gettime=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_func_clock_gettime" >&5
+$as_echo "$ac_cv_func_clock_gettime" >&6; }
+if test "x$ac_cv_func_clock_gettime" = x""yes; then
+  :
+else
+
+	{ $as_echo "$as_me:$LINENO: checking for clock_gettime in -lrt" >&5
+$as_echo_n "checking for clock_gettime in -lrt... " >&6; }
+if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lrt  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_cxx_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  ac_cv_lib_rt_clock_gettime=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_rt_clock_gettime=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_rt_clock_gettime" >&5
+$as_echo "$ac_cv_lib_rt_clock_gettime" >&6; }
+if test "x$ac_cv_lib_rt_clock_gettime" = x""yes; then
+  RT_LIB="-lrt"
+else
+  { { $as_echo "$as_me:$LINENO: error: Unable to find clock_gettime function; required by ocount" >&5
+$as_echo "$as_me: error: Unable to find clock_gettime function; required by ocount" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+
+
+
+
 # fixups for config.h
 if test "$prefix" = "NONE"; then
 	my_op_prefix="$ac_default_prefix"
@@ -22333,7 +21854,7 @@ _ACEOF
 OP_DOCDIR=`eval echo "${my_op_prefix}/share/doc/$PACKAGE/"`
 
 
-ac_config_files="$ac_config_files Makefile pe_profiling/Makefile libperf_events/Makefile m4/Makefile libutil/Makefile libutil/tests/Makefile libutil++/Makefile libutil++/tests/Makefile libop/Makefile libop/tests/Makefile libopagent/Makefile libopt++/Makefile libdb/Makefile libdb/tests/Makefile libabi/Makefile libabi/tests/Makefile libregex/Makefile libregex/tests/Makefile libregex/stl.pat libregex/tests/mangled-name daemon/Makefile daemon/liblegacy/Makefile events/Makefile utils/Makefile doc/Makefile doc/xsl/catalog-1.xml doc/oprofile.1 doc/opcontrol.1 doc/ophelp.1 doc/opreport.1 doc/opannotate.1 doc/opgprof.1 doc/oparchive.1 doc/opimport.1 doc/operf.1 doc/srcdoc/Doxyfile libpp/Makefile opjitconv/Makefile pp/Makefile gui/Makefile gui/ui/Makefile agents/Makefile agents/jvmti/Makefile agents/jvmpi/Makefile"
+ac_config_files="$ac_config_files Makefile pe_counting/Makefile libpe_utils/Makefile pe_profiling/Makefile libperf_events/Makefile m4/Makefile libutil/Makefile libutil/tests/Makefile libutil++/Makefile libutil++/tests/Makefile libop/Makefile libop/tests/Makefile libopagent/Makefile libopt++/Makefile libdb/Makefile libdb/tests/Makefile libabi/Makefile libabi/tests/Makefile libregex/Makefile libregex/tests/Makefile libregex/stl.pat libregex/tests/mangled-name events/Makefile utils/Makefile doc/Makefile doc/xsl/catalog-1.xml doc/oprofile.1 doc/ophelp.1 doc/op-check-perfevents.1 doc/opreport.1 doc/opannotate.1 doc/opgprof.1 doc/oparchive.1 doc/opimport.1 doc/operf.1 doc/ocount.1 doc/srcdoc/Doxyfile libpp/Makefile opjitconv/Makefile pp/Makefile agents/Makefile agents/jvmti/Makefile agents/jvmpi/Makefile"
 
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@@ -22496,13 +22017,6 @@ $as_echo "$as_me: error: conditional \"BUILD_JVMPI_AGENT\" was never defined.
 Usually this means the macro was only invoked conditionally." >&2;}
    { (exit 1); exit 1; }; }
 fi
-if test -z "${have_qt_TRUE}" && test -z "${have_qt_FALSE}"; then
-  { { $as_echo "$as_me:$LINENO: error: conditional \"have_qt\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-$as_echo "$as_me: error: conditional \"have_qt\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
-   { (exit 1); exit 1; }; }
-fi
 if test -z "${have_xsltproc_TRUE}" && test -z "${have_xsltproc_FALSE}"; then
   { { $as_echo "$as_me:$LINENO: error: conditional \"have_xsltproc\" was never defined.
 Usually this means the macro was only invoked conditionally." >&5
@@ -22839,7 +22353,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by $as_me, which was
+This file was extended by OProfile $as_me 1.0.0, which was
 generated by GNU Autoconf 2.63.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -22902,7 +22416,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_version="\\
-config.status
+OProfile config.status 1.0.0
 configured by $0, generated by GNU Autoconf 2.63,
   with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -23380,6 +22894,8 @@ do
     "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
     "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "pe_counting/Makefile") CONFIG_FILES="$CONFIG_FILES pe_counting/Makefile" ;;
+    "libpe_utils/Makefile") CONFIG_FILES="$CONFIG_FILES libpe_utils/Makefile" ;;
     "pe_profiling/Makefile") CONFIG_FILES="$CONFIG_FILES pe_profiling/Makefile" ;;
     "libperf_events/Makefile") CONFIG_FILES="$CONFIG_FILES libperf_events/Makefile" ;;
     "m4/Makefile") CONFIG_FILES="$CONFIG_FILES m4/Makefile" ;;
@@ -23399,27 +22915,24 @@ do
     "libregex/tests/Makefile") CONFIG_FILES="$CONFIG_FILES libregex/tests/Makefile" ;;
     "libregex/stl.pat") CONFIG_FILES="$CONFIG_FILES libregex/stl.pat" ;;
     "libregex/tests/mangled-name") CONFIG_FILES="$CONFIG_FILES libregex/tests/mangled-name" ;;
-    "daemon/Makefile") CONFIG_FILES="$CONFIG_FILES daemon/Makefile" ;;
-    "daemon/liblegacy/Makefile") CONFIG_FILES="$CONFIG_FILES daemon/liblegacy/Makefile" ;;
     "events/Makefile") CONFIG_FILES="$CONFIG_FILES events/Makefile" ;;
     "utils/Makefile") CONFIG_FILES="$CONFIG_FILES utils/Makefile" ;;
     "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
     "doc/xsl/catalog-1.xml") CONFIG_FILES="$CONFIG_FILES doc/xsl/catalog-1.xml" ;;
     "doc/oprofile.1") CONFIG_FILES="$CONFIG_FILES doc/oprofile.1" ;;
-    "doc/opcontrol.1") CONFIG_FILES="$CONFIG_FILES doc/opcontrol.1" ;;
     "doc/ophelp.1") CONFIG_FILES="$CONFIG_FILES doc/ophelp.1" ;;
+    "doc/op-check-perfevents.1") CONFIG_FILES="$CONFIG_FILES doc/op-check-perfevents.1" ;;
     "doc/opreport.1") CONFIG_FILES="$CONFIG_FILES doc/opreport.1" ;;
     "doc/opannotate.1") CONFIG_FILES="$CONFIG_FILES doc/opannotate.1" ;;
     "doc/opgprof.1") CONFIG_FILES="$CONFIG_FILES doc/opgprof.1" ;;
     "doc/oparchive.1") CONFIG_FILES="$CONFIG_FILES doc/oparchive.1" ;;
     "doc/opimport.1") CONFIG_FILES="$CONFIG_FILES doc/opimport.1" ;;
     "doc/operf.1") CONFIG_FILES="$CONFIG_FILES doc/operf.1" ;;
+    "doc/ocount.1") CONFIG_FILES="$CONFIG_FILES doc/ocount.1" ;;
     "doc/srcdoc/Doxyfile") CONFIG_FILES="$CONFIG_FILES doc/srcdoc/Doxyfile" ;;
     "libpp/Makefile") CONFIG_FILES="$CONFIG_FILES libpp/Makefile" ;;
     "opjitconv/Makefile") CONFIG_FILES="$CONFIG_FILES opjitconv/Makefile" ;;
     "pp/Makefile") CONFIG_FILES="$CONFIG_FILES pp/Makefile" ;;
-    "gui/Makefile") CONFIG_FILES="$CONFIG_FILES gui/Makefile" ;;
-    "gui/ui/Makefile") CONFIG_FILES="$CONFIG_FILES gui/ui/Makefile" ;;
     "agents/Makefile") CONFIG_FILES="$CONFIG_FILES agents/Makefile" ;;
     "agents/jvmti/Makefile") CONFIG_FILES="$CONFIG_FILES agents/jvmti/Makefile" ;;
     "agents/jvmpi/Makefile") CONFIG_FILES="$CONFIG_FILES agents/jvmpi/Makefile" ;;
@@ -25068,16 +24581,6 @@ else
 fi
 
 
-if test "x$enable_gui" = "xno" ; then
-	echo "No GUI will be built as it was explicitly disabled."
-else
-	if test -z "$QT_LIBS"; then
-		echo "Warning: QT version $QT_VERSION was requested but not found. No GUI will be built."
-	else
-		echo "Building GUI with QT $QT_VERSION"
-	 fi
-fi
-
 if ! test "x$enable_account_check" = "xyes"; then
 	:
 elif test "`getent passwd oprofile 2>/dev/null`" == "" || \
@@ -25099,8 +24602,24 @@ elif test "`getent passwd oprofile 2>/dev/null`" == "" || \
 	fi
 fi
 
-if  test "$PERF_EVENT_H_EXISTS" != "yes" && test "$kernel_has_perf_events_support" = "yes"; then
-	echo "Warning: perf_event.h not found.  Please install the kernel headers package if you"
-	echo "         want non-root support built into OProfile."
+if  test "$PERF_EVENT_H_EXISTS" != "yes" && test "$kernel_may_have_perf_events_support" = "yes"; then
+	echo "Warning: perf_event.h not found.  Either install the kernel headers package or"
+	echo "use the --with-kernel option if you want the non-root, single application"
+	echo "profiling support provided by operf."
+	echo ""
+	echo "If you run 'make' now, only the legacy ocontrol-based profiler will be built."
 fi
 
+if test "$KERNELDIR" != "" && test "$kernel_has_perf_events_support" != "yes"; then
+	if ! test -d $KERNELDIR; then
+		echo "WARNING: You passed '--with-kernel=$KERNELDIR', but $KERNELDIR"
+		echo "does not exist."
+	else
+		echo "Warning: You requested to build with the '--with-kernel' option, but your kernel"
+		echo "headers were not accessible at the given location. Be sure you have run the following"
+		echo "command from within your kernel source tree:"
+		echo "     make headers_install INSTALL_HDR_PATH=<kernel-hdrs-install-dir>"
+		echo "Then pass <kernel-hdrs-install-dir> to oprofile's '--with-kernel' configure option."
+	fi
+	echo ""
+fi
diff --git a/configure.ac b/configure.ac
index cc19339..c8a94cd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -11,9 +11,10 @@
 
 AC_PREREQ(2.13)
 
-AC_INIT(libop/op_config.h)
-AM_INIT_AUTOMAKE(oprofile, 0.9.8)
-AM_CONFIG_HEADER(config.h)
+AC_INIT([OProfile], [1.0.0])
+AC_CONFIG_SRCDIR([libop/op_config.h])
+AM_INIT_AUTOMAKE
+AC_CONFIG_HEADERS(config.h)
 
 AC_CHECK_DECLS([basename], [], [], [[#include <libgen.h>]])
 AC_PROG_RANLIB
@@ -60,55 +61,134 @@ test "$LD" || AC_ERROR(ld not found)
 
 # --with-kernel for cross compilation
 AC_ARG_WITH(kernel,
-[AS_HELP_STRING([--with-kernel=dir], [Path to kernel include directory (...include/linux/perf_event.h) to use.
+[AS_HELP_STRING([--with-kernel=dir], [Path to kernel include directory (e.g. /tmp/linux-xyz) to use.
 If this option is not specified, configure will look for kernel header files in the usual installation location
-for a kernel-headers package -- /usr/include. Use this option in cross-compile enviroments
+for a kernel-headers package -- /usr. Use this option in cross-compile enviroments
 or in situations where the host system does not support perf_events but you wish to build binaries
-for a target system that does support perf_events. Because of OProfile's use of syscalls, be sure that the
-kernel headers used match the architecture of the intended target system.])],
+for a target system that does support perf_events. Because of OProfile's use of syscalls,
+kernel headers used during build must match the architecture of the intended target system.
+NOTE:  Run the command 'make headers_install INSTALL_HDR_PATH=<kernel-hdrs-install-dir>'
+from the root directory of your kernel source tree, and use <kernel-hdrs-install-dir> for
+oprofile's '--with-kernel' configure option.])],
 KERNELDIR=$withval)
 
 
 dnl Check kernel version for perf_events supported
-AC_MSG_CHECKING([kernel version supports perf_events])
 if test "$KERNELDIR" != ""; then
-	KINC="$KERNELDIR/include"
+	if test -d $KERNELDIR; then
+		KINC="$KERNELDIR/include"
+		PERF_EVENT_FLAGS=" -I$KERNELDIR/include"
+		AC_SUBST(PERF_EVENT_FLAGS)
+		PERF_EVENT_H="$KERNELDIR/include/linux/perf_event.h"
+	else
+		echo "$KERNELDIR does not exist."
+	fi
+else
+	PERF_EVENT_H="/usr/include/linux/perf_event.h"
 fi
-AX_KERNEL_VERSION(2, 6, 31, <=, kernel_has_perf_events_support="yes",
+
+PERF_EVENT_H_EXISTS="no"
+kernel_may_have_perf_events_support="no"
+AX_KERNEL_VERSION(2, 6, 31, <=, kernel_may_have_perf_events_support="yes",
 kernel_has_perf_events_support="no")
 
-if test "$kernel_has_perf_events_support" = "no"; then
-	AC_MSG_RESULT([This kernel does not have perf_events support; falling back to legacy oprofile])
+dnl The AX_KERNEL_VERSION macro may return kernel_may_have_perf_events_support="yes",
+dnl indicating a partial answer.  Some architectures do not implement the Performance
+dnl Events Kernel Subsystem even with kernel versions > 2.6.31 -- i.e., not even
+dnl implementing the perf_event_open syscall to return ENOSYS.  So the check below
+dnl will identify and handle such situations.
+
+if test "$kernel_may_have_perf_events_support" = "yes"; then
+	AC_CHECK_HEADER($PERF_EVENT_H,PERF_EVENT_H_EXISTS="yes")
+	AC_MSG_CHECKING([kernel supports perf_events])
+	if test "$PERF_EVENT_H_EXISTS" = "yes"; then
+		rm -f test-for-PERF_EVENT_OPEN
+		AC_LANG_CONFTEST(
+			[AC_LANG_PROGRAM([[#include <linux/perf_event.h>
+				#include <asm/unistd.h>
+				#include <sys/types.h>
+				#include <string.h>
+			]],
+			[[struct perf_event_attr attr;
+				pid_t pid;
+				memset(&attr, 0, sizeof(attr));
+				attr.size = sizeof(attr);
+				attr.sample_type = PERF_SAMPLE_IP;
+				pid = getpid();
+				syscall(__NR_perf_event_open, &attr, pid, 0, -1, 0);
+				]])
+		])
+		$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS $PERF_EVENT_FLAGS -o test-for-PERF_EVENT_OPEN  > /dev/null 2>&1
+		if test -f test-for-PERF_EVENT_OPEN; then
+			kernel_has_perf_events_support="yes"
+			AC_MSG_RESULT(yes)
+		else
+			AC_MSG_RESULT(no)
+			kernel_has_perf_events_support="no"
+		fi
+		rm -f test-for-PERF_EVENT_OPEN
+	else
+		AC_MSG_RESULT(unknown -- perf_event.h not found)
+	fi
 else
-	AC_MSG_RESULT([This kernel has perf_events support])
+	AC_MSG_RESULT(kernel supports perf_events... no)
+	kernel_has_perf_events_support="no"
 fi
 
-if test "$KERNELDIR" == ""; then
-	PERF_EVENT_H="/usr/include/linux/perf_event.h"
-else
-	PERF_EVENT_H="$KERNELDIR/include/linux/perf_event.h"
-fi
-AC_CHECK_HEADER($PERF_EVENT_H,PERF_EVENT_H_EXISTS="yes")
-AM_CONDITIONAL(BUILD_FOR_PERF_EVENT, test -n "$PERF_EVENT_H_EXISTS")
-if test "$PERF_EVENT_H_EXISTS" = "yes"; then
+AM_CONDITIONAL(BUILD_FOR_PERF_EVENT, test "$kernel_has_perf_events_support" = "yes")
+
+if test "$kernel_has_perf_events_support" = "yes"; then
 	HAVE_PERF_EVENTS='1'
-	if test "$KERNELDIR" != ""; then
-		PERF_EVENT_FLAGS=" -I$KERNELDIR/include"
-		AC_SUBST(PERF_EVENT_FLAGS)
+	AC_MSG_CHECKING([whether PERF_RECORD_MISC_GUEST_KERNEL is defined in perf_event.h])
+	rm -f test-for-PERF_GUEST
+	AC_LANG_CONFTEST(
+		[AC_LANG_PROGRAM([[#include <linux/perf_event.h>]],
+			[[unsigned int pr_guest_kern = PERF_RECORD_MISC_GUEST_KERNEL;
+			unsigned int pr_guest_user = PERF_RECORD_MISC_GUEST_USER;]])
+		])
+	$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS $PERF_EVENT_FLAGS -o test-for-PERF_GUEST  > /dev/null 2>&1
+	if test -f test-for-PERF_GUEST; then
+		echo "yes"
+		HAVE_PERF_GUEST_MACROS='1'
+	else
+		echo "no"
+		HAVE_PERF_GUEST_MACROS='0'
 	fi
+	AC_DEFINE_UNQUOTED(HAVE_PERF_GUEST_MACROS, $HAVE_PERF_GUEST_MACROS, [PERF_RECORD_MISC_GUEST_KERNEL is defined in perf_event.h])
+	rm -f test-for-PERF_GUEST*
+
+	AC_MSG_CHECKING([whether precise_ip is defined in perf_event.h])
+	rm -f test-for-precise-ip
+	AC_LANG_CONFTEST(
+		[AC_LANG_PROGRAM([[#include <linux/perf_event.h>]],
+			[[struct perf_event_attr attr;
+			attr.precise_ip = 2;]])
+		])
+	$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS $PERF_EVENT_FLAGS -o test-for-precise-ip  > /dev/null 2>&1
+	if test -f test-for-precise-ip; then
+		echo "yes"
+		HAVE_PERF_PRECISE_IP='1'
+	else
+		echo "no"
+		HAVE_PERF_PRECISE_IP='0'
+	fi
+	AC_DEFINE_UNQUOTED(HAVE_PERF_PRECISE_IP, $HAVE_PERF_PRECISE_IP, [precise_ip is defined in perf_event.h])
+	rm -f test-for-precise-ip*
+
 else
 	HAVE_PERF_EVENTS='0'
+	AC_MSG_RESULT([No perf_events support available; falling back to legacy oprofile])
 fi
-AC_DEFINE_UNQUOTED(HAVE_PERF_EVENTS, $HAVE_PERF_EVENTS, [Kernel support for perf_events exists])
 
+AC_DEFINE_UNQUOTED(HAVE_PERF_EVENTS, $HAVE_PERF_EVENTS, [Kernel support for perf_events exists])
+AC_CANONICAL_HOST
 if test "$HAVE_PERF_EVENTS" = "1"; then
 	PFM_LIB=
-	arch="`uname -m`"
-	if test "$arch" = "ppc64"; then
-		AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; usually provided in papi devel package])])
-		AC_CHECK_LIB(pfm,pfm_get_event_name, HAVE_LIBPFM3='1'; HAVE_LIBPFM='1', [
-			AC_CHECK_LIB(pfm,pfm_get_os_event_encoding, HAVE_LIBPFM3='0'; HAVE_LIBPFM='1',
-			[AC_MSG_ERROR([libpfm not found; usually provided in papi devel package])])])
+	if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64"; then
+		AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; may be provided by libpfm devel or papi devel package])])
+		AC_CHECK_LIB(pfm,pfm_get_os_event_encoding, HAVE_LIBPFM3='0'; HAVE_LIBPFM='1', [
+			AC_CHECK_LIB(pfm, pfm_get_event_name, HAVE_LIBPFM3='1'; HAVE_LIBPFM='1',
+			[AC_MSG_ERROR([libpfm not found; may be provided by libpfm devel or papi devel package])])])
 		PFM_LIB="-lpfm"
 		AC_DEFINE_UNQUOTED(HAVE_LIBPFM3, $HAVE_LIBPFM3, [Define to 1 if using libpfm3; 0 if using newer libpfm])
 		AC_DEFINE_UNQUOTED(HAVE_LIBPFM, $HAVE_LIBPFM, [Define to 1 if libpfm is available])
@@ -160,8 +240,6 @@ LDFLAGS="$LDFLAGS_SAVE"
 AC_SUBST(OP_CPPFLAGS)
 AC_SUBST(OP_LDFLAGS)
 
-AX_CELL_SPU
-
 # C++ tests
 
 AC_LANG_CPLUSPLUS
@@ -182,68 +260,6 @@ AX_TYPEDEFED_NAME(ptrdiff_t, "int" "long", PTRDIFF_T_TYPE)
 AC_SUBST(PTRDIFF_T_TYPE)
 
 AC_PATH_XTRA
-ORIG_X_SAVE_LIBS="$LIBS"
-LIBS="$X_PRE_LIBS $LIBS $X_LIBS -lX11 $X_EXTRA_LIBS"
-X_LIBS="$LIBS"
-AC_SUBST(X_LIBS)
-
-AC_ARG_ENABLE(gui,[  --enable-gui  compile with gui component (qt3|qt4|yes|no),
-		     if not given or set to yes, gui defaults to qt3],, enable_gui=qt3)
-
-if test "x$enable_gui"  = "xqt3"  || test "x$enable_gui"  = "xyes"; then
-	QT_VERSION=3
-	QT_DO_IT_ALL
-	if test -n "$QT_LIB"; then
-		QT_LIBS="$QT_LIB $QT_LDFLAGS"
-	fi
-	QT_CFLAGS="$QT_INCLUDES"
-fi
-
-if test "x$enable_gui"  = "xqt4"; then
-	QT_VERSION=4
-	PKG_CHECK_MODULES(QT, Qt3Support QtGui QtCore ,,[
-	echo "You requested QT4 but its build files are not available. Exiting now."
-	exit
-	])
-	MOC=$(pkg-config --variable=moc_location QtCore)
-	UIC=$(dirname $(pkg-config --variable=uic_location QtCore))/uic3
-	QT_VERSION=$(pkg-config --modversion QtCore)
-
-dnl following are some sanity tests and workarounds for buggy QtCore.pc files
-	if test "x$MOC" = "x"; then
-		echo "WARNING: Your QtCore.pc file is buggy, it doesn't provide the variable 'moc_location'"
-		echo "WARNING: I will try to find it in your PATH ..."
-		AC_CHECK_PROG(MOC, moc, moc)
-		if test "x$MOC" = "x"; then
-			echo "WARNING: You can fix this by adding the location of moc to your path."
-			echo "WARNING: Exiting now."
-			exit
-		fi
-	fi
-
-	if test "x$UIC" = "x3"; then
-		echo "WARNING: Your QtCore.pc file is buggy, it doesn't provide the variable 'uic_location'"
-		echo "WARNING: I will try to find it in your PATH ..."
-		AC_CHECK_PROG(UIChelp, uic3, uic3)
-		if test "x$UIChelp" = "x"; then
-			echo "WARNING: You can fix this by adding the location of uic3 to your path."
-			echo "WARNING: Exiting now."
-			exit
-		else
-			UIC="$UIChelp"
-		fi
-	fi
-
-	flags_has_qt3support=$(echo $QT_CFLAGS | grep QT3_SUPPORT)
-	if test "x$flags_has_qt3support" = "x" ; then
-		echo "WARNING: Your Qt3Support package is buggy; it dosn't include the 'QT3_SUPPORT' flag"
-		echo "WARNING: adding it manually"
-		QT_CFLAGS="$QT_CFLAGS -DQT3_SUPPORT"
-	fi
-fi
-AM_CONDITIONAL(have_qt, test -n "$QT_LIBS")
-
-LIBS="$ORIG_X_SAVE_LIBS"
 
 dnl enable pch for c++
 AC_ARG_ENABLE(pch,
@@ -315,6 +331,12 @@ AM_CONDITIONAL(CHECK_ACCOUNT, test "x$enable_account_check" = "xyes")
 AC_SUBST(OP_CFLAGS)
 AC_SUBST(OP_CXXFLAGS)
 
+AC_CHECK_FUNC(clock_gettime, [], [
+	AC_CHECK_LIB(rt, clock_gettime, RT_LIB="-lrt",
+		AC_MSG_ERROR(Unable to find clock_gettime function; required by ocount))])
+AC_SUBST(RT_LIB)
+
+
 # fixups for config.h
 if test "$prefix" = "NONE"; then
 	my_op_prefix="$ac_default_prefix"
@@ -338,6 +360,8 @@ OP_DOCDIR=`eval echo "${my_op_prefix}/share/doc/$PACKAGE/"`
 AC_SUBST(OP_DOCDIR)
 
 AC_OUTPUT(Makefile \
+	pe_counting/Makefile \
+	libpe_utils/Makefile \
 	pe_profiling/Makefile \
 	libperf_events/Makefile \
 	m4/Makefile \
@@ -357,43 +381,30 @@ AC_OUTPUT(Makefile \
 	libregex/tests/Makefile \
 	libregex/stl.pat \
 	libregex/tests/mangled-name \
-	daemon/Makefile \
-	daemon/liblegacy/Makefile \
 	events/Makefile \
 	utils/Makefile \
 	doc/Makefile \
 	doc/xsl/catalog-1.xml \
 	doc/oprofile.1 \
-	doc/opcontrol.1 \
 	doc/ophelp.1 \
+	doc/op-check-perfevents.1 \
 	doc/opreport.1 \
 	doc/opannotate.1 \
 	doc/opgprof.1 \
 	doc/oparchive.1 \
 	doc/opimport.1 \
 	doc/operf.1 \
+	doc/ocount.1 \
 	doc/srcdoc/Doxyfile \
 	libpp/Makefile \
 	opjitconv/Makefile \
 	pp/Makefile \
-	gui/Makefile \
-	gui/ui/Makefile \
 	agents/Makefile \
 	agents/jvmti/Makefile \
 	agents/jvmpi/Makefile)
 
 AX_COPY_IF_CHANGE(doc/xsl/catalog-1.xml, doc/xsl/catalog.xml)
 
-if test "x$enable_gui" = "xno" ; then
-	echo "No GUI will be built as it was explicitly disabled."
-else
-	if test -z "$QT_LIBS"; then
-		echo "Warning: QT version $QT_VERSION was requested but not found. No GUI will be built."
-	else
-		echo "Building GUI with QT $QT_VERSION"
-	 fi
-fi
-
 if ! test "x$enable_account_check" = "xyes"; then
 	:
 elif test "`getent passwd oprofile 2>/dev/null`" == "" || \
@@ -415,8 +426,24 @@ elif test "`getent passwd oprofile 2>/dev/null`" == "" || \
 	fi
 fi
 
-if  test "$PERF_EVENT_H_EXISTS" != "yes" && test "$kernel_has_perf_events_support" = "yes"; then
-	echo "Warning: perf_event.h not found.  Please install the kernel headers package if you"
-	echo "         want non-root support built into OProfile."
+if  test "$PERF_EVENT_H_EXISTS" != "yes" && test "$kernel_may_have_perf_events_support" = "yes"; then
+	echo "Warning: perf_event.h not found.  Either install the kernel headers package or"
+	echo "use the --with-kernel option if you want the non-root, single application"
+	echo "profiling support provided by operf."
+	echo ""
+	echo "If you run 'make' now, only the legacy ocontrol-based profiler will be built."
 fi
 
+if test "$KERNELDIR" != "" && test "$kernel_has_perf_events_support" != "yes"; then
+	if ! test -d $KERNELDIR; then
+		echo "WARNING: You passed '--with-kernel=$KERNELDIR', but $KERNELDIR"
+		echo "does not exist."
+	else
+		echo "Warning: You requested to build with the '--with-kernel' option, but your kernel"
+		echo "headers were not accessible at the given location. Be sure you have run the following"
+		echo "command from within your kernel source tree:"
+		echo "     make headers_install INSTALL_HDR_PATH=<kernel-hdrs-install-dir>"
+		echo "Then pass <kernel-hdrs-install-dir> to oprofile's '--with-kernel' configure option."
+	fi
+	echo ""
+fi
diff --git a/daemon/Makefile.am b/daemon/Makefile.am
deleted file mode 100644
index ce6f5e2..0000000
--- a/daemon/Makefile.am
+++ /dev/null
@@ -1,60 +0,0 @@
-SUBDIRS = liblegacy .
-
-oprofiled_SOURCES = \
-	init.c \
-	oprofiled.c \
-	oprofiled.h \
-	opd_stats.c \
-	opd_pipe.c \
-	opd_pipe.h \
-	opd_sfile.c \
-	opd_sfile.h \
-	opd_kernel.c \
-	opd_kernel.h \
-	opd_trans.c \
-	opd_trans.h \
-	opd_printf.h \
-	opd_stats.h \
-	opd_cookie.c \
-	opd_cookie.h \
-	opd_events.c \
-	opd_events.h \
-	opd_interface.h \
-	opd_mangling.c \
-	opd_mangling.h \
-	opd_perfmon.h \
-	opd_perfmon.c \
-	opd_anon.h \
-	opd_anon.c \
-	opd_spu.c \
-	opd_extended.h \
-	opd_extended.c \
-	opd_ibs.h \
-	opd_ibs.c \
-	opd_ibs_macro.h \
-	opd_ibs_trans.h \
-	opd_ibs_trans.c
-
-LIBS=@POPT_LIBS@ @LIBERTY_LIBS@
-
-AM_CPPFLAGS = \
-	-I ${top_srcdir}/libabi \
-	-I ${top_srcdir}/libutil \
-	-I ${top_srcdir}/libop \
-	-I ${top_srcdir}/libdb \
-	@OP_CPPFLAGS@
-
-# -fno-omit-frame-pointer needed for daemon build: see ChangeLog-2004 02-23
-AM_CFLAGS = @OP_CFLAGS@ -fno-omit-frame-pointer
-AM_LDFLAGS = @OP_LDFLAGS@
-
-bin_PROGRAMS = oprofiled
-
-oprofiled_LDADD = \
-	liblegacy/liblegacy.a \
-	../libabi/libabi.a \
-	../libdb/libodb.a \
-	../libop/libop.a \
-	../libutil/libutil.a
-
-oprofiled_LINK = $(CC) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
diff --git a/daemon/Makefile.in b/daemon/Makefile.in
deleted file mode 100644
index a80da87..0000000
--- a/daemon/Makefile.in
+++ /dev/null
@@ -1,785 +0,0 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-bin_PROGRAMS = oprofiled$(EXEEXT)
-subdir = daemon
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
-	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
-	$(top_srcdir)/m4/compileroption.m4 \
-	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
-	$(top_srcdir)/m4/extradirs.m4 \
-	$(top_srcdir)/m4/kernelversion.m4 $(top_srcdir)/m4/libtool.m4 \
-	$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
-	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
-	$(top_srcdir)/m4/mallocattribute.m4 \
-	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
-	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
-	$(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
-	$(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-am__installdirs = "$(DESTDIR)$(bindir)"
-PROGRAMS = $(bin_PROGRAMS)
-am_oprofiled_OBJECTS = init.$(OBJEXT) oprofiled.$(OBJEXT) \
-	opd_stats.$(OBJEXT) opd_pipe.$(OBJEXT) opd_sfile.$(OBJEXT) \
-	opd_kernel.$(OBJEXT) opd_trans.$(OBJEXT) opd_cookie.$(OBJEXT) \
-	opd_events.$(OBJEXT) opd_mangling.$(OBJEXT) \
-	opd_perfmon.$(OBJEXT) opd_anon.$(OBJEXT) opd_spu.$(OBJEXT) \
-	opd_extended.$(OBJEXT) opd_ibs.$(OBJEXT) \
-	opd_ibs_trans.$(OBJEXT)
-oprofiled_OBJECTS = $(am_oprofiled_OBJECTS)
-oprofiled_DEPENDENCIES = liblegacy/liblegacy.a ../libabi/libabi.a \
-	../libdb/libodb.a ../libop/libop.a ../libutil/libutil.a
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-am__mv = mv -f
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
-	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-SOURCES = $(oprofiled_SOURCES)
-DIST_SOURCES = $(oprofiled_SOURCES)
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
-	html-recursive info-recursive install-data-recursive \
-	install-dvi-recursive install-exec-recursive \
-	install-html-recursive install-info-recursive \
-	install-pdf-recursive install-ps-recursive install-recursive \
-	installcheck-recursive installdirs-recursive pdf-recursive \
-	ps-recursive uninstall-recursive
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
-  distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
-	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
-	distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
-  dir0=`pwd`; \
-  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
-  sed_rest='s,^[^/]*/*,,'; \
-  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
-  sed_butlast='s,/*[^/]*$$,,'; \
-  while test -n "$$dir1"; do \
-    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
-    if test "$$first" != "."; then \
-      if test "$$first" = ".."; then \
-        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
-        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
-      else \
-        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
-        if test "$$first2" = "$$first"; then \
-          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
-        else \
-          dir2="../$$dir2"; \
-        fi; \
-        dir0="$$dir0"/"$$first"; \
-      fi; \
-    fi; \
-    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
-  done; \
-  reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BFD_LIBS = @BFD_LIBS@
-CAT_ENTRY_END = @CAT_ENTRY_END@
-CAT_ENTRY_START = @CAT_ENTRY_START@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DATE = @DATE@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-DOCBOOK_ROOT = @DOCBOOK_ROOT@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-EXTRA_CFLAGS_MODULE = @EXTRA_CFLAGS_MODULE@
-FGREP = @FGREP@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-JAVA_HOMEDIR = @JAVA_HOMEDIR@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBERTY_LIBS = @LIBERTY_LIBS@
-LIBOBJS = @LIBOBJS@
-LIBS = @POPT_LIBS@ @LIBERTY_LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAKEINFO = @MAKEINFO@
-MKDIR_P = @MKDIR_P@
-MOC = @MOC@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OP_CFLAGS = @OP_CFLAGS@
-OP_CPPFLAGS = @OP_CPPFLAGS@
-OP_CXXFLAGS = @OP_CXXFLAGS@
-OP_DOCDIR = @OP_DOCDIR@
-OP_LDFLAGS = @OP_LDFLAGS@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PERF_EVENT_FLAGS = @PERF_EVENT_FLAGS@
-PFM_LIB = @PFM_LIB@
-PKG_CONFIG = @PKG_CONFIG@
-POPT_LIBS = @POPT_LIBS@
-PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SIZE_T_TYPE = @SIZE_T_TYPE@
-STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
-VERSION = @VERSION@
-XMKMF = @XMKMF@
-XML_CATALOG = @XML_CATALOG@
-XSLTPROC = @XSLTPROC@
-XSLTPROC_FLAGS = @XSLTPROC_FLAGS@
-X_CFLAGS = @X_CFLAGS@
-X_EXTRA_LIBS = @X_EXTRA_LIBS@
-X_LIBS = @X_LIBS@
-X_PRE_LIBS = @X_PRE_LIBS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-lt_ECHO = @lt_ECHO@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-topdir = @topdir@
-SUBDIRS = liblegacy .
-oprofiled_SOURCES = \
-	init.c \
-	oprofiled.c \
-	oprofiled.h \
-	opd_stats.c \
-	opd_pipe.c \
-	opd_pipe.h \
-	opd_sfile.c \
-	opd_sfile.h \
-	opd_kernel.c \
-	opd_kernel.h \
-	opd_trans.c \
-	opd_trans.h \
-	opd_printf.h \
-	opd_stats.h \
-	opd_cookie.c \
-	opd_cookie.h \
-	opd_events.c \
-	opd_events.h \
-	opd_interface.h \
-	opd_mangling.c \
-	opd_mangling.h \
-	opd_perfmon.h \
-	opd_perfmon.c \
-	opd_anon.h \
-	opd_anon.c \
-	opd_spu.c \
-	opd_extended.h \
-	opd_extended.c \
-	opd_ibs.h \
-	opd_ibs.c \
-	opd_ibs_macro.h \
-	opd_ibs_trans.h \
-	opd_ibs_trans.c
-
-AM_CPPFLAGS = \
-	-I ${top_srcdir}/libabi \
-	-I ${top_srcdir}/libutil \
-	-I ${top_srcdir}/libop \
-	-I ${top_srcdir}/libdb \
-	@OP_CPPFLAGS@
-
-
-# -fno-omit-frame-pointer needed for daemon build: see ChangeLog-2004 02-23
-AM_CFLAGS = @OP_CFLAGS@ -fno-omit-frame-pointer
-AM_LDFLAGS = @OP_LDFLAGS@
-oprofiled_LDADD = \
-	liblegacy/liblegacy.a \
-	../libabi/libabi.a \
-	../libdb/libodb.a \
-	../libop/libop.a \
-	../libutil/libutil.a
-
-oprofiled_LINK = $(CC) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-all: all-recursive
-
-.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
-	@for dep in $?; do \
-	  case '$(am__configure_deps)' in \
-	    *$$dep*) \
-	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-	        && { if test -f $@; then exit 0; else break; fi; }; \
-	      exit 1;; \
-	  esac; \
-	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign daemon/Makefile'; \
-	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --foreign daemon/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
-	@case '$?' in \
-	  *config.status*) \
-	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
-	  *) \
-	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
-	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
-	esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure:  $(am__configure_deps)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-install-binPROGRAMS: $(bin_PROGRAMS)
-	@$(NORMAL_INSTALL)
-	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
-	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
-	for p in $$list; do echo "$$p $$p"; done | \
-	sed 's/$(EXEEXT)$$//' | \
-	while read p p1; do if test -f $$p || test -f $$p1; \
-	  then echo "$$p"; echo "$$p"; else :; fi; \
-	done | \
-	sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
-	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
-	sed 'N;N;N;s,\n, ,g' | \
-	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
-	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
-	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
-	    else { print "f", $$3 "/" $$4, $$1; } } \
-	  END { for (d in files) print "f", d, files[d] }' | \
-	while read type dir files; do \
-	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
-	    test -z "$$files" || { \
-	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
-	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
-	    } \
-	; done
-
-uninstall-binPROGRAMS:
-	@$(NORMAL_UNINSTALL)
-	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
-	files=`for p in $$list; do echo "$$p"; done | \
-	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
-	      -e 's/$$/$(EXEEXT)/' `; \
-	test -n "$$list" || exit 0; \
-	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
-	cd "$(DESTDIR)$(bindir)" && rm -f $$files
-
-clean-binPROGRAMS:
-	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
-	echo " rm -f" $$list; \
-	rm -f $$list || exit $$?; \
-	test -n "$(EXEEXT)" || exit 0; \
-	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
-	echo " rm -f" $$list; \
-	rm -f $$list
-oprofiled$(EXEEXT): $(oprofiled_OBJECTS) $(oprofiled_DEPENDENCIES) 
-	@rm -f oprofiled$(EXEEXT)
-	$(oprofiled_LINK) $(oprofiled_OBJECTS) $(oprofiled_LDADD) $(LIBS)
-
-mostlyclean-compile:
-	-rm -f *.$(OBJEXT)
-
-distclean-compile:
-	-rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_anon.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_cookie.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_events.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_extended.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_ibs.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_ibs_trans.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_kernel.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_mangling.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_perfmon.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_pipe.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_sfile.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_spu.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_stats.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_trans.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprofiled.Po@am__quote@
-
-.c.o:
-@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(COMPILE) -c $<
-
-.c.obj:
-@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
-@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
-
-mostlyclean-libtool:
-	-rm -f *.lo
-
-clean-libtool:
-	-rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-#     (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
-	@fail= failcom='exit 1'; \
-	for f in x $$MAKEFLAGS; do \
-	  case $$f in \
-	    *=* | --[!k]*);; \
-	    *k*) failcom='fail=yes';; \
-	  esac; \
-	done; \
-	dot_seen=no; \
-	target=`echo $@ | sed s/-recursive//`; \
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  echo "Making $$target in $$subdir"; \
-	  if test "$$subdir" = "."; then \
-	    dot_seen=yes; \
-	    local_target="$$target-am"; \
-	  else \
-	    local_target="$$target"; \
-	  fi; \
-	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
-	  || eval $$failcom; \
-	done; \
-	if test "$$dot_seen" = "no"; then \
-	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
-	fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
-	@fail= failcom='exit 1'; \
-	for f in x $$MAKEFLAGS; do \
-	  case $$f in \
-	    *=* | --[!k]*);; \
-	    *k*) failcom='fail=yes';; \
-	  esac; \
-	done; \
-	dot_seen=no; \
-	case "$@" in \
-	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
-	  *) list='$(SUBDIRS)' ;; \
-	esac; \
-	rev=''; for subdir in $$list; do \
-	  if test "$$subdir" = "."; then :; else \
-	    rev="$$subdir $$rev"; \
-	  fi; \
-	done; \
-	rev="$$rev ."; \
-	target=`echo $@ | sed s/-recursive//`; \
-	for subdir in $$rev; do \
-	  echo "Making $$target in $$subdir"; \
-	  if test "$$subdir" = "."; then \
-	    local_target="$$target-am"; \
-	  else \
-	    local_target="$$target"; \
-	  fi; \
-	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
-	  || eval $$failcom; \
-	done && test -z "$$fail"
-tags-recursive:
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
-	done
-ctags-recursive:
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
-	done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
-	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-		$(TAGS_FILES) $(LISP)
-	set x; \
-	here=`pwd`; \
-	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
-	  include_option=--etags-include; \
-	  empty_fix=.; \
-	else \
-	  include_option=--include; \
-	  empty_fix=; \
-	fi; \
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  if test "$$subdir" = .; then :; else \
-	    test ! -f $$subdir/TAGS || \
-	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
-	  fi; \
-	done; \
-	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	shift; \
-	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-	  test -n "$$unique" || unique=$$empty_fix; \
-	  if test $$# -gt 0; then \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      "$$@" $$unique; \
-	  else \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      $$unique; \
-	  fi; \
-	fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-		$(TAGS_FILES) $(LISP)
-	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	test -z "$(CTAGS_ARGS)$$unique" \
-	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-	     $$unique
-
-GTAGS:
-	here=`$(am__cd) $(top_builddir) && pwd` \
-	  && $(am__cd) $(top_srcdir) \
-	  && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
-	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
-	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	list='$(DISTFILES)'; \
-	  dist_files=`for file in $$list; do echo $$file; done | \
-	  sed -e "s|^$$srcdirstrip/||;t" \
-	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-	case $$dist_files in \
-	  */*) $(MKDIR_P) `echo "$$dist_files" | \
-			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-			   sort -u` ;; \
-	esac; \
-	for file in $$dist_files; do \
-	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
-	  if test -d $$d/$$file; then \
-	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-	    if test -d "$(distdir)/$$file"; then \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
-	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
-	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
-	  else \
-	    test -f "$(distdir)/$$file" \
-	    || cp -p $$d/$$file "$(distdir)/$$file" \
-	    || exit 1; \
-	  fi; \
-	done
-	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-	  if test "$$subdir" = .; then :; else \
-	    test -d "$(distdir)/$$subdir" \
-	    || $(MKDIR_P) "$(distdir)/$$subdir" \
-	    || exit 1; \
-	  fi; \
-	done
-	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-	  if test "$$subdir" = .; then :; else \
-	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
-	    $(am__relativize); \
-	    new_distdir=$$reldir; \
-	    dir1=$$subdir; dir2="$(top_distdir)"; \
-	    $(am__relativize); \
-	    new_top_distdir=$$reldir; \
-	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
-	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
-	    ($(am__cd) $$subdir && \
-	      $(MAKE) $(AM_MAKEFLAGS) \
-	        top_distdir="$$new_top_distdir" \
-	        distdir="$$new_distdir" \
-		am__remove_distdir=: \
-		am__skip_length_check=: \
-		am__skip_mode_fix=: \
-	        distdir) \
-	      || exit 1; \
-	  fi; \
-	done
-check-am: all-am
-check: check-recursive
-all-am: Makefile $(PROGRAMS)
-installdirs: installdirs-recursive
-installdirs-am:
-	for dir in "$(DESTDIR)$(bindir)"; do \
-	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
-	done
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
-	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	  `test -z '$(STRIP)' || \
-	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
-	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
-	@echo "This command is intended for maintainers to use"
-	@echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
-	-rm -rf ./$(DEPDIR)
-	-rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
-	distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am: install-binPROGRAMS
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
-	-rm -rf ./$(DEPDIR)
-	-rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
-	mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-binPROGRAMS
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
-	install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
-	all all-am check check-am clean clean-binPROGRAMS \
-	clean-generic clean-libtool ctags ctags-recursive distclean \
-	distclean-compile distclean-generic distclean-libtool \
-	distclean-tags distdir dvi dvi-am html html-am info info-am \
-	install install-am install-binPROGRAMS install-data \
-	install-data-am install-dvi install-dvi-am install-exec \
-	install-exec-am install-html install-html-am install-info \
-	install-info-am install-man install-pdf install-pdf-am \
-	install-ps install-ps-am install-strip installcheck \
-	installcheck-am installdirs installdirs-am maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-compile \
-	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
-	tags tags-recursive uninstall uninstall-am \
-	uninstall-binPROGRAMS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/daemon/init.c b/daemon/init.c
deleted file mode 100644
index a79e36e..0000000
--- a/daemon/init.c
+++ /dev/null
@@ -1,372 +0,0 @@
-/**
- * @file daemon/init.c
- * Daemon set up and main loop for 2.6
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * @Modifications Daniel Hansel
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
-#include "config.h"
- 
-#include "oprofiled.h"
-#include "opd_stats.h"
-#include "opd_sfile.h"
-#include "opd_pipe.h"
-#include "opd_kernel.h"
-#include "opd_trans.h"
-#include "opd_anon.h"
-#include "opd_perfmon.h"
-#include "opd_printf.h"
-#include "opd_extended.h"
-
-#include "op_version.h"
-#include "op_config.h"
-#include "op_deviceio.h"
-#include "op_get_time.h"
-#include "op_libiberty.h"
-#include "op_fileio.h"
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include <wait.h>
-#include <string.h>
-
-size_t kernel_pointer_size;
-
-static fd_t devfd;
-static char * sbuf;
-static size_t s_buf_bytesize;
-extern char * session_dir;
-static char start_time_str[32];
-static int jit_conversion_running;
-
-static void opd_sighup(void);
-static void opd_alarm(void);
-static void opd_sigterm(void);
-static void opd_sigchild(void);
-static void opd_do_jitdumps(void);
-
-/**
- * opd_open_files - open necessary files
- *
- * Open the device files and the log file,
- * and mmap() the hash map.
- */
-static void opd_open_files(void)
-{
-	devfd = op_open_device("/dev/oprofile/buffer");
-	if (devfd == -1) {
-		if (errno == EINVAL)
-			fprintf(stderr, "Failed to open device. Possibly you have passed incorrect\n"
-				"parameters. Check /var/log/messages.");
-		else
-			perror("Failed to open profile device");
-		exit(EXIT_FAILURE);
-	}
-
-	/* give output before re-opening stdout as the logfile */
-	printf("Using log file %s\n", op_log_file);
-
-	/* set up logfile */
-	close(0);
-	close(1);
-
-	if (open("/dev/null", O_RDONLY) == -1) {
-		perror("oprofiled: couldn't re-open stdin as /dev/null: ");
-		exit(EXIT_FAILURE);
-	}
-
-	opd_open_logfile();
-	opd_create_pipe();
-
-	printf("oprofiled started %s", op_get_time());
-	printf("kernel pointer size: %lu\n",
-		(unsigned long)kernel_pointer_size);
-	fflush(stdout);
-}
- 
-
-/** Done writing out the samples, indicate with complete_dump file */
-static void complete_dump(void)
-{
-	FILE * status_file;
-
-retry:
-	status_file = fopen(op_dump_status, "w");
-
-	if (!status_file && errno == EMFILE) {
-		if (sfile_lru_clear()) {
-			printf("LRU cleared but file open fails for %s.\n",
-			       op_dump_status);
-			abort();
-		}
-		goto retry;
-	}
-
-	if (!status_file) {
-		perror("warning: couldn't set complete_dump: ");
-		return;
-	}
-
-	fprintf(status_file, "1\n");
-	fclose(status_file);
-}
-
- 
-/**
- * opd_do_samples - process a sample buffer
- * @param opd_buf  buffer to process
- *
- * Process a buffer of samples.
- *
- * If the sample could be processed correctly, it is written
- * to the relevant sample file.
- */
-static void opd_do_samples(char const * opd_buf, ssize_t count)
-{
-	size_t num = count / kernel_pointer_size;
- 
-	opd_stats[OPD_DUMP_COUNT]++;
-
-	verbprintf(vmisc, "Read buffer of %d entries.\n", (unsigned int)num);
- 
-	opd_process_samples(opd_buf, num);
-
-	complete_dump();
-}
- 
-static void opd_do_jitdumps(void)
-{ 
-	pid_t childpid;
-	int arg_num;
-	unsigned long long end_time = 0ULL;
-	struct timeval tv;
-	char end_time_str[32];
-	char opjitconv_path[PATH_MAX + 1];
-	char * exec_args[6];
-
-	if (jit_conversion_running)
-		return;
-	jit_conversion_running = 1;
-
-	childpid = fork();
-	switch (childpid) {
-		case -1:
-			perror("Error forking JIT dump process!");
-			break;
-		case 0:
-			gettimeofday(&tv, NULL);
-			end_time = tv.tv_sec;
-			sprintf(end_time_str, "%llu", end_time);
-			sprintf(opjitconv_path, "%s/%s", OP_BINDIR, "opjitconv");
-			arg_num = 0;
-			exec_args[arg_num++] = "opjitconv";
-			if (vmisc)
-				exec_args[arg_num++] = "-d";
-			exec_args[arg_num++] = session_dir;
-			exec_args[arg_num++] = start_time_str;
-			exec_args[arg_num++] = end_time_str;
-			exec_args[arg_num] = (char *) NULL;
-			execvp(opjitconv_path, exec_args);
-			fprintf(stderr, "Failed to exec %s: %s\n",
-			        exec_args[0], strerror(errno));
-			/* We don't want any cleanup in the child */
-			_exit(EXIT_FAILURE);
-		default:
-			break;
-	} 
-
-} 
-
-/**
- * opd_do_read - enter processing loop
- * @param buf  buffer to read into
- * @param size  size of buffer
- *
- * Read some of a buffer from the device and process
- * the contents.
- */
-static void opd_do_read(char * buf, size_t size)
-{
-	opd_open_pipe();
-
-	while (1) {
-		ssize_t count = -1;
-
-		/* loop to handle EINTR */
-		while (count < 0) {
-			count = op_read_device(devfd, buf, size);
-
-			/* we can lose an alarm or a hup but
-			 * we don't care.
-			 */
-			if (signal_alarm) {
-				signal_alarm = 0;
-				opd_alarm();
-			}
-
-			if (signal_hup) {
-				signal_hup = 0;
-				opd_sighup();
-			}
-
-			if (signal_term)
-				opd_sigterm();
-
-			if (signal_child)
-				opd_sigchild();
-
-			if (signal_usr1) {
-				signal_usr1 = 0;
-				perfmon_start();
-			}
-
-			if (signal_usr2) {
-				signal_usr2 = 0;
-				perfmon_stop();
-			}
-
-			if (is_jitconv_requested()) {
-				verbprintf(vmisc, "Start opjitconv was triggered\n");
-				opd_do_jitdumps();
-			}
-		}
-
-		opd_do_samples(buf, count);
-	}
-	
-	opd_close_pipe();
-}
-
-
-/** opd_alarm - sync files and report stats */
-static void opd_alarm(void)
-{
-	sfile_sync_files();
-	opd_print_stats();
-	alarm(60 * 10);
-}
- 
-
-/** re-open files for logrotate/opcontrol --reset */
-static void opd_sighup(void)
-{
-	printf("Received SIGHUP.\n");
-	/* We just close them, and re-open them lazily as usual. */
-	sfile_close_files();
-	close(1);
-	close(2);
-	opd_open_logfile();
-}
-
-
-static void clean_exit(void)
-{
-	perfmon_exit();
-	unlink(op_lock_file);
-}
-
-
-static void opd_sigterm(void)
-{
-	opd_do_jitdumps();
-	opd_print_stats();
-	printf("oprofiled stopped %s", op_get_time());
-	opd_ext_deinitialize();
-
-	exit(EXIT_FAILURE);
-}
-
-/* SIGCHLD received from JIT dump child process. */
-static void opd_sigchild(void)
-{
-	int child_status;
-	wait(&child_status);
-	jit_conversion_running = 0;
-	if (WIFEXITED(child_status) && (!WEXITSTATUS(child_status))) {
-		verbprintf(vmisc, "JIT dump processing complete.\n");
-	} else {
-		printf("JIT dump processing exited abnormally: %d\n",
-		       WEXITSTATUS(child_status));
-	}
-
-}
- 
-static void opd_26_init(void)
-{
-	size_t i;
-	size_t opd_buf_size;
-	unsigned long long start_time = 0ULL;
-	struct timeval tv;
-
-	opd_create_vmlinux(vmlinux, kernel_range);
-	opd_create_xen(xenimage, xen_range);
-
-	opd_buf_size = opd_read_fs_int("/dev/oprofile/", "buffer_size", 1);
-	kernel_pointer_size = opd_read_fs_int("/dev/oprofile/", "pointer_size", 1);
-
-	s_buf_bytesize = opd_buf_size * kernel_pointer_size;
-
-	sbuf = xmalloc(s_buf_bytesize);
-
-	opd_reread_module_info();
-
-	for (i = 0; i < OPD_MAX_STATS; i++)
-		opd_stats[i] = 0;
-
-	perfmon_init();
-
-	cookie_init();
-	sfile_init();
-	anon_init();
-
-	/* must be /after/ perfmon_init() at least */
-	if (atexit(clean_exit)) {
-		perfmon_exit();
-		perror("oprofiled: couldn't set exit cleanup: ");
-		exit(EXIT_FAILURE);
-	}
-
-	/* trigger kernel module setup before returning control to opcontrol */
-	opd_open_files();
-	gettimeofday(&tv, NULL);
-	start_time = 0ULL;
-	start_time = tv.tv_sec;
-	sprintf(start_time_str, "%llu", start_time);
-		  
-}
-
-
-static void opd_26_start(void)
-{
-	/* simple sleep-then-process loop */
-	opd_do_read(sbuf, s_buf_bytesize);
-}
-
-
-static void opd_26_exit(void)
-{
-	opd_print_stats();
-	printf("oprofiled stopped %s", op_get_time());
-
-	free(sbuf);
-	free(vmlinux);
-	/* FIXME: free kernel images, sfiles etc. */
-}
-
-struct oprofiled_ops opd_26_ops = {
-	.init = opd_26_init,
-	.start = opd_26_start,
-	.exit = opd_26_exit,
-};
diff --git a/daemon/liblegacy/Makefile.am b/daemon/liblegacy/Makefile.am
deleted file mode 100644
index 5c71dd8..0000000
--- a/daemon/liblegacy/Makefile.am
+++ /dev/null
@@ -1,29 +0,0 @@
-noinst_LIBRARIES = liblegacy.a
-
-# -fno-omit-frame-pointer needed for daemon build: see ChangeLog-2004 02-23
-AM_CFLAGS = @OP_CFLAGS@ -fno-omit-frame-pointer
-
-AM_CPPFLAGS = \
-	-I ${top_srcdir}/libutil \
-	-I ${top_srcdir}/libop \
-	-I ${top_srcdir}/libdb \
-	-I ${top_srcdir}/daemon \
-	@OP_CPPFLAGS@
-
-liblegacy_a_SOURCES = \
-	opd_24_stats.c \
-	opd_24_stats.h \
-	opd_kernel.c \
-	opd_proc.c \
-	opd_image.c \
-	opd_mapping.c \
-	opd_parse_proc.c \
-	opd_image.h \
-	opd_mapping.h \
-	p_module.h \
-	opd_kernel.h \
-	opd_parse_proc.h \
-	opd_proc.h \
-	opd_sample_files.c \
-	opd_sample_files.h \
-	init.c
diff --git a/daemon/liblegacy/init.c b/daemon/liblegacy/init.c
deleted file mode 100644
index b2cd013..0000000
--- a/daemon/liblegacy/init.c
+++ /dev/null
@@ -1,387 +0,0 @@
-/**
- * @file daemon/liblegacy/init.c
- * Daemon set up and main loop for 2.4
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "config.h"
- 
-#include "opd_proc.h"
-#include "opd_mapping.h"
-#include "opd_24_stats.h"
-#include "opd_sample_files.h"
-#include "opd_image.h"
-#include "opd_parse_proc.h"
-#include "opd_kernel.h"
-#include "opd_printf.h"
-#include "oprofiled.h"
-
-#include "op_sample_file.h"
-#include "op_config_24.h"
-#include "op_interface.h"
-#include "op_libiberty.h"
-#include "op_deviceio.h"
-#include "op_events.h"
-#include "op_get_time.h"
-#include "op_fileio.h"
-
-#include <stdio.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-
-fd_t hashmapdevfd;
-
-int cpu_number;
-
-static fd_t devfd;
-static fd_t notedevfd;
-static struct op_buffer_head * sbuf;
-static size_t s_buf_bytesize;
-static struct op_note * nbuf;
-static size_t n_buf_bytesize;
-
-static void opd_sighup(void);
-static void opd_alarm(void);
-static void opd_sigterm(void);
-
-
-/**
- * op_open_files - open necessary files
- *
- * Open the device files and the log file,
- * and mmap() the hash map.
- */
-static void op_open_files(void)
-{
-	hashmapdevfd = op_open_device(op_hash_device);
-	if (hashmapdevfd == -1) {
-		perror("Failed to open hash map device");
-		exit(EXIT_FAILURE);
-	}
-
-	notedevfd = op_open_device(op_note_device);
-	if (notedevfd == -1) {
-		if (errno == EINVAL)
-			fprintf(stderr, "Failed to open note device. Possibly you have passed incorrect\n"
-				"parameters. Check /var/log/messages.");
-		else
-			perror("Failed to open note device");
-		exit(EXIT_FAILURE);
-	}
-
-	devfd = op_open_device(op_device);
-	if (devfd == -1) {
-		if (errno == EINVAL)
-			fprintf(stderr, "Failed to open device. Possibly you have passed incorrect\n"
-				"parameters. Check /var/log/messages.");
-		else
-			perror("Failed to open profile device");
-		exit(EXIT_FAILURE);
-	}
-
-	opd_init_hash_map();
-
-	/* give output before re-opening stdout as the logfile */
-	printf("Using log file %s\n", op_log_file);
-
-	/* set up logfile */
-	close(0);
-	close(1);
-
-	if (open("/dev/null", O_RDONLY) == -1) {
-		perror("oprofiled: couldn't re-open stdin as /dev/null: ");
-		exit(EXIT_FAILURE);
-	}
-
-	opd_open_logfile();
-
-	printf("oprofiled started %s", op_get_time());
-	fflush(stdout);
-}
- 
-
-static void opd_do_samples(struct op_buffer_head const * buf);
-static void opd_do_notes(struct op_note const * opd_buf, size_t count);
-
-/**
- * do_shutdown - shutdown cleanly, reading as much remaining data as possible.
- * @param buf  sample buffer area
- * @param size  size of sample buffer
- * @param nbuf  note buffer area
- * @param nsize  size of note buffer
- */
-static void opd_shutdown(struct op_buffer_head * buf, size_t size, struct op_note * nbuf, size_t nsize)
-{
-	ssize_t count = -1;
-	ssize_t ncount = -1;
-
-	/* the dump may have added no samples, so we must set
-	 * non-blocking */
-	if (fcntl(devfd, F_SETFL, fcntl(devfd, F_GETFL) | O_NONBLOCK) < 0) {
-		perror("Failed to set non-blocking read for device: ");
-		exit(EXIT_FAILURE);
-	}
-
-	/* it's always OK to read the note device */
-	while (ncount < 0)
-		ncount = op_read_device(notedevfd, nbuf, nsize);
-
-	if (ncount > 0)
-		opd_do_notes(nbuf, ncount);
-
-	/* read as much as we can until we have exhausted the data
-	 * (EAGAIN is returned).
-	 *
-	 * This will not livelock as the profiler has been partially
-	 * shut down by now.
-	 */
-	while (1) {
-		count = op_read_device(devfd, buf, size);
-		if (count < 0 && errno == EAGAIN)
-			break;
-		verbprintf(vmisc, "Shutting down, state %d\n", buf->state);
-		opd_do_samples(buf);
-	}
-}
- 
-
-/**
- * opd_do_read - enter processing loop
- * @param buf  buffer to read into
- * @param size  size of buffer
- * @param nbuf  note buffer
- * @param nsize  size of note buffer
- *
- * Read some of a buffer from the device and process
- * the contents.
- */
-static void opd_do_read(struct op_buffer_head * buf, size_t size, struct op_note * nbuf, size_t nsize)
-{
-	while (1) {
-		ssize_t count = -1;
-		ssize_t ncount = -1;
-
-		/* loop to handle EINTR */
-		while (count < 0)
-			count = op_read_device(devfd, buf, size);
-
-		while (ncount < 0)
-			ncount = op_read_device(notedevfd, nbuf, nsize);
-
-		opd_do_notes(nbuf, ncount);
-		opd_do_samples(buf);
-
-		// we can lost a signal alarm or a signal hup but we don't
-		// take care.
-		if (signal_alarm) {
-			signal_alarm = 0;
-			opd_alarm();
-		}
-
-		if (signal_hup) {
-			signal_hup = 0;
-			opd_sighup();
-		}
-
-		if (signal_term)
-			opd_sigterm();
- 
-		/* request to stop arrived */
-		if (buf->state == STOPPING) {
-			verbprintf(vmisc, "Shutting down by request.\n");
-			opd_shutdown(buf, size, nbuf, nsize);
-			return;
-		}
-	}
-}
-
-/**
- * opd_do_notes - process a notes buffer
- * @param opd_buf  buffer to process
- * @param count  number of bytes in buffer
- *
- * Process a buffer of notes.
- */
-static void opd_do_notes(struct op_note const * opd_buf, size_t count)
-{
-	uint i;
-	struct op_note const * note;
-
-	for (i = 0; i < count/sizeof(struct op_note); i++) {
-		note = &opd_buf[i];
-
-		opd_24_stats[OPD_NOTIFICATIONS]++;
-
-		switch (note->type) {
-			case OP_MAP:
-			case OP_EXEC:
-				if (note->type == OP_EXEC)
-					opd_handle_exec(note->pid, note->tgid);
-				opd_handle_mapping(note);
-				break;
-
-			case OP_FORK:
-				opd_handle_fork(note);
-				break;
-
-			case OP_DROP_MODULES:
-				opd_clear_module_info();
-				break;
-
-			case OP_EXIT:
-				opd_handle_exit(note);
-				break;
-
-			default:
-				fprintf(stderr, "Received unknown notification type %u\n", note->type);
-				abort();
-				break;
-		}
-	}
-}
-
-/**
- * opd_do_samples - process a sample buffer
- * @param opd_buf  buffer to process
- *
- * Process a buffer of samples.
- * The signals specified by the global variable maskset are
- * masked.
- *
- * If the sample could be processed correctly, it is written
- * to the relevant sample file. Additionally mapping and
- * process notifications are handled here.
- */
-static void opd_do_samples(struct op_buffer_head const * opd_buf)
-{
-	uint i;
-	struct op_sample const * buffer = opd_buf->buffer; 
-
-	opd_24_stats[OPD_DUMP_COUNT]++;
-
-	verbprintf(vmisc, "Read buffer of %d entries for cpu %d.\n",
-		   (unsigned int)opd_buf->count, opd_buf->cpu_nr);
- 
-	if (separate_cpu)
-		cpu_number = opd_buf->cpu_nr;
-	for (i = 0; i < opd_buf->count; i++) {
-		verbprintf(vsamples, "%.6u: EIP: 0x%.8lx pid: %.6d\n",
-			i, buffer[i].eip, buffer[i].pid);
-		opd_put_sample(&buffer[i]);
-	}
-}
-
-
-/**
- * opd_alarm - clean up old procs, msync, and report stats
- */
-static void opd_alarm(void)
-{
-	opd_sync_samples_files();
-
-	opd_age_procs();
-
-	opd_print_24_stats();
-
-	alarm(60 * 10);
-}
- 
-
-/* re-open logfile for logrotate */
-static void opd_sighup(void)
-{
-	printf("Received SIGHUP.\n");
-	close(1);
-	close(2);
-	opd_open_logfile();
-	/* We just close them, and re-open them lazily as usual. */
-	opd_for_each_image(opd_close_image_samples_files);
-}
-
-
-static void clean_exit(void)
-{
-	opd_cleanup_hash_name();
-	op_free_events();
-	unlink(op_lock_file);
-}
-
-
-static void opd_sigterm(void)
-{
-	opd_print_24_stats();
-	printf("oprofiled stopped %s", op_get_time());
-	exit(EXIT_FAILURE);
-}
- 
-
-static void opd_24_init(void)
-{
-	size_t i;
-	int opd_buf_size = OP_DEFAULT_BUF_SIZE;
-	int opd_note_buf_size = OP_DEFAULT_NOTE_SIZE;
-
-	if (!no_vmlinux)
-		opd_parse_kernel_range(kernel_range);
-	opd_buf_size = opd_read_fs_int(OP_MOUNT, "bufsize", 1);
-	opd_note_buf_size = opd_read_fs_int(OP_MOUNT, "notesize", 1);
-
-	s_buf_bytesize = sizeof(struct op_buffer_head) + opd_buf_size * sizeof(struct op_sample);
-
-	sbuf = xmalloc(s_buf_bytesize);
-
-	n_buf_bytesize = opd_note_buf_size * sizeof(struct op_note);
-	nbuf = xmalloc(n_buf_bytesize);
-
-	opd_init_images();
-	opd_init_procs();
-	opd_init_kernel_image();
-
-	for (i = 0; i < OPD_MAX_STATS; i++)
-		opd_24_stats[i] = 0;
-
-	if (atexit(clean_exit)) {
-		perror("oprofiled: couldn't set exit cleanup: ");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-static void opd_24_start(void)
-{
-	op_open_files();
-
-	/* yes, this is racey. */
-	opd_get_ascii_procs();
-
-	/* simple sleep-then-process loop */
-	opd_do_read(sbuf, s_buf_bytesize, nbuf, n_buf_bytesize);
-}
-
-
-static void opd_24_exit(void)
-{
-	opd_print_24_stats();
-	printf("oprofiled stopped %s", op_get_time());
-
-	free(sbuf);
-	free(nbuf);
-	opd_clear_module_info();
-	opd_proc_cleanup();
-	/* kernel/module image are not owned by a proc, we must cleanup them */
-	opd_for_each_image(opd_delete_image);
-}
-
-
-struct oprofiled_ops opd_24_ops = {
-	.init = opd_24_init,
-	.start = opd_24_start,
-	.exit = opd_24_exit
-};
diff --git a/daemon/liblegacy/opd_24_stats.c b/daemon/liblegacy/opd_24_stats.c
deleted file mode 100644
index 52425c4..0000000
--- a/daemon/liblegacy/opd_24_stats.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * @file opd_24_stats.c
- * Management of daemon statistics
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "opd_24_stats.h"
-#include "opd_proc.h"
-#include "opd_image.h"
-#include "oprofiled.h"
-
-#include "op_get_time.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-unsigned long opd_24_stats[OPD_MAX_STATS];
-
-void opd_print_24_stats(void)
-{
-	printf("\n%s\n", op_get_time());
-	printf("Nr. proc struct: %d\n", opd_get_nr_procs());
-	printf("Nr. image struct: %d\n", opd_get_nr_images());
-	printf("Nr. kernel samples: %lu\n", opd_24_stats[OPD_KERNEL]);
-	printf("Nr. modules samples: %lu\n", opd_24_stats[OPD_MODULE]);
-	printf("Nr. modules samples lost: %lu\n", opd_24_stats[OPD_LOST_MODULE]);
-	printf("Nr. samples lost due to no process information: %lu\n",
-		opd_24_stats[OPD_LOST_PROCESS]);
-	printf("Nr. samples lost due to sample file open failure: %lu\n",
-		opd_24_stats[OPD_LOST_SAMPLEFILE]);
-	printf("Nr. process samples in user-space: %lu\n", opd_24_stats[OPD_PROCESS]);
-	printf("Nr. samples lost due to no map information: %lu\n",
-		opd_24_stats[OPD_LOST_MAP_PROCESS]);
-	if (opd_24_stats[OPD_PROC_QUEUE_ACCESS]) {
-		printf("Average depth of search of proc queue: %f\n",
-			(double)opd_24_stats[OPD_PROC_QUEUE_DEPTH]
-			/ (double)opd_24_stats[OPD_PROC_QUEUE_ACCESS]);
-	}
-	if (opd_24_stats[OPD_MAP_ARRAY_ACCESS]) {
-		printf("Average depth of iteration through mapping array: %f\n",
-			(double)opd_24_stats[OPD_MAP_ARRAY_DEPTH]
-			/ (double)opd_24_stats[OPD_MAP_ARRAY_ACCESS]);
-	}
-	if (opd_24_stats[OPD_IMAGE_HASH_ACCESS]) {
-		printf("Average depth of iteration through image hash array: %f\n",
-			(double)opd_24_stats[OPD_IMAGE_HASH_DEPTH]
-			/ (double)opd_24_stats[OPD_IMAGE_HASH_ACCESS]);
-	}
-	printf("Nr. sample dumps: %lu\n", opd_24_stats[OPD_DUMP_COUNT]);
-	printf("Nr. samples total: %lu\n", opd_24_stats[OPD_SAMPLES]);
-	printf("Nr. notifications: %lu\n", opd_24_stats[OPD_NOTIFICATIONS]);
-	printf("Nr. kernel note buffer overflow: %u\n",
-	       opd_read_fs_int(OP_MOUNT, "note_buffer_overflow", 0));
-	printf("Nr. kernel samples buffer overflow: %u\n",
-	       opd_read_fs_int(OP_MOUNT, "buffer_overflow", 0));
-	fflush(stdout);
-}
diff --git a/daemon/liblegacy/opd_24_stats.h b/daemon/liblegacy/opd_24_stats.h
deleted file mode 100644
index 4acd860..0000000
--- a/daemon/liblegacy/opd_24_stats.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * @file opd_24_stats.h
- * Management of daemon statistics
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_24_STATS_H
-#define OPD_24_STATS_H
-
-extern unsigned long opd_24_stats[];
-
-enum {  OPD_KERNEL, /**< nr kernel samples */
-	OPD_MODULE, /**< nr module samples */
-	OPD_LOST_MODULE, /**< nr samples in module for which modules can not be located */
-	OPD_LOST_PROCESS, /**< nr samples for which process info couldn't be accessed */
-	OPD_PROCESS, /**< nr userspace samples */
-	OPD_LOST_MAP_PROCESS, /**< nr samples for which map info couldn't be accessed */
-	OPD_LOST_SAMPLEFILE, /**< nr samples for which sample file can't be opened */
-	OPD_PROC_QUEUE_ACCESS, /**< nr accesses of proc queue */
-	OPD_PROC_QUEUE_DEPTH, /**< cumulative depth of proc queue accesses */
-	OPD_DUMP_COUNT, /**< nr of times buffer is read */
-	OPD_MAP_ARRAY_ACCESS, /**< nr accesses of map array */
-	OPD_MAP_ARRAY_DEPTH, /**< cumulative depth of map array accesses */
-	OPD_IMAGE_HASH_ACCESS,  /**< nr opd_find_image() */
-	OPD_IMAGE_HASH_DEPTH,  /**< cumulative depth of image search */
-	OPD_SAMPLES, /**< nr distinct samples */
-	OPD_NOTIFICATIONS, /**< nr notifications */
-	OPD_MAX_STATS /**< end of stats */
-	};
-
-/** opd_print_24_stats - print out latest statistics */
-void opd_print_24_stats(void);
-
-#endif /* OPD_24_STATS_H */
diff --git a/daemon/liblegacy/opd_image.c b/daemon/liblegacy/opd_image.c
deleted file mode 100644
index ac6b5a2..0000000
--- a/daemon/liblegacy/opd_image.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * @file opd_image.c
- * Management of binary images
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "opd_image.h"
-#include "opd_printf.h"
-#include "opd_sample_files.h"
-#include "opd_24_stats.h"
-#include "oprofiled.h"
-
-#include "op_file.h"
-#include "op_config_24.h"
-#include "op_libiberty.h"
-#include "op_string.h"
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-/* maintained for statistics purpose only */
-static int nr_images;
-
-/* list of images */
-#define OPD_IMAGE_HASH_SIZE 2048
-static struct list_head opd_images[OPD_IMAGE_HASH_SIZE];
-
-
-void opd_init_images(void)
-{
-	int i;
-	for (i = 0; i < OPD_IMAGE_HASH_SIZE; ++i)
-		list_init(&opd_images[i]);
-}
-
-
-int opd_get_nr_images(void)
-{
-	return nr_images;
-}
-
-
-void opd_delete_image(struct opd_image * image)
-{
-	verbprintf(vmisc, "Deleting image: name %s app_name %s, kernel %d, "
-	           "tid %d, tgid %d ref count %u\n",
-	           image->name, image->app_name, image->kernel,
-	           image->tid, image->tgid, (int)image->ref_count);
-
-	if (image->ref_count <= 0) {
-		printf("image->ref_count < 0 for image: name %s app_name %s, "
-		       "kernel %d, tid %d, tgid %d ref count %u\n",
-		       image->name, image->app_name, image->kernel,
-		       image->tid, image->tgid, image->ref_count);
-		abort();
-	}
-
-	if (--image->ref_count != 0)
-		return;
-
-	if (image->name)
-		free(image->name);
-	if (image->app_name)
-		free(image->app_name);
-	list_del(&image->hash_next);
-	opd_close_image_samples_files(image);
-	free(image);
-
-	nr_images--;
-}
-
-
-void opd_for_each_image(opd_image_cb image_cb)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-	int i;
-
-	for (i = 0; i < OPD_IMAGE_HASH_SIZE; ++i) {
-		list_for_each_safe(pos, pos2, &opd_images[i]) {
-			struct opd_image * image =
-				list_entry(pos, struct opd_image, hash_next);
-			image_cb(image);
-		}
-	}
-}
- 
-
-/**
- * opd_hash_image - hash an image
- * @param hash  hash of image name
- * @param tid  thread id
- * @param tgid  thread group id
- *
- * return the hash code for the passed parameters
- */
-static size_t opd_hash_image(char const * name, pid_t tid, pid_t tgid)
-{
-	size_t hash = op_hash_string(name);
-	if (separate_thread)
-		hash += tid + tgid;
-	return  hash % OPD_IMAGE_HASH_SIZE;
-}
-
-
-/**
- * opd_new_image - create an image sample file
- * @param app_name  the application name where belongs this image
- * @param name  name of the image to add
- * @param kernel  is the image a kernel/module image
- * @param tid  thread id
- * @param tgid  thread group id
- *
- * image at funtion entry is uninitialised
- * name is copied i.e. should be GC'd separately from the
- * image structure if appropriate.
- *
- * Initialise an opd_image struct for the image image
- * without opening the associated samples files. At return
- * the image is fully initialized.
- */
-static struct opd_image *
-opd_new_image(char const * name, char const * app_name, int kernel,
-              pid_t tid, pid_t tgid)
-{
-	size_t hash_image;
-	struct opd_image * image;
-
-	verbprintf(vmisc, "Creating image: %s %s, kernel %d, tid %d, "
-	           "tgid %d\n", name, app_name, kernel, tid, tgid);
-
-	image = xmalloc(sizeof(struct opd_image));
-
-	list_init(&image->hash_next);
-	image->name = xstrdup(name);
-	image->kernel = kernel;
-	image->tid = tid;
-	image->tgid = tgid;
-	image->ref_count = 0;
-	image->app_name = app_name ? xstrdup(app_name) : NULL;
-	image->mtime = op_get_mtime(image->name);
-
-	image->ignored = 1;
-	if (separate_lib && app_name)
-		image->ignored = is_image_ignored(app_name);
-	if (image->ignored)
-		image->ignored = is_image_ignored(name);
-
-	memset(image->sfiles, '\0', NR_CPUS * sizeof(struct opd_24_sfile **));
-
-	hash_image = opd_hash_image(name, tid, tgid);
-	list_add(&image->hash_next, &opd_images[hash_image]);
-
-	nr_images++;
-
-	return image;
-}
-
-
-/**
- * is_same_image - check for identical image
- * @param image  image to compare
- * @param name  name of image
- * @param app_name image must belong to this application name
- * @param tid  thread id
- * @param tgid  thread group id
- *
- * on entry caller have checked than strcmp(image->name, name) == 0
- * return 0 if the couple (name, app_name) refers to same image
- */
-static int is_same_image(struct opd_image const * image, char const * app_name,
-                         pid_t tid, pid_t tgid)
-{
-	/* correctness is really important here, if we fail to recognize
-	 * identical image we will open/mmap multiple time the same samples
-	 * files which is not supported by the kernel, strange assertion
-	 * failure in libfd is a typical symptom of that */
-
-	if (separate_thread) {
-		if (image->tid != tid || image->tgid != tgid)
-			return 1;
-	}
-
-	/* if !separate_lib, the comparison made by caller is enough */
-	if (!separate_lib)
-		return 0;
-
-	if (image->app_name == NULL && app_name == NULL)
-		return 0;
-
-	if (image->app_name != NULL && app_name != NULL &&
-	    !strcmp(image->app_name, app_name))
-		return 0;
-
-	/* /proc parsed image come with a non null app_name but notification
-	 * for application itself come with a null app_name, in this case
-	 * the test above fail so check for this case. */
-	if (image->app_name && !app_name && !strcmp(image->app_name, image->name))
-		return 0;
-
-	return 1;
-}
-
-
-/**
- * opd_find_image - find an image
- * @param name  name of image to find
- * @param hash  hash of image to find
- * @param app_name  the application name where belongs this image
- * @param tid  thread id
- * @param tgid  thread group id
- *
- * Returns the image pointer for the file specified by name, or %NULL.
- */
-static struct opd_image * opd_find_image(char const * name, 
-                                char const * app_name, pid_t tid, pid_t tgid)
-{
-	/* suppress uninitialized use warning */
-	struct opd_image * image = 0;
-	struct list_head * pos;
-	size_t bucket;
-
-	opd_24_stats[OPD_IMAGE_HASH_ACCESS]++;
-	bucket = opd_hash_image(name, tid, tgid);
-	list_for_each(pos, &opd_images[bucket]) {
-		opd_24_stats[OPD_IMAGE_HASH_DEPTH]++;
-		image = list_entry(pos, struct opd_image, hash_next);
-
-		if (!strcmp(image->name, name)) {
-			if (!is_same_image(image, app_name, tid, tgid))
-				break;
-		}
-	}
-
-	if (pos == &opd_images[bucket])
-		return NULL;
-
-	/* The app_name field is always valid */
-	return image;
-}
-
- 
-struct opd_image * opd_get_image(char const * name, char const * app_name,
-                                 int kernel, pid_t tid, pid_t tgid)
-{
-	struct opd_image * image;
-	if ((image = opd_find_image(name, app_name, tid, tgid)) == NULL)
-		image = opd_new_image(name, app_name, kernel, tid, tgid);
-
-	return image;
-}
-
-
-struct opd_image * opd_get_kernel_image(char const * name,
-                               char const * app_name, pid_t tid, pid_t tgid)
-{
-	return opd_get_image(name, app_name, 1, tid, tgid);
-}
diff --git a/daemon/liblegacy/opd_image.h b/daemon/liblegacy/opd_image.h
deleted file mode 100644
index 5d55270..0000000
--- a/daemon/liblegacy/opd_image.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * @file opd_image.h
- * Management of binary images
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_IMAGE_H
-#define OPD_IMAGE_H
-
-#include "op_list.h"
-#include "op_config_24.h"
-#include "op_types.h"
-
-#include <time.h>
-
-struct opd_24_sfile;
-
-/**
- * A binary (library, application, kernel or module)
- * is represented by a struct opd_image.
- */
-struct opd_image {
-	/** used by container of opd_images */
-	struct list_head hash_next;
-	/** how many time this opd_image is referenced */
-	int ref_count;
-	/** all samples files belonging to this image */
-	struct opd_24_sfile ** sfiles[NR_CPUS];
-	/** name of this image */
-	char * name;
-	/** the application name where belongs this image, NULL if image has
-	 * no owner (such as vmlinux or module) */
-	char * app_name;
-	/** thread id, on 2.2 kernel always == to tgid */
-	pid_t tid;
-	/** thread group id  */
-	pid_t tgid;
-	/** time of last modification */
-	time_t mtime;
-	/** kernel image or not */
-	int kernel;
-	/** non zero if this image must be profiled */
-	int ignored;
-};
-
-/** callback function passed to opd_for_each_image() */
-typedef void (*opd_image_cb)(struct opd_image *);
-
-/**
- * @param imagecb callback to apply onto each existing image struct
- *
- * the callback receive a struct opd_image * (not a const struct) and is
- * allowed to freeze the image struct itself.
- */
-void opd_for_each_image(opd_image_cb imagecb);
-
-/**
- * initialize opd_image container
- */
-void opd_init_images(void);
-
-/**
- * @param image  the image pointer
- *
- * Decrement reference count of image, if reference count is zero flush and
- * close the samples files then freeze all memory belonging to this image.
- */
-void opd_delete_image(struct opd_image * image);
-
-/**
- * opd_get_kernel_image - get a kernel image
- * @param name of image
- * @param app_name application owner of this kernel image. non-null only
- *  when separate_kernel_sample != 0
- * @param tid  thread id
- * @param tgid  thread group id
- *
- * Create and initialise an image adding it to the image lists and to image
- * hash list. Note than at creation reference count is zero, it's caller
- * responsabilities to incr this count.
- */
-struct opd_image * opd_get_kernel_image(char const * name,
-     char const * app_name, pid_t tid, pid_t tgid);
-
-/**
- * opd_get_image - get an image from the image structure
- * @param name  name of image
- * @param app_name  the application name where belongs this image
- * @param kernel  is the image a kernel/module image
- * @param tid  thread id
- * @param tgid  thread group id
- *
- * Get the image specified by the file name name from the
- * image structure. If it is not present, the image is
- * added to the structure. In either case, the image number
- * is returned.
- */
-struct opd_image * opd_get_image(char const * name, char const * app_name,
-                                 int kernel, pid_t tid, pid_t tgid);
-
-/**
- * opd_get_nr_images - return number of images
- */
-int opd_get_nr_images(void);
-
-#endif /* OPD_IMAGE_H */
diff --git a/daemon/liblegacy/opd_kernel.c b/daemon/liblegacy/opd_kernel.c
deleted file mode 100644
index 1131aa7..0000000
--- a/daemon/liblegacy/opd_kernel.c
+++ /dev/null
@@ -1,464 +0,0 @@
-/**
- * @file daemon/liblegacy/opd_kernel.c
- * Dealing with the kernel and kernel module samples
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "opd_kernel.h"
-#include "opd_proc.h"
-#include "opd_image.h"
-#include "opd_mapping.h"
-#include "opd_printf.h"
-#include "opd_24_stats.h"
-#include "oprofiled.h"
-
-#include "op_fileio.h"
-#include "op_config_24.h"
-#include "op_libiberty.h"
-
-#include "p_module.h"
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-
-/* kernel module */
-struct opd_module {
-	char * name;
-	struct opd_image * image;
-	unsigned long start;
-	unsigned long end;
-	struct list_head module_list;
-};
-
-static struct opd_image * kernel_image;
-
-/* kernel and module support */
-static unsigned long kernel_start;
-static unsigned long kernel_end;
-static struct list_head opd_modules = { &opd_modules, &opd_modules };
-static unsigned int nr_modules=0;
-
-void opd_init_kernel_image(void)
-{
-	/* for no vmlinux */
-	if (!vmlinux)
-		vmlinux = "no-vmlinux";
-	kernel_image = opd_get_kernel_image(vmlinux, NULL, 0, 0);
-	kernel_image->ref_count++;
-}
-
-
-void opd_parse_kernel_range(char const * arg)
-{
-	sscanf(arg, "%lx,%lx", &kernel_start, &kernel_end);
-
-	verbprintf(vmisc, "OPD_PARSE_KERNEL_RANGE: kernel_start = %lx, kernel_end = %lx\n",
-		   kernel_start, kernel_end);
-
-	if (!kernel_start && !kernel_end) {
-		fprintf(stderr,
-			"Warning: mis-parsed kernel range: %lx-%lx\n",
-			kernel_start, kernel_end);
-		fprintf(stderr, "kernel profiles will be wrong.\n");
-	}
-}
-
-
-/**
- * opd_create_module - allocate and initialise a module description
- * @param name module name
- * @param start start address
- * @param end end address
- */
-static struct opd_module *
-opd_create_module(char * name, unsigned long start, unsigned long end)
-{
-	struct opd_module * module = xmalloc(sizeof(struct opd_module));
-
-	module->name = xstrdup(name);
-	module->image = NULL;
-	module->start = start;
-	module->end = end;
-	list_add(&module->module_list, &opd_modules);
-
-	return module;
-}
-
-
-/**
- * opd_find_module_by_name - find a module by name, ccreating a new once if
- * search fail
- * @param name module name
- */
-static struct opd_module * opd_find_module_by_name(char * name)
-{
-	struct list_head * pos;
-	struct opd_module * module;
-
-	list_for_each(pos, &opd_modules) {
-		module = list_entry(pos, struct opd_module, module_list);
-		if (!strcmp(name, module->name))
-			return module;
-	}
-
-	return opd_create_module(name, 0, 0);
-}
-
-
-void opd_clear_module_info(void)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-	struct opd_module * module;
-
-	verbprintf(vmodule, "Removing module list\n");
-	list_for_each_safe(pos, pos2, &opd_modules) {
-		module = list_entry(pos, struct opd_module, module_list);
-		free(module->name);
-		free(module);
-	}
-
-	list_init(&opd_modules);
-
-	opd_clear_kernel_mapping();
-}
-
-
-/**
- * opd_get_module_info - parse mapping information for kernel modules
- *
- * Parse the file /proc/ksyms to read in mapping information for
- * all kernel modules. The modutils package adds special symbols
- * to this file which allows determination of the module image
- * and mapping addresses of the form :
- *
- * __insmod_modulename_Oobjectfile_Mmtime_Vversion
- * __insmod_modulename_Ssectionname_Llength
- *
- * Currently the image file "objectfile" is stored, and details of
- * ".text" sections.
- *
- * There is no query_module API that allow to get directly the pathname
- * of a module so we need to parse all the /proc/ksyms.
- */
-static void opd_get_module_info(void)
-{
-	char * line;
-	char * cp, * cp2, * cp3;
-	FILE * fp;
-	struct opd_module * mod;
-	char * modname;
-	char * filename;
-
-	nr_modules=0;
-
-	fp = op_try_open_file("/proc/ksyms", "r");
-
-	if (!fp) {
-		printf("oprofiled: /proc/ksyms not readable, can't process module samples.\n");
-		return;
-	}
-
-	verbprintf(vmodule, "Read module info.\n");
-
-	while (1) {
-		line = op_get_line(fp);
-
-		if (!line)
-			break;
-
-		if (!strcmp("", line)) {
-			free(line);
-			continue;
-		}
-
-		if (strlen(line) < 9) {
-			printf("oprofiled: corrupt /proc/ksyms line \"%s\"\n", line);
-			break;
-		}
-
-		if (strncmp("__insmod_", line + 9, 9)) {
-			free(line);
-			continue;
-		}
-
-		cp = line + 18;
-		cp2 = cp;
-		while ((*cp2) && !!strncmp("_S", cp2+1, 2) && !!strncmp("_O", cp2+1, 2))
-			cp2++;
-
-		if (!*cp2) {
-			printf("oprofiled: corrupt /proc/ksyms line \"%s\"\n", line);
-			break;
-		}
-
-		cp2++;
-
-		modname = xmalloc((size_t)((cp2-cp) + 1));
-		strncpy(modname, cp, (size_t)((cp2-cp)));
-		modname[cp2-cp] = '\0';
-
-		mod = opd_find_module_by_name(modname);
-
-		free(modname);
-
-		switch (*(++cp2)) {
-			case 'O':
-				/* get filename */
-				cp2++;
-				cp3 = cp2;
-
-				while ((*cp3) && !!strncmp("_M", cp3+1, 2))
-					cp3++;
-
-				if (!*cp3) {
-					free(line);
-					continue;
-				}
-
-				cp3++;
-				filename = xmalloc((size_t)(cp3 - cp2 + 1));
-				strncpy(filename, cp2, (size_t)(cp3 - cp2));
-				filename[cp3-cp2] = '\0';
-
-				mod->image = opd_get_kernel_image(filename, NULL, 0, 0);
-				mod->image->ref_count++;
-				free(filename);
-				break;
-
-			case 'S':
-				/* get extent of .text section */
-				cp2++;
-				if (strncmp(".text_L", cp2, 7)) {
-					free(line);
-					continue;
-				}
-
-				cp2 += 7;
-				sscanf(line, "%lx", &mod->start);
-				sscanf(cp2, "%lu", &mod->end);
-				mod->end += mod->start;
-				break;
-		}
-
-		free(line);
-	}
-
-	if (line)
-		free(line);
-	op_close_file(fp);
-}
- 
-
-/**
- * opd_drop_module_sample - drop a module sample efficiently
- * @param eip  eip of sample
- *
- * This function is called to recover from failing to put a samples even
- * after re-reading /proc/ksyms. It's either a rogue sample, or from a module
- * that didn't create symbols (like in some initrd setups). So we check with
- * query_module() if we can place it in a symbol-less module, and if so create
- * a negative entry for it, to quickly ignore future samples.
- *
- * Problem uncovered by Bob Montgomery <bobm@fc.hp.com>
- *
- */
-static void opd_drop_module_sample(unsigned long eip)
-{
-	char * module_names;
-	char * name;
-	size_t size = 1024;
-	size_t ret;
-	uint nr_mods;
-	uint mod = 0;
-
-	opd_24_stats[OPD_LOST_MODULE]++;
-
-	module_names = xmalloc(size);
-	while (query_module(NULL, QM_MODULES, module_names, size, &ret)) {
-		if (errno != ENOSPC) {
-			verbprintf(vmodule, "query_module failed: %s\n", strerror(errno));
-			return;
-		}
-		size = ret;
-		module_names = xrealloc(module_names, size);
-	}
-
-	nr_mods = ret;
-	name = module_names;
-
-	while (mod < nr_mods) {
-		struct module_info info;
-		if (!query_module(name, QM_INFO, &info, sizeof(info), &ret)) {
-			if (eip >= info.addr && eip < info.addr + info.size) {
-				verbprintf(vmodule, "Sample from unprofilable module %s\n", name);
-				opd_create_module(name, info.addr, info.addr + info.size);
-				break;
-			}
-		}
-		mod++;
-		name += strlen(name) + 1;
-	}
-
-	if (module_names)
-		free(module_names);
-}
-
-
-/**
- * opd_find_module_by_eip - find a module by its eip
- * @param eip  EIP value
- *
- * find in the modules container the module which
- * contain this eip return %NULL if not found.
- * caller must check than the module image is valid
- */
-static struct opd_module * opd_find_module_by_eip(unsigned long eip)
-{
-	struct list_head * pos;
-	struct opd_module * module;
-
-	list_for_each(pos, &opd_modules) {
-		module = list_entry(pos, struct opd_module, module_list);
-		if (module->start <= eip && module->end > eip)
-			return module;
-	}
-
-	return NULL;
-}
-
-
-/**
- * opd_handle_module_sample - process a module sample
- * @param eip  EIP value
- * @param counter  counter number
- *
- * Process a sample in module address space. The sample eip
- * is matched against module information. If the search was
- * successful, the sample is output to the relevant file.
- *
- * Note that for modules and the kernel, the offset will be
- * wrong in the file, as it is not a file offset, but the offset
- * from the text section. This is fixed up in pp.
- *
- * If the sample could not be located in a module, it is treated
- * as a kernel sample.
- */
-static void opd_handle_module_sample(unsigned long eip, u32 counter)
-{
-	struct opd_module * module;
-
-	module = opd_find_module_by_eip(eip);
-	if (!module) {
-		/* not found in known modules, re-read our info and retry */
-		opd_clear_module_info();
-		opd_get_module_info();
-
-		module = opd_find_module_by_eip(eip);
-	}
-
-	if (module) {
-		if (module->image != NULL) {
-			opd_24_stats[OPD_MODULE]++;
-			opd_put_image_sample(module->image,
-					     eip - module->start, counter);
-		} else {
-			opd_24_stats[OPD_LOST_MODULE]++;
-			verbprintf(vmodule, "No image for sampled module %s\n",
-				   module->name);
-		}
-	} else {
-		opd_drop_module_sample(eip);
-	}
-}
-
-
-void opd_handle_kernel_sample(unsigned long eip, u32 counter)
-{
-	if (no_vmlinux || eip < kernel_end) {
-		opd_24_stats[OPD_KERNEL]++;
-		opd_put_image_sample(kernel_image, eip - kernel_start, counter);
-		return;
-	}
-
-	/* in a module */
-	opd_handle_module_sample(eip, counter);
-}
- 
-
-int opd_eip_is_kernel(unsigned long eip)
-{
-#ifdef __i386
-#define KERNEL_OFFSET 0xC0000000
-	/*
-	 * kernel_start == 0 when using --no-vmlinux.
-	 * This is wrong, wrong, wrong, wrong, but we don't have much
-	 * choice. It obviously breaks for IA64.
-	 */
-	if (!kernel_start)
-		return eip >= KERNEL_OFFSET;
-#endif
-
-	return eip >= kernel_start;
-}
-
-
-void opd_add_kernel_map(struct opd_proc * proc, unsigned long eip)
-{
-	struct opd_module * module;
-	struct opd_image * image;
-	char const * app_name;
-
-	app_name = proc->name;
-	if (!app_name) {
-		verbprintf(vmisc, "un-named proc for tid %d\n", proc->tid);
-		return;
-	}
-
-
-	if (eip < kernel_end) {
-		image = opd_get_kernel_image(vmlinux, app_name, proc->tid, proc->tgid);
-		if (!image) {
-			verbprintf(vmisc, "Can't create image for %s %s\n", vmlinux, app_name);
-			return;
-		}
-
-		opd_add_mapping(proc, image, kernel_start, 0, kernel_end);
-		return;
-	}
-
-	module = opd_find_module_by_eip(eip);
-	if (!module) {
-		/* not found in known modules, re-read our info and retry */
-		opd_clear_module_info();
-		opd_get_module_info();
-
-		module = opd_find_module_by_eip(eip);
-	}
-
-	if (module) {
-		/* module->name is only the module name not the full path */
-		char const * module_name = 0;
-		if (module->image)
-			module_name = module->image->name;
-		if (!module_name) {
-			verbprintf(vmodule, "unable to get path name for module %s\n",
-			       module->name);
-			module_name = module->name;
-		}
-		image = opd_get_kernel_image(module_name, app_name, proc->tid, proc->tgid);
-		if (!image) {
-			verbprintf(vmodule, "Can't create image for %s %s\n",
-			       module->name, app_name);
-			return;
-		}
-		opd_add_mapping(proc, image, module->start, 0, module->end);
-	} else {
-		opd_drop_module_sample(eip);
-	}
-}
diff --git a/daemon/liblegacy/opd_kernel.h b/daemon/liblegacy/opd_kernel.h
deleted file mode 100644
index f854ae0..0000000
--- a/daemon/liblegacy/opd_kernel.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * @file daemon/liblegacy/opd_kernel.h
- * Dealing with the kernel and kernel module samples
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_KERNEL_H
-#define OPD_KERNEL_H
-
-#include "op_types.h"
-
-struct opd_proc;
-
-/**
- * opd_init_kernel_image - initialise the kernel image
- */
-void opd_init_kernel_image(void);
-
-/**
- * opd_parse_kernel_range - parse the kernel range values
- */
-void opd_parse_kernel_range(char const * arg);
-
-/**
- * opd_clear_module_info - clear kernel module information
- *
- * Clear and free all kernel module information and reset
- * values.
- */
-void opd_clear_module_info(void);
-
-/**
- * opd_handle_kernel_sample - process a kernel sample
- * @param eip  EIP value of sample
- * @param counter  counter number
- *
- * Handle a sample in kernel address space or in a module. The sample is
- * output to the relevant image file.
- */
-void opd_handle_kernel_sample(unsigned long eip, u32 counter);
-
-/**
- * opd_eip_is_kernel - is the sample from kernel/module space
- * @param eip  EIP value
- *
- * Returns %1 if eip is in the address space starting at
- * kernel_start, %0 otherwise.
- */
-int opd_eip_is_kernel(unsigned long eip);
-
-/**
- * opd_add_kernel_map - add a module or kernel maps to a proc struct
- *
- * @param proc owning proc of the new mapping
- * @param eip eip inside the new mapping
- *
- * We assume than eip >= kernel_start
- *
- */
-void opd_add_kernel_map(struct opd_proc * proc, unsigned long eip);
-
-#endif /* OPD_KERNEL_H */
diff --git a/daemon/liblegacy/opd_mapping.c b/daemon/liblegacy/opd_mapping.c
deleted file mode 100644
index 699c475..0000000
--- a/daemon/liblegacy/opd_mapping.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/**
- * @file opd_mapping.c
- * Management of process mappings
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "opd_mapping.h"
-#include "opd_proc.h"
-#include "opd_image.h"
-#include "opd_printf.h"
-
-#include "op_interface.h"
-#include "op_config_24.h"
-#include "op_libiberty.h"
-
-#include <sys/mman.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-/* hash map device mmap */
-static struct op_hash_index * hashmap;
-/* already seen mapping name */
-static char const * hash_name[OP_HASH_MAP_NR];
-
-
-void opd_cleanup_hash_name(void)
-{
-	int i;
-	for (i = 0; i < OP_HASH_MAP_NR; ++i)
-		free((char *)hash_name[i]);
-	
-}
-
-
-void opd_init_hash_map(void)
-{
-	extern fd_t hashmapdevfd;
-
-	hashmap = mmap(0, OP_HASH_MAP_SIZE, PROT_READ, MAP_SHARED, hashmapdevfd, 0);
-	if ((long)hashmap == -1) {
-		perror("oprofiled: couldn't mmap hash map");
-		exit(EXIT_FAILURE);
-	}
-
-}
-
-
-void opd_kill_maps(struct opd_proc * proc)
-{
-	struct list_head * pos, * pos2;
-
-	list_for_each_safe(pos, pos2, &proc->maps) {
-		struct opd_map * map = list_entry(pos, struct opd_map, next);
-		list_del(pos);
-		opd_delete_image(map->image);
-		free(map);
-	}
-}
-
-
-void opd_add_mapping(struct opd_proc * proc, struct opd_image * image,
-		unsigned long start, unsigned long offset, unsigned long end)
-{
-	struct opd_map * map;
-
-	verbprintf(vmisc, "Adding mapping for process %d: 0x%.8lx-0x%.8lx, off 0x%.8lx, \"%s\"\n",
-		proc->tid, start, end, offset, image->name);
-
-	map = malloc(sizeof(struct opd_map));
-
-	/* first map is the primary image */
-	if (list_empty(&proc->maps)) {
-		if (proc->name)
-			free((char *)proc->name);
-		proc->name = xstrdup(image->name);
-	}
-
-	image->ref_count++;
-
-	map->image = image;
-	map->start = start;
-	map->offset = offset;
-	map->end = end;
-	list_add_tail(&map->next, &proc->maps);
-}
-
-
-/**
- * get_from_pool - retrieve string from hash map pool
- * @param ind index into pool
- */
-inline static char * get_from_pool(uint ind)
-{
-	return ((char *)(hashmap + OP_HASH_MAP_NR) + ind);
-}
-
-
-/**
- * opg_get_hash_name - find a mapping name from a hash
- * @param hash hash value for this name
- */
-static char const * opd_get_hash_name(int hash)
-{
-	char file[PATH_MAX];
-	char * c = &file[PATH_MAX-1];
-	int orighash = hash;
-
-	if (hash_name[hash])
-		return hash_name[hash];
-
-	*c = '\0';
-	while (hash) {
-		char * name = get_from_pool(hashmap[hash].name);
-
-		if (strlen(name) + 1 + strlen(c) >= PATH_MAX) {
-			fprintf(stderr, "String \"%s\" too large.\n", c);
-			exit(EXIT_FAILURE);
-		}
-
-		c -= strlen(name) + 1;
-		*c = '/';
-		strncpy(c + 1, name, strlen(name));
-
-		/* move onto parent */
-		hash = hashmap[hash].parent;
-	}
-
-	return hash_name[orighash] = xstrdup(c);
-}
-
-
-void opd_handle_mapping(struct op_note const * note)
-{
-	struct opd_proc * proc;
-	struct opd_image * image;
-	int hash;
-	char const * name;
-
-	proc = opd_get_proc(note->pid, note->tgid);
-
-	if (!proc) {
-		verbprintf(vmisc, "Told about mapping for non-existent process %u.\n", note->pid);
-		proc = opd_new_proc(note->pid, note->tgid);
-	}
-
-	hash = note->hash;
-
-	if (hash == -1) {
-		/* possibly deleted file */
-		return;
-	}
-
-	if (hash < 0 || hash >= OP_HASH_MAP_NR) {
-		fprintf(stderr, "hash value %u out of range.\n", hash);
-		return;
-	}
-
-	name = opd_get_hash_name(hash);
-	image = opd_get_image(name, proc->name, 0, note->pid, note->tgid);
-
-	opd_add_mapping(proc, image, note->addr, note->offset,
-	                note->addr + note->len);
-}
diff --git a/daemon/liblegacy/opd_mapping.h b/daemon/liblegacy/opd_mapping.h
deleted file mode 100644
index f10b0c8..0000000
--- a/daemon/liblegacy/opd_mapping.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * @file opd_mapping.h
- * Management of process mappings
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_MAPPING_H
-#define OPD_MAPPING_H
-
-#include "op_list.h"
-
-struct opd_image;
-struct opd_proc;
-struct op_note;
-
-/**
- * represent a mmap'ed area, we create such area only for vma area with exec
- * access right
- */
-struct opd_map {
-	/** next mapping for this image */
-	struct list_head next;
-	/** owning image */
-	struct opd_image * image;
-	/** mapping start vma */
-	unsigned long start;
-	/** mapping offset */
-	unsigned long offset;
-	/** mapping end vma */
-	unsigned long end;
-};
-
-/**
- * opd_init_hash_map - initialise the hashmap
- */
-void opd_init_hash_map(void);
-
-/**
- * op_cleanup_hash_name
- *
- * release resource owned by hash_name array
- */
-void opd_cleanup_hash_name(void);
-
-/**
- * opd_handle_mapping - deal with mapping notification
- * @param note  mapping notification
- *
- * Deal with one notification that a process has mapped
- * in a new executable file. The mapping information is
- * added to the process structure.
- */
-void opd_handle_mapping(struct op_note const * note);
-
-/**
- * opd_put_mapping - add a mapping to a process
- * @param proc  process to add map to
- * @param image  mapped image pointer
- * @param start  start of mapping
- * @param offset  file offset of mapping
- * @param end  end of mapping
- *
- * Add the mapping specified to the process proc growing the maps array
- * if necessary.
- */
-void opd_add_mapping(struct opd_proc * proc, struct opd_image * image,
-		unsigned long start, unsigned long offset, unsigned long end);
-
-/**
- * opd_kill_maps - delete mapping information for a process
- * @param proc  process to work on
- *
- * Frees structures holding mapping information
- */
-void opd_kill_maps(struct opd_proc * proc);
-
-/**
- * opd_is_in_map - check whether an EIP is within a mapping
- * @param map  map to check
- * @param eip  EIP value
- *
- * Return %1 if the EIP value @eip is within the boundaries
- * of the map @map, %0 otherwise.
- */
-inline static int opd_is_in_map(struct opd_map * map, unsigned long eip)
-{
-	return (eip >= map->start && eip < map->end);
-}
-
-
-/*
- * opd_map_offset - return offset of sample against map
- * @param map  map to use
- * @param eip  EIP value to use
- *
- * Returns the offset of the EIP value @eip into
- * the map @map, which is the same as the file offset
- * for the relevant binary image.
- */
-inline static unsigned long opd_map_offset(struct opd_map * map,
-					   unsigned long eip)
-{
-	return (eip - map->start) + map->offset;
-}
-
-#endif /* OPD_MAPPING_H */
diff --git a/daemon/liblegacy/opd_parse_proc.c b/daemon/liblegacy/opd_parse_proc.c
deleted file mode 100644
index 8a2d4e8..0000000
--- a/daemon/liblegacy/opd_parse_proc.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/**
- * @file opd_parse_proc.c
- * Parsing of /proc/#pid
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "op_libiberty.h"
-
-#include "opd_parse_proc.h"
-#include "opd_proc.h"
-#include "opd_mapping.h"
-#include "opd_image.h"
-#include "opd_printf.h"
-
-#include "op_file.h"
-#include "op_fileio.h"
-
-#include <dirent.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * opd_add_ascii_map - parse an ASCII map string for a process
- * @param proc  process to add map to
- * @param line  0-terminated ASCII string
- * @param image_name the binary application name
- *
- * Attempt to parse the string @line for map information
- * and add the info to the process @proc. Returns %1
- * on success, %0 otherwise.
- *
- * The parsing is based on Linux 2.4 format, which looks like this :
- *
- * 4001e000-400fc000 r-xp 00000000 03:04 31011      /lib/libc-2.1.2.so
- */
-/* FIXME: handle (deleted) */
-static int opd_add_ascii_map(struct opd_proc * proc, char const * line,
-			     char * const image_name)
-{
-	unsigned long offset, start, end;
-	struct opd_image * image;
-	char const * cp = line;
-
-	/* skip to protection field */
-	while (*cp && *cp != ' ')
-		cp++;
-
-	/* handle rwx */
-	if (!*cp || (!*(++cp)) || (!*(++cp)) || (*(++cp) != 'x'))
-		return 0;
-
-	/* get start and end from "40000000-4001f000" */
-	if (sscanf(line, "%lx-%lx", &start, &end) != 2)
-		return 0;
-
-	/* "p " */
-	cp += 2;
-
-	/* read offset */
-	if (sscanf(cp, "%lx", &offset) != 1)
-		return 0;
-
-	while (*cp && *cp != '/')
-		cp++;
-
-	if (!*cp)
-		return 0;
-
-	image = opd_get_image(cp, image_name, 0, proc->tid, proc->tgid);
-	if (!image)
-		return 0;
-
-	opd_add_mapping(proc, image, start, offset, end);
-
-	return 1;
-}
-
-
-/**
- * opd_get_ascii_maps - read all maps for a process
- * @param proc  process to work on
- *
- * Read the /proc/<pid>/maps file and add all
- * mapping information found to the process @proc.
- */
-static void opd_get_ascii_maps(struct opd_proc * proc)
-{
-	FILE * fp;
-	char mapsfile[20] = "/proc/";
-	char * line;
-	char exe_name[20];
-	char * image_name;
-	struct list_head * pos;
-
-	snprintf(mapsfile + 6, 6, "%hu", proc->tid);
-
-	strcpy(exe_name, mapsfile);
-
-	strcat(mapsfile, "/maps");
-
-	fp = op_try_open_file(mapsfile, "r");
-	if (!fp)
-		return;
-
-	strcat(exe_name, "/exe");
-	image_name = xmalloc(PATH_MAX);
-	if (!realpath(exe_name, image_name))
-		/* kernel thread are invalid symlink */
-		strcpy(image_name, exe_name);
-
-	verbprintf(vmisc, "image name %s for pid %u %u\n", image_name, proc->tid, proc->tgid);
-
-	while (1) {
-		line = op_get_line(fp);
-		if (!line)
-			break;
-
-		opd_add_ascii_map(proc, line, image_name);
-		free(line);
-	}
-
-	/* dae assume than the first map added is the primary image name, this
-	 * is always true at exec time but not for /proc/pid so restore
-	 * the primary image name
-	 */
-	list_for_each(pos, &proc->maps) {
-		struct opd_map * map = list_entry(pos, struct opd_map, next);
-		if (!strcmp(map->image->name, image_name)) {
-			if (pos != proc->maps.next) {
-				fprintf(stderr, "swap map for image %s from %s to %s\n", image_name, proc->name, map->image->name);
-				free((char *)proc->name);
-				proc->name = xstrdup(map->image->name);
-			}
-			break;
-		}
-	}
-
-	if (list_empty(&proc->maps)) {
-		/* we always need a valid proc->maps[0], we artificially give
-		 * a map of length zero so on no samples will never go to this
-		 * map. This is used only with --separate-samples and kernel
-		 * thread when adding vmlinux and module maps to proc->maps[]
-		 */
-		/* FIXME: use the first field of /proc/pid/status as proc name
-		 * for now we use /proc/%pid/exe as name */
-		struct opd_image * image = opd_get_image(image_name,
-                                       image_name, 0, proc->tid, proc->tgid);
-		if (image)
-			opd_add_mapping(proc, image, 0, 0, 0);
-	}
-
-	if (image_name)
-		free(image_name);
-
-	op_close_file(fp);
-}
-
-
-static u32 read_tgid(u32 tid)
-{
-	char status_file[30] = "/proc/";
-	char * line;
-	FILE * fp;
-	u32 tgid;
-
-	snprintf(status_file + 6, 6, "%hu", tid);
-
-	strcat(status_file, "/status");
-
-	fp = op_try_open_file(status_file, "r");
-	if (!fp)
-		return 0;
-
-	while (1) {
-		line = op_get_line(fp);
-		if (!line)
-			break;
-
-		if (sscanf(line, "Tgid: %u", &tgid) == 1) {
-			free(line);
-			op_close_file(fp);
-			return tgid;
-		}
-		free(line);
-	}
-
-	op_close_file(fp);
-
-	return 0;
-}
-
-
-void opd_get_ascii_procs(void)
-{
-	DIR * dir;
-	struct dirent * dirent;
-	struct opd_proc * proc;
-	u32 pid;
-
-	if (!(dir = opendir("/proc"))) {
-		perror("oprofiled: /proc directory could not be opened. ");
-		exit(EXIT_FAILURE);
-	}
-
-	while ((dirent = readdir(dir))) {
-		if (sscanf(dirent->d_name, "%u", &pid) == 1) {
-			u32 tgid = read_tgid(pid);
-			verbprintf(vmisc, "ASCII added %u %u\n", pid, tgid);
-			proc = opd_get_proc(pid, tgid);
-			if (!proc)
-				proc = opd_new_proc(pid, tgid);
-			opd_get_ascii_maps(proc);
-		}
-	}
-
-	closedir(dir);
-}
diff --git a/daemon/liblegacy/opd_parse_proc.h b/daemon/liblegacy/opd_parse_proc.h
deleted file mode 100644
index a224be4..0000000
--- a/daemon/liblegacy/opd_parse_proc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/**
- * @file opd_parse_proc.h
- * Parsing of /proc/#pid
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_PARSE_PROC_H
-#define OPD_PARSE_PROC_H
-
-/**
- * opd_get_ascii_procs - read process and mapping information from /proc
- *
- * Read information on each process and its mappings from the /proc
- * filesystem.
- */
-void opd_get_ascii_procs(void);
-
-#endif /* OPD_PARSE_PROC_H */
diff --git a/daemon/liblegacy/opd_proc.c b/daemon/liblegacy/opd_proc.c
deleted file mode 100644
index 2f9b38c..0000000
--- a/daemon/liblegacy/opd_proc.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/**
- * @file opd_proc.c
- * Management of processes
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "op_hw_config.h"
-#include "opd_proc.h"
-#include "opd_image.h"
-#include "opd_mapping.h"
-#include "opd_sample_files.h"
-#include "opd_kernel.h"
-#include "opd_24_stats.h"
-#include "opd_printf.h"
-#include "oprofiled.h"
-
-#include "op_interface.h"
-#include "op_libiberty.h"
-
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* size of process hash table */
-#define OPD_MAX_PROC_HASH 1024
-
-extern int cpu_number;
-
-/* hash of process lists */
-static struct list_head opd_procs[OPD_MAX_PROC_HASH];
-
-/* statistics purpose */
-static int nr_procs;
-
-
-void opd_init_procs(void)
-{
-	int i;
-	for (i = 0; i < OPD_MAX_PROC_HASH; i++)
-		list_init(&opd_procs[i]);
-}
-
-
-int opd_get_nr_procs(void)
-{
-	return nr_procs;
-}
-
-
-/**
- * proc_hash - hash pid value
- * @param tid  pid value to hash
- *
- */
-inline static uint proc_hash(pid_t tid)
-{
-	/* FIXME: hash tgid too! */
-	return ((tid >> 4) ^ (tid)) % OPD_MAX_PROC_HASH;
-}
-
-
-struct opd_proc * opd_new_proc(pid_t tid, pid_t tgid)
-{
-	struct opd_proc * proc;
-
-	nr_procs++;
-	proc = xmalloc(sizeof(struct opd_proc));
-	list_init(&proc->maps);
-	proc->name = NULL;
-	proc->tid = tid;
-	proc->tgid = tgid;
-	proc->dead = 0;
-	proc->accessed = 0;
-	list_add(&proc->next, &opd_procs[proc_hash(tid)]);
-	return proc;
-}
-
-
-struct opd_proc * opd_get_proc(pid_t tid, pid_t tgid)
-{
-	struct opd_proc * proc;
-	uint hash = proc_hash(tid);
-	struct list_head * pos, *pos2;
-
-	opd_24_stats[OPD_PROC_QUEUE_ACCESS]++;
-	list_for_each_safe(pos, pos2, &opd_procs[hash]) {
-		opd_24_stats[OPD_PROC_QUEUE_DEPTH]++;
-		proc = list_entry(pos, struct opd_proc, next);
-		if (tid == proc->tid && tgid == proc->tgid) {
-			/* LRU to head */
-			list_del(&proc->next);
-			list_add(&proc->next, &opd_procs[hash]);
-			return proc;
-		}
-	}
-
-	return NULL;
-}
-
-
-/**
- * verb_show_sample - print the sample out to the log
- * @param offset  the offset value
- * @param map  map to print
- */
-inline static void
-verb_show_sample(unsigned long offset, struct opd_map * map)
-{
-	verbprintf(vsamples, "DO_PUT_SAMPLE : calc offset 0x%.8lx, "
-		"map start 0x%.8lx, end 0x%.8lx, offset 0x%.8lx, name \"%s\"\n",
-		offset, map->start, map->end, map->offset, 
-		map->image->name);
-}
-
-
-void opd_put_image_sample(struct opd_image * image, unsigned long offset,
-                          u32 counter)
-{
-	struct opd_24_sfile * sfile;
-	int err;
-
-	if (image->ignored)
-		return;
-
-	if (!image->sfiles[cpu_number]) {
-		image->sfiles[cpu_number] =
-			xcalloc(OP_MAX_COUNTERS, sizeof(struct op_24_sfile *));
-	}
-	sfile = image->sfiles[cpu_number][counter];
-
-	if (!sfile || !odb_open_count(&sfile->sample_file)) {
-		if (opd_open_24_sample_file(image, counter, cpu_number)) {
-			/* opd_open_24_sample_file output an error message */
-			opd_24_stats[OPD_LOST_SAMPLEFILE]++;
-			return;
-		}
-		sfile = image->sfiles[cpu_number][counter];
-	}
-
-	err = odb_update_node(&sfile->sample_file, offset);
-	if (err) {
-		fprintf(stderr, "%s\n", strerror(err));
-		abort();
-	}
-
-	opd_24_sfile_lru(sfile);
-}
-
-
-/**
- * opd_lookup_maps - lookup a proc mappings for a sample
- * @param proc proc to lookup
- * @param sample sample to lookup
- *
- * iterate through the proc maps searching the mapping which owns sample
- * if sucessful sample count will be updated and we return non-zero
- */
-static int opd_lookup_maps(struct opd_proc * proc,
-			struct op_sample const * sample)
-{
-	struct list_head * pos;
-
-	proc->accessed = 1;
-
-	opd_24_stats[OPD_MAP_ARRAY_ACCESS]++;
-	list_for_each(pos, &proc->maps) {
-		struct opd_map * map = list_entry(pos, struct opd_map, next);
-		if (opd_is_in_map(map, sample->eip)) {
-			unsigned long offset = opd_map_offset(map, sample->eip);
-			if (map->image != NULL) {
-				verb_show_sample(offset, map);
-				opd_put_image_sample(map->image, offset, sample->counter);
-			}
-			opd_24_stats[OPD_PROCESS]++;
-			return 1;
-		}
-		opd_24_stats[OPD_MAP_ARRAY_DEPTH]++;
-	}
-
-	return 0;
-}
-
-
-void opd_put_sample(struct op_sample const * sample)
-{
-	struct opd_proc * proc;
-	int in_kernel_eip = opd_eip_is_kernel(sample->eip);
-
-	opd_24_stats[OPD_SAMPLES]++;
-
-	verbprintf(vsamples, "DO_PUT_SAMPLE: c%d, EIP 0x%.8lx, tgid %.6d pid %.6d\n",
-		sample->counter, sample->eip, sample->tgid, sample->pid);
-
-	if (!separate_kernel && in_kernel_eip) {
-		opd_handle_kernel_sample(sample->eip, sample->counter);
-		return;
-	}
-
-	if (!(proc = opd_get_proc(sample->pid, sample->tgid))) {
-		if (in_kernel_eip || no_vmlinux) {
-			/* idle task get a 0 pid and is hidden we can never get
-			 * a proc so on we fall back to put sample in vmlinux
-			 * or module samples files. Here we will catch also
-			 * sample for newly created kernel thread, currently 
-			 * we can handle properly only kenel thread created
-			 * at daemon startup time */
-			opd_handle_kernel_sample(sample->eip, sample->counter);
-		} else {
-			verbprintf(vmisc, "No proc info for tgid %.6d pid %.6d.\n",
-                                   sample->tgid, sample->pid);
-			opd_24_stats[OPD_LOST_PROCESS]++;
-		}
-		return;
-	}
-
-	if (opd_lookup_maps(proc, sample))
-		return;
-
-	if (in_kernel_eip) {
-		opd_add_kernel_map(proc, sample->eip);
-		if (opd_lookup_maps(proc, sample))
-			return;
-	}
-
-	/* couldn't locate it */
-	verbprintf(vsamples, "Couldn't find map for pid %.6d, EIP 0x%.8lx.\n",
-		   sample->pid, sample->eip);
-	opd_24_stats[OPD_LOST_MAP_PROCESS]++;
-}
-
-
-void opd_handle_fork(struct op_note const * note)
-{
-	struct opd_proc * old;
-	struct opd_proc * proc;
-	struct list_head * pos;
-
-	verbprintf(vmisc, "DO_FORK: from %d, %d to %ld, %ld\n", note->pid, note->tgid,
-	           note->addr, note->len);
-
-	old = opd_get_proc(note->pid, note->tgid);
-
-	/* we can quite easily get a fork() after the execve() because the
-	 * notifications are racy. In particular, the fork notification is
-	 * done on parent return (so we know the pid), but this will often be
-	 * after the execve is done by the child.
-	 *
-	 * So we only create a new setup if it doesn't exist already, allowing
-	 * both the clone() and the execve() cases to work.
-	 */
-	if (opd_get_proc(note->addr, note->len))
-		return;
-
-	/* eip/len is actually tid/tgid of new process */
-	proc = opd_new_proc(note->addr, note->len);
-
-	if (!old)
-		return;
-
-	/* copy the maps */
-	list_for_each(pos, &old->maps) {
-		struct opd_map * map = list_entry(pos, struct opd_map, next);
-		if (!separate_thread) {
-			opd_add_mapping(proc, map->image, map->start,
-			                map->offset, map->end);
-		} else {
-			/* when separating thread we can't create blindly a new
-			 * image e.g. pid re-use, multiple mapping with the
-			 * same mapping name etc. */
-			struct opd_image * image = 
-				opd_get_image(map->image->name, old->name,
-				map->image->kernel, note->addr, note->len);
-			opd_add_mapping(proc, image, map->start, map->offset,
-			                map->end);
-		}
-	}
-}
-
-
-void opd_handle_exec(pid_t tid, pid_t tgid)
-{
-	struct opd_proc * proc;
-
-	verbprintf(vmisc, "DO_EXEC: pid %u %u\n", tid, tgid);
-
-	/* There is a race for samples received between fork/exec sequence.
-	 * These samples belong to the old mapping but we can not say if
-	 * samples has been received before the exec or after. This explains
-	 * the message "Couldn't find map for ..." in verbose mode.
-	 *
-	 * Unhappily, it is difficult to get an estimation of these misplaced
-	 * samples, the error message can count only out of mapping samples but
-	 * not samples between the race and inside the mapping of the exec'ed
-	 * process :/.
-	 *
-	 * Trying to save old mapping is not correct due the above reason. The
-	 * only manner to handle this is to flush the module samples hash table
-	 * after each fork which is unacceptable for performance reasons */
-	proc = opd_get_proc(tid, tgid);
-	if (proc) {
-		opd_kill_maps(proc);
-		/* proc->name will be set when the next mapping occurs */
-		free((char *)proc->name);
-		proc->name = NULL;
-	} else {
-		opd_new_proc(tid, tgid);
-	}
-}
-
-
-void opd_handle_exit(struct op_note const * note)
-{
-	struct opd_proc * proc;
-
-	verbprintf(vmisc, "DO_EXIT: process %d\n", note->pid);
-
-	proc = opd_get_proc(note->pid, note->tgid);
-	if (proc) {
-		proc->dead = 1;
-		proc->accessed = 1;
-	} else {
-		verbprintf(vmisc, "unknown proc %u just exited.\n", note->pid);
-	}
-}
-
-
-typedef void (*opd_proc_cb)(struct opd_proc *);
-
-/**
- * @param proc_cb callback to apply onto each existing proc struct
- *
- * the callback receive a struct opd_proc * (not a const struct) and is
- * allowed to freeze the proc struct itself.
- */
-static void opd_for_each_proc(opd_proc_cb proc_cb)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-	int i;
-
-	for (i = 0; i < OPD_MAX_PROC_HASH; ++i) {
-		list_for_each_safe(pos, pos2, &opd_procs[i]) {
-			struct opd_proc * proc =
-				list_entry(pos, struct opd_proc, next);
-			proc_cb(proc);
-		}
-	}
-}
-
-
-/**
- * opd_delete_proc - delete a process
- * @param proc  process to delete
- *
- * Remove the process proc from the process list and free
- * the associated structures.
- */
-static void opd_delete_proc(struct opd_proc * proc)
-{
-	--nr_procs;
-	list_del(&proc->next);
-	opd_kill_maps(proc);
-	if (proc->name)
-		free((char *)proc->name);
-	free(proc);
-}
-
-
-void opd_proc_cleanup(void)
-{
-	opd_for_each_proc(opd_delete_proc);
-}
-
-
-/**
- * opd_age_proc - age a struct opd_proc
- * @param  proc proc to age
- *
- * age dead proc in such way if a proc doesn't receive any samples
- * between two age_proc the opd_proc struct is deleted
- */
-static void opd_age_proc(struct opd_proc * proc)
-{
-	// delay death whilst its still being accessed
-	if (proc->dead) {
-		proc->dead += proc->accessed;
-		proc->accessed = 0;
-		if (--proc->dead == 0)
-			opd_delete_proc(proc);
-	}
-}
-
-
-void opd_age_procs(void)
-{
-	opd_for_each_proc(opd_age_proc);
-}
-
-
-/**
- * opd_remove_kernel_mapping - remove all kernel mapping for an opd_proc
- * @param proc  proc where mappings must be updated.
- *
- * invalidate (by removing them) all kernel mapping. This function do nothing
- * when separate_kernel == 0 because we don't add mapping for kernel
- * sample in proc struct.
- */
-static void opd_remove_kernel_mapping(struct opd_proc * proc)
-{
-	struct list_head * pos, * pos2;
-
-	list_for_each_safe(pos, pos2, &proc->maps) {
-		struct opd_map * map = list_entry(pos, struct opd_map, next);
-		if (opd_eip_is_kernel(map->start + map->offset)) {
-			list_del(pos);
-			opd_delete_image(map->image);
-			free(map);
-		}
-	}
-}
-
-
-void opd_clear_kernel_mapping(void)
-{
-	opd_for_each_proc(opd_remove_kernel_mapping);
-}
diff --git a/daemon/liblegacy/opd_proc.h b/daemon/liblegacy/opd_proc.h
deleted file mode 100644
index 8140a46..0000000
--- a/daemon/liblegacy/opd_proc.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * @file opd_proc.h
- * Management of processes
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_PROC_H
-#define OPD_PROC_H
-
-#include "op_types.h"
-#include "op_list.h"
-
-struct opd_map;
-struct opd_image;
-struct op_note;
-struct op_sample;
-
-/**
- * track process, created either by a fork or an exec notification.
- */
-struct opd_proc {
-	/** maps are always added to the end of head, so search will be done
-	 * from the newest map to the oldest which mean we don't care about
-	 * munmap. First added map must be the primary image */
-	struct list_head maps;
-	/** process name */
-	char const * name;
-	/** thread id for this process, always equal to tgid for 2.2 kernel */
-	pid_t tid;
-	/** thread group id for this process */
-	pid_t tgid;
-	/** non-zero if this process receive any samples, this field
-	 * is used with dead field to defer opd_proc deletion */
-	int accessed;
-	/** Set to non-zero when an exit notification occur for this process */
-	int dead;
-	/** used by container of opd_proc */
-	struct list_head next;
-};
-
-/**
- * initialize opd_proc container
- */
-void opd_init_procs(void);
-
-/**
- * opd_put_sample - process a sample
- * @param sample  sample to process
- *
- * Write out the sample to the appropriate sample file. This
- * routine handles kernel and module samples as well as ordinary ones.
- */
-void opd_put_sample(struct op_sample const * sample);
-
-/**
- * opd_put_image_sample - write sample to file
- * @param image  image for sample
- * @param offset  (file) offset to write to
- * @param counter  counter number
- *
- * Add to the count stored at position offset in the
- * image file. Overflow pins the count at the maximum
- * value.
- */
-void opd_put_image_sample(struct opd_image * image, unsigned long offset, u32 counter);
-
-/**
- * opd_handle_fork - deal with fork notification
- * @param note  note to handle
- *
- * Deal with a fork() notification by creating a new process
- * structure, and copying mapping information from the old process.
- *
- * sample->pid contains the process id of the old process.
- * sample->eip contains the process id of the new process.
- */
-void opd_handle_fork(struct op_note const * note);
-
-/**
- * opd_handle_exec - deal with notification of execve()
- * @param tid  tid for this process
- * @param tgid  tgid for this process
- *
- * Drop all mapping information for the process.
- */
-void opd_handle_exec(pid_t tid, pid_t tgid);
-
-/**
- * opd_handle_exit - deal with exit notification
- * @param note  note to handle
- *
- * Deal with an exit() notification by setting the flag "dead"
- * on a process. These will be later cleaned up by the %SIGALRM
- * handler.
- *
- * sample->pid contains the process id of the exited process.
- */
-void opd_handle_exit(struct op_note const * note);
-
-/**
- * opd_get_proc - get process from process list
- * @param tid  tid for this process
- * @param tgid  tgid for this process
- *
- * A process with pid tid is searched on the process list,
- * maintaining LRU order. If it is not found, %NULL is returned,
- * otherwise the process structure is returned.
- */
-struct opd_proc * opd_get_proc(pid_t tid, pid_t tgid);
-
-/**
- * opd_new_proc - create a new process structure
- * @param tid  tid for this process
- * @param tgid  tgid for this process
- *
- * Allocate and initialise a process structure and insert
- * it into the procs hash table.
- */
-struct opd_proc * opd_new_proc(pid_t tid, pid_t tgid);
-
-/**
- * opd_get_nr_procs - return number of processes tracked
- */
-int opd_get_nr_procs(void);
-
-/**
- * opd_age_procs - age all dead process preparing them for a deletion
- */
-void opd_age_procs(void);
-
-/**
- * freeze all resource used by opd_procs managment
- */
-void opd_proc_cleanup(void);
-
-/**
- * opd_clear_kernel_mapping - remove all kernel mapping for all opd_proc
- *
- * invalidate (by removing them) all kernel mapping. This function do nothing
- * when separate_kernel == 0 because we don't add mapping for kernel
- * sample in proc struct. As side effect decrease reference count of
- * associated with these mapping which eventually close this image
- */
-void opd_clear_kernel_mapping(void);
-
-#endif /* OPD_PROC_H */
diff --git a/daemon/liblegacy/opd_sample_files.c b/daemon/liblegacy/opd_sample_files.c
deleted file mode 100644
index 126ba99..0000000
--- a/daemon/liblegacy/opd_sample_files.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/**
- * @file opd_sample_files.c
- * Management of sample files
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include <sys/types.h>
- 
-#include "opd_sample_files.h"
-#include "opd_image.h"
-#include "opd_printf.h"
-#include "opd_events.h"
-#include "oprofiled.h"
-
-#include "op_sample_file.h"
-#include "op_file.h"
-#include "op_config.h"
-#include "op_mangle.h"
-#include "op_events.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-/** All sfiles are on this list. */
-static LIST_HEAD(lru_list);
-
-/* this value probably doesn't matter too much */
-#define LRU_AMOUNT 1000
-static int opd_24_sfile_lru_clear(void)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-	struct opd_24_sfile * sfile;
-	int amount = LRU_AMOUNT;
-
-	verbprintf(vsfile, "image lru clear\n");
-
-	if (list_empty(&lru_list))
-		return 1;
-
-	list_for_each_safe(pos, pos2, &lru_list) {
-		if (!--amount)
-			break;
-		sfile = list_entry(pos, struct opd_24_sfile, lru_next);
-		odb_close(&sfile->sample_file);
-		list_del_init(&sfile->lru_next);
-	}
-
-	return 0;
-}
-
-
-void opd_24_sfile_lru(struct opd_24_sfile * sfile)
-{
-	list_del(&sfile->lru_next);
-	list_add_tail(&sfile->lru_next, &lru_list);
-}
-
-
-static char * opd_mangle_filename(struct opd_image const * image, int counter,
-                                  int cpu_nr)
-{
-	char * mangled;
-	struct mangle_values values;
-	struct opd_event * event = find_counter_event(counter);
-
-	values.flags = 0;
-	if (image->kernel)
-		values.flags |= MANGLE_KERNEL;
-
-	if (separate_thread) {
-		values.flags |= MANGLE_TGID | MANGLE_TID;
-		values.tid = image->tid;
-		values.tgid = image->tgid;
-	}
-
-	if (separate_cpu) {
-		values.flags |= MANGLE_CPU;
-		values.cpu = cpu_nr;
-	}
-
-	values.event_name = event->name;
-	values.count = event->count;
-	values.unit_mask = event->um;
-
-	values.image_name = image->name;
-	values.dep_name = separate_lib && image->app_name
-		? image->app_name : image->name;
-
-	mangled = op_mangle_filename(&values);
-
-	return mangled;
-}
-
-
-int opd_open_24_sample_file(struct opd_image * image, int counter, int cpu_nr)
-{
-	char * mangled;
-	struct opd_24_sfile * sfile;
-	int err;
-
-	mangled = opd_mangle_filename(image, counter, cpu_nr);
-
-	verbprintf(vsfile, "Opening \"%s\"\n", mangled);
-
-	create_path(mangled);
-
-	sfile = image->sfiles[cpu_nr][counter];
-	if (!sfile) {
-		sfile = malloc(sizeof(struct opd_24_sfile));
-		list_init(&sfile->lru_next);
-		odb_init(&sfile->sample_file);
-		image->sfiles[cpu_nr][counter] = sfile;
-	}
-
-	list_del(&sfile->lru_next);
-	list_add_tail(&sfile->lru_next, &lru_list);
-
-retry:
-	err = odb_open(&sfile->sample_file, mangled, ODB_RDWR,
-                       sizeof(struct opd_header));
-
-	/* This can naturally happen when racing against opcontrol --reset. */
-	if (err) {
-		if (err == EMFILE) {
-			if (opd_24_sfile_lru_clear()) {
-				printf("LRU cleared but odb_open() fails for %s.\n", mangled);
-				abort();
-			}
-			goto retry;
-		}
-
-		fprintf(stderr, "oprofiled: open of %s failed: %s\n",
-		        mangled, strerror(err));
-		goto out;
-	}
-
-	fill_header(odb_get_data(&sfile->sample_file), counter, 0, 0,
-		    image->kernel, 0, 0, 0, image->mtime);
-
-out:
-	free(mangled);
-	return err;
-}
-
-
-void opd_sync_samples_files(void)
-{
-	struct list_head * pos;
-	struct opd_24_sfile * sfile;
-
-	list_for_each(pos, &lru_list) {
-		sfile = list_entry(pos, struct opd_24_sfile, lru_next);
-		odb_sync(&sfile->sample_file);
-	}
-}
-
-
-void opd_close_image_samples_files(struct opd_image * image)
-{
-	uint i, j;
-	for (i = 0 ; i < op_nr_counters ; ++i) {
-		for (j = 0; j < NR_CPUS; ++j) {
-			if (image->sfiles[j] && image->sfiles[j][i]) {
-				odb_close(&image->sfiles[j][i]->sample_file);
-				list_del(&image->sfiles[j][i]->lru_next);
-				free(image->sfiles[j][i]);
-				image->sfiles[j][i] = 0;
-			}
-		}
-	}
-}
diff --git a/daemon/liblegacy/opd_sample_files.h b/daemon/liblegacy/opd_sample_files.h
deleted file mode 100644
index 20555a9..0000000
--- a/daemon/liblegacy/opd_sample_files.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * @file opd_sample_files.h
- * Management of sample files
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_SAMPLE_FILES_H
-#define OPD_SAMPLE_FILES_H
-
-#include "op_list.h"
-#include "odb.h"
-
-struct opd_image;
-
-/** one samples file when profiling on a 2.2/2.4 kernel */
-struct opd_24_sfile {
-	/** lru list of sample file */
-	struct list_head lru_next;
-	/** the sample file itself */
-	odb_t sample_file;
-};
-
-/**
- * sync all samples files
- */
-void opd_sync_samples_files(void);
-
-/**
- * @param image  the image pointer to work on
- *
- * close all samples files belonging to this image
- */
-void opd_close_image_samples_files(struct opd_image * image);
-
-/**
- * opd_open_24_sample_file - open an image sample file
- * @param image  image to open file for
- * @param counter  counter number
- * @param cpu_nr  cpu number
- *
- * Open image sample file for the image, counter
- * counter and set up memory mappings for it.
- * image->kernel and image->name must have meaningful
- * values.
- *
- * Returns 0 on success.
- */
-int opd_open_24_sample_file(struct opd_image * image, int counter, int cpu_nr);
-
-/**
- * @param sfile  sample file to act on
- *
- * put sfile at the head of samples files lru list
- */
-void opd_24_sfile_lru(struct opd_24_sfile * sfile);
-
-
-#endif /* OPD_SAMPLE_FILES_H */
diff --git a/daemon/liblegacy/p_module.h b/daemon/liblegacy/p_module.h
deleted file mode 100644
index 9367508..0000000
--- a/daemon/liblegacy/p_module.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/* Definitions for the Linux module syscall interface.
-   Copyright 1996, 1997 Linux International.
-
-   Contributed by Richard Henderson <rth@tamu.edu>
-
-   This file is part of the Linux modutils.
-
-   This program is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 2 of the License, or (at your
-   option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software Foundation,
-   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef MODUTILS_MODULE_H
-#define MODUTILS_MODULE_H 1
-
-/* This file contains the structures used by the 2.0 and 2.1 kernels.
-   We do not use the kernel headers directly because we do not wish
-   to be dependant on a particular kernel version to compile insmod.  */
-
-
-/*======================================================================*/
-/* The structures used by Linux 2.0.  */
-
-/* The symbol format used by get_kernel_syms(2).  */
-struct old_kernel_sym
-{
-  unsigned long value;
-  char name[60];
-};
-
-struct old_module_ref
-{
-  unsigned long module;		/* kernel addresses */
-  unsigned long next;
-};
-
-struct old_module_symbol
-{
-  unsigned long addr;
-  unsigned long name;
-};
-
-struct old_symbol_table
-{
-  int size;			/* total, including string table!!! */
-  int n_symbols;
-  int n_refs;
-  struct old_module_symbol symbol[0]; /* actual size defined by n_symbols */
-  struct old_module_ref ref[0];	/* actual size defined by n_refs */
-};
-
-struct old_mod_routines
-{
-  unsigned long init;
-  unsigned long cleanup;
-};
-
-struct old_module
-{
-  unsigned long next;
-  unsigned long ref;		/* the list of modules that refer to me */
-  unsigned long symtab;
-  unsigned long name;
-  int size;			/* size of module in pages */
-  unsigned long addr;		/* address of module */
-  int state;
-  unsigned long cleanup;	/* cleanup routine */
-};
-
-/* Sent to init_module(2) or'ed into the code size parameter.  */
-#define OLD_MOD_AUTOCLEAN 0x40000000 /* big enough, but no sign problems... */
-
-int get_kernel_syms(struct old_kernel_sym *);
-int old_sys_init_module(char const * name, char *code, unsigned codesize,
-			struct old_mod_routines *, struct old_symbol_table *);
-
-/*======================================================================*/
-/* For sizeof() which are related to the module platform and not to the
-   environment isnmod is running in, use sizeof_xx instead of sizeof(xx).  */
-
-#define tgt_sizeof_char		sizeof(char)
-#define tgt_sizeof_short	sizeof(short)
-#define tgt_sizeof_int		sizeof(int)
-#define tgt_sizeof_long		sizeof(long)
-#define tgt_sizeof_char_p	sizeof(char *)
-#define tgt_sizeof_void_p	sizeof(void *)
-#define tgt_long		long
-#define tgt_long_fmt		"l"
-
-/* This assumes that long long on a 32 bit system is equivalent to long on the
- * equivalent 64 bit system.  Also that void and char pointers are 8 bytes on
- * all 64 bit systems.  Add per system tweaks if it ever becomes necessary.
- */
-#if defined(COMMON_3264) && defined(ONLY_64)
-#undef tgt_long
-#undef tgt_long_fmt
-#undef tgt_sizeof_long
-#undef tgt_sizeof_char_p
-#undef tgt_sizeof_void_p
-#define tgt_long                long long
-#define tgt_long_fmt		"ll"
-#define tgt_sizeof_long         8
-#define tgt_sizeof_char_p       8
-#define tgt_sizeof_void_p       8
-#endif
-
-/*======================================================================*/
-/* The structures used in Linux 2.1 onwards.  */
-
-/* Note: module_symbol does not use tgt_long intentionally */
-struct module_symbol
-{
-  unsigned long value;
-  unsigned long name;
-};
-
-struct module_ref
-{
-  unsigned tgt_long dep;		/* kernel addresses */
-  unsigned tgt_long ref;
-  unsigned tgt_long next_ref;
-};
-
-struct module
-{
-  unsigned tgt_long size_of_struct;	/* == sizeof(module) */
-  unsigned tgt_long next;
-  unsigned tgt_long name;
-  unsigned tgt_long size;
-
-  tgt_long usecount;
-  unsigned tgt_long flags;		/* AUTOCLEAN et al */
-
-  unsigned nsyms;
-  unsigned ndeps;
-
-  unsigned tgt_long syms;
-  unsigned tgt_long deps;
-  unsigned tgt_long refs;
-  unsigned tgt_long init;
-  unsigned tgt_long cleanup;
-  unsigned tgt_long ex_table_start;
-  unsigned tgt_long ex_table_end;
-#ifdef __alpha__
-  unsigned tgt_long gp;
-#endif
-  /* Everything after here is extension.  */
-  unsigned tgt_long read_start;		/* Read data from existing module */
-  unsigned tgt_long read_end;
-  unsigned tgt_long can_unload;
-  unsigned tgt_long runsize;
-  unsigned tgt_long kallsyms_start;
-  unsigned tgt_long kallsyms_end;
-  unsigned tgt_long archdata_start;
-  unsigned tgt_long archdata_end;
-  unsigned tgt_long kernel_data;
-};
-
-struct module_info
-{
-  unsigned long addr;
-  unsigned long size;
-  unsigned long flags;
-	   long usecount;
-};
-
-/* Bits of module.flags.  */
-#define NEW_MOD_RUNNING		1
-#define NEW_MOD_DELETED		2
-#define NEW_MOD_AUTOCLEAN	4
-#define NEW_MOD_VISITED		8
-#define NEW_MOD_USED_ONCE	16
-#define NEW_MOD_INITIALIZING	64
-
-int sys_init_module(char const * name, const struct module *);
-int query_module(char const * name, int which, void *buf, size_t bufsize,
-		 size_t *ret);
-
-/* Values for query_module's which.  */
-
-#define QM_MODULES	1
-#define QM_DEPS		2
-#define QM_REFS		3
-#define QM_SYMBOLS	4
-#define QM_INFO		5
-
-/*======================================================================*/
-/* The system calls unchanged between 2.0 and 2.1.  */
-
-unsigned long create_module(const char *, size_t);
-int delete_module(const char *);
-
-/* In safe mode the last parameter is forced to be a module name and meta
- * expansion is not allowed on that name.
- */
-extern unsigned int safemode;
-
-#endif /* module.h */
diff --git a/daemon/opd_anon.c b/daemon/opd_anon.c
deleted file mode 100644
index 9caea3d..0000000
--- a/daemon/opd_anon.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/**
- * @file opd_anon.c
- * Anonymous region handling.
- *
- * Our caching of maps has some problems: if we get tgid reuse,
- * and it's the same application, we might end up with wrong
- * maps. The same happens in an unmap-remap case. There's not much
- * we can do about this, we just hope it's not too common...
- *
- * What is relatively common is expanding anon maps, which leaves us
- * with lots of separate sample files.
- *
- * @remark Copyright 2005 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @Modifications Gisle Dankel
- */
-
-#include "opd_anon.h"
-#include "opd_trans.h"
-#include "opd_sfile.h"
-#include "opd_printf.h"
-#include "op_libiberty.h"
-
-#include <limits.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#define HASH_SIZE 1024
-#define HASH_BITS (HASH_SIZE - 1)
-
-/*
- * Note that this value is tempered by the fact that when we miss in the
- * anon cache, we'll tear down all the mappings for that tgid. Thus, LRU
- * of a mapping can potentially clear out a much larger number of
- * mappings.
- */
-#define LRU_SIZE 8192
-#define LRU_AMOUNT (LRU_SIZE/8)
-
-static struct list_head hashes[HASH_SIZE];
-static struct list_head lru;
-static size_t nr_lru;
-
-static void do_lru(struct transient * trans)
-{
-	size_t nr_to_kill = LRU_AMOUNT;
-	struct list_head * pos;
-	struct list_head * pos2;
-	struct anon_mapping * entry;
-
-	list_for_each_safe(pos, pos2, &lru) {
-		entry = list_entry(pos, struct anon_mapping, lru_list);
-		if (trans->anon == entry)
-			clear_trans_current(trans);
-		if (trans->last_anon == entry)
-			clear_trans_last(trans);
-		sfile_clear_anon(entry);
-		list_del(&entry->list);
-		list_del(&entry->lru_list);
-		--nr_lru;
-		free(entry);
-		if (nr_to_kill-- == 0)
-			break;
-	}
-}
-
-
-static unsigned long hash_anon(pid_t tgid, cookie_t app)
-{
-	return ((app >> DCOOKIE_SHIFT) ^ (tgid >> 2)) & (HASH_SIZE - 1);
-}
- 
-
-static void clear_anon_maps(struct transient * trans)
-{
-	unsigned long hash = hash_anon(trans->tgid, trans->app_cookie);
-	pid_t tgid = trans->tgid;
-	cookie_t app = trans->app_cookie;
-	struct list_head * pos;
-	struct list_head * pos2;
-	struct anon_mapping * entry;
-
-	clear_trans_current(trans);
-
-	list_for_each_safe(pos, pos2, &hashes[hash]) {
-		entry = list_entry(pos, struct anon_mapping, list);
-		if (entry->tgid == tgid && entry->app_cookie == app) {
-			if (trans->last_anon == entry)
-				clear_trans_last(trans);
-			sfile_clear_anon(entry);
-			list_del(&entry->list);
-			list_del(&entry->lru_list);
-			--nr_lru;
-			free(entry);
-		}
-	}
-
-	if (vmisc) {
-		char const * name = verbose_cookie(app);
-		printf("Cleared anon maps for tgid %u (%s).\n", tgid, name);
-	}
-}
-
-
-static void
-add_anon_mapping(struct transient * trans, vma_t start, vma_t end, char * name)
-{
-	unsigned long hash = hash_anon(trans->tgid, trans->app_cookie);
-	struct anon_mapping * m = xmalloc(sizeof(struct anon_mapping));
-	m->tgid = trans->tgid;
-	m->app_cookie = trans->app_cookie;
-	m->start = start;
-	m->end = end;
-	strncpy(m->name, name, MAX_IMAGE_NAME_SIZE + 1);
-	list_add_tail(&m->list, &hashes[hash]);
-	list_add_tail(&m->lru_list, &lru);
-	if (++nr_lru == LRU_SIZE)
-		do_lru(trans);
-	if (vmisc) {
-		char const * name = verbose_cookie(m->app_cookie);
-		printf("Added anon map 0x%llx-0x%llx for tgid %u (%s).\n",
-		       start, end, m->tgid, name);
-	}
-}
-
-
-/* 42000000-4212f000 r-xp 00000000 16:03 424334 /lib/tls/libc-2.3.2.so */
-static void get_anon_maps(struct transient * trans)
-{
-	FILE * fp = NULL;
-	char buf[PATH_MAX];
-	vma_t start, end;
-	int ret;
-
-	snprintf(buf, PATH_MAX, "/proc/%d/maps", trans->tgid);
-	fp = fopen(buf, "r");
-	if (!fp)
-		return;
-
-	while (fgets(buf, PATH_MAX, fp) != NULL) {
-		char tmp[MAX_IMAGE_NAME_SIZE + 1];
-		char name[MAX_IMAGE_NAME_SIZE + 1];
-		/* Some anon maps have labels like
-		 * [heap], [stack], [vdso], [vsyscall] ...
-		 * Keep track of these labels. If a map has no name, call it "anon".
-		 * Ignore all mappings starting with "/" (file or shared memory object)
-		 */
-		strcpy(name, "anon");
-		ret = sscanf(buf, "%llx-%llx %20s %20s %20s %20s %20s",
-		             &start, &end, tmp, tmp, tmp, tmp, name);
-		if (ret < 6 || name[0] == '/')
-			continue;
-
-		add_anon_mapping(trans, start, end, name);
-	}
-
-	fclose(fp);
-}
-
-
-static int
-anon_match(struct transient const * trans, struct anon_mapping const * anon)
-{
-	if (!anon)
-		return 0;
-	if (trans->tgid != anon->tgid)
-		return 0;
-	if (trans->app_cookie != anon->app_cookie)
-		return 0;
-	if (trans->pc < anon->start)
-		return 0;
-	return (trans->pc < anon->end);
-}
-
-
-struct anon_mapping * find_anon_mapping(struct transient * trans)
-{
-	unsigned long hash = hash_anon(trans->tgid, trans->app_cookie);
-	struct list_head * pos;
-	struct anon_mapping * entry;
-	int tried = 0;
-
-	if (anon_match(trans, trans->anon))
-		return (trans->anon);
-
-retry:
-	list_for_each(pos, &hashes[hash]) {
-		entry = list_entry(pos, struct anon_mapping, list);
-		if (anon_match(trans, entry))
-			goto success;
-	}
-
-	if (!tried) {
-		clear_anon_maps(trans);
-		get_anon_maps(trans);
-		tried = 1;
-		goto retry;
-	}
-
-	return NULL;
-
-success:
-	/*
-	 * Typically, there's one big mapping that matches. Let's go
-	 * faster.
-	 */
-	list_del(&entry->list);
-	list_add(&entry->list, &hashes[hash]);
-
-	verbprintf(vmisc, "Found range 0x%llx-0x%llx for tgid %u, pc %llx.\n",
-	           entry->start, entry->end, (unsigned int)entry->tgid,
-		   trans->pc);
-	return entry;
-}
-
-
-void anon_init(void)
-{
-	size_t i;
-
-	for (i = 0; i < HASH_SIZE; ++i)
-		list_init(&hashes[i]);
-
-	list_init(&lru);
-}
diff --git a/daemon/opd_anon.h b/daemon/opd_anon.h
deleted file mode 100644
index 3f66b55..0000000
--- a/daemon/opd_anon.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * @file opd_anon.h
- * Anonymous region handling.
- *
- * @remark Copyright 2005 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#ifndef OPD_ANON_H
-#define OPD_ANON_H
-
-#include "op_types.h"
-#include "op_list.h"
-
-#include "opd_cookie.h"
-
-#include <sys/types.h>
-
-struct transient;
-
-/**
- * Shift useful bits into play for VMA hashing.
- */
-#define VMA_SHIFT 13 
-
-/* Maximum size of the image name considered */
-#define MAX_IMAGE_NAME_SIZE 20
-
-struct anon_mapping {
-	/** start of the mapping */
-	vma_t start;
-	/** end of the mapping */
-	vma_t end;
-	/** tgid of the app */
-	pid_t tgid;
-	/** cookie of the app */
-	cookie_t app_cookie;
-	/** hash list */
-	struct list_head list;
-	/** lru list */
-	struct list_head lru_list;
-	char name[MAX_IMAGE_NAME_SIZE+1];
-};
-
-/**
- * Try to find an anonymous mapping for the given pc/tgid pair.
- */
-struct anon_mapping * find_anon_mapping(struct transient *);
-
-void anon_init(void);
-
-#endif /* OPD_ANON_H */
diff --git a/daemon/opd_cookie.c b/daemon/opd_cookie.c
deleted file mode 100644
index ec3ff2d..0000000
--- a/daemon/opd_cookie.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * @file opd_cookie.c
- * cookie -> name cache
- *
- * @remark Copyright 2002, 2005 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#include "opd_cookie.h"
-#include "oprofiled.h"
-#include "op_list.h"
-#include "op_libiberty.h"
-
-#include <sys/syscall.h>
-#include <unistd.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#ifndef __NR_lookup_dcookie
-#if defined(__i386__)
-#define __NR_lookup_dcookie 253
-#elif defined(__x86_64__)
-#define __NR_lookup_dcookie 212
-#elif defined(__powerpc__)
-#define __NR_lookup_dcookie 235
-#elif defined(__alpha__)
-#define __NR_lookup_dcookie 406
-#elif defined(__hppa__)
-#define __NR_lookup_dcookie 223
-#elif defined(__ia64__)
-#define __NR_lookup_dcookie 1237
-#elif defined(__sparc__)
-/* untested */
-#define __NR_lookup_dcookie 208
-#elif defined(__s390__) || defined (__s390x__)
-#define __NR_lookup_dcookie 110
-#elif defined(__arm__)
-#define __NR_lookup_dcookie (__NR_SYSCALL_BASE+249)
-#elif defined(__mips__)
-#include <sgidefs.h>
-/* O32 */
-#if _MIPS_SIM == _MIPS_SIM_ABI32
-#define __NR_lookup_dcookie 4247
-/* N64 */
-#elif _MIPS_SIM == _MIPS_SIM_ABI64
-#define __NR_lookup_dcookie 5206
-/* N32 */
-#elif _MIPS_SIM == _MIPS_SIM_NABI32
-#define __NR_lookup_dcookie 6206
-#else
-#error Unknown MIPS ABI: Dunno __NR_lookup_dcookie
-#endif
-#else
-#error Please define __NR_lookup_dcookie for your architecture
-#endif
-#endif /* __NR_lookup_dcookie */
-
-#if (defined(__powerpc__) && !defined(__powerpc64__)) || defined(__hppa__)\
-	|| (defined(__s390__) && !defined(__s390x__)) \
-	|| (defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI32) \
-	    && defined(__MIPSEB__)) \
-        || (defined(__arm__) && defined(__ARM_EABI__) \
-            && defined(__ARMEB__))
-static inline int lookup_dcookie(cookie_t cookie, char * buf, size_t size)
-{
-	return syscall(__NR_lookup_dcookie, (unsigned long)(cookie >> 32),
-		       (unsigned long)(cookie & 0xffffffff), buf, size);
-}
-#elif (defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI32)) \
-	|| (defined(__arm__) && defined(__ARM_EABI__)) \
-	|| (defined(__tile__) && !defined(__LP64__))
-static inline int lookup_dcookie(cookie_t cookie, char * buf, size_t size)
-{
-	return syscall(__NR_lookup_dcookie,
-		       (unsigned long)(cookie & 0xffffffff),
-		       (unsigned long)(cookie >> 32), buf, size);
-}
-#else
-static inline int lookup_dcookie(cookie_t cookie, char * buf, size_t size)
-{
-	return syscall(__NR_lookup_dcookie, cookie, buf, size);
-}
-#endif
-
-
-struct cookie_entry {
-	cookie_t value;
-	char * name;
-	int ignored;
-	struct list_head list;
-};
-
-
-#define HASH_SIZE 512
-#define HASH_BITS (HASH_SIZE - 1)
-
-static struct list_head hashes[HASH_SIZE];
-
-static struct cookie_entry * create_cookie(cookie_t cookie)
-{
-	int err;
-	struct cookie_entry * entry = xmalloc(sizeof(struct cookie_entry));
-
-	entry->value = cookie;
-	entry->name = xmalloc(PATH_MAX + 1);
-
-	err = lookup_dcookie(cookie, entry->name, PATH_MAX);
-
-	if (err < 0) {
-		fprintf(stderr, "Lookup of cookie %llx failed, errno=%d\n",
-		       cookie, errno); 
-		free(entry->name);
-		entry->name = NULL;
-		entry->ignored = 0;
-	} else {
-		entry->ignored = is_image_ignored(entry->name);
-	}
-
-	return entry;
-}
-
-
-/* Cookie monster want cookie! */
-static unsigned long hash_cookie(cookie_t cookie)
-{
-	return (cookie >> DCOOKIE_SHIFT) & (HASH_SIZE - 1);
-}
- 
-
-char const * find_cookie(cookie_t cookie)
-{
-	unsigned long hash = hash_cookie(cookie);
-	struct list_head * pos;
-	struct cookie_entry * entry;
-
-	if (cookie == INVALID_COOKIE || cookie == NO_COOKIE)
-		return NULL;
-
-	list_for_each(pos, &hashes[hash]) {
-		entry = list_entry(pos, struct cookie_entry, list);
-		if (entry->value == cookie)
-			goto out;
-	}
-
-	/* not sure this can ever happen due to is_cookie_ignored */
-	entry = create_cookie(cookie);
-	list_add(&entry->list, &hashes[hash]);
-out:
-	return entry->name;
-}
-
-
-int is_cookie_ignored(cookie_t cookie)
-{
-	unsigned long hash = hash_cookie(cookie);
-	struct list_head * pos;
-	struct cookie_entry * entry;
-
-	if (cookie == INVALID_COOKIE || cookie == NO_COOKIE)
-		return 1;
-
-	list_for_each(pos, &hashes[hash]) {
-		entry = list_entry(pos, struct cookie_entry, list);
-		if (entry->value == cookie)
-			goto out;
-	}
-
-	entry = create_cookie(cookie);
-	list_add(&entry->list, &hashes[hash]);
-out:
-	return entry->ignored;
-}
-
-
-char const * verbose_cookie(cookie_t cookie)
-{
-	unsigned long hash = hash_cookie(cookie);
-	struct list_head * pos;
-	struct cookie_entry * entry;
-
-	if (cookie == INVALID_COOKIE)
-		return "invalid";
-
-	if (cookie == NO_COOKIE)
-		return "anonymous";
-
-	list_for_each(pos, &hashes[hash]) {
-		entry = list_entry(pos, struct cookie_entry, list);
-		if (entry->value == cookie) {
-			if (!entry->name)
-				return "failed lookup";
-			return entry->name;
-		}
-	}
-
-	return "not hashed";
-}
-
-
-void cookie_init(void)
-{
-	size_t i;
-
-	for (i = 0; i < HASH_SIZE; ++i)
-		list_init(&hashes[i]);
-}
diff --git a/daemon/opd_cookie.h b/daemon/opd_cookie.h
deleted file mode 100644
index a9f13b1..0000000
--- a/daemon/opd_cookie.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * @file opd_cookie.h
- * cookie -> name cache
- *
- * @remark Copyright 2002, 2005 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#ifndef OPD_COOKIE_H
-#define OPD_COOKIE_H
-
-typedef unsigned long long cookie_t;
-
-#define INVALID_COOKIE ~0LLU
-#define NO_COOKIE 0LLU
-
-/**
- * Shift value to remove trailing zero on a dcookie value, 7 is sufficient
- * for most architecture
- */
-#define DCOOKIE_SHIFT 7
-
-/**
- * Return the name of the given dcookie. May return
- * NULL on failure.
- */
-char const * find_cookie(cookie_t cookie);
-
-/** return true if this cookie should be ignored */
-int is_cookie_ignored(cookie_t cookie);
-
-/** give a textual description of the cookie */
-char const * verbose_cookie(cookie_t cookie);
-
-void cookie_init(void);
-
-#endif /* OPD_COOKIE_H */
diff --git a/daemon/opd_events.c b/daemon/opd_events.c
deleted file mode 100644
index 40d6979..0000000
--- a/daemon/opd_events.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/**
- * @file daemon/opd_events.c
- * Event details for each counter
- *
- * @remark Copyright 2002, 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "config.h"
- 
-#include "opd_events.h"
-#include "opd_printf.h"
-#include "opd_extended.h"
-#include "oprofiled.h"
-
-#include "op_string.h"
-#include "op_config.h"
-#include "op_cpufreq.h"
-#include "op_cpu_type.h"
-#include "op_libiberty.h"
-#include "op_hw_config.h"
-#include "op_sample_file.h"
-#include "op_events.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-extern op_cpu cpu_type;
-
-struct opd_event opd_events[OP_MAX_COUNTERS];
-
-static double cpu_speed;
-
-static void malformed_events(void)
-{
-	fprintf(stderr, "oprofiled: malformed events passed "
-		"on the command line\n");
-	exit(EXIT_FAILURE);
-}
-
-
-static char * copy_token(char ** c, char delim)
-{
-	char * tmp = *c;
-	char * tmp2 = *c;
-	char * str;
-
-	if (!**c)
-		return NULL;
-
-	while (*tmp2 && *tmp2 != delim)
-		++tmp2;
-
-	if (tmp2 == tmp)
-		return NULL;
-
-	str = op_xstrndup(tmp, tmp2 - tmp);
-	*c = tmp2;
-	if (**c)
-		++*c;
-	return str;
-}
-
-
-static unsigned long copy_ulong(char ** c, char delim)
-{
-	unsigned long val = 0;
-	char * str = copy_token(c, delim);
-	if (!str)
-		malformed_events();
-	val = strtoul(str, NULL, 0);
-	free(str);
-	return val;
-}
-
-
-void opd_parse_events(char const * events)
-{
-	char * ev = xstrdup(events);
-	char * c;
-	size_t cur = 0;
-
-	cpu_speed = op_cpu_frequency();
-
-	if (cpu_type == CPU_TIMER_INT) {
-		struct opd_event * event = &opd_events[0];
-		event->name = xstrdup("TIMER");
-		event->value = event->counter
-			= event->count = event->um = 0;
-		event->kernel = 1;
-		event->user = 1;
-		return;
-	}
-
-	if (!ev || !strlen(ev)) {
-		fprintf(stderr, "oprofiled: no events passed.\n");
-		exit(EXIT_FAILURE);
-	}
-
-	verbprintf(vmisc, "Events: %s\n", ev);
-
-	c = ev;
-
-	while (*c && cur < op_nr_counters) {
-		struct opd_event * event = &opd_events[cur];
-
-		if (!(event->name = copy_token(&c, ':')))
-			malformed_events();
-		event->value = copy_ulong(&c, ':');
-		event->counter = copy_ulong(&c, ':');
-		event->count = copy_ulong(&c, ':');
-		event->um = copy_ulong(&c, ':');
-		event->kernel = copy_ulong(&c, ':');
-		event->user = copy_ulong(&c, ',');
-		++cur;
-	}
-
-	if (*c) {
-		fprintf(stderr, "oprofiled: too many events passed.\n");
-		exit(EXIT_FAILURE);
-	}
-
-	free(ev);
-}
-
-
-struct opd_event * find_counter_event(unsigned long counter)
-{
-	size_t i;
-	struct opd_event * ret = NULL;
-
-	if (counter >= OP_MAX_COUNTERS) {
-		if((ret = opd_ext_find_counter_event(counter)) != NULL)
-			return ret;
-	}
-
-	/*
-	 *  The kernel modules will put a 0 as counter number into the
-	 *  samples when timer based sampling is used.  So this
-	 *  theoretically might get confused here with the first
-	 *  hardware counter also having number zero.  However, TIMER
-	 *  is never allowed to be used together with other events.
-	 *  So it is safe to map this to the timer event without
-	 *  actually doing the lookup.
-	 */
-	if (op_cpu_has_timer_fs()
-	    && strcmp(opd_events[0].name, TIMER_EVENT_NAME) == 0
-	    && !opd_events[1].name) {
-		return &opd_events[0];
-
-	}
-
-	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
-		if (counter == opd_events[i].counter)
-			return &opd_events[i];
-	}
-
-	fprintf(stderr, "Unknown event for counter %lu\n", counter);
-	abort();
-	return NULL;
-}
-
-
-void fill_header(struct opd_header * header, unsigned long counter,
-		 vma_t anon_start, vma_t cg_to_anon_start,
-		 int is_kernel, int cg_to_is_kernel,
-		 int spu_samples, uint64_t embed_offset, time_t mtime)
-{
-	struct opd_event * event = find_counter_event(counter);
-
-	memset(header, '\0', sizeof(struct opd_header));
-	header->version = OPD_VERSION;
-	memcpy(header->magic, OPD_MAGIC, sizeof(header->magic));
-	header->cpu_type = cpu_type;
-	header->ctr_event = event->value;
-	header->ctr_count = event->count;
-	header->ctr_um = event->um;
-	header->is_kernel = is_kernel;
-	header->cg_to_is_kernel = cg_to_is_kernel;
-	header->cpu_speed = cpu_speed;
-	header->mtime = mtime;
-	header->anon_start = anon_start;
-	header->spu_profile = spu_samples;
-	header->embedded_offset = embed_offset;
-	header->cg_to_anon_start = cg_to_anon_start;
-}
diff --git a/daemon/opd_events.h b/daemon/opd_events.h
deleted file mode 100644
index 1e8b801..0000000
--- a/daemon/opd_events.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * @file daemon/opd_events.h
- * Event details for each counter
- *
- * @remark Copyright 2002, 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_EVENTS_H
-
-#include "op_types.h"
-
-#include <stdint.h>
-#include <time.h>
-
-/** event description for setup (perfmon) and mangling */
-struct opd_event {
-	char * name;
-	unsigned long value;
-	unsigned long counter;
-	unsigned long count;
-	unsigned long um;
-	unsigned long kernel;
-	unsigned long user;
-};
-
-/* needed for opd_perfmon.c */
-extern struct opd_event opd_events[];
-
-/** parse the events into the opd_events array */
-void opd_parse_events(char const * events);
-
-/** Find the event for the given counter */
-struct opd_event * find_counter_event(unsigned long counter);
-
-struct opd_header;
-
-/** fill the sample file header with event info etc. */
-void fill_header(struct opd_header * header, unsigned long counter,
-		 vma_t anon_start, vma_t anon_end,
-		 int is_kernel, int cg_to_is_kernel,
-                 int spu_samples, uint64_t embed_offset, time_t mtime);
-
-#endif /* OPD_EVENTS_H */
diff --git a/daemon/opd_extended.c b/daemon/opd_extended.c
deleted file mode 100644
index d4c8872..0000000
--- a/daemon/opd_extended.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/**
- * @file opd_extended.c
- * OProfile Extended Feature
- *
- * @remark Copyright 2007-2009 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2009 Advanced Micro Devices, Inc.
- */
-
-#include "opd_extended.h"
-#include "op_string.h"
-
-#include <string.h>
-#include <stdio.h>
-
-/* This global variable is >= 0
- * if extended feature is enabled */
-static int opd_ext_feat_index;
-
-extern struct opd_ext_handlers ibs_handlers;
-
-/**
- * OProfile Extended Feature Table
- *
- * This table contains a list of extended features.
- */
-static struct opd_ext_feature ext_feature_table[] = {
-	{"ibs", &ibs_handlers },
-	{ NULL, NULL }
-};
-
-
-static int get_index_for_feature(char const * name)
-{
-	int ret = -1;
-	unsigned int i;
-
-	if(!name)
-		return ret;
-
-	for (i = 0 ; ext_feature_table[i].feature != NULL ; i++ ) {
-		if(!strncmp(name, ext_feature_table[i].feature,
-			strlen(ext_feature_table[i].feature))) {
-			ret = i;
-			break;
-		}
-	}
-
-	return ret;
-}
-
-
-static inline int is_ext_enabled()
-{
-	if (opd_ext_feat_index >= 0
-	&& ext_feature_table[opd_ext_feat_index].handlers != NULL)
-		return 1;
-	else
-		return 0;
-}
-
-
-static inline int is_ext_sfile_enabled()
-{
-	if (opd_ext_feat_index >= 0
-	&& ext_feature_table[opd_ext_feat_index].handlers != NULL
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile != NULL)
-		return 1;
-	else
-		return 0;
-}
-
-
-/**
- * Param "value" is the input from CML option with the format:
- *
- * <feature name>:<param1>:<param2>:<param3>:.....
- *
- * where param1,2.3,..n are optional.
- */
-int opd_ext_initialize(char const * value)
-{
-	int ret = EXIT_FAILURE;
-	char * tmp = NULL, * name = NULL, * args = NULL;
-
-	if(!value) {
-		opd_ext_feat_index = -1;
-		return 0;
-	}
-
-	tmp = op_xstrndup(value, strlen(value));
-
-	/* Parse feature name*/
-	if((name = strtok_r(tmp, ":", &args)) == NULL)
-		goto err_out;
-
-	if((opd_ext_feat_index = get_index_for_feature(name)) < 0)
-		goto err_out;
-
-	ret = ext_feature_table[opd_ext_feat_index].handlers->ext_init(args);
-
-	return ret;
-
-err_out:
-	fprintf(stderr,"opd_ext_initialize: Invalid extended feature option: %s\n", value);
-	return ret;
-}
-
-
-int opd_ext_deinitialize()
-{
-	int ret = EXIT_FAILURE;
-
-	if(opd_ext_feat_index == -1) {
-		return 0;
-	}
-
-	ret = ext_feature_table[opd_ext_feat_index].handlers->ext_deinit();
-
-	return ret;
-}
-
-
-void opd_ext_print_stats()
-{
-	if (is_ext_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_print_stats != NULL) {
-		printf("\n-- OProfile Extended-Feature Statistics --\n");
-		ext_feature_table[opd_ext_feat_index].handlers->ext_print_stats();
-	}
-}
-
-
-/**
- * opd_sfile extended APIs
- */
-void opd_ext_sfile_create(struct sfile * sf)
-{
-	/* Creating ext sfile only if extended feature is enable*/
-	if (is_ext_sfile_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->create != NULL)
-		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->create(sf);
-}
-
-
-void opd_ext_sfile_dup (struct sfile * to, struct sfile * from)
-{
-	/* Duplicate ext sfile only if extended feature is enable*/
-	if (is_ext_sfile_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->dup != NULL)
-		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->dup(to, from);
-}
-
-
-void opd_ext_sfile_close (struct sfile * sf)
-{
-	/* Close ext sfile only if extended feature is enable*/
-	if (is_ext_sfile_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->close != NULL)
-		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->close(sf);
-}
-
-
-void opd_ext_sfile_sync(struct sfile * sf)
-{
-	/* Sync ext sfile only if extended feature is enable*/
-	if (is_ext_sfile_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->sync != NULL)
-		ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->sync(sf);
-}
-
-
-odb_t * opd_ext_sfile_get(struct transient const * trans, int is_cg)
-{
-	/* Get ext sfile only if extended feature is enable*/
-	if (is_ext_sfile_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->get != NULL)
-		return	ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->get(trans, is_cg);
-
-	return NULL;
-}
-
-
-struct opd_event * opd_ext_find_counter_event(unsigned long counter)
-{
-	/* Only if extended feature is enable*/
-	if (is_ext_sfile_enabled()
-	&& ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->find_counter_event != NULL)
-		return	ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->find_counter_event(counter);
-
-	return NULL;
-}
-
diff --git a/daemon/opd_extended.h b/daemon/opd_extended.h
deleted file mode 100644
index d7682a4..0000000
--- a/daemon/opd_extended.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * @file opd_extended.h
- * OProfile Extended Feature
- *
- * @remark Copyright 2007-2009 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2009 Advanced Micro Devices, Inc.
- */
-
-#ifndef OPD_EXTENDED_H
-#define OPD_EXTENDED_H
-
-#include "opd_trans.h"
-#include "odb.h"
-
-#include <stdlib.h>
-#include <stdint.h>
-
-
-/**
- * OProfile Extended Feature Table Entry
- */
-struct opd_ext_feature {
-	// Feature name
-	const char* feature;
-	// Feature handlers
-	struct opd_ext_handlers * handlers;
-};
-
-/**
- * OProfile Extended handlers
- */
-struct opd_ext_handlers {
-	// Extended init
-	int (*ext_init)(char const *);
-	// Extended deinit 
-	int (*ext_deinit)();
-	// Extended statistics
-	int (*ext_print_stats)();
-	// Extended sfile handlers
-	struct opd_ext_sfile_handlers * ext_sfile;
-};
-
-/**
- * OProfile Extended sub-handlers (sfile)
- */
-struct opd_ext_sfile_handlers {
-	int (*create)(struct sfile *);
-	int (*dup)(struct sfile *, struct sfile *);
-	int (*close)(struct sfile *);
-	int (*sync)(struct sfile *);
-	odb_t * (*get)(struct transient const *, int);
-	struct opd_event * (*find_counter_event)(unsigned long);
-};
-
-/**
- * @param value: commandline input option string
- *
- * Parse the specified extended feature
- */
-extern int opd_ext_initialize(char const * value);
-
-/**
- * @param value: commandline input option string
- *
- * Deinitialize
- */
-extern int opd_ext_deinitialize();
-
-/**
- * Print out extended feature statistics in oprofiled.log file
- */
-extern void opd_ext_print_stats();
-
-/**
- * opd_sfile extended sfile handling functions
- */
-extern void opd_ext_sfile_create(struct sfile * sf);
-extern void opd_ext_sfile_dup (struct sfile * to, struct sfile * from);
-extern void opd_ext_sfile_close(struct sfile * sf);
-extern void opd_ext_sfile_sync(struct sfile * sf);
-extern odb_t * opd_ext_sfile_get(struct transient const * trans, int is_cg);
-
-/**
- * @param counter: counter index
- *
- * Get event struct opd_event from the counter index value.
- */
-extern struct opd_event * opd_ext_find_counter_event(unsigned long counter);
-
-
-#endif
diff --git a/daemon/opd_ibs.c b/daemon/opd_ibs.c
deleted file mode 100644
index 6a886bb..0000000
--- a/daemon/opd_ibs.c
+++ /dev/null
@@ -1,832 +0,0 @@
-/**
- * @file daemon/opd_ibs.c
- * AMD Family10h Instruction Based Sampling (IBS) handling.
- *
- * @remark Copyright 2007-2010 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Paul Drongowski <paul.drongowski@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2008 Advanced Micro Devices, Inc.
- */
-
-#include "op_hw_config.h"
-#include "op_events.h"
-#include "op_string.h"
-#include "op_hw_specific.h"
-#include "op_libiberty.h"
-#include "opd_printf.h"
-#include "opd_trans.h"
-#include "opd_events.h"
-#include "opd_kernel.h"
-#include "opd_anon.h"
-#include "opd_sfile.h"
-#include "opd_interface.h"
-#include "opd_mangling.h"
-#include "opd_extended.h"
-#include "opd_ibs.h"
-#include "opd_ibs_trans.h"
-#include "opd_ibs_macro.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <limits.h>
-
-extern op_cpu cpu_type;
-extern int no_event_ok;
-extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2);
-extern void sfile_dup(struct sfile * to, struct sfile * from);
-extern char * session_dir;
-
-/* IBS Select Counters */
-static unsigned int ibs_selected_size;
-
-/* These flags store the IBS-derived events selection. */
-static unsigned int ibs_fetch_selected_flag;
-static unsigned int ibs_op_selected_flag;
-static unsigned int ibs_op_ls_selected_flag;
-static unsigned int ibs_op_nb_selected_flag;
-
-/* IBS Statistics */
-static unsigned long ibs_fetch_sample_stats;
-static unsigned long ibs_fetch_incomplete_stats;
-static unsigned long ibs_op_sample_stats;
-static unsigned long ibs_op_incomplete_stats;
-static unsigned long ibs_derived_event_stats;
-
-/*
- * IBS Virtual Counter
- */
-struct opd_event ibs_vc[OP_MAX_IBS_COUNTERS];
-
-/* IBS Virtual Counter Index(VCI) Map*/
-unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS];
-
-/* CPUID information */
-unsigned int ibs_family;
-unsigned int ibs_model;
-unsigned int ibs_stepping;
-
-/* IBS Extended MSRs */
-static unsigned long ibs_bta_enabled;
-
-/* IBS log files */
-FILE * memaccess_log;
-FILE * bta_log;
-
-/**
- * This function converts IBS fetch event flags and values into
- * derived events. If the tagged (sampled) fetched caused a derived
- * event, the derived event is tallied.
- */
-static void opd_log_ibs_fetch(struct transient * trans)
-{
-	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
-	if (!trans_fetch)
-		return;
-
-	trans_ibs_fetch(trans, ibs_fetch_selected_flag);
-}
-
-
-/**
- * This function translates the IBS op event flags and values into
- * IBS op derived events. If an op derived event occured, it's tallied.
- */
-static void opd_log_ibs_op(struct transient * trans)
-{
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-	if (!trans_op)
-		return;
-
-	trans_ibs_op_mask_reserved(ibs_family, trans);
-
-	if (trans_ibs_op_rip_invalid(trans) != 0)
-		return;
-
-	trans_ibs_op(trans, ibs_op_selected_flag);
-	trans_ibs_op_ls(trans, ibs_op_ls_selected_flag);
-	trans_ibs_op_nb(trans, ibs_op_nb_selected_flag);
-	trans_ibs_op_ls_memaccess(trans);
-	trans_ibs_op_bta(trans);
-}
-
-
-static void opd_put_ibs_sample(struct transient * trans)
-{
-	unsigned long long event = 0;
-	struct kernel_image * k_image = NULL;
-	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
-
-	if (!enough_remaining(trans, 1)) {
-		trans->remaining = 0;
-		return;
-	}
-
-	/* IBS can generate samples with invalid dcookie and
-	 * in kernel address range. Map such samples to vmlinux
-	 * only if the user either specifies a range, or vmlinux.
-	 */
-	if (trans->cookie == INVALID_COOKIE
-	    && (k_image = find_kernel_image(trans)) != NULL
-	    && (k_image->start != 0 && k_image->end != 0)
-	    && trans->in_kernel == 0)
-		trans->in_kernel = 1;
-
-	if (trans->tracing != TRACING_ON)
-		trans->event = event;
-
-	/* sfile can change at each sample for kernel */
-	if (trans->in_kernel != 0)
-		clear_trans_current(trans);
-
-	if (!trans->in_kernel && trans->cookie == NO_COOKIE)
-		trans->anon = find_anon_mapping(trans);
-
-	/* get the current sfile if needed */
-	if (!trans->current)
-		trans->current = sfile_find(trans);
-
-	/*
-	 * can happen if kernel sample falls through the cracks, or if
-	 * it's a sample from an anon region we couldn't find
-	 */
-	if (!trans->current)
-		goto out;
-
-	if (trans_fetch)
-		opd_log_ibs_fetch(trans);
-	else
-		opd_log_ibs_op(trans);
-out:
-	/* switch to trace mode */
-	if (trans->tracing == TRACING_START)
-		trans->tracing = TRACING_ON;
-
-	update_trans_last(trans);
-}
-
-
-static void get_ibs_bta_status()
-{
-	FILE * fp = NULL;
-	char buf[PATH_MAX];
-
-	/* Default to disable */
-	ibs_bta_enabled = 0;
-
-	snprintf(buf, PATH_MAX, "/dev/oprofile/ibs_op/branch_target");
-	fp = fopen(buf, "r");
-	if (!fp)
-		return;
-
-	while (fgets(buf, PATH_MAX, fp) != NULL)
-		ibs_bta_enabled = strtoul(buf, NULL, 10);	
-
-	fclose(fp);
-}
-
-
-void code_ibs_fetch_sample(struct transient * trans)
-{
-	struct ibs_fetch_sample * trans_fetch = NULL;
-
-	if (!enough_remaining(trans, 7)) {
-		verbprintf(vext, "not enough remaining\n");
-		trans->remaining = 0;
-		ibs_fetch_incomplete_stats++;
-		return;
-	}
-
-	ibs_fetch_sample_stats++;
-
-	trans->ext = xmalloc(sizeof(struct ibs_sample));
-	((struct ibs_sample*)(trans->ext))->fetch = xmalloc(sizeof(struct ibs_fetch_sample));
-	trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
-
-	trans_fetch->rip = pop_buffer_value(trans);
-
-	trans_fetch->ibs_fetch_lin_addr_low   = pop_buffer_value(trans);
-	trans_fetch->ibs_fetch_lin_addr_high  = pop_buffer_value(trans);
-
-	trans_fetch->ibs_fetch_ctl_low        = pop_buffer_value(trans);
-	trans_fetch->ibs_fetch_ctl_high       = pop_buffer_value(trans);
-	trans_fetch->ibs_fetch_phys_addr_low  = pop_buffer_value(trans);
-	trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans);
-
-	verbprintf(vsamples,
-		"FETCH_X CPU:%ld PID:%ld RIP:%lx CTL_H:%x LAT:%d P_HI:%x P_LO:%x L_HI:%x L_LO:%x\n",
-		trans->cpu,
-		(long)trans->tgid,
-		trans_fetch->rip,
-		(trans_fetch->ibs_fetch_ctl_high >> 16) & 0x3ff,
-		(trans_fetch->ibs_fetch_ctl_high) & 0xffff,
-		trans_fetch->ibs_fetch_phys_addr_high,
-		trans_fetch->ibs_fetch_phys_addr_low,
-		trans_fetch->ibs_fetch_lin_addr_high,
-		trans_fetch->ibs_fetch_lin_addr_low) ;
-
-	/* Overwrite the trans->pc with the more accurate trans_fetch->rip */
-	trans->pc = trans_fetch->rip;
-
-	opd_put_ibs_sample(trans);
-
-	free(trans_fetch);
-	free(trans->ext);
-	trans->ext = NULL;
-}
-
-
-static void get_ibs_op_bta_sample(struct transient * trans,
-				    struct ibs_op_sample * trans_op)
-{
-	// Check remaining
-	if (!enough_remaining(trans, 2)) {
-		verbprintf(vext, "not enough remaining\n");
-		trans->remaining = 0;
-		ibs_op_incomplete_stats++;
-		return;
-	}
-
-	if (ibs_bta_enabled == 1) {
-		trans_op->ibs_op_brtgt_addr = pop_buffer_value(trans);
-	
-		// Check if branch target address is valid (MSRC001_1035[37] == 1]
-		if ((trans_op->ibs_op_data1_high & (0x00000001 << 5)) == 0) {
-			trans_op->ibs_op_brtgt_addr = 0;
-		}
-	} else {
-		trans_op->ibs_op_brtgt_addr = 0;
-	}
-}
-
-
-void code_ibs_op_sample(struct transient * trans)
-{
-	struct ibs_op_sample * trans_op= NULL;
-
-	if (!enough_remaining(trans, 13)) {
-		verbprintf(vext, "not enough remaining\n");
-		trans->remaining = 0;
-		ibs_op_incomplete_stats++;
-		return;
-	}
-
-	ibs_op_sample_stats++;
-
-	trans->ext = xmalloc(sizeof(struct ibs_sample));
-	((struct ibs_sample*)(trans->ext))->op = xmalloc(sizeof(struct ibs_op_sample));
-	trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	trans_op->rip = pop_buffer_value(trans);
-
-	trans_op->ibs_op_lin_addr_low = pop_buffer_value(trans);
-	trans_op->ibs_op_lin_addr_high = pop_buffer_value(trans);
-
-	trans_op->ibs_op_data1_low         = pop_buffer_value(trans);
-	trans_op->ibs_op_data1_high        = pop_buffer_value(trans);
-	trans_op->ibs_op_data2_low         = pop_buffer_value(trans);
-	trans_op->ibs_op_data2_high        = pop_buffer_value(trans);
-	trans_op->ibs_op_data3_low         = pop_buffer_value(trans);
-	trans_op->ibs_op_data3_high        = pop_buffer_value(trans);
-	trans_op->ibs_op_ldst_linaddr_low  = pop_buffer_value(trans);
-	trans_op->ibs_op_ldst_linaddr_high = pop_buffer_value(trans);
-	trans_op->ibs_op_phys_addr_low     = pop_buffer_value(trans);
-	trans_op->ibs_op_phys_addr_high    = pop_buffer_value(trans);
-
-	get_ibs_op_bta_sample(trans, trans_op);
-
-	verbprintf(vsamples,
-	   "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n",
-		   trans->cpu,
-		   trans->tgid,
-		   trans_op->rip,
-		   trans_op->ibs_op_data1_high,
-		   trans_op->ibs_op_data1_low,
-		   trans_op->ibs_op_data2_low,
-		   trans_op->ibs_op_data3_high,
-		   trans_op->ibs_op_data3_low,
-		   trans_op->ibs_op_ldst_linaddr_low,
-		   trans_op->ibs_op_phys_addr_low);
-
-	/* Overwrite the trans->pc with the more accurate trans_op->rip */
-	trans->pc = trans_op->rip;
-
-	opd_put_ibs_sample(trans);
-
-	free(trans_op);
-	free(trans->ext);
-	trans->ext = NULL;
-}
-
-
-/** Convert IBS event to value used for data structure indexing */
-static unsigned long ibs_event_to_counter(unsigned long x)
-{
-	unsigned long ret = ~0UL;
-
-	if (IS_IBS_FETCH(x))
-		ret = (x - IBS_FETCH_BASE);
-	else if (IS_IBS_OP(x))
-		ret = (x - IBS_OP_BASE + IBS_FETCH_MAX);
-	else if (IS_IBS_OP_LS(x))
-		ret = (x - IBS_OP_LS_BASE + IBS_OP_MAX + IBS_FETCH_MAX);
-	else if (IS_IBS_OP_NB(x))
-		ret = (x - IBS_OP_NB_BASE + IBS_OP_LS_MAX + IBS_OP_MAX + IBS_FETCH_MAX);
-
-	return (ret != ~0UL) ? ret + OP_MAX_COUNTERS : ret;
-}
-
-
-void opd_log_ibs_event(unsigned int event,
-	struct transient * trans)
-{
-	ibs_derived_event_stats++;
-	trans->event = event;
-	sfile_log_sample_count(trans, 1);
-}
-
-
-void opd_log_ibs_count(unsigned int event,
-			struct transient * trans,
-			unsigned int count)
-{
-	ibs_derived_event_stats++;
-	trans->event = event;
-	sfile_log_sample_count(trans, count);
-}
-
-
-static unsigned long get_ibs_vci_key(unsigned int event)
-{
-	unsigned long key = ibs_event_to_counter(event);
-	if (key == ~0UL || key < OP_MAX_COUNTERS)
-		return ~0UL;
-
-	key = key - OP_MAX_COUNTERS;
-
-	return key;
-}
-
-
-static int ibs_parse_and_set_events(char * str)
-{
-	char * tmp, * ptr, * tok1, * tok2 = NULL;
-	int is_done = 0;
-	struct op_event * event = NULL;
-	op_cpu cpu_type = CPU_NO_GOOD;
-	unsigned long key;
-
-	if (!str)
-		return -1;
-
-	cpu_type = op_get_cpu_type();
-	op_events(cpu_type);
-
-	tmp = op_xstrndup(str, strlen(str));
-	ptr = tmp;
-
-	while (is_done != 1
-		&& (tok1 = strtok_r(ptr, ",", &tok2)) != NULL) {
-
-		if ((ptr = strstr(tok1, ":")) != NULL) {
-			*ptr = '\0';
-			is_done = 1;
-		}
-
-		// Resove event number
-		event = find_event_by_name(tok1, 0, 0);
-		if (!event)
-			return -1;
-
-		// Grouping
-		if (IS_IBS_FETCH(event->val)) {
-			ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val);
-		} else if (IS_IBS_OP(event->val)) {
-			ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val);
-		} else if (IS_IBS_OP_LS(event->val)) {
-			ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val);
-		} else if (IS_IBS_OP_NB(event->val)) {
-			ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val);
-		} else {
-			return -1;
-		}
-
-		key = get_ibs_vci_key(event->val);
-		if (key == ~0UL)
-			return -1;
-
-		ibs_vci_map[key] = ibs_selected_size;
-
-		/* Initialize part of ibs_vc */
-		ibs_vc[ibs_selected_size].name    = tok1;
-		ibs_vc[ibs_selected_size].value   = event->val;
-		ibs_vc[ibs_selected_size].counter = ibs_selected_size + OP_MAX_COUNTERS;
-		ibs_vc[ibs_selected_size].kernel  = 1;
-		ibs_vc[ibs_selected_size].user    = 1;
-
-		ibs_selected_size++;
-
-		ptr = NULL;
-	}
-
-	return 0;
-}
-
-
-static int ibs_parse_counts(char * str, unsigned long int * count)
-{
-	char * tmp, * tok1, * tok2 = NULL, *end = NULL;
-	if (!str)
-		return -1;
-
-	tmp = op_xstrndup(str, strlen(str));
-	tok1 = strtok_r(tmp, ":", &tok2);
-	*count = strtoul(tok1, &end, 10);
-	if ((end && *end) || *count == 0
-	    || errno == EINVAL || errno == ERANGE) {
-		fprintf(stderr,"Invalid count (%s)\n", str);
-		return -1;
-	}
-
-	return 0;
-}
-
-
-static int ibs_parse_and_set_um_fetch(char const * str)
-{
-	if (!str)
-		return -1;
-	return 0;
-}
-
-
-static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um)
-{
-	char * end = NULL;
-	if (!str)
-		return -1;
-
-	*ibs_op_um = strtoul(str, &end, 16);
-	if ((end && *end) || errno == EINVAL || errno == ERANGE) {
-		fprintf(stderr,"Invalid unitmaks (%s)\n", str);
-		return -1;
-	}
-	return 0;
-}
-
-
-static void check_cpuid_family_model_stepping()
-{
-#if defined(__i386__) || defined(__x86_64__) 
-	unsigned eax = cpuid_signature();
-
-	ibs_family   = cpu_family(eax);
-	ibs_model    = cpu_model(eax);
-	ibs_stepping = cpu_stepping(eax);
-#else
-	ibs_family   = 0;
-	ibs_model    = 0;
-	ibs_stepping = 0;
-#endif
-}
-
-
-static int ibs_init(char const * argv)
-{
-	char * tmp, * ptr, * tok1, * tok2 = NULL;
-	unsigned int i = 0;
-	unsigned long int ibs_fetch_count = 0;
-	unsigned long int ibs_op_count = 0;
-	unsigned long int ibs_op_um = 0;
-
-	if (!argv)
-		return -1;
-
-	if (empty_line(argv) != 0)
-		return -1;
-
-	tmp = op_xstrndup(argv, strlen(argv));
-	ptr = (char *) skip_ws(tmp);
-
-	// "fetch:event1,event2,....:count:um|op:event1,event2,.....:count:um"
-	tok1 = strtok_r(ptr, "|", &tok2);
-
-	while (tok1 != NULL) {
-
-		if (!strncmp("fetch:", tok1, strlen("fetch:"))) {
-			// Get to event section
-			tok1 = tok1 + strlen("fetch:");
-			if (ibs_parse_and_set_events(tok1) == -1)
-				return -1;
-
-			// Get to count section
-			while (tok1) {
-				if (*tok1 == '\0')
-					return -1;
-				if (*tok1 != ':') {
-					tok1++;
-				} else {
-					tok1++;
-					break;
-				}
-			}
-
-			if (ibs_parse_counts(tok1, &ibs_fetch_count) == -1)
-				return -1;
-
-			// Get to um section
-			while (tok1) {
-				if (*tok1 == '\0')
-					return -1;
-				if (*tok1 != ':') {
-					tok1++;
-				} else {
-					tok1++;
-					break;
-				}
-			}
-
-			if (ibs_parse_and_set_um_fetch(tok1) == -1)
-				return -1;
-
-		} else if (!strncmp("op:", tok1, strlen("op:"))) {
-			// Get to event section
-			tok1 = tok1 + strlen("op:");
-			if (ibs_parse_and_set_events(tok1) == -1)
-				return -1;
-
-			// Get to count section
-			while (tok1) {
-				if (*tok1 == '\0')
-					return -1;
-				if (*tok1 != ':') {
-					tok1++;
-				} else {
-					tok1++;
-					break;
-				}
-			}
-
-			if (ibs_parse_counts(tok1, &ibs_op_count) == -1)
-				return -1;
-
-			// Get to um section
-			while (tok1) {
-				if (*tok1 == '\0')
-					return -1;
-				if (*tok1 != ':') {
-					tok1++;
-				} else {
-					tok1++;
-					break;
-				}
-			}
-
-			if (ibs_parse_and_set_um_op(tok1, &ibs_op_um))
-				return -1;
-
-		} else
-			return -1;
-
-		tok1 = strtok_r(NULL, "|", &tok2);
-	}
-
-	/* Initialize ibs_vc */
-	for (i = 0 ; i < ibs_selected_size ; i++)
-	{
-		if (IS_IBS_FETCH(ibs_vc[i].value)) {
-			ibs_vc[i].count   = ibs_fetch_count;
-			ibs_vc[i].um      = 0;
-		} else {
-			ibs_vc[i].count   = ibs_op_count;
-			ibs_vc[i].um      = ibs_op_um;
-		}
-	}
-
-	// Allow no event
-	no_event_ok = 1;
-
-	check_cpuid_family_model_stepping();
-
-	get_ibs_bta_status();
-
-	/* Create IBS memory access log */
-	memaccess_log = NULL;
-	if (ibs_op_um & 0x2) {
-		char filename[PATH_MAX];
-		char * log_file = "/samples/ibs_memaccess.log";
-		size_t path_len = strlen(session_dir) + strlen(log_file);
-		if (path_len < PATH_MAX) {
-			strcpy(filename, session_dir);
-			strcat(filename, log_file);
-			memaccess_log = fopen(filename, "w");
-		}
-		if ( memaccess_log == NULL) {
-			verbprintf(vext, "Warning: Cannot create ibs_memaccess.log\n");
-			
-		} else {
-			fprintf (memaccess_log, "# IBS Memory Access Log\n\n");
-			fprintf (memaccess_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address,\n");
-			fprintf (memaccess_log, "#         phy-hi:phy-low,lin-hi:lin-low,accese-type,latency\n\n");
-		}
-	}
-
-	// Create IBS Branch Target Address (BTA) log	
-	bta_log = NULL;
-	if (ibs_bta_enabled) {
-		char filename[PATH_MAX];
-		char * log_file = "/samples/ibs_bta.log";
-		size_t path_len = strlen(session_dir) + strlen(log_file);
-		if (path_len < PATH_MAX) {
-			strcpy(filename, session_dir);
-			strcat(filename, log_file);
-			bta_log = fopen(filename, "w");
-		}
-		if ( bta_log == NULL) {
-			verbprintf(vext, "Warning: Cannot create ibs_bta.log\n");
-		} else {
-			fprintf (bta_log, "# IBS Memory Access Log\n\n");
-			fprintf (bta_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address\n\n");
-		}
-	}
-
-	return 0;
-}
-
-
-static int ibs_deinit()
-{
-	if (memaccess_log) {
-		fclose (memaccess_log);
-		memaccess_log = NULL;
-	}
-	
-	if (bta_log) {
-		fclose (bta_log);
-		bta_log = NULL;
-	}
-	return 0;
-}
-
-
-static int ibs_print_stats()
-{
-	printf("Nr. IBS Fetch samples     : %lu (%lu entries)\n", 
-		ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7));
-	printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats);
-	printf("Nr. IBS Op samples        : %lu (%lu entries)\n", 
-		ibs_op_sample_stats, (ibs_op_sample_stats * 13));
-	printf("Nr. IBS Op incompletes    : %lu\n", ibs_op_incomplete_stats);
-	printf("Nr. IBS derived events    : %lu\n", ibs_derived_event_stats);
-	return 0;
-}
-
-
-static int ibs_sfile_create(struct sfile * sf)
-{
-	unsigned int i;
-	sf->ext_files = xmalloc(ibs_selected_size * sizeof(odb_t));
-	for (i = 0 ; i < ibs_selected_size ; ++i)
-		odb_init(&sf->ext_files[i]);
-
-	return 0;
-}
-
-
-static int ibs_sfile_dup (struct sfile * to, struct sfile * from)
-{
-	unsigned int i;
-	if (from->ext_files != NULL) {
-		to->ext_files = xmalloc(ibs_selected_size * sizeof(odb_t));
-		for (i = 0 ; i < ibs_selected_size ; ++i)
-			odb_init(&to->ext_files[i]);
-	} else {
-		to->ext_files = NULL;
-	}
-	return 0;
-}
-
-static int ibs_sfile_close(struct sfile * sf)
-{
-	unsigned int i;
-	if (sf->ext_files != NULL) {
-		for (i = 0; i < ibs_selected_size ; ++i)
-			odb_close(&sf->ext_files[i]);
-
-		free(sf->ext_files);
-		sf->ext_files= NULL;
-	}
-	return 0;
-}
-
-static int ibs_sfile_sync(struct sfile * sf)
-{
-	unsigned int i;
-	if (sf->ext_files != NULL) {
-		for (i = 0; i < ibs_selected_size ; ++i)
-			odb_sync(&sf->ext_files[i]);
-	}
-	return 0;
-}
-
-static odb_t * ibs_sfile_get(struct transient const * trans, int is_cg)
-{
-	struct sfile * sf = trans->current;
-	struct sfile * last = trans->last;
-	struct cg_entry * cg;
-	struct list_head * pos;
-	unsigned long hash;
-	odb_t * file;
-	unsigned long counter, ibs_vci, key;
-
-	/* Note: "trans->event" for IBS is not the same as traditional
- 	 * events.  Here, it has the actual event (0xfxxx), while the
- 	 * traditional event has the event index.
- 	 */
-	key = get_ibs_vci_key(trans->event);
-	if (key == ~0UL) {
-		fprintf(stderr, "%s: Invalid IBS event %lu\n", __func__, trans->event);
-		abort();
-	}
-	ibs_vci = ibs_vci_map[key];
-	counter = ibs_vci + OP_MAX_COUNTERS;
-
-	/* Creating IBS sfile if it not already exists */
-	if (sf->ext_files == NULL)
-		ibs_sfile_create(sf);
-
-	file = &(sf->ext_files[ibs_vci]);
-	if (!is_cg)
-		goto open;
-
-	hash = last->hashval & (CG_HASH_SIZE - 1);
-
-	/* Need to look for the right 'to'. Since we're looking for
-	 * 'last', we use its hash.
-	 */
-	list_for_each(pos, &sf->cg_hash[hash]) {
-		cg = list_entry(pos, struct cg_entry, hash);
-		if (sfile_equal(last, &cg->to)) {
-			file = &(cg->to.ext_files[ibs_vci]);
-			goto open;
-		}
-	}
-
-	cg = xmalloc(sizeof(struct cg_entry));
-	sfile_dup(&cg->to, last);
-	list_add(&cg->hash, &sf->cg_hash[hash]);
-	file = &(cg->to.ext_files[ibs_vci]);
-
-open:
-	if (!odb_open_count(file))
-		opd_open_sample_file(file, last, sf, counter, is_cg);
-
-	/* Error is logged by opd_open_sample_file */
-	if (!odb_open_count(file))
-		return NULL;
-
-	return file;
-}
-
-
-/** Filled opd_event structure with IBS derived event information
- *  from the given counter value.
- */
-static struct opd_event * ibs_sfile_find_counter_event(unsigned long counter)
-{
-	unsigned long ibs_vci;
-
-	if (counter >= OP_MAX_COUNTERS + OP_MAX_IBS_COUNTERS
-	    || counter < OP_MAX_COUNTERS) {
-		fprintf(stderr,"Error: find_ibs_counter_event : "
-				"invalid counter value %lu.\n", counter);
-		abort();
-	}
-
-	ibs_vci = counter - OP_MAX_COUNTERS;
-	return &ibs_vc[ibs_vci];
-}
-
-
-struct opd_ext_sfile_handlers ibs_sfile_handlers =
-{
-	.create = &ibs_sfile_create,
-	.dup    = &ibs_sfile_dup,
-	.close  = &ibs_sfile_close,
-	.sync   = &ibs_sfile_sync,
-	.get    = &ibs_sfile_get,
-	.find_counter_event = &ibs_sfile_find_counter_event
-};
-
-
-struct opd_ext_handlers ibs_handlers =
-{
-	.ext_init        = &ibs_init,
-	.ext_deinit      = &ibs_deinit,
-	.ext_print_stats = &ibs_print_stats,
-	.ext_sfile       = &ibs_sfile_handlers
-};
diff --git a/daemon/opd_ibs.h b/daemon/opd_ibs.h
deleted file mode 100644
index 6f0fd64..0000000
--- a/daemon/opd_ibs.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * @file daemon/opd_ibs.h
- * AMD Family10h Instruction Based Sampling (IBS) handling.
- *
- * @remark Copyright 2008-2010 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Paul Drongowski <paul.drongowski@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2008 Advanced Micro Devices, Inc.
- */
-
-#ifndef OPD_IBS_H
-#define OPD_IBS_H
-
-#include <stdint.h>
-
-#include "opd_ibs_macro.h"
-
-struct transient;
-struct opd_event;
-
-/**
- * IBS information is processed in two steps. The first step decodes
- * hardware-level IBS information and saves it in decoded form. The
- * second step translates the decoded IBS information into IBS derived
- * events. IBS information is tallied and is reported as derived events.
- */
-
-struct ibs_sample {
-	struct ibs_fetch_sample * fetch;
-	struct ibs_op_sample * op;
-};
-
-/**
- * This struct represents the hardware-level IBS fetch information.
- * Each field corresponds to a model-specific register (MSR.) See the
- * BIOS and Kernel Developer's Guide for AMD Model Family 10h Processors
- * for further details.
- */
-struct ibs_fetch_sample {
-	unsigned long int rip;
-	/* MSRC001_1030 IBS Fetch Control Register */
-	unsigned int ibs_fetch_ctl_low;
-	unsigned int ibs_fetch_ctl_high;
-	/* MSRC001_1031 IBS Fetch Linear Address Register */
-	unsigned int ibs_fetch_lin_addr_low;
-	unsigned int ibs_fetch_lin_addr_high;
-	/* MSRC001_1032 IBS Fetch Physical Address Register */
-	unsigned int ibs_fetch_phys_addr_low;
-	unsigned int ibs_fetch_phys_addr_high;
-	unsigned int dummy_event;
-};
-
-
-
-/** This struct represents the hardware-level IBS op information. */
-struct ibs_op_sample {
-	unsigned long int rip;
-	/* MSRC001_1034 IBS Op Logical Address Register */
-	unsigned int ibs_op_lin_addr_low;
-	unsigned int ibs_op_lin_addr_high;
-	/* MSRC001_1035 IBS Op Data Register */
-	unsigned int ibs_op_data1_low;
-	unsigned int ibs_op_data1_high;
-	/* MSRC001_1036 IBS Op Data 2 Register */
-	unsigned int ibs_op_data2_low;
-	unsigned int ibs_op_data2_high;
-	/* MSRC001_1037 IBS Op Data 3 Register */
-	unsigned int ibs_op_data3_low;
-	unsigned int ibs_op_data3_high;
-	/* MSRC001_1038 IBS DC Linear Address */
-	unsigned int ibs_op_ldst_linaddr_low;
-	unsigned int ibs_op_ldst_linaddr_high;
-	/* MSRC001_1039 IBS DC Physical Address */
-	unsigned int ibs_op_phys_addr_low;
-	unsigned int ibs_op_phys_addr_high;
-	/* MSRC001_103B IBS Branch Target Address */
-	unsigned long ibs_op_brtgt_addr;
-};
-
-
-/**
- * Handle an IBS fetch sample escape code sequence. An IBS fetch sample
- * is represented as an escape code sequence. (See the comment for the
- * function code_ibs_op_sample() for the sequence of entries in the event
- * buffer.) When this function is called, the ESCAPE_CODE and IBS_FETCH_CODE
- * have already been removed from the event buffer. Thus, 7 more event buffer
- * entries are needed in order to process a complete IBS fetch sample.
- */
-extern void code_ibs_fetch_sample(struct transient * trans);
-
-/**
- * Handle an IBS op sample escape code sequence. An IBS op sample
- * is represented as an escape code sequence:
- *
- *    IBS fetch              IBS op
- *    ---------------        ----------------
- *    ESCAPE_CODE            ESCAPE_CODE
- *    IBS_FETCH_CODE         IBS_OP_CODE
- *    Offset                 Offset
- *    IbsFetchLinAd low      IbsOpRip low        <-- Logical (virtual) RIP
- *    IbsFetchLinAd high     IbsOpRip high       <-- Logical (virtual) RIP
- *    IbsFetchCtl low        IbsOpData low
- *    IbsFetchCtl high       IbsOpData high
- *    IbsFetchPhysAd low     IbsOpData2 low
- *    IbsFetchPhysAd high    IbsOpData2 high
- *                           IbsOpData3 low
- *                           IbsOpData3 high
- *                           IbsDcLinAd low
- *                           IbsDcLinAd high
- *                           IbsDcPhysAd low
- *                           IbsDcPhysAd high
- *
- * When this function is called, the ESCAPE_CODE and IBS_OP_CODE have
- * already been removed from the event buffer. Thus, 13 more event buffer
- * entries are needed to process a complete IBS op sample.
- *
- * The IbsFetchLinAd and IbsOpRip are the linear (virtual) addresses
- * that were generated by the IBS hardware. These addresses are mapped
- * into the offset.
- */
-extern void code_ibs_op_sample(struct transient * trans);
-
-/** Log the specified IBS derived event. */
-extern void opd_log_ibs_event(unsigned int event, struct transient * trans);
-
-/** Log the specified IBS cycle count. */
-extern void opd_log_ibs_count(unsigned int event, struct transient * trans, unsigned int count);
-
-
-#endif /*OPD_IBS_H*/
diff --git a/daemon/opd_ibs_macro.h b/daemon/opd_ibs_macro.h
deleted file mode 100644
index 0bfcf17..0000000
--- a/daemon/opd_ibs_macro.h
+++ /dev/null
@@ -1,397 +0,0 @@
-/**
- * @file daemon/opd_ibs_macro.h
- * AMD Instruction Based Sampling (IBS) related macro.
- *
- * @remark Copyright 2008-2010 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Paul Drongowski <paul.drongowski@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2008 Advanced Micro Devices, Inc.
- */
-
-#ifndef OPD_IBS_MACRO_H
-#define OPD_IBS_MACRO_H
-
-/**
- * The following defines are bit masks that are used to select
- * IBS fetch event flags and values at the
- * MSRC001_1030 IBS Fetch Control Register (IbsFetchCtl)
- */
-#define FETCH_MASK_LATENCY  0x0000ffff
-#define FETCH_MASK_COMPLETE 0x00040000
-#define FETCH_MASK_IC_MISS  0x00080000
-#define FETCH_MASK_PHY_ADDR 0x00100000
-#define FETCH_MASK_PG_SIZE  0x00600000
-#define FETCH_MASK_L1_MISS  0x00800000
-#define FETCH_MASK_L2_MISS  0x01000000
-#define FETCH_MASK_KILLED   \
-		(FETCH_MASK_L1_MISS|FETCH_MASK_L2_MISS|FETCH_MASK_PHY_ADDR|\
-		FETCH_MASK_COMPLETE|FETCH_MASK_IC_MISS)
-
-
-/**
- * The following defines are bit masks that are used to select
- * IBS op event flags and values at the MSR level.
- */
-
-/* MSRC001_1035 IBS Op Data Register (IbsOpData) */
-#define BR_MASK_RETIRE           0x0000ffff
-#define MASK_RIP_INVALID         0x00000040
-#define BR_MASK_BRN_RET          0x00000020
-#define BR_MASK_BRN_MISP         0x00000010
-#define BR_MASK_BRN_TAKEN        0x00000008
-#define BR_MASK_RETURN           0x00000004
-#define BR_MASK_MISP_RETURN      0x00000002
-#define BR_MASK_BRN_RESYNC       0x00000001
-
-/* MSRC001_1036 IBS Op Data Register (IbsOpData2) */
-#define NB_MASK_L3_STATE         0x00000020
-#define NB_MASK_REQ_DST_PROC     0x00000010
-#define NB_MASK_REQ_DATA_SRC     0x00000007
-
-/* MSRC001_1037 IBS Op Data Register (IbsOpData3) */
-#define DC_MASK_L2_HIT_1G        0x00080000
-#define DC_MASK_PHY_ADDR_VALID   0x00040000
-#define DC_MASK_LIN_ADDR_VALID   0x00020000
-#define DC_MASK_MAB_HIT          0x00010000
-#define DC_MASK_LOCKED_OP        0x00008000
-#define DC_MASK_UC_MEM_ACCESS    0x00004000
-#define DC_MASK_WC_MEM_ACCESS    0x00002000
-#define DC_MASK_ST_TO_LD_CANCEL  0x00001000
-#define DC_MASK_ST_TO_LD_FOR     0x00000800
-#define DC_MASK_ST_BANK_CONFLICT 0x00000400
-#define DC_MASK_LD_BANK_CONFLICT 0x00000200
-#define DC_MASK_MISALIGN_ACCESS  0x00000100
-#define DC_MASK_DC_MISS          0x00000080
-#define DC_MASK_L2_HIT_2M        0x00000040
-#define DC_MASK_L1_HIT_1G        0x00000020
-#define DC_MASK_L1_HIT_2M        0x00000010
-#define DC_MASK_L2_TLB_MISS      0x00000008
-#define DC_MASK_L1_TLB_MISS      0x00000004
-#define DC_MASK_STORE_OP         0x00000002
-#define DC_MASK_LOAD_OP          0x00000001
-
-
-/**
- * IBS derived events:
- *
- * IBS derived events are identified by event select values which are
- * similar to the event select values that identify performance monitoring
- * counter (PMC) events. Event select values for IBS derived events begin
- * at 0xf000.
- *
- * The definitions in this file *must* match definitions
- * of IBS derived events. More information
- * about IBS derived events is given in the Software Oprimization
- * Guide.
- */
-
-/**
- * The following defines associate a 16-bit select value with an IBS
- * derived fetch event.
- */
-#define DE_IBS_FETCH_ALL         0xf000
-#define DE_IBS_FETCH_KILLED      0xf001
-#define DE_IBS_FETCH_ATTEMPTED   0xf002
-#define DE_IBS_FETCH_COMPLETED   0xf003
-#define DE_IBS_FETCH_ABORTED     0xf004
-#define DE_IBS_L1_ITLB_HIT       0xf005
-#define DE_IBS_ITLB_L1M_L2H      0xf006
-#define DE_IBS_ITLB_L1M_L2M      0xf007
-#define DE_IBS_IC_MISS           0xf008
-#define DE_IBS_IC_HIT            0xf009
-#define DE_IBS_FETCH_4K_PAGE     0xf00a
-#define DE_IBS_FETCH_2M_PAGE     0xf00b
-#define DE_IBS_FETCH_1G_PAGE     0xf00c
-#define DE_IBS_FETCH_XX_PAGE     0xf00d
-#define DE_IBS_FETCH_LATENCY     0xf00e
-
-#define IBS_FETCH_BASE           0xf000
-#define IBS_FETCH_END            0xf00e
-#define IBS_FETCH_MAX            (IBS_FETCH_END - IBS_FETCH_BASE + 1)
-#define IS_IBS_FETCH(x)          (IBS_FETCH_BASE <= x && x <= IBS_FETCH_END)
-#define IBS_FETCH_OFFSET(x)      (x - IBS_FETCH_BASE)
-#define CHECK_FETCH_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_FETCH_OFFSET(x)))
-
-
-/**
- * The following defines associate a 16-bit select value with an IBS
- * derived branch/return macro-op event.
- */
-#define DE_IBS_OP_ALL             0xf100
-#define DE_IBS_OP_TAG_TO_RETIRE   0xf101
-#define DE_IBS_OP_COMP_TO_RETIRE  0xf102
-#define DE_IBS_BRANCH_RETIRED     0xf103
-#define DE_IBS_BRANCH_MISP        0xf104
-#define DE_IBS_BRANCH_TAKEN       0xf105
-#define DE_IBS_BRANCH_MISP_TAKEN  0xf106
-#define DE_IBS_RETURN             0xf107
-#define DE_IBS_RETURN_MISP        0xf108
-#define DE_IBS_RESYNC             0xf109
-
-#define IBS_OP_BASE               0xf100
-#define IBS_OP_END                0xf109
-#define IBS_OP_MAX                (IBS_OP_END - IBS_OP_BASE + 1)
-#define IS_IBS_OP(x)              (IBS_OP_BASE <= x && x <= IBS_OP_END)
-#define IBS_OP_OFFSET(x)          (x - IBS_OP_BASE)
-#define CHECK_OP_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_OP_OFFSET(x)))
-
-
-/**
- * The following defines associate a 16-bit select value with an IBS
- * derived load/store event.
- */
-#define DE_IBS_LS_ALL_OP         0xf200
-#define DE_IBS_LS_LOAD_OP        0xf201
-#define DE_IBS_LS_STORE_OP       0xf202
-#define DE_IBS_LS_DTLB_L1H       0xf203
-#define DE_IBS_LS_DTLB_L1M_L2H   0xf204
-#define DE_IBS_LS_DTLB_L1M_L2M   0xf205
-#define DE_IBS_LS_DC_MISS        0xf206
-#define DE_IBS_LS_DC_HIT         0xf207
-#define DE_IBS_LS_MISALIGNED     0xf208
-#define DE_IBS_LS_BNK_CONF_LOAD  0xf209
-#define DE_IBS_LS_BNK_CONF_STORE 0xf20a
-#define DE_IBS_LS_STL_FORWARDED  0xf20b
-#define DE_IBS_LS_STL_CANCELLED  0xf20c
-#define DE_IBS_LS_UC_MEM_ACCESS  0xf20d
-#define DE_IBS_LS_WC_MEM_ACCESS  0xf20e
-#define DE_IBS_LS_LOCKED_OP      0xf20f
-#define DE_IBS_LS_MAB_HIT        0xf210
-#define DE_IBS_LS_L1_DTLB_4K     0xf211
-#define DE_IBS_LS_L1_DTLB_2M     0xf212
-#define DE_IBS_LS_L1_DTLB_1G     0xf213
-#define DE_IBS_LS_L1_DTLB_RES    0xf214
-#define DE_IBS_LS_L2_DTLB_4K     0xf215
-#define DE_IBS_LS_L2_DTLB_2M     0xf216
-#define DE_IBS_LS_L2_DTLB_1G     0xf217
-#define DE_IBS_LS_L2_DTLB_RES2   0xf218
-#define DE_IBS_LS_DC_LOAD_LAT    0xf219
-
-#define IBS_OP_LS_BASE           0xf200
-#define IBS_OP_LS_END            0xf219
-#define IBS_OP_LS_MAX            (IBS_OP_LS_END - IBS_OP_LS_BASE + 1)
-#define IS_IBS_OP_LS(x)          (IBS_OP_LS_BASE <= x && x <= IBS_OP_LS_END)
-#define IBS_OP_LS_OFFSET(x)      (x - IBS_OP_LS_BASE)
-#define CHECK_OP_LS_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_OP_LS_OFFSET(x)))
-
-
-/**
- * The following defines associate a 16-bit select value with an IBS
- * derived Northbridge (NB) event.
- */
-#define DE_IBS_NB_LOCAL          0xf240
-#define DE_IBS_NB_REMOTE         0xf241
-#define DE_IBS_NB_LOCAL_L3       0xf242
-#define DE_IBS_NB_LOCAL_CACHE    0xf243
-#define DE_IBS_NB_REMOTE_CACHE   0xf244
-#define DE_IBS_NB_LOCAL_DRAM     0xf245
-#define DE_IBS_NB_REMOTE_DRAM    0xf246
-#define DE_IBS_NB_LOCAL_OTHER    0xf247
-#define DE_IBS_NB_REMOTE_OTHER   0xf248
-#define DE_IBS_NB_CACHE_STATE_M  0xf249
-#define DE_IBS_NB_CACHE_STATE_O  0xf24a
-#define DE_IBS_NB_LOCAL_LATENCY  0xf24b
-#define DE_IBS_NB_REMOTE_LATENCY 0xf24c
-
-#define IBS_OP_NB_BASE           0xf240
-#define IBS_OP_NB_END            0xf24c
-#define IBS_OP_NB_MAX            (IBS_OP_NB_END - IBS_OP_NB_BASE + 1)
-#define IS_IBS_OP_NB(x)          (IBS_OP_NB_BASE <= x && x <= IBS_OP_NB_END)
-#define IBS_OP_NB_OFFSET(x)      (x - IBS_OP_NB_BASE)
-#define CHECK_OP_NB_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_OP_NB_OFFSET(x)))
-
-
-#define OP_MAX_IBS_COUNTERS      (IBS_FETCH_MAX + IBS_OP_MAX + IBS_OP_LS_MAX + IBS_OP_NB_MAX)
-
-
-/**
- * These macro decodes IBS hardware-level event flags and fields.
- * Translation results are either zero (false) or non-zero (true), except
- * the fetch latency, which is a 16-bit cycle count, and the fetch page size
- * field, which is a 2-bit unsigned integer.
- */
-
-/** Bits 47:32 IbsFetchLat: instruction fetch latency */
-#define IBS_FETCH_FETCH_LATENCY(x)              ((unsigned short)(x->ibs_fetch_ctl_high & FETCH_MASK_LATENCY))
-
-/** Bit 50 IbsFetchComp: instruction fetch complete. */
-#define IBS_FETCH_FETCH_COMPLETION(x)           ((x->ibs_fetch_ctl_high & FETCH_MASK_COMPLETE) != 0)
-
-/** Bit 51 IbsIcMiss: instruction cache miss. */
-#define IBS_FETCH_INST_CACHE_MISS(x)            ((x->ibs_fetch_ctl_high & FETCH_MASK_IC_MISS) != 0)
-
-/** Bit 52 IbsPhyAddrValid: instruction fetch physical address valid. */
-#define IBS_FETCH_PHYS_ADDR_VALID(x)            ((x->ibs_fetch_ctl_high & FETCH_MASK_PHY_ADDR) != 0)
-
-enum IBSL1PAGESIZE {
-	L1TLB4K = 0,
-	L1TLB2M,
-	L1TLB1G,
-	L1TLB_INVALID
-};
-
-/** Bits 54:53 IbsL1TlbPgSz: instruction cache L1TLB page size. */
-#define IBS_FETCH_TLB_PAGE_SIZE(x)              ((unsigned short)((x->ibs_fetch_ctl_high >> 21) & 0x3))
-#define IBS_FETCH_TLB_PAGE_SIZE_4K(x)           (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB4K)
-#define IBS_FETCH_TLB_PAGE_SIZE_2M(x)           (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB2M)
-#define IBS_FETCH_TLB_PAGE_SIZE_1G(x)           (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB1G)
-
-/** Bit 55 IbsL1TlbMiss: instruction cache L1TLB miss. */
-#define IBS_FETCH_M_L1_TLB_MISS(x)              ((x->ibs_fetch_ctl_high & FETCH_MASK_L1_MISS) != 0)
-
-/** Bit 56 IbsL2TlbMiss: instruction cache L2TLB miss. */
-#define IBS_FETCH_L2_TLB_MISS(x)                ((x->ibs_fetch_ctl_high & FETCH_MASK_L2_MISS) != 0)
-
-/** A fetch is a killed fetch if all the masked bits are clear */
-#define IBS_FETCH_KILLED(x)                     ((x->ibs_fetch_ctl_high & FETCH_MASK_KILLED) == 0)
-
-#define IBS_FETCH_INST_CACHE_HIT(x)             (IBS_FETCH_FETCH_COMPLETION(x) && !IBS_FETCH_INST_CACHE_MISS(x))
-
-#define IBS_FETCH_L1_TLB_HIT(x)                 (!IBS_FETCH_M_L1_TLB_MISS(x) && IBS_FETCH_PHYS_ADDR_VALID(x))
-
-#define IBS_FETCH_ITLB_L1M_L2H(x)               (IBS_FETCH_M_L1_TLB_MISS(x) && !IBS_FETCH_L2_TLB_MISS(x))
-
-#define IBS_FETCH_ITLB_L1M_L2M(x)               (IBS_FETCH_M_L1_TLB_MISS(x) && IBS_FETCH_L2_TLB_MISS(x))
-
-
-/**
- * These macros translates IBS op event data from its hardware-level
- * representation .It hides the MSR layout of IBS op data.
- */
-
-/**
- * MSRC001_1035 IBS OP Data Register (IbsOpData)
- *
- * 15:0 IbsCompToRetCtr: macro-op completion to retire count
- */
-#define IBS_OP_COM_TO_RETIRE_CYCLES(x)          ((unsigned short)(x->ibs_op_data1_low & BR_MASK_RETIRE))
-
-/** 31:16 tag_to_retire_cycles : macro-op tag to retire count. */
-#define IBS_OP_TAG_TO_RETIRE_CYCLES(x)          ((unsigned short)((x->ibs_op_data1_low >> 16) & BR_MASK_RETIRE))
-
-/** 32 op_branch_resync : resync macro-op. */
-#define IBS_OP_BRANCH_RESYNC(x)                 ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0)
-
-/** 33 op_mispredict_return : mispredicted return macro-op. */
-#define IBS_OP_MISPREDICT_RETURN(x)             ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0)
-
-/** 34 IbsOpReturn: return macro-op. */
-#define IBS_OP_RETURN(x)                        ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0)
-
-/** 35 IbsOpBrnTaken: taken branch macro-op. */
-#define IBS_OP_BRANCH_TAKEN(x)                  ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0)
-
-/** 36 IbsOpBrnMisp: mispredicted branch macro-op.  */
-#define IBS_OP_BRANCH_MISPREDICT(x)             ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0)
-
-/** 37 IbsOpBrnRet: branch macro-op retired. */
-#define IBS_OP_BRANCH_RETIRED(x)                ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0)
-
-/** 38 IbsRipInvalid: RIP invalid. */
-#define IBS_OP_RIP_INVALID(x)                   ((x->ibs_op_data1_high & MASK_RIP_INVALID) != 0)
-
-/**
- * MSRC001_1036 IBS Op Data 2 Register (IbsOpData2)
- *
- * 5 NbIbsReqCacheHitSt: IBS L3 cache state
- */
-#define IBS_OP_NB_IBS_CACHE_HIT_ST(x)           ((x->ibs_op_data2_low & NB_MASK_L3_STATE) != 0)
-
-/** 4 NbIbsReqDstProc: IBS request destination processor */
-#define IBS_OP_NB_IBS_REQ_DST_PROC(x)           ((x->ibs_op_data2_low & NB_MASK_REQ_DST_PROC) != 0)
-
-/** 2:0 NbIbsReqSrc: Northbridge IBS request data source */
-#define IBS_OP_NB_IBS_REQ_SRC(x)                ((unsigned char)(x->ibs_op_data2_low & NB_MASK_REQ_DATA_SRC))
-
-#define IBS_OP_NB_IBS_REQ_SRC_01(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x01)
-
-#define IBS_OP_NB_IBS_REQ_SRC_02(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x02)
-
-#define IBS_OP_NB_IBS_REQ_SRC_03(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x03)
-
-#define IBS_OP_NB_IBS_REQ_SRC_07(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x07)
-
-/**
- * MSRC001_1037 IBS Op Data3 Register
- *
- * Bits 47:32   IbsDcMissLat
- */
-#define IBS_OP_DC_MISS_LATENCY(x)               ((unsigned short)(x->ibs_op_data3_high & 0xffff))
-
-/** 0 IbsLdOp: Load op */
-#define IBS_OP_IBS_LD_OP(x)                     ((x->ibs_op_data3_low & DC_MASK_LOAD_OP) != 0)
-
-/** 1 IbsStOp: Store op */
-#define IBS_OP_IBS_ST_OP(x)                     ((x->ibs_op_data3_low & DC_MASK_STORE_OP) != 0)
-
-/** 2 ibs_dc_l1_tlb_miss: Data cache L1TLB miss */
-#define IBS_OP_IBS_DC_L1_TLB_MISS(x)            ((x->ibs_op_data3_low & DC_MASK_L1_TLB_MISS) != 0)
-
-/** 3 ibs_dc_l2_tlb_miss: Data cache L2TLB miss */
-#define IBS_OP_IBS_DC_L2_TLB_MISS(x)            ((x->ibs_op_data3_low & DC_MASK_L2_TLB_MISS) != 0)
-
-/** 4 IbsDcL1tlbHit2M: Data cache L1TLB hit in 2M page */
-#define IBS_OP_IBS_DC_L1_TLB_HIT_2MB(x)         ((x->ibs_op_data3_low & DC_MASK_L1_HIT_2M) != 0)
-
-/** 5 ibs_dc_l1_tlb_hit_1gb: Data cache L1TLB hit in 1G page */
-#define IBS_OP_IBS_DC_L1_TLB_HIT_1GB(x)         ((x->ibs_op_data3_low & DC_MASK_L1_HIT_1G) != 0)
-
-/** 6 ibs_dc_l2_tlb_hit_2mb: Data cache L2TLB hit in 2M page */
-#define IBS_OP_IBS_DC_L2_TLB_HIT_2MB(x)         ((x->ibs_op_data3_low & DC_MASK_L2_HIT_2M) != 0)
-
-/** 7 ibs_dc_miss: Data cache miss */
-#define IBS_OP_IBS_DC_MISS(x)                   ((x->ibs_op_data3_low & DC_MASK_DC_MISS) != 0)
-
-/** 8 ibs_dc_miss_acc: Misaligned access */
-#define IBS_OP_IBS_DC_MISS_ACC(x)               ((x->ibs_op_data3_low & DC_MASK_MISALIGN_ACCESS) != 0)
-
-/** 9 ibs_dc_ld_bnk_con: Bank conflict on load operation */
-#define IBS_OP_IBS_DC_LD_BNK_CON(x)             ((x->ibs_op_data3_low & DC_MASK_LD_BANK_CONFLICT) != 0)
-
-/** 10 ibs_dc_st_bnk_con: Bank conflict on store operation */
-#define IBS_OP_IBS_DC_ST_BNK_CON(x)             ((x->ibs_op_data3_low & DC_MASK_ST_BANK_CONFLICT) != 0)
-
-/** 11 ibs_dc_st_to_ld_fwd : Data forwarded from store to load operation */
-#define IBS_OP_IBS_DC_ST_TO_LD_FWD(x)           ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_FOR) != 0)
-
-/** 12 ibs_dc_st_to_ld_can: Data forwarding from store to load operation cancelled */
-#define IBS_OP_IBS_DC_ST_TO_LD_CAN(x)           ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_CANCEL) != 0)
-
-/** 13 ibs_dc_wc_mem_acc : WC memory access */
-#define IBS_OP_IBS_DC_WC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_WC_MEM_ACCESS) != 0)
-
-/** 14 ibs_dc_uc_mem_acc : UC memory access */
-#define IBS_OP_IBS_DC_UC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0)
-
-/** 15 ibs_locked_op: Locked operation */
-#define IBS_OP_IBS_LOCKED_OP(x)                 ((x->ibs_op_data3_low & DC_MASK_LOCKED_OP) != 0)
-
-/** 16 ibs_dc_mab_hit : MAB hit */
-#define IBS_OP_IBS_DC_MAB_HIT(x)                ((x->ibs_op_data3_low & DC_MASK_MAB_HIT) != 0)
-
-/** 17 IbsDcLinAddrValid: Data cache linear address valid */
-#define IBS_OP_IBS_DC_LIN_ADDR_VALID(x)         ((x->ibs_op_data3_low & DC_MASK_LIN_ADDR_VALID) != 0)
-
-/** 18 ibs_dc_phy_addr_valid: Data cache physical address valid */
-#define IBS_OP_IBS_DC_PHY_ADDR_VALID(x)         ((x->ibs_op_data3_low & DC_MASK_PHY_ADDR_VALID) != 0)
-
-/** 19 ibs_dc_l2_tlb_hit_1gb: Data cache L2TLB hit in 1G page */
-#define IBS_OP_IBS_DC_L2_TLB_HIT_1GB(x)         ((x->ibs_op_data3_low & DC_MASK_L2_HIT_1G) != 0)
-
-
-/**
- * Aggregate the IBS derived event. Increase the
- * derived event count by one.
- */
-#define AGG_IBS_EVENT(EV)               opd_log_ibs_event(EV, trans)
-
-/**
- * Aggregate the IBS latency/cycle counts. Increase the
- * derived event count by the specified count value.
- */
-#define AGG_IBS_COUNT(EV, COUNT)        opd_log_ibs_count(EV, trans, COUNT)
-
-#endif /*OPD_IBS_MACRO_H*/
diff --git a/daemon/opd_ibs_trans.c b/daemon/opd_ibs_trans.c
deleted file mode 100644
index 8bb3181..0000000
--- a/daemon/opd_ibs_trans.c
+++ /dev/null
@@ -1,634 +0,0 @@
-/**
- * @file daemon/opd_ibs_trans.c
- * AMD Instruction Based Sampling (IBS) translation.
- *
- * @remark Copyright 2008 - 2010 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Paul Drongowski <paul.drongowski@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2008 Advanced Micro Devices, Inc.
- */
-
-#include "opd_ibs.h"
-#include "opd_ibs_macro.h"
-#include "opd_ibs_trans.h"
-#include "opd_trans.h"
-#include "opd_printf.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-extern FILE * bta_log;
-extern FILE * memaccess_log;
-
-/*
- * --------------------- FETCH DERIVED FUNCTION
- */
-void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag)
-{
-	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
-
-	if ((selected_flag) == 0)
-		return;
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ALL) {
-		/* IBS all fetch samples (kills + attempts) */
-		AGG_IBS_EVENT(DE_IBS_FETCH_ALL);
-	}		
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_KILLED) {
-		/* IBS killed fetches ("case 0") -- All interesting event
-		 * flags are clear */
-		if (IBS_FETCH_KILLED(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_FETCH_KILLED);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ATTEMPTED) {
-		/* Any non-killed fetch is an attempted fetch */
-		AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_COMPLETED) {
-		if (IBS_FETCH_FETCH_COMPLETION(trans_fetch))
-			/* IBS Fetch Completed */
-			AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ABORTED) {
-		if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch))
-			/* IBS Fetch Aborted */
-			AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_L1_ITLB_HIT) {
-		/* IBS L1 ITLB hit */
-		if (IBS_FETCH_L1_TLB_HIT(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2H) {
-		/* IBS L1 ITLB miss and L2 ITLB hit */
-		if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2M) {
-		/* IBS L1 & L2 ITLB miss; complete ITLB miss */
-		if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_MISS) {
-		/* IBS instruction cache miss */
-		if (IBS_FETCH_INST_CACHE_MISS(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_IC_MISS);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_HIT) {
-		/* IBS instruction cache hit */
-		if (IBS_FETCH_INST_CACHE_HIT(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_IC_HIT);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_4K_PAGE) {
-		if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
-		    && IBS_FETCH_TLB_PAGE_SIZE_4K(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_2M_PAGE) {
-		if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
-		    && IBS_FETCH_TLB_PAGE_SIZE_2M(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_1G_PAGE) {
-		if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
-		    && IBS_FETCH_TLB_PAGE_SIZE_1G(trans_fetch))
-			AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE);
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_XX_PAGE) {
-	}
-
-	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_LATENCY) {
-		if (IBS_FETCH_FETCH_LATENCY(trans_fetch))
-			AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY,
-				      IBS_FETCH_FETCH_LATENCY(trans_fetch));
-	}
-}
-
-
-/*
- * --------------------- OP DERIVED FUNCTION
- */
-void trans_ibs_op (struct transient * trans, unsigned int selected_flag)
-{
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	if ((selected_flag) == 0)
-		return;
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_OP_ALL) {
-		/* All IBS op samples */
-		AGG_IBS_EVENT(DE_IBS_OP_ALL);
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_OP_TAG_TO_RETIRE) {
-		/* Tally retire cycle counts for all sampled macro-ops
-		 * IBS tag to retire cycles */
-		if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op))
-			AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE,
-				IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op));
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_OP_COMP_TO_RETIRE) {
-		/* IBS completion to retire cycles */
-		if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op))
-			AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE,
-				IBS_OP_COM_TO_RETIRE_CYCLES(trans_op));
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_RETIRED) {
-		if (IBS_OP_BRANCH_RETIRED(trans_op))
-			/* IBS Branch retired op */
-			AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ;
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP) {
-		if (IBS_OP_BRANCH_RETIRED(trans_op)
-		    /* Test branch-specific event flags */
-		    /* IBS mispredicted Branch op */
-		    && IBS_OP_BRANCH_MISPREDICT(trans_op))
-			AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ;
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_TAKEN) {
-		if (IBS_OP_BRANCH_RETIRED(trans_op)
-		    /* IBS taken Branch op */
-		    && IBS_OP_BRANCH_TAKEN(trans_op))
-			AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN);
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP_TAKEN) {
-		if (IBS_OP_BRANCH_RETIRED(trans_op)
-		    /* IBS mispredicted taken branch op */
-		    && IBS_OP_BRANCH_TAKEN(trans_op)
-		    && IBS_OP_BRANCH_MISPREDICT(trans_op))
-			AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN);
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN) {
-		if (IBS_OP_BRANCH_RETIRED(trans_op)
-		    /* IBS return op */
-		    && IBS_OP_RETURN(trans_op))
-			AGG_IBS_EVENT(DE_IBS_RETURN);
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN_MISP) {
-		if (IBS_OP_BRANCH_RETIRED(trans_op)
-		    /* IBS mispredicted return op */
-		    && IBS_OP_RETURN(trans_op)
-		    && IBS_OP_BRANCH_MISPREDICT(trans_op))
-			AGG_IBS_EVENT(DE_IBS_RETURN_MISP);
-	}
-
-	CHECK_OP_SELECTED_FLAG(DE_IBS_RESYNC) {
-		/* Test for a resync macro-op */
-		if (IBS_OP_BRANCH_RESYNC(trans_op))
-			AGG_IBS_EVENT(DE_IBS_RESYNC);
-	}
-}
-
-
-/*
- * --------------------- OP LS DERIVED FUNCTION
- */
-void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag)
-{
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	/* Preliminary check */
-	if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op))
-		return;
-
-
-	if ((selected_flag) == 0)
-		return;
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_ALL_OP) {
-		/* Count the number of LS op samples */
-		AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ;
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOAD_OP) {
-		if (IBS_OP_IBS_LD_OP(trans_op))
-			/* TALLy an IBS load derived event */
-			AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ;
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STORE_OP) {
-		if (IBS_OP_IBS_ST_OP(trans_op))
-			/* Count and handle store operations */
-			AGG_IBS_EVENT(DE_IBS_LS_STORE_OP);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1H) {
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op))
-			/* L1 DTLB hit -- This is the most frequent case */
-			AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2H) {
-		/* l2_translation_size = 1 */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
-			/* L1 DTLB miss, L2 DTLB hit */
-			AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2M) {
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-		    && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
-			/* L1 DTLB miss, L2 DTLB miss */
-			AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_MISS) {
-		if (IBS_OP_IBS_DC_MISS(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_DC_MISS);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_HIT) {
-		if (!IBS_OP_IBS_DC_MISS(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_DC_HIT);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MISALIGNED) {
-		if (IBS_OP_IBS_DC_MISS_ACC(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_LOAD) {
-		if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_STORE) {
-		if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_FORWARDED) {
-		if (IBS_OP_IBS_LD_OP(trans_op)
-		    /* Data forwarding info are valid only for load ops */
-		    && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ;
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_CANCELLED) {
-		if (IBS_OP_IBS_LD_OP(trans_op))
-		if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ;
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_UC_MEM_ACCESS) {
-		if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_WC_MEM_ACCESS) {
-		if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOCKED_OP) {
-		if (IBS_OP_IBS_LOCKED_OP(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MAB_HIT) {
-		if (IBS_OP_IBS_DC_MAB_HIT(trans_op))
-			AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_4K) {
-		/* l1_translation */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-
-		    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
-		    && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
-			/* This is the most common case, unfortunately */
-			AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ;
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_2M) {
-		/* l1_translation */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-
-		    && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op))
-			/* 2M L1 DTLB page translation */
-			AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_1G) {
-		/* l1_translation */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-
-		    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
-		    && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
-			/* 1G L1 DTLB page translation */
-			AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_RES) {
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_4K) {
-		/* l2_translation_size = 1 */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
-
-		    /* L2 DTLB page translation */
-		    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
-		    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
-			/* 4K L2 DTLB page translation */
-			AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_2M) {
-		/* l2_translation_size = 1 */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
-
-		    /* L2 DTLB page translation */
-		    && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
-		    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
-			/* 2M L2 DTLB page translation */
-			AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_1G) {
-		/* l2_translation_size = 1 */
-		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
-		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
-		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
-
-		    /* L2 DTLB page translation */
-		    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
-		    && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
-			/* 2M L2 DTLB page translation */
-			AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G);
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_RES2) {
-	}
-
-	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_LOAD_LAT) {
-		if (IBS_OP_IBS_LD_OP(trans_op)
-		    /* If the load missed in DC, tally the DC load miss latency */
-		    && IBS_OP_IBS_DC_MISS(trans_op))
-			/* DC load miss latency is only reliable for load ops */
-			AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT,
-				      IBS_OP_DC_MISS_LATENCY(trans_op)) ;
-	}
-}
-
-/*
- * --------------------- OP NB DERIVED FUNCTION
- *
- * NB data is only guaranteed reliable for load operations
- * that miss in L1 and L2 cache. NB data arrives too late
- * to be reliable for store operations
- */
-void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag)
-{
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	/* Preliminary check */
-	if ((selected_flag) == 0)
-		return;
-
-	if (!IBS_OP_IBS_LD_OP(trans_op))
-		return;
-
-	if (!IBS_OP_IBS_DC_MISS(trans_op))
-		return;
-
-	if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0)
-		return;
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL) {
-		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
-			/* Request was serviced by local processor */
-			AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE) {
-		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
-			/* Request was serviced by remote processor */
-			AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_L3) {
-		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_01(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3);
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_CACHE) {
-		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_02(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE);
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_CACHE) {
-		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_02(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_DRAM) {
-		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_03(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM);
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_DRAM) {
-		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_03(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_OTHER) {
-		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_07(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER);
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_OTHER) {
-		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
-		    &&  IBS_OP_NB_IBS_REQ_SRC_07(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_M) {
-		if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op)
-		    && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_O) {
-		if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op)
-		    && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
-			AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ;
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_LATENCY) {
-		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
-			/* Request was serviced by local processor */
-			AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY,
-				      IBS_OP_DC_MISS_LATENCY(trans_op));
-	}
-
-	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_LATENCY) {
-		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
-			/* Request was serviced by remote processor */
-			AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY,
-				      IBS_OP_DC_MISS_LATENCY(trans_op));
-	}
-}
-
-
-int trans_ibs_op_rip_invalid (struct transient * trans)
-{
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	if (IBS_OP_RIP_INVALID(trans_op))
-		return 1;	
-
-	return 0;
-}
-
-
-void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans)
-{
-	struct ibs_op_sample * trans_op    = ((struct ibs_sample*)(trans->ext))->op;
-
-	switch (family) {
-	case 0x10:
-		/* Reserved IbsRipInvalid (MSRC001_1035[38])*/
-		trans_op->ibs_op_data1_high &= ~MASK_RIP_INVALID;
-		break;
-	case 0x12:
-		/* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */
-		trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC;
-		/* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */
-		trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE;
-		break;
-	case 0x14:
-		/* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */
-		trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC;
-		/* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */
-		trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE;
-		/* Reserved IbsDcL1tlbHit1G (MSRC001_1037[5]) */
-		trans_op->ibs_op_data3_low &= ~DC_MASK_L1_HIT_1G;
-		/* Reserved IbsDcLdBnkCon (MSRC001_1037[9]) */
-		trans_op->ibs_op_data3_low &= ~DC_MASK_LD_BANK_CONFLICT;
-		/* Reserved IbsDcStBnkCon (MSRC001_1037[10]) */
-		trans_op->ibs_op_data3_low &= ~DC_MASK_ST_BANK_CONFLICT;
-		/* Reserved IbsDcStToLdCan (MSRC001_1037[12]) */
-		trans_op->ibs_op_data3_low &= ~DC_MASK_ST_TO_LD_CANCEL;
-		/* Reserved IbsDcL2tlbHit1G (MSRC001_1037[19]) */
-		trans_op->ibs_op_data3_low &= ~DC_MASK_L2_HIT_1G;
-		
-		break;
-	case 0x15:
-	default:
-		break;
-	
-	}
-}
-
-
-void trans_ibs_op_bta(struct transient * trans)
-{
-	static cookie_t old_cookie     = NO_COOKIE;
-	static cookie_t old_app_cookie = NO_COOKIE;
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	if (!bta_log)
-		return;
-
-	if (!trans_op->ibs_op_brtgt_addr)
-		return;
-
-	if( old_app_cookie == INVALID_COOKIE 
-	||  old_app_cookie == NO_COOKIE 
-	||  old_app_cookie != trans->app_cookie) {
-		old_app_cookie = trans->cookie;
-	}
-
-	if (trans->in_kernel == 1) {
-		old_cookie = NO_COOKIE;
-	} else {
-		if( old_cookie == INVALID_COOKIE 
-		||  old_cookie == NO_COOKIE 
-		||  old_cookie != trans->cookie) {
-			old_cookie = trans->cookie;
-		}
-	}
-
-	fprintf(bta_log, "0x%016llx,0x%016llx,%02lu %08u,%08u,0x%08x,0x%08lx\n",
-                        trans->app_cookie, trans->cookie, trans->cpu, trans->tgid, trans->tid, (unsigned int)trans->pc,
-			trans_op->ibs_op_brtgt_addr);
-}
-
-
-void trans_ibs_op_ls_memaccess(struct transient * trans)
-{
-	static cookie_t old_cookie     = NO_COOKIE;
-	static cookie_t old_app_cookie = NO_COOKIE;
-	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
-
-	if (!memaccess_log)
-		return;
-
-	if( old_app_cookie == INVALID_COOKIE 
-	||  old_app_cookie == NO_COOKIE 
-	||  old_app_cookie != trans->app_cookie) {
-		old_app_cookie = trans->cookie;
-	}
-
-	if (trans->in_kernel == 1) {
-		old_cookie = NO_COOKIE;
-	} else {
-		if( old_cookie == INVALID_COOKIE 
-		||  old_cookie == NO_COOKIE 
-		||  old_cookie != trans->cookie) {
-			old_cookie = trans->cookie;
-		}
-	}
-
-	fprintf(memaccess_log, "0x%016llx,0x%016llx,%02lu,%08u,%08u,0x%08x,0x%08u:%08x,0x%08x:%08x,%s,%08u\n",
-                        trans->app_cookie, 
-trans->cookie, 
-trans->cpu, 
-trans->tgid, 
-trans->tid, 
-(unsigned int)trans->pc, 
-			trans_op->ibs_op_phys_addr_high, trans_op->ibs_op_phys_addr_low, 
-			trans_op->ibs_op_ldst_linaddr_high, trans_op->ibs_op_ldst_linaddr_low, 
-			(IBS_OP_IBS_LD_OP(trans_op))? "LD": "ST", 
-			(unsigned int) IBS_OP_DC_MISS_LATENCY(trans_op));
-}
diff --git a/daemon/opd_ibs_trans.h b/daemon/opd_ibs_trans.h
deleted file mode 100644
index bf34dfa..0000000
--- a/daemon/opd_ibs_trans.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * @file daemon/opd_ibs_trans.h
- * AMD Family10h Instruction Based Sampling (IBS) translation.
- *
- * @remark Copyright 2008 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Paul Drongowski <paul.drongowski@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Copyright (c) 2008 Advanced Micro Devices, Inc.
- */
-
-#ifndef OPD_IBS_TRANS_H
-#define OPD_IBS_TRANS_H
-
-struct ibs_fetch_sample;
-struct ibs_op_sample;
-struct transient;
-
-struct ibs_translation_table {
-	unsigned int event;
-	void (*translator)(struct transient *);
-};
-
-
-extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag);
-extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag);
-extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag);
-extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag);
-extern int  trans_ibs_op_rip_invalid (struct transient * trans);
-extern void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans);
-extern void trans_ibs_op_ls_memaccess(struct transient * trans);
-extern void trans_ibs_op_bta (struct transient * trans);
-#endif // OPD_IBS_TRANS_H
diff --git a/daemon/opd_interface.h b/daemon/opd_interface.h
deleted file mode 100644
index ef3b02c..0000000
--- a/daemon/opd_interface.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * @file opd_interface.h
- *
- * Module / user space interface for 2.6 kernels and above
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
-#ifndef OPD_INTERFACE_H
-#define OPD_INTERFACE_H
-
-#define CTX_SWITCH_CODE			1
-#define CPU_SWITCH_CODE			2
-#define COOKIE_SWITCH_CODE		3
-#define KERNEL_ENTER_SWITCH_CODE	4
-#define USER_ENTER_SWITCH_CODE		5
-#define MODULE_LOADED_CODE		6
-#define CTX_TGID_CODE			7
-#define TRACE_BEGIN_CODE		8
-/* Code 9 used to be TRACE_END_CODE which is not used anymore  */
-/* Code 9 is now considered an unknown escape code             */
-#define XEN_ENTER_SWITCH_CODE		10
-/*
- * Ugly work-around for the unfortunate collision between Xenoprof's
- * DOMAIN_SWITCH_CODE (in use on x86) and Cell's SPU_PROFILING_CODE
- * (in use with Power):
- */
-#if defined(__powerpc__)
-#define SPU_PROFILING_CODE		11
-#define SPU_CTX_SWITCH_CODE		12
-#else
-#define DOMAIN_SWITCH_CODE		11
-/* Code 12 is now considered an unknown escape code */
-#endif
-
-/* AMD's Instruction-Based Sampling (IBS) escape code */
-#define IBS_FETCH_SAMPLE		13
-#define IBS_OP_SAMPLE			14
-#define LAST_CODE			15
- 
-#endif /* OPD_INTERFACE_H */
diff --git a/daemon/opd_kernel.c b/daemon/opd_kernel.c
deleted file mode 100644
index 5ebc210..0000000
--- a/daemon/opd_kernel.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/**
- * @file daemon/opd_kernel.c
- * Dealing with the kernel and kernel module samples
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
-#include "opd_kernel.h"
-#include "opd_sfile.h"
-#include "opd_trans.h"
-#include "opd_printf.h"
-#include "opd_stats.h"
-#include "oprofiled.h"
-
-#include "op_fileio.h"
-#include "op_config.h"
-#include "op_libiberty.h"
-
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <assert.h>
-
-static LIST_HEAD(modules);
-
-static struct kernel_image vmlinux_image;
-
-static struct kernel_image xen_image;
-
-void opd_create_vmlinux(char const * name, char const * arg)
-{
-	/* vmlinux is *not* on the list of modules */
-	list_init(&vmlinux_image.list);
-
-	/* for no vmlinux */
-	if (no_vmlinux) {
-		vmlinux_image.name = "no-vmlinux";
-		return;
-	}
-	
-	vmlinux_image.name = xstrdup(name);
-
-	sscanf(arg, "%llx,%llx", &vmlinux_image.start, &vmlinux_image.end);
-
-	verbprintf(vmisc, "kernel_start = %llx, kernel_end = %llx\n",
-	           vmlinux_image.start, vmlinux_image.end);
-
-	if (!vmlinux_image.start && !vmlinux_image.end) {
-		fprintf(stderr, "error: mis-parsed kernel range: %llx-%llx\n",
-		        vmlinux_image.start, vmlinux_image.end);
-		exit(EXIT_FAILURE);
-	}
-}
-
-void opd_create_xen(char const * name, char const * arg)
-{
-	/* xen is *not* on the list of modules */
-	list_init(&xen_image.list);
-
-	/* for no xen */
-	if (no_xen) {
-		xen_image.name = "no-xen";
-		return;
-	}
-
-	xen_image.name = xstrdup(name);
-
-	sscanf(arg, "%llx,%llx", &xen_image.start, &xen_image.end);
-
-	verbprintf(vmisc, "xen_start = %llx, xen_end = %llx\n",
-	           xen_image.start, xen_image.end);
-
-	if (!xen_image.start && !xen_image.end) {
-		fprintf(stderr, "error: mis-parsed xen range: %llx-%llx\n",
-		        xen_image.start, xen_image.end);
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-/**
- * Allocate and initialise a kernel image description
- * @param name image name
- * @param start start address
- * @param end end address
- */
-static struct kernel_image *
-opd_create_module(char const * name, vma_t start, vma_t end)
-{
-	struct kernel_image * image = xmalloc(sizeof(struct kernel_image));
-
-	image->name = xstrdup(name);
-	image->start = start;
-	image->end = end;
-	list_add(&image->list, &modules);
-
-	return image;
-}
-
-
-/**
- * Clear and free all kernel image information and reset
- * values.
- */
-static void opd_clear_modules(void)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-	struct kernel_image * image;
-
-	list_for_each_safe(pos, pos2, &modules) {
-		image = list_entry(pos, struct kernel_image, list);
-		if (image->name)
-			free(image->name);
-		free(image);
-	}
-
-	list_init(&modules);
-
-	/* clear out lingering references */
-	sfile_clear_kernel();
-}
-
-
-/*
- * each line is in the format:
- *
- * module_name 16480 1 dependencies Live 0xe091e000
- *
- * without any blank space in each field
- */
-void opd_reread_module_info(void)
-{
-	FILE * fp;
-	char * line;
-	struct kernel_image * image;
-	int module_size;
-	char ref_count[32+1];
-	int ret;
-	char module_name[256+1];
-	char live_info[32+1];
-	char dependencies[4096+1];
-	unsigned long long start_address;
-
-	if (no_vmlinux)
-		return;
-
-	opd_clear_modules();
-
-	printf("Reading module info.\n");
-
-	fp = op_try_open_file("/proc/modules", "r");
-
-	if (!fp) {
-		printf("oprofiled: /proc/modules not readable, "
-			"can't process module samples.\n");
-		return;
-	}
-
-	while (1) {
-		line = op_get_line(fp);
-
-		if (!line)
-			break;
-
-		if (line[0] == '\0') {
-			free(line);
-			continue;
-		}
-
-		ret = sscanf(line, "%256s %u %32s %4096s %32s %llx",
-			     module_name, &module_size, ref_count,
-			     dependencies, live_info, &start_address);
-		if (ret != 6) {
-			printf("bad /proc/modules entry: %s\n", line);
-			free(line);
-			continue;
-		}
-
-		image = opd_create_module(module_name, start_address,
-		                          start_address + module_size);
-
-		verbprintf(vmodule, "module %s start %llx end %llx\n",
-			   image->name, image->start, image->end);
-
-		free(line);
-	}
-
-	op_close_file(fp);
-}
-
-
-/**
- * find a kernel image by PC value
- * @param trans holds PC value to look up
- *
- * find the kernel image which contains this PC.
- *
- * Return %NULL if not found.
- */
-struct kernel_image * find_kernel_image(struct transient const * trans)
-{
-	struct list_head * pos;
-	struct kernel_image * image = &vmlinux_image;
-
-	if (no_vmlinux)
-		return image;
-
-	if (image->start <= trans->pc && image->end > trans->pc)
-		return image;
-
-	list_for_each(pos, &modules) {
-		image = list_entry(pos, struct kernel_image, list);
-		if (image->start <= trans->pc && image->end > trans->pc)
-			return image;
-	}
-
-	if (xen_image.start <= trans->pc && xen_image.end > trans->pc)
-		return &xen_image;
-
-	return NULL;
-}
diff --git a/daemon/opd_kernel.h b/daemon/opd_kernel.h
deleted file mode 100644
index cb71a30..0000000
--- a/daemon/opd_kernel.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * @file daemon/opd_kernel.h
- * Dealing with the kernel and kernel module images
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
-#ifndef OPD_KERNEL_H
-#define OPD_KERNEL_H
-
-#include "op_types.h"
-#include "op_list.h"
-
-struct transient;
-
-/** create the kernel image */
-void opd_create_vmlinux(char const * name, char const * arg);
-
-void opd_create_xen(char const * name, char const * arg);
-
-/** opd_reread_module_info - parse /proc/modules for kernel modules */
-void opd_reread_module_info(void);
-
-/** Describes a kernel module or vmlinux itself */
-struct kernel_image {
-	char * name;
-	vma_t start;
-	vma_t end;
-	struct list_head list;
-};
-
-/** Find a kernel_image based upon the given parameters in trans. */
-struct kernel_image *
-find_kernel_image(struct transient const * trans);
-
-#endif /* OPD_KERNEL_H */
diff --git a/daemon/opd_mangling.c b/daemon/opd_mangling.c
deleted file mode 100644
index b4768a6..0000000
--- a/daemon/opd_mangling.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/**
- * @file daemon/opd_mangling.c
- * Mangling and opening of sample files
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include <sys/types.h>
- 
-#include "opd_mangling.h"
-#include "opd_kernel.h"
-#include "opd_cookie.h"
-#include "opd_sfile.h"
-#include "opd_anon.h"
-#include "opd_printf.h"
-#include "opd_events.h"
-#include "oprofiled.h"
-
-#include "op_file.h"
-#include "op_sample_file.h"
-#include "op_config.h"
-#include "op_mangle.h"
-#include "op_events.h"
-#include "op_libiberty.h"
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-
-static char const * get_dep_name(struct sfile const * sf)
-{
-	if (sf->anon)
-		return find_cookie(sf->app_cookie);
-
-	/* avoid to call find_cookie(), caller can recover using image_name */
-	if (sf->cookie == sf->app_cookie)
-		return NULL;
-
-	if (!separate_kernel && !(separate_lib && !sf->kernel))
-		return NULL;
-
-	/* this will fail if e.g. kernel thread */
-	if (sf->app_cookie == 0)
-		return NULL;
-
-	return find_cookie(sf->app_cookie);
-}
-
-
-static char * mangle_anon(struct anon_mapping const * anon)
-{
-	char * name = xmalloc(PATH_MAX);
-
-	snprintf(name, 1024, "%u.0x%llx.0x%llx", (unsigned int)anon->tgid,
-	       anon->start, anon->end);
-
-	return name;
-}
-
-
-static char *
-mangle_filename(struct sfile * last, struct sfile const * sf, int counter, int cg)
-{
-	char * mangled;
-	struct mangle_values values;
-	struct opd_event * event = find_counter_event(counter);
-
-	values.flags = 0;
-
-	if (sf->kernel) {
-		values.image_name = sf->kernel->name;
-		values.flags |= MANGLE_KERNEL;
-	} else if (sf->anon) {
-		values.flags |= MANGLE_ANON;
-		values.image_name = mangle_anon(sf->anon);
-		values.anon_name = sf->anon->name;
-	} else {
-		values.image_name = find_cookie(sf->cookie);
-	}
-
-	values.dep_name = get_dep_name(sf);
-	if (!values.dep_name)
-		values.dep_name = values.image_name;
- 
-	/* FIXME: log */
-	if (!values.image_name || !values.dep_name)
-		return NULL;
-
-	if (separate_thread) {
-		values.flags |= MANGLE_TGID | MANGLE_TID;
-		values.tid = sf->tid;
-		values.tgid = sf->tgid;
-	}
- 
-	if (separate_cpu) {
-		values.flags |= MANGLE_CPU;
-		values.cpu = sf->cpu;
-	}
-
-	if (cg) {
-		values.flags |= MANGLE_CALLGRAPH;
-		if (last->kernel) {
-			values.cg_image_name = last->kernel->name;
-		} else if (last->anon) {
-			values.flags |= MANGLE_CG_ANON;
-			values.cg_image_name = mangle_anon(last->anon);
-			values.anon_name = last->anon->name;
-		} else {
-			values.cg_image_name = find_cookie(last->cookie);
-		}
-
-		/* FIXME: log */
-		if (!values.cg_image_name) {
-			if (values.flags & MANGLE_ANON)
-				free((char *)values.image_name);
-			return NULL;
-		}
-	}
-
-	values.event_name = event->name;
-	values.count = event->count;
-	values.unit_mask = event->um;
-
-	mangled = op_mangle_filename(&values);
-
-	if (values.flags & MANGLE_ANON)
-		free((char *)values.image_name);
-	if (values.flags & MANGLE_CG_ANON)
-		free((char *)values.cg_image_name);
-	return mangled;
-}
-
-
-int opd_open_sample_file(odb_t *file, struct sfile *last,
-                         struct sfile * sf, int counter, int cg)
-{
-	char * mangled;
-	char const * binary;
-	int spu_profile = 0;
-	vma_t last_start = 0;
-	int err;
-
-	mangled = mangle_filename(last, sf, counter, cg);
-
-	if (!mangled)
-		return EINVAL;
-
-	verbprintf(vsfile, "Opening \"%s\"\n", mangled);
-
-	create_path(mangled);
-
-	/* locking sf will lock associated cg files too */
-	sfile_get(sf);
-	if (sf != last)
-		sfile_get(last);
-
-retry:
-	err = odb_open(file, mangled, ODB_RDWR, sizeof(struct opd_header));
-
-	/* This can naturally happen when racing against opcontrol --reset. */
-	if (err) {
-		if (err == EMFILE) {
-			if (sfile_lru_clear()) {
-				printf("LRU cleared but odb_open() fails for %s.\n", mangled);
-				abort();
-			}
-			goto retry;
-		}
-
-		fprintf(stderr, "oprofiled: open of %s failed: %s\n",
-		        mangled, strerror(err));
-		goto out;
-	}
-
-	if (!sf->kernel)
-		binary = find_cookie(sf->cookie);
-	else
-		binary = sf->kernel->name;
-
-	if (last && last->anon)
-		last_start = last->anon->start;
-
-	if (sf->embedded_offset != UNUSED_EMBEDDED_OFFSET)
-		spu_profile = 1;
-
-	fill_header(odb_get_data(file), counter,
-		    sf->anon ? sf->anon->start : 0, last_start,
-		    !!sf->kernel, last ? !!last->kernel : 0,
-		    spu_profile, sf->embedded_offset,
-		    binary ? op_get_mtime(binary) : 0);
-
-out:
-	sfile_put(sf);
-	if (sf != last)
-		sfile_put(last);
-	free(mangled);
-	return err;
-}
diff --git a/daemon/opd_mangling.h b/daemon/opd_mangling.h
deleted file mode 100644
index d1b2a78..0000000
--- a/daemon/opd_mangling.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/**
- * @file daemon/opd_mangling.h
- * Mangling and opening of sample files
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_MANGLING_H
-#define OPD_MANGLING_H
-
-#include "odb.h"
-
-struct sfile;
-
-/*
- * opd_open_sample_file - open a sample file
- * @param sf  sfile to open sample file for
- * @param counter  counter number
- * @param cg if this is a callgraph file
- *
- * Open image sample file for the sfile, counter
- * counter and set up memory mappings for it.
- *
- * Returns 0 on success.
- */
-int opd_open_sample_file(odb_t *file, struct sfile *last,
-                         struct sfile * sf, int counter, int cg);
-
-#endif /* OPD_MANGLING_H */
diff --git a/daemon/opd_perfmon.c b/daemon/opd_perfmon.c
deleted file mode 100644
index 97f307c..0000000
--- a/daemon/opd_perfmon.c
+++ /dev/null
@@ -1,522 +0,0 @@
-/**
- * @file opd_perfmon.c
- * perfmonctl() handling
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#ifdef __ia64__
-
-/* need this for sched_setaffinity() in <sched.h> */
-#define _GNU_SOURCE
-
-#include "oprofiled.h"
-#include "opd_perfmon.h"
-#include "opd_events.h"
-
-#include "op_cpu_type.h"
-#include "op_libiberty.h"
-#include "op_hw_config.h"
-
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <limits.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#ifdef HAVE_SCHED_SETAFFINITY
-#include <sched.h>
-#endif
-
-extern op_cpu cpu_type;
-
-#ifndef HAVE_SCHED_SETAFFINITY
-
-/* many glibc's are not yet up to date */
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 1231
-#endif
-
-/* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
-#define CPU_SETSIZE	1024
-#define __NCPUBITS	(8 * sizeof (unsigned long))
-typedef struct
-{
-	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
-} cpu_set_t;
-
-#define CPU_SET(cpu, cpusetp) \
-	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
-#define CPU_ZERO(cpusetp) \
-	memset((cpusetp), 0, sizeof(cpu_set_t))
-
-static int
-sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
-{
-	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
-}
-#endif
-
-
-#ifndef HAVE_PERFMONCTL
-#ifndef __NR_perfmonctl
-#define __NR_perfmonctl 1175
-#endif
-
-static int perfmonctl(int fd, int cmd, void * arg, int narg)
-{
-	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
-}
-#endif
-
-
-static unsigned char uuid[16] = {
-	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
-	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
-};
-
-
-static size_t nr_cpus;
-
-struct child {
-	pid_t pid;
-	int up_pipe[2];
-	int ctx_fd;
-	sig_atomic_t sigusr1;
-	sig_atomic_t sigusr2;
-	sig_atomic_t sigterm;
-};
-
-static struct child * children;
-
-static void perfmon_start_child(int ctx_fd)
-{
-	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-static void perfmon_stop_child(int ctx_fd)
-{
-	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-static void child_sigusr1(int val __attribute__((unused)))
-{
-	size_t i;
-
-	for (i = 0; i < nr_cpus; ++i) {
-		if (children[i].pid == getpid()) {
-			children[i].sigusr1 = 1;
-			return;
-		}
-	}
-}
-
-
-static void child_sigusr2(int val __attribute__((unused)))
-{
-	size_t i;
-
-	for (i = 0; i < nr_cpus; ++i) {
-		if (children[i].pid == getpid()) {
-			children[i].sigusr2 = 1;
-			return;
-		}
-	}
-}
-
-
-static void child_sigterm(int val __attribute__((unused)))
-{
-	kill(getppid(), SIGTERM);
-}
-
-
-static void set_affinity(size_t cpu)
-{
-	cpu_set_t set;
-	int err;
-
-	CPU_ZERO(&set);
-	CPU_SET(cpu, &set);
-
-	err = sched_setaffinity(getpid(), sizeof(set), &set);
-
-	if (err == -1) {
-		perror("Failed to set affinity");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-static void setup_signals(void)
-{
-	struct sigaction act;
-	sigset_t mask;
-
-	sigemptyset(&mask);
-	sigaddset(&mask, SIGUSR1);
-	sigaddset(&mask, SIGUSR2);
-	sigprocmask(SIG_BLOCK, &mask, NULL);
-
-	act.sa_handler = child_sigusr1;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-
-	if (sigaction(SIGUSR1, &act, NULL)) {
-		perror("oprofiled: install of SIGUSR1 handler failed");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = child_sigusr2;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-
-	if (sigaction(SIGUSR2, &act, NULL)) {
-		perror("oprofiled: install of SIGUSR2 handler failed");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = child_sigterm;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-
-	if (sigaction(SIGTERM, &act, NULL)) {
-		perror("oprofiled: install of SIGTERM handler failed");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-/** create the per-cpu context */
-static void create_context(struct child * self)
-{
-	pfarg_context_t ctx;
-	int err;
-
-	memset(&ctx, 0, sizeof(pfarg_context_t));
-	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
-	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
-
-	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
-	if (err == -1) {
-		perror("CREATE_CONTEXT failed");
-		exit(EXIT_FAILURE);
-	}
-
-	self->ctx_fd = ctx.ctx_fd;
-}
-
-
-/** program the perfmon counters */
-static void write_pmu(struct child * self)
-{
-	pfarg_reg_t pc[OP_MAX_COUNTERS];
-	pfarg_reg_t pd[OP_MAX_COUNTERS];
-	int err;
-	size_t i;
-
-	memset(pc, 0, sizeof(pc));
-	memset(pd, 0, sizeof(pd));
-
-#define PMC_GEN_INTERRUPT (1UL << 5)
-#define PMC_PRIV_MONITOR (1UL << 6)
-/* McKinley requires pmc4 to have bit 23 set (enable PMU).
- * It is supposedly ignored in other pmc registers.
- */
-#define PMC_MANDATORY (1UL << 23)
-#define PMC_USER (1UL << 3)
-#define PMC_KERNEL (1UL << 0)
-	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
-		struct opd_event * event = &opd_events[i];
-		pc[i].reg_num = event->counter + 4;
-		pc[i].reg_value = PMC_GEN_INTERRUPT;
-		pc[i].reg_value |= PMC_PRIV_MONITOR;
-		pc[i].reg_value |= PMC_MANDATORY;
-		(event->user) ? (pc[i].reg_value |= PMC_USER)
-		              : (pc[i].reg_value &= ~PMC_USER);
-		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
-		                : (pc[i].reg_value &= ~PMC_KERNEL);
-		pc[i].reg_value &= ~(0xff << 8);
-		pc[i].reg_value |= ((event->value & 0xff) << 8);
-		pc[i].reg_value &= ~(0xf << 16);
-		pc[i].reg_value |= ((event->um & 0xf) << 16);
-		pc[i].reg_smpl_eventid = event->counter;
-	}
-
-	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
-		struct opd_event * event = &opd_events[i];
-		pd[i].reg_value = ~0UL - event->count + 1;
-		pd[i].reg_short_reset = ~0UL - event->count + 1;
-		pd[i].reg_num = event->counter + 4;
-	}
-
-	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
-	if (err == -1) {
-		perror("Couldn't write PMCs");
-		exit(EXIT_FAILURE);
-	}
-
-	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
-	if (err == -1) {
-		perror("Couldn't write PMDs");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-static void load_context(struct child * self)
-{
-	pfarg_load_t load_args;
-	int err;
-
-	memset(&load_args, 0, sizeof(load_args));
-	load_args.load_pid = self->pid;
-
-	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
-	if (err == -1) {
-		perror("Couldn't load context");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-static void notify_parent(struct child * self, size_t cpu)
-{
-	for (;;) {
-		ssize_t ret;
-		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
-		if (ret == sizeof(size_t))
-			break;
-		if (ret < 0 && errno != EINTR) {
-			perror("Failed to write child pipe:");
-			exit(EXIT_FAILURE);
-		}
-	}
-}
-
-static struct child * inner_child;
-void close_pipe(void)
-{
-	close(inner_child->up_pipe[1]);
-}
-
-static void run_child(size_t cpu)
-{
-	struct child * self = &children[cpu];
-
-	self->pid = getpid();
-	self->sigusr1 = 0;
-	self->sigusr2 = 0;
-	self->sigterm = 0;
-
-	inner_child = self;
-	if (atexit(close_pipe)){
-		close_pipe();
-		exit(EXIT_FAILURE);
-	}
-
-	umask(0);
-	/* Change directory to allow directory to be removed */
-	if (chdir("/") < 0) {
-		perror("Unable to chdir to \"/\"");
-		exit(EXIT_FAILURE);
-	}
-
-	setup_signals();
-
-	set_affinity(cpu);
-
-	create_context(self);
-
-	write_pmu(self);
-
-	load_context(self);
-
-	notify_parent(self, cpu);
-
-	/* Redirect standard files to /dev/null */
-	freopen( "/dev/null", "r", stdin);
-	freopen( "/dev/null", "w", stdout);
-	freopen( "/dev/null", "w", stderr);
-
-	for (;;) {
-		sigset_t sigmask;
-		sigfillset(&sigmask);
-		sigdelset(&sigmask, SIGUSR1);
-		sigdelset(&sigmask, SIGUSR2);
-		sigdelset(&sigmask, SIGTERM);
-
-		if (self->sigusr1) {
-			perfmon_start_child(self->ctx_fd);
-			self->sigusr1 = 0;
-		}
-
-		if (self->sigusr2) {
-			perfmon_stop_child(self->ctx_fd);
-			self->sigusr2 = 0;
-		}
-
-		sigsuspend(&sigmask);
-	}
-}
-
-
-static void wait_for_child(struct child * child)
-{
-	size_t tmp;
-	for (;;) {
-		ssize_t ret;
-		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
-		if (ret == sizeof(size_t))
-			break;
-		if ((ret < 0 && errno != EINTR) || ret == 0 ) {
-			perror("Failed to read child pipe");
-			exit(EXIT_FAILURE);
-		}
-	}
-	printf("Perfmon child up on CPU%d\n", (int)tmp);
-	fflush(stdout);
-
-	close(child->up_pipe[0]);
-}
-
-static struct child* xen_ctx;
-
-void perfmon_init(void)
-{
-	size_t i;
-	long nr;
-
-	if (cpu_type == CPU_TIMER_INT)
-		return;
-
-	if (!no_xen) {
-		xen_ctx = xmalloc(sizeof(struct child));
-		xen_ctx->pid = getpid();
-		xen_ctx->up_pipe[0] = -1;
-		xen_ctx->up_pipe[1] = -1;
-		xen_ctx->sigusr1 = 0;
-		xen_ctx->sigusr2 = 0;
-		xen_ctx->sigterm = 0;
-
-		create_context(xen_ctx);
-
-		write_pmu(xen_ctx);
-		
-		load_context(xen_ctx);
-		return;
-	}
-	
-
-	nr = sysconf(_SC_NPROCESSORS_ONLN);
-	if (nr == -1) {
-		fprintf(stderr, "Couldn't determine number of CPUs.\n");
-		exit(EXIT_FAILURE);
-	}
-
-	nr_cpus = nr;
-
-	children = xmalloc(sizeof(struct child) * nr_cpus);
-	bzero(children, sizeof(struct child) * nr_cpus);
-
-	for (i = 0; i < nr_cpus; ++i) {
-		int ret;
-
-		if (pipe(children[i].up_pipe)) {
-			perror("Couldn't create child pipe");
-			exit(EXIT_FAILURE);
-		}
-
-		ret = fork();
-		if (ret == -1) {
-			perror("Couldn't fork perfmon child");
-			exit(EXIT_FAILURE);
-		} else if (ret == 0) {
-			close(children[i].up_pipe[0]);
-			run_child(i);
-		} else {
-			children[i].pid = ret;
-			close(children[i].up_pipe[1]);
-			printf("Waiting on CPU%d\n", (int)i);
-			wait_for_child(&children[i]);
-		}
-	}
-}
-
-
-void perfmon_exit(void)
-{
-	size_t i;
-
-	if (cpu_type == CPU_TIMER_INT)
-		return;
-
-	if (!no_xen)
-		return;
-
-	for (i = 0; i < nr_cpus; ++i) {
-		if (children[i].pid) {
-			int c_pid = children[i].pid;
-			children[i].pid = 0;
-			if (kill(c_pid, SIGKILL)==0)
-				waitpid(c_pid, NULL, 0);
-		}
-	}
-}
-
-
-void perfmon_start(void)
-{
-	size_t i;
-
-	if (cpu_type == CPU_TIMER_INT)
-		return;
-
-	if (!no_xen) {
-		perfmon_start_child(xen_ctx->ctx_fd);
-		return;
-	}
-
-	for (i = 0; i < nr_cpus; ++i) {
-		if (kill(children[i].pid, SIGUSR1)) {
-			perror("Unable to start perfmon");
-			exit(EXIT_FAILURE);
-		}
-	}
-}
-
-
-void perfmon_stop(void)
-{
-	size_t i;
-
-	if (cpu_type == CPU_TIMER_INT)
-		return;
-
-	if (!no_xen) {
-		perfmon_stop_child(xen_ctx->ctx_fd);
-		return;
-	}
-	
-	for (i = 0; i < nr_cpus; ++i)
-		if (kill(children[i].pid, SIGUSR2)) {
-			perror("Unable to stop perfmon");
-			exit(EXIT_FAILURE);
-		}
-}
-
-#endif /* __ia64__ */
diff --git a/daemon/opd_perfmon.h b/daemon/opd_perfmon.h
deleted file mode 100644
index 9b4267f..0000000
--- a/daemon/opd_perfmon.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * @file opd_perfmon.h
- * perfmonctl() handling
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#ifndef OPD_PERFMON_H
-#define OPD_PERFMON_H
-
-#ifdef __ia64__
-
-#include <stdlib.h>
-
-void perfmon_init(void);
-void perfmon_exit(void);
-void perfmon_start(void);
-void perfmon_stop(void);
-
-/* The following is from asm/perfmon.h. When it's installed on
- * enough boxes, we can remove this and include the platform
- * perfmon.h
- */
-
-typedef unsigned char pfm_uuid_t[16];	/* custom sampling buffer identifier type */
-
-/*
- * Request structure used to define a context
- */
-typedef struct {
-	pfm_uuid_t     ctx_smpl_buf_id;	 /* which buffer format to use (if needed) */
-	unsigned long  ctx_flags;	 /* noblock/block */
-	unsigned short ctx_nextra_sets;	 /* number of extra event sets (you always get 1) */
-	unsigned short ctx_reserved1;	 /* for future use */
-	int	       ctx_fd;		 /* return arg: unique identification for context */
-	void	       *ctx_smpl_vaddr;	 /* return arg: virtual address of sampling buffer, is used */
-	unsigned long  ctx_reserved2[11];/* for future use */
-} pfarg_context_t;
-
-/*
- * Request structure used to write/read a PMC or PMD
- */
-typedef struct {
-	unsigned int	reg_num;	   /* which register */
-	unsigned short	reg_set;	   /* event set for this register */
-	unsigned short	reg_reserved1;	   /* for future use */
-
-	unsigned long	reg_value;	   /* initial pmc/pmd value */
-	unsigned long	reg_flags;	   /* input: pmc/pmd flags, return: reg error */
-
-	unsigned long	reg_long_reset;	   /* reset after buffer overflow notification */
-	unsigned long	reg_short_reset;   /* reset after counter overflow */
-
-	unsigned long	reg_reset_pmds[4]; /* which other counters to reset on overflow */
-	unsigned long	reg_random_seed;   /* seed value when randomization is used */
-	unsigned long	reg_random_mask;   /* bitmask used to limit random value */
-	unsigned long   reg_last_reset_val;/* return: PMD last reset value */
-
-	unsigned long	reg_smpl_pmds[4];  /* which pmds are accessed when PMC overflows */
-	unsigned long	reg_smpl_eventid;  /* opaque sampling event identifier */
-
-	unsigned long   reg_reserved2[3];   /* for future use */
-} pfarg_reg_t;
-
-typedef struct {
-	pid_t		load_pid;	   /* process to load the context into */
-	unsigned short	load_set;	   /* first event set to load */
-	unsigned short	load_reserved1;	   /* for future use */
-	unsigned long	load_reserved2[3]; /* for future use */
-} pfarg_load_t;
-
-#define PFM_WRITE_PMCS      0x01
-#define PFM_WRITE_PMDS      0x02
-#define PFM_STOP            0x04
-#define PFM_START           0x05
-#define PFM_CREATE_CONTEXT  0x08
-#define PFM_LOAD_CONTEXT    0x10
-#define PFM_FL_SYSTEM_WIDE  0x02
-
-#else
-
-void perfmon_init(void)
-{
-}
-
-
-void perfmon_exit(void)
-{
-}
-
-
-void perfmon_start(void)
-{
-}
-
-
-void perfmon_stop(void)
-{
-}
-
-#endif /* __ia64__ */
-
-#endif /* OPD_PERFMON_H */
diff --git a/daemon/opd_pipe.c b/daemon/opd_pipe.c
deleted file mode 100644
index 3c81979..0000000
--- a/daemon/opd_pipe.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * @file daemon/opd_pipe.c
- * Functions handling the $SESSIONDIR/opd_pipe FIFO special file.
- * NOTE: This code is dealing with potentially insecure input.
- *
- * @remark Copyright 2008 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Daniel Hansel
- */
-
-#include "opd_pipe.h"
-#include "opd_printf.h"
-#include "op_config.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/stat.h>
-
-static int fifo;
-static FILE * fifo_fd = NULL;
-
-void opd_create_pipe(void)
-{
-	mode_t orig_umask = umask(0111);
-	if (mkfifo(op_pipe_file, 0666) == -1) {
-		if (errno != EEXIST) {
-			perror("oprofiled: couldn't create pipe: ");
-			exit(EXIT_FAILURE);
-		}
-	}
-	umask(orig_umask);
-}
-
-
-void opd_open_pipe(void)
-{
-	fifo = open(op_pipe_file, O_RDONLY | O_NONBLOCK);
-	if (fifo == -1) {
-		perror("oprofiled: couldn't open pipe: ");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void opd_close_pipe(void)
-{
-	if (fifo_fd)
-		fclose(fifo_fd);
-	close(fifo);
-}
-
-
-int is_jitconv_requested(void)
-{
-	/* number of dropped (unknown) requests */
-	static long nr_drops = 0;
-	/* modulus to output only a few warnings to avoid flooding oprofiled.log */
-	static int mod_cnt_drops = 1;
-	char line[256];
-	int i, ret = 0;
-
-	/* get a file descriptor to the pipe */
-	if (!fifo_fd)
-		fifo_fd = fdopen(fifo, "r");
-
-	if (fifo_fd == NULL) {
-		perror("oprofiled: couldn't create file descriptor: ");
-		exit(EXIT_FAILURE);
-	}
-
-	/* read up to 99 lines to check for 'do_jitconv' */
-	for (i = 0; i < 99; i++) {
-		/* just break if no new line is found */
-		if (fgets(line, 256, fifo_fd) == NULL)
-			break;
-		line[strlen(line) - 1] = '\0';
-
-		if (strstr(line, "do_jitconv") != NULL) {
-			ret = 1;
-		} else {
-			nr_drops++;
-
-			if (nr_drops % mod_cnt_drops == 0) {
-				printf(
-				       "Warning: invalid pipe request received (dropped request(s): %ld)\n",
-				       nr_drops);
-				/* increase modulus to avoid flooding log file */
-				mod_cnt_drops *= 5;
-			}
-		}
-	}
-
-	return ret;
-}
diff --git a/daemon/opd_pipe.h b/daemon/opd_pipe.h
deleted file mode 100644
index 7f96b07..0000000
--- a/daemon/opd_pipe.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * @file daemon/opd_pipe.h
- * Functions handling the $SESSIONDIR/opd_pipe FIFO special file.
- * NOTE: This code is dealing with potencially insecure input.
- *
- * @remark Copyright 2008 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Daniel Hansel
- */
-
-#ifndef OPD_PIPE_H_
-#define OPD_PIPE_H_
- 
-/**
- * opd_create_pipe - creates the oprofiled fifo file
- *
- * Creates the Oprofile daemon fifo pipe to enable communication between
- * the daemon and the 'opcontrol --dump' command. Failure to create the pipe
- * is a fatal error.
- */
-void opd_create_pipe(void);
-
-/**
- * opd_open_pipe - opens the oprofiled fifo file
- */
-void opd_open_pipe(void);
-
-/**
- * opd_close_pipe - closes the oprofiled fifo file
- *
- * Closes the Oprofile daemon fifo pipe.
- */
-void opd_close_pipe(void);
-
-/**
- * is_jitconv_requested - check for request to jit conversion
- *
- * Checks the Oprofile daemon fifo pipe for do_jitconv request.
- * If jit conversion is requested ('do_jitconv' is sent) the check returns 1.
- * Otherwise it returns 0.
- */
-int is_jitconv_requested(void);
-
-#endif /*OPD_PIPE_H_*/
diff --git a/daemon/opd_printf.h b/daemon/opd_printf.h
deleted file mode 100644
index 09df07f..0000000
--- a/daemon/opd_printf.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * @file daemon/opd_printf.h
- * Output routines
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_PRINTF_H
-#define OPD_PRINTF_H
-
-/// log all sample file name manipulation; sample files open, close,
-/// sfile LRU etc. voluminous. FIXME need to be splitted (filename manip, files
-/// handling) ?
-extern int vsfile;
-/// log samples, voluminous.
-extern int vsamples;
-/// log arc, very voluminous.
-extern int varcs;
-/// kernel module handling
-extern int vmodule;
-/// extended feature
-extern int vext;
-/// all others not fitting in above category, not voluminous.
-extern int vmisc;
-
-#define verbprintf(x, args...) \
-	do { \
-		/* look like fragile but we must catch verbrintf("%s", "") */ \
-		if (x == 1) \
-			printf(args); \
-	} while (0)
-
-#endif /* OPD_PRINTF_H */
diff --git a/daemon/opd_sfile.c b/daemon/opd_sfile.c
deleted file mode 100644
index 16b50ad..0000000
--- a/daemon/opd_sfile.c
+++ /dev/null
@@ -1,750 +0,0 @@
-/**
- * @file daemon/opd_sfile.c
- * Management of sample files
- *
- * @remark Copyright 2002, 2005 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "opd_sfile.h"
-
-#include "opd_trans.h"
-#include "opd_kernel.h"
-#include "opd_mangling.h"
-#include "opd_anon.h"
-#include "opd_printf.h"
-#include "opd_stats.h"
-#include "opd_extended.h"
-#include "oprofiled.h"
-
-#include "op_libiberty.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-#define HASH_SIZE 2048
-#define HASH_BITS (HASH_SIZE - 1)
-
-/** All sfiles are hashed into these lists */
-static struct list_head hashes[HASH_SIZE];
-
-/* This data structure is used to help us determine when we should
- * discard user context kernel samples for which we no longer have
- * an app name to which we can attribute them.  This can happen (especially
- * on a busy system) in the following scenario:
- *  - A user context switch occurs.
- *  - User context kernel samples are recorded for this process.
- *  - The user process ends.
- *  - The above-mentioned sample information is first recorded into the per-CPU
- *  buffer and later transferred to the main event buffer.  Since the process
- *  for which this context switch was recorded ended before the transfer
- *  occurs, the app cookie that is recorded into the event buffer along with the
- *  CTX_SWITCH_CODE will be set to NO_COOKIE. When the oprofile userspace daemon
- *  processes the CTX_SWITCH_CODE, it sets trans->app_cookie to NO_COOKIE and then
- *  continues to process the kernel samples. But having no appname in order to
- *  locate the appropriate sample file, it creates a new sample file of the form:
- *  <session_dir>/current/{kern}/<some_kernel_image>/{dep}/{kern}/<some_kernel_image>/<event_spec>.<tgid>.<tid>.<cpu>
- *
- *  This is not really an invalid form for sample files, since it is certainly valid for
- *  oprofile to collect samples for kernel threads that are not running in any process context.
- *  Such samples would be stored in sample files like this, and opreport would show those
- *  samples as having an appname of "<some_kernel_image>".  But if the tgid/tid info for
- *  the sample is from a defunct user process, we should discard these samples.  Not doing so
- *  can lead to strange results when generating reports by tgid/tid (i.e., appname of
- *  "<some_kernel_image>" instead of the real app name associated with the given tgid/tid.
- *  The following paragraph describes the technique for identifying and discarding such samples.
- *
- * When processing a kernel sample for which trans->app_cookie==NO_COOKIE, we inspect the
- * /proc/<pid>/cmdline file.  Housekeeping types of kernel threads (e.g., kswapd, watchdog)
- * won't have a command line since they exist and operate outside of a process context.
- * However, other kernel "tasks" do operate within a process context (e.g., some kernel
- * driver functions, kernel functions invoked via a syscall, etc.).  When we get samples
- * for the latter type of task but no longer have app name info for the process for which
- * the kernel task is performing work, we cannot correctly attribute those kernel samples
- * to a user application, so they should be discarded.  We classify the two different types
- * of kernel "tasks" based on whether or not the /proc/<pid>/cmdline is empty.  We cache
- * the results in kernel_cmdlines for fast lookup when processing samples.
- */
-static struct list_head kernel_cmdlines[HASH_SIZE];
-struct kern_cmdline {
-	pid_t kern_pid;
-	struct list_head hash;
-	unsigned int has_cmdline;
-};
-
-/** All sfiles are on this list. */
-static LIST_HEAD(lru_list);
-
-
-/* FIXME: can undoubtedly improve this hashing */
-/** Hash the transient parameters for lookup. */
-static unsigned long
-sfile_hash(struct transient const * trans, struct kernel_image * ki)
-{
-	unsigned long val = 0;
-	
-	if (separate_thread) {
-		val ^= trans->tid << 2;
-		val ^= trans->tgid << 2;
-	}
-
-	if (separate_kernel || ((trans->anon || separate_lib) && !ki))
-		val ^= trans->app_cookie >> (DCOOKIE_SHIFT + 3);
-
-	if (separate_cpu)
-		val ^= trans->cpu;
-
-	/* cookie meaningless for kernel, shouldn't hash */
-	if (trans->in_kernel) {
-		val ^= ki->start >> 14;
-		val ^= ki->end >> 7;
-		return val & HASH_BITS;
-	}
-
-	if (trans->cookie != NO_COOKIE) {
-		val ^= trans->cookie >> DCOOKIE_SHIFT;
-		return val & HASH_BITS;
-	}
-
-	if (!separate_thread)
-		val ^= trans->tgid << 2;
-
-	if (trans->anon) {
-		val ^= trans->anon->start >> VMA_SHIFT;
-		val ^= trans->anon->end >> (VMA_SHIFT + 1);
-	}
-
-	return val & HASH_BITS;
-}
-
-
-static int
-do_match(struct sfile const * sf, cookie_t cookie, cookie_t app_cookie,
-         struct kernel_image const * ki, struct anon_mapping const * anon,
-         pid_t tgid, pid_t tid, unsigned int cpu)
-{
-	/* this is a simplified check for "is a kernel image" AND
-	 * "is the right kernel image". Also handles no-vmlinux
-	 * correctly.
-	 */
-	if (sf->kernel != ki)
-		return 0;
-
-	if (separate_thread) {
-		if (sf->tid != tid || sf->tgid != tgid)
-			return 0;
-	}
-
-	if (separate_cpu) {
-		if (sf->cpu != cpu)
-			return 0;
-	}
-
-	if (separate_kernel || ((anon || separate_lib) && !ki)) {
-		if (sf->app_cookie != app_cookie)
-			return 0;
-	}
-
-	/* ignore the cached trans->cookie for kernel images,
-	 * it's meaningless and we checked all others already
-	 */
-	if (ki)
-		return 1;
-
-	if (sf->anon != anon)
-		return 0;
-
-	return sf->cookie == cookie;
-}
-
-
-static int
-trans_match(struct transient const * trans, struct sfile const * sfile,
-            struct kernel_image const * ki)
-{
-	return do_match(sfile, trans->cookie, trans->app_cookie, ki,
-	                trans->anon, trans->tgid, trans->tid, trans->cpu);
-}
-
-
-int
-sfile_equal(struct sfile const * sf, struct sfile const * sf2)
-{
-	return do_match(sf, sf2->cookie, sf2->app_cookie, sf2->kernel,
-	                sf2->anon, sf2->tgid, sf2->tid, sf2->cpu);
-}
-
-
-static int
-is_sf_ignored(struct sfile const * sf)
-{
-	if (sf->kernel) {
-		if (!is_image_ignored(sf->kernel->name))
-			return 0;
-
-		/* Let a dependent kernel image redeem the sf if we're
-		 * executing on behalf of an application.
-		 */
-		return is_cookie_ignored(sf->app_cookie);
-	}
-
-	/* Anon regions are always dependent on the application.
- 	 * Otherwise, let a dependent image redeem the sf.
-	 */
-	if (sf->anon || is_cookie_ignored(sf->cookie))
-		return is_cookie_ignored(sf->app_cookie);
-
-	return 0;
-}
-
-
-/** create a new sfile matching the current transient parameters */
-static struct sfile *
-create_sfile(unsigned long hash, struct transient const * trans,
-             struct kernel_image * ki)
-{
-	size_t i;
-	struct sfile * sf;
-
-	sf = xmalloc(sizeof(struct sfile));
-
-	sf->hashval = hash;
-
-	/* The logic here: if we're in the kernel, the cached cookie is
-	 * meaningless (though not the app_cookie if separate_kernel)
-	 */
-	sf->cookie = trans->in_kernel ? INVALID_COOKIE : trans->cookie;
-	sf->app_cookie = INVALID_COOKIE;
-	sf->tid = (pid_t)-1;
-	sf->tgid = (pid_t)-1;
-	sf->cpu = 0;
-	sf->kernel = ki;
-	sf->anon = trans->anon;
-
-	for (i = 0 ; i < op_nr_counters ; ++i)
-		odb_init(&sf->files[i]);
-
-	if (trans->ext)
-		opd_ext_sfile_create(sf);
-	else
-		sf->ext_files = NULL;
-
-	for (i = 0; i < CG_HASH_SIZE; ++i)
-		list_init(&sf->cg_hash[i]);
-
-	if (separate_thread)
-		sf->tid = trans->tid;
-	if (separate_thread || trans->cookie == NO_COOKIE)
-		sf->tgid = trans->tgid;
-
-	if (separate_cpu)
-		sf->cpu = trans->cpu;
-
-	if (separate_kernel || ((trans->anon || separate_lib) && !ki))
-		sf->app_cookie = trans->app_cookie;
-
-	sf->ignored = is_sf_ignored(sf);
-
-	sf->embedded_offset = trans->embedded_offset;
-
-	/* If embedded_offset is a valid value, it means we're
-	 * processing a Cell BE SPU profile; in which case, we
-	 * want sf->app_cookie to hold trans->app_cookie.
-	 */
-	if (trans->embedded_offset != UNUSED_EMBEDDED_OFFSET)
-		sf->app_cookie = trans->app_cookie;
-	return sf;
-}
-
-
-struct sfile * sfile_find(struct transient const * trans)
-{
-	struct sfile * sf;
-	struct list_head * pos;
-	struct kernel_image * ki = NULL;
-	unsigned long hash;
-
-	/* There is a small race where this *can* happen, see
-	 * caller of cpu_buffer_reset() in the kernel
-	 */
-	if (trans->in_kernel == -1) {
-		verbprintf(vsamples, "Losing sample at 0x%llx of unknown provenance.\n",
-		           trans->pc);
-		opd_stats[OPD_NO_CTX]++;
-		return NULL;
-	}
-
-	/* we might need a kernel image start/end to hash on */
-	if (trans->in_kernel) {
-		ki = find_kernel_image(trans);
-		if (!ki) {
-			verbprintf(vsamples, "Lost kernel sample %llx\n", trans->pc);
-			opd_stats[OPD_LOST_KERNEL]++;
-			return NULL;
-		}
-		// We *know* that PID 0, 1, and 2 are pure kernel context tasks, so
-		// we always want to keep these samples.
-		if ((trans->tgid == 0) || (trans->tgid == 1) || (trans->tgid == 2))
-			goto find_sfile;
-
-		// Decide whether or not this kernel sample should be discarded.
-		// See detailed description above where the kernel_cmdlines hash
-		// table is defined.
-		if (trans->app_cookie == NO_COOKIE) {
-			int found = 0;
-			struct kern_cmdline * kcmd;
-			hash = (trans->tgid << 2) & HASH_BITS;
-			list_for_each(pos, &kernel_cmdlines[hash]) {
-				kcmd = list_entry(pos, struct kern_cmdline, hash);
-				if (kcmd->kern_pid == trans->tgid) {
-					found = 1;
-					if (kcmd->has_cmdline) {
-						verbprintf(vsamples,
-						           "Dropping user context kernel sample 0x%llx "
-						           "for process %u due to no app cookie available.\n",
-						           (unsigned long long)trans->pc, trans->tgid);
-						opd_stats[OPD_NO_APP_KERNEL_SAMPLE]++;
-						return NULL;
-					}
-					break;
-				}
-			}
-			if (!found) {
-				char name[32], dst[8];
-				int fd, dropped = 0;
-				kcmd = (struct kern_cmdline *)xmalloc(sizeof(*kcmd));
-				kcmd->kern_pid = trans->tgid;
-				snprintf(name, sizeof name, "/proc/%u/cmdline", trans->tgid);
-				fd = open(name, O_RDONLY);
-				if(fd==-1) {
-					// Most likely due to process ending, so we'll assume it used to have a cmdline
-					kcmd->has_cmdline = 1;
-					verbprintf(vsamples,
-					           "Open of /proc/%u/cmdline failed, so dropping "
-					           "kernel sameple 0x%llx\n",
-					           trans->tgid, (unsigned long long)trans->pc);
-					opd_stats[OPD_NO_APP_KERNEL_SAMPLE]++;
-					dropped = 1;
-				} else {
-					if((read(fd, dst, 8) < 1)) {
-						verbprintf(vsamples, "No cmdline for PID %u\n", trans->tgid);
-						kcmd->has_cmdline = 0;
-					} else {
-						// This *really* shouldn't happen.  If it does, then why don't
-						// we have an app_cookie?
-						dst[7] = '\0';
-						verbprintf(vsamples, "Start of cmdline for PID %u is %s\n", trans->tgid, dst);
-						kcmd->has_cmdline = 1;
-						opd_stats[OPD_NO_APP_KERNEL_SAMPLE]++;
-						dropped = 1;
-					}
-				}
-				list_add(&kcmd->hash, &kernel_cmdlines[hash]);
-				if (dropped)
-					return NULL;
-			}
-		}
-	} else if (trans->cookie == NO_COOKIE && !trans->anon) {
-		if (vsamples) {
-			char const * app = verbose_cookie(trans->app_cookie);
-			printf("No anon map for pc %llx, app %s.\n",
-			       trans->pc, app);
-		}
-		opd_stats[OPD_LOST_NO_MAPPING]++;
-		return NULL;
-	}
-
-find_sfile:
-	hash = sfile_hash(trans, ki);
-	list_for_each(pos, &hashes[hash]) {
-		sf = list_entry(pos, struct sfile, hash);
-		if (trans_match(trans, sf, ki)) {
-			sfile_get(sf);
-			goto lru;
-		}
-	}
-
-	sf = create_sfile(hash, trans, ki);
-	list_add(&sf->hash, &hashes[hash]);
-
-lru:
-	sfile_put(sf);
-	return sf;
-}
-
-
-void sfile_dup(struct sfile * to, struct sfile * from)
-{
-	size_t i;
-
-	memcpy(to, from, sizeof (struct sfile));
-
-	for (i = 0 ; i < op_nr_counters ; ++i)
-		odb_init(&to->files[i]);
-
-	opd_ext_sfile_dup(to, from);
-
-	for (i = 0; i < CG_HASH_SIZE; ++i)
-		list_init(&to->cg_hash[i]);
-
-	list_init(&to->hash);
-	list_init(&to->lru);
-}
-
-
-static odb_t * get_file(struct transient const * trans, int is_cg)
-{
-	struct sfile * sf = trans->current;
-	struct sfile * last = trans->last;
-	struct cg_entry * cg;
-	struct list_head * pos;
-	unsigned long hash;
-	odb_t * file;
-
-	if ((trans->ext) != NULL)
-		return opd_ext_sfile_get(trans, is_cg);
-
-	if (trans->event >= op_nr_counters) {
-		fprintf(stderr, "%s: Invalid counter %lu\n", __FUNCTION__,
-			trans->event);
-		abort();
-	}
-
-	file = &sf->files[trans->event];
-
-	if (!is_cg)
-		goto open;
-
-	hash = last->hashval & (CG_HASH_SIZE - 1);
-
-	/* Need to look for the right 'to'. Since we're looking for
-	 * 'last', we use its hash.
-	 */
-	list_for_each(pos, &sf->cg_hash[hash]) {
-		cg = list_entry(pos, struct cg_entry, hash);
-		if (sfile_equal(last, &cg->to)) {
-			file = &cg->to.files[trans->event];
-			goto open;
-		}
-	}
-
-	cg = xmalloc(sizeof(struct cg_entry));
-	sfile_dup(&cg->to, last);
-	list_add(&cg->hash, &sf->cg_hash[hash]);
-	file = &cg->to.files[trans->event];
-
-open:
-	if (!odb_open_count(file))
-		opd_open_sample_file(file, last, sf, trans->event, is_cg);
-
-	/* Error is logged by opd_open_sample_file */
-	if (!odb_open_count(file))
-		return NULL;
-
-	return file;
-}
-
-
-static void verbose_print_sample(struct sfile * sf, vma_t pc, uint counter)
-{
-	char const * app = verbose_cookie(sf->app_cookie);
-	printf("0x%llx(%u): ", pc, counter);
-	if (sf->anon) {
-		printf("anon (tgid %u, 0x%llx-0x%llx), ",
-		       (unsigned int)sf->anon->tgid,
-		       sf->anon->start, sf->anon->end);
-	} else if (sf->kernel) {
-		printf("kern (name %s, 0x%llx-0x%llx), ", sf->kernel->name,
-		       sf->kernel->start, sf->kernel->end);
-	} else {
-		printf("%s(%llx), ", verbose_cookie(sf->cookie),  sf->cookie);
-	}
-	printf("app %s(%llx)", app, sf->app_cookie);
-}
-
-
-static void verbose_sample(struct transient const * trans, vma_t pc)
-{
-	printf("Sample ");
-	verbose_print_sample(trans->current, pc, trans->event);
-	printf("\n");
-}
-
-
-static void
-verbose_arc(struct transient const * trans, vma_t from, vma_t to)
-{
-	printf("Arc ");
-	verbose_print_sample(trans->current, from, trans->event);
-	printf(" -> 0x%llx", to);
-	printf("\n");
-}
-
-
-static void sfile_log_arc(struct transient const * trans)
-{
-	int err;
-	vma_t from = trans->pc;
-	vma_t to = trans->last_pc;
-	uint64_t key;
-	odb_t * file;
-
-	file = get_file(trans, 1);
-
-	/* absolute value -> offset */
-	if (trans->current->kernel)
-		from -= trans->current->kernel->start;
-
-	if (trans->last->kernel)
-		to -= trans->last->kernel->start;
-
-	if (trans->current->anon)
-		from -= trans->current->anon->start;
-
-	if (trans->last->anon)
-		to -= trans->last->anon->start;
-
-	if (varcs)
-		verbose_arc(trans, from, to);
-
-	if (!file) {
-		opd_stats[OPD_LOST_SAMPLEFILE]++;
-		return;
-	}
-
-	/* Possible narrowings to 32-bit value only. */
-	key = to & (0xffffffff);
-	key |= ((uint64_t)from) << 32;
-
-	err = odb_update_node(file, key);
-	if (err) {
-		fprintf(stderr, "%s: %s\n", __FUNCTION__, strerror(err));
-		abort();
-	}
-}
-
-
-void sfile_log_sample(struct transient const * trans)
-{
-	sfile_log_sample_count(trans, 1);
-}
-
-
-void sfile_log_sample_count(struct transient const * trans,
-                            unsigned long int count)
-{
-	int err;
-	vma_t pc = trans->pc;
-	odb_t * file;
-
-	if (trans->tracing == TRACING_ON) {
-		/* can happen if kernel sample falls through the cracks,
-		 * see opd_put_sample() */
-		if (trans->last)
-			sfile_log_arc(trans);
-		return;
-	}
-
-	file = get_file(trans, 0);
-
-	/* absolute value -> offset */
-	if (trans->current->kernel)
-		pc -= trans->current->kernel->start;
-
-	if (trans->current->anon)
-		pc -= trans->current->anon->start;
-
-	if (vsamples)
-		verbose_sample(trans, pc);
-
-	if (!file) {
-		opd_stats[OPD_LOST_SAMPLEFILE]++;
-		return;
-	}
-
-	err = odb_update_node_with_offset(file,
-					  (odb_key_t)pc,
-					  count);
-	if (err) {
-		fprintf(stderr, "%s: %s\n", __FUNCTION__, strerror(err));
-		abort();
-	}
-}
-
-
-static int close_sfile(struct sfile * sf, void * data __attribute__((unused)))
-{
-	size_t i;
-
-	/* it's OK to close a non-open odb file */
-	for (i = 0; i < op_nr_counters; ++i)
-		odb_close(&sf->files[i]);
-
-	opd_ext_sfile_close(sf);
-
-	return 0;
-}
-
-
-static void kill_sfile(struct sfile * sf)
-{
-	close_sfile(sf, NULL);
-	list_del(&sf->hash);
-	list_del(&sf->lru);
-}
-
-
-static int sync_sfile(struct sfile * sf, void * data __attribute__((unused)))
-{
-	size_t i;
-
-	for (i = 0; i < op_nr_counters; ++i)
-		odb_sync(&sf->files[i]);
-
-	opd_ext_sfile_sync(sf);
-
-	return 0;
-}
-
-
-static int is_sfile_kernel(struct sfile * sf, void * data __attribute__((unused)))
-{
-	return !!sf->kernel;
-}
-
-
-static int is_sfile_anon(struct sfile * sf, void * data)
-{
-	return sf->anon == data;
-}
-
-
-typedef int (*sfile_func)(struct sfile *, void *);
-
-static void
-for_one_sfile(struct sfile * sf, sfile_func func, void * data)
-{
-	size_t i;
-	int free_sf = func(sf, data);
-
-	for (i = 0; i < CG_HASH_SIZE; ++i) {
-		struct list_head * pos;
-		struct list_head * pos2;
-		list_for_each_safe(pos, pos2, &sf->cg_hash[i]) {
-			struct cg_entry * cg =
-				list_entry(pos, struct cg_entry, hash);
-			if (free_sf || func(&cg->to, data)) {
-				kill_sfile(&cg->to);
-				list_del(&cg->hash);
-				free(cg);
-			}
-		}
-	}
-
-	if (free_sf) {
-		kill_sfile(sf);
-		free(sf);
-	}
-}
-
-
-static void for_each_sfile(sfile_func func, void * data)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-
-	list_for_each_safe(pos, pos2, &lru_list) {
-		struct sfile * sf = list_entry(pos, struct sfile, lru);
-		for_one_sfile(sf, func, data);
-	}
-}
-
-
-void sfile_clear_kernel(void)
-{
-	for_each_sfile(is_sfile_kernel, NULL);
-}
-
-
-void sfile_clear_anon(struct anon_mapping * anon)
-{
-	for_each_sfile(is_sfile_anon, anon);
-}
-
-
-void sfile_sync_files(void)
-{
-	for_each_sfile(sync_sfile, NULL);
-}
-
-
-void sfile_close_files(void)
-{
-	for_each_sfile(close_sfile, NULL);
-}
-
-
-static int always_true(void)
-{
-	return 1;
-}
-
-
-#define LRU_AMOUNT 256
-
-/*
- * Clear out older sfiles. Note the current sfiles we're using
- * will not be present in this list, due to sfile_get/put() pairs
- * around the caller of this.
- */
-int sfile_lru_clear(void)
-{
-	struct list_head * pos;
-	struct list_head * pos2;
-	int amount = LRU_AMOUNT;
-
-	if (list_empty(&lru_list))
-		return 1;
-
-	list_for_each_safe(pos, pos2, &lru_list) {
-		struct sfile * sf;
-		if (!--amount)
-			break;
-		sf = list_entry(pos, struct sfile, lru);
-		for_one_sfile(sf, (sfile_func)always_true, NULL);
-	}
-
-	return 0;
-}
-
-
-void sfile_get(struct sfile * sf)
-{
-	if (sf)
-		list_del(&sf->lru);
-}
-
-
-void sfile_put(struct sfile * sf)
-{
-	if (sf)
-		list_add_tail(&sf->lru, &lru_list);
-}
-
-
-void sfile_init(void)
-{
-	size_t i = 0;
-
-	for (; i < HASH_SIZE; ++i) {
-		list_init(&hashes[i]);
-		list_init(&kernel_cmdlines[i]);
-	}
-}
diff --git a/daemon/opd_sfile.h b/daemon/opd_sfile.h
deleted file mode 100644
index 76e5e63..0000000
--- a/daemon/opd_sfile.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * @file daemon/opd_sfile.h
- * Management of sample files
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_SFILE_H
-#define OPD_SFILE_H
-
-#include "opd_cookie.h"
-
-#include "odb.h"
-#include "op_hw_config.h"
-#include "op_types.h"
-#include "op_list.h"
-
-#include <sys/types.h>
-
-struct kernel_image;
-struct transient;
-
-#define CG_HASH_SIZE 16
-#define UNUSED_EMBEDDED_OFFSET ~0LLU
-
-/**
- * Each set of sample files (where a set is over the physical counter
- * types) will have one of these for it. We match against the
- * descriptions here to find which sample DB file we need to modify.
- *
- * cg files are stored in the hash.
- */
-struct sfile {
-	/** hash value for this sfile */
-	unsigned long hashval;
-	/** cookie value for the binary profiled */
-	cookie_t cookie;
-	/** cookie value for the application owner, INVALID_COOKIE if not set */
-	cookie_t app_cookie;
-	/** thread ID, -1 if not set */
-	pid_t tid;
-	/** thread group ID, -1 if not set */
-	pid_t tgid;
-	/** CPU number */
-	unsigned int cpu;
-	/** kernel image if applicable */
-	struct kernel_image * kernel;
-	/** anonymous mapping */
-	struct anon_mapping * anon;
-	/** embedded offset for Cell BE SPU */
-	uint64_t embedded_offset;
-
-	/** hash table link */
-	struct list_head hash;
-	/** lru list */
-	struct list_head lru;
-	/** true if this file should be ignored in profiles */
-	int ignored;
-	/** opened sample files */
-	odb_t files[OP_MAX_COUNTERS];
-	/** extended sample files */
-	odb_t * ext_files;
-	/** hash table of opened cg sample files */
-	struct list_head cg_hash[CG_HASH_SIZE];
-};
-
-/** a call-graph entry */
-struct cg_entry {
-	/** where arc is to */
-	struct sfile to;
-	/** next in the hash slot */
-	struct list_head hash;
-};
-
-/** clear any sfiles that are for the kernel */
-void sfile_clear_kernel(void);
-
-struct anon_mapping;
-
-/** clear any sfiles for the given anon mapping */
-void sfile_clear_anon(struct anon_mapping *);
-
-/** sync sample files */
-void sfile_sync_files(void);
-
-/** close sample files */
-void sfile_close_files(void);
-
-/** clear out a certain amount of LRU entries
- * return non-zero if the lru is already empty */
-int sfile_lru_clear(void);
-
-/** remove a sfile from the lru list, protecting it from sfile_lru_clear() */
-void sfile_get(struct sfile * sf);
-
-/** add this sfile to lru list */
-void sfile_put(struct sfile * sf);
-
-/**
- * Find the sfile for the current parameters. Note that is required
- * that the PC value be set appropriately (needed for kernel images)
- */
-struct sfile * sfile_find(struct transient const * trans);
-
-/** Log the sample in a previously located sfile. */
-void sfile_log_sample(struct transient const * trans);
-
-/** Log the event/cycle count in a previously located sfile */
-void sfile_log_sample_count(struct transient const * trans,
-                            unsigned long int count);
-
-/** initialise hashes */
-void sfile_init(void);
-
-#endif /* OPD_SFILE_H */
diff --git a/daemon/opd_spu.c b/daemon/opd_spu.c
deleted file mode 100644
index 62a2c2b..0000000
--- a/daemon/opd_spu.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/**
- * @file daemon/opd_spu.c
- * Processing the sample buffer for Cell BE SPU profile
- *
- * @remark Copyright 2007 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Maynard Johnson
- * (C) Copyright IBM Corporation 2007
- */
-
-#include "opd_interface.h"
-#include "opd_printf.h"
-#include "opd_sfile.h"
-#include "opd_stats.h"
-#include "opd_trans.h"
-#include "op_libiberty.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-struct spu_context_info {
-	pid_t tid;
-	pid_t tgid;
-	cookie_t app_cookie;
-	uint64_t embedded_offset;
-	cookie_t spu_cookie;
-};
-
-static struct spu_context_info * spu_context_cache;
-
-/* Forward declaration */
-static void process_spu_samples(struct transient * trans);
-
-void (*special_processor)(struct transient *);
-
-/*
- * This function is called when the first value found in the
- * buffer (after the beginning ESCAPE_CODE) is SPU_PROFILING_CODE.
- * Once we get here, the rest of the processing of the buffer is
- * Cell-SPU-specific, so we do not need to return until the
- * trans.buffer is empty.
- */
-void code_spu_profiling(struct transient * trans)
-{
-	/* Next value in buffer is the number of SPUs. */
-	unsigned long long num_spus = pop_buffer_value(trans);
-	/* Free the cache from previous run */
-	free(spu_context_cache);
-	spu_context_cache = xmalloc(sizeof(struct spu_context_info) * num_spus);
-	special_processor = process_spu_samples;
-	process_spu_samples(trans);
-}
-
-void code_spu_ctx_switch(struct transient * trans)
-{
-	clear_trans_current(trans);
-
-	if (!enough_remaining(trans, 6)) {
-		trans->remaining = 0;
-		return;
-	}
-
-	/* First value in the buffer for an SPU context switch is
-	 * the SPU number.  For SPU profiling, 'cpu' = 'spu'.
-	 */
-	trans->cpu = pop_buffer_value(trans);
-	trans->tid = pop_buffer_value(trans);
-	trans->tgid = pop_buffer_value(trans);
-	trans->app_cookie = pop_buffer_value(trans);
-
-	if (vmisc) {
-		char const * app = find_cookie(trans->app_cookie);
-		printf("SPU_CTX_SWITCH to tid %lu, tgid %lu, cookie %llx(%s)\n",
-		       (unsigned long)trans->tid, (unsigned long)trans->tgid,
-		       trans->app_cookie, app ? app : "none");
-	}
-
-	/* The trans->cookie will point to the binary file where the SPU ELF
-	 * can be found.  If the SPU ELF is embedded, it may be embedded in
-	 * either the executable application binary or a shared lib.  If shared
-	 * library, then trans->cookie will differ from the previously obtained
-	 * trans->app_cookie.  For the non-embedded case, trans->cookie always
-	 * points to a separate binary file.
-	 */
-	trans->cookie = pop_buffer_value(trans);
-	trans->embedded_offset = pop_buffer_value(trans);
-}
-
-
-static void cache_spu_context_info(struct transient * trans)
-{
-	int i = trans->cpu;
-	spu_context_cache[i].tid = trans->tid;
-	spu_context_cache[i].tgid = trans->tgid;
-	spu_context_cache[i].app_cookie = trans->app_cookie;
-	spu_context_cache[i].embedded_offset = trans->embedded_offset;
-	spu_context_cache[i].spu_cookie = trans->cookie;
-}
-
-static void update_trans_for_spu(struct transient * trans)
-{
-	int i = trans->cpu;
-	trans->tid = spu_context_cache[i].tid;
-	trans->tgid = spu_context_cache[i].tgid;
-	trans->app_cookie = spu_context_cache[i].app_cookie;
-	trans->embedded_offset = spu_context_cache[i].embedded_offset;
-	trans->cookie = spu_context_cache[i].spu_cookie;
-}
-#define SPU_NUM_MASK 0xFFFFFFFF00000000ULL
-#define SPU_CYCLES_COUNTER 0
-
-static void opd_put_spu_sample
-(struct transient * trans, unsigned long long pc)
-{
-	unsigned long spu_number = (pc & SPU_NUM_MASK) >> 32;
-	if (trans->cpu != spu_number) {
-		trans->cpu = spu_number;
-	        clear_trans_current(trans);
-		update_trans_for_spu(trans);
-	}
-	/* get the current sfile if needed */
-	if (!trans->current)
-		trans->current = sfile_find(trans);
-
-	if (trans->tracing != TRACING_ON)
-		trans->event = SPU_CYCLES_COUNTER;
-
-	trans->pc = (pc & ~SPU_NUM_MASK);
-	/* log the sample or arc */
-	sfile_log_sample(trans);
-
-	/* switch to trace mode */
-	if (trans->tracing == TRACING_START)
-		trans->tracing = TRACING_ON;
-
-	update_trans_last(trans);
-}
-
-/*
- * This function processes SPU context switches and
- * SPU program counter samples.  After processing a
- * context switch (via handlers[code)), we cache the
- * SPU context information that has been temporarily
- * stored in trans.
- */
-static void process_spu_samples(struct transient * trans)
-{
-	unsigned long long code;
-	trans->in_kernel = 0;
-	while (trans->remaining) {
-		code = pop_buffer_value(trans);
-
-		if (!is_escape_code(code)) {
-			opd_put_spu_sample(trans, code);
-			continue;
-		}
-
-		if (!trans->remaining) {
-			verbprintf(vmisc, "Dangling ESCAPE_CODE.\n");
-			opd_stats[OPD_DANGLING_CODE]++;
-			break;
-		}
-
-		/* started with ESCAPE_CODE, next is type */
-		code = pop_buffer_value(trans);
-
-		if (code >= LAST_CODE) {
-			fprintf(stderr, "Unknown code %llu\n", code);
-			abort();
-		}
-
-		handlers[code](trans);
-		cache_spu_context_info(trans);
-	}
-}
diff --git a/daemon/opd_stats.c b/daemon/opd_stats.c
deleted file mode 100644
index 8817a14..0000000
--- a/daemon/opd_stats.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * @file daemon/opd_stats.c
- * Management of daemon statistics
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include "opd_stats.h"
-#include "opd_extended.h"
-#include "oprofiled.h"
-
-#include "op_get_time.h"
-
-#include <dirent.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-unsigned long opd_stats[OPD_MAX_STATS];
-
-/**
- * print_if - print an integer value read from file filename,
- * do nothing if the value read == -1 except if force is non-zero
- */
-static void print_if(char const * fmt, char const * path, char const * filename, int force)
-{
-	int value = opd_read_fs_int(path, filename, 0);
-	if (value != -1 || force)
-		printf(fmt, value);
-}
-
-/**
- * opd_print_stats - print out latest statistics
- */
-void opd_print_stats(void)
-{
-	DIR * dir;
-	struct dirent * dirent;
-
-	printf("\n%s\n", op_get_time());
-	printf("\n-- OProfile Statistics --\n");
-	printf("Nr. sample dumps: %lu\n", opd_stats[OPD_DUMP_COUNT]);
-	printf("Nr. non-backtrace samples: %lu\n", opd_stats[OPD_SAMPLES]);
-	printf("Nr. kernel samples: %lu\n", opd_stats[OPD_KERNEL]);
-	printf("Nr. lost samples (no kernel/user): %lu\n", opd_stats[OPD_NO_CTX]);
-	printf("Nr. lost kernel samples: %lu\n", opd_stats[OPD_LOST_KERNEL]);
-	printf("Nr. incomplete code structs: %lu\n", opd_stats[OPD_DANGLING_CODE]);
-	printf("Nr. samples lost due to sample file open failure: %lu\n",
-		opd_stats[OPD_LOST_SAMPLEFILE]);
-	printf("Nr. samples lost due to no permanent mapping: %lu\n",
-		opd_stats[OPD_LOST_NO_MAPPING]);
-	printf("Nr. user context kernel samples lost due to no app info available: %lu\n",
-	       opd_stats[OPD_NO_APP_KERNEL_SAMPLE]);
-	print_if("Nr. samples lost due to buffer overflow: %u\n",
-	       "/dev/oprofile/stats", "event_lost_overflow", 1);
-	print_if("Nr. samples lost due to no mapping: %u\n",
-	       "/dev/oprofile/stats", "sample_lost_no_mapping", 1);
-	print_if("Nr. backtraces skipped due to no file mapping: %u\n",
-	       "/dev/oprofile/stats", "bt_lost_no_mapping", 0);
-	print_if("Nr. samples lost due to no mm: %u\n",
-	       "/dev/oprofile/stats", "sample_lost_no_mm", 1);
-
-	opd_ext_print_stats();
-
-	if (!(dir = opendir("/dev/oprofile/stats/")))
-		goto out;
-	while ((dirent = readdir(dir))) {
-		int cpu_nr;
-		char path[256];
-		if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1)
-			continue;
-		snprintf(path, 256, "/dev/oprofile/stats/%s", dirent->d_name);
-
-		printf("\n---- Statistics for cpu : %d\n", cpu_nr);
-		print_if("Nr. samples lost cpu buffer overflow: %u\n",
-		     path, "sample_lost_overflow", 1);
-		print_if("Nr. samples lost task exit: %u\n",
-		     path, "sample_lost_task_exit", 0);
-		print_if("Nr. samples received: %u\n",
-		     path, "sample_received", 1);
-		print_if("Nr. backtrace aborted: %u\n", 
-		     path, "backtrace_aborted", 0);
-		print_if("Nr. samples lost invalid pc: %u\n", 
-		     path, "sample_invalid_eip", 0);
-	}
-	closedir(dir);
-out:
-	fflush(stdout);
-}
diff --git a/daemon/opd_stats.h b/daemon/opd_stats.h
deleted file mode 100644
index 5b703d6..0000000
--- a/daemon/opd_stats.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * @file daemon/opd_stats.h
- * Management of daemon statistics
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPD_STATS_H
-#define OPD_STATS_H
-
-extern unsigned long opd_stats[];
-
-enum {	OPD_SAMPLES, /**< nr. samples */
-	OPD_KERNEL, /**< nr. kernel samples */
-	OPD_PROCESS, /**< nr. userspace samples */
-	OPD_NO_CTX, /**< nr. samples lost due to not knowing if in the kernel or not */
-	OPD_LOST_KERNEL,  /**< nr. kernel samples lost */
-	OPD_LOST_SAMPLEFILE, /**< nr samples for which sample file can't be opened */
-	OPD_LOST_NO_MAPPING, /**< nr samples lost due to no mapping */
-	OPD_DUMP_COUNT, /**< nr. of times buffer is read */
-	OPD_DANGLING_CODE, /**< nr. partial code notifications (buffer overflow */
-	OPD_NO_APP_KERNEL_SAMPLE, /**<nr. user ctx kernel samples dropped due to no app cookie available */
-	OPD_MAX_STATS /**< end of stats */
-};
-
-void opd_print_stats(void);
-
-#endif /* OPD_STATS_H */
diff --git a/daemon/opd_trans.c b/daemon/opd_trans.c
deleted file mode 100644
index b279089..0000000
--- a/daemon/opd_trans.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/**
- * @file daemon/opd_trans.c
- * Processing the sample buffer
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- *
- * Modified by Maynard Johnson <maynardj@us.ibm.com>
- * These modifications are:
- * (C) Copyright IBM Corporation 2007
- */
-
-#include "opd_trans.h"
-#include "opd_kernel.h"
-#include "opd_sfile.h"
-#include "opd_anon.h"
-#include "opd_stats.h"
-#include "opd_printf.h"
-#include "opd_interface.h"
- 
-#include <limits.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <errno.h>
-
-extern size_t kernel_pointer_size;
-
-
-void clear_trans_last(struct transient * trans)
-{
-	trans->last = NULL;
-	trans->last_anon = NULL;
-}
-
-
-void clear_trans_current(struct transient * trans)
-{
-	trans->current = NULL;
-	trans->anon = NULL;
-}
-
-
-uint64_t pop_buffer_value(struct transient * trans)
-{
-	uint64_t val;
-
-	if (!trans->remaining) {
-		fprintf(stderr, "BUG: popping empty buffer !\n");
-		abort();
-	}
-
-	if (kernel_pointer_size == 4) {
-		uint32_t const * lbuf = (void const *)trans->buffer;
-		val = *lbuf;
-	} else {
-		uint64_t const * lbuf = (void const *)trans->buffer;
-		val = *lbuf;
-	}
-
-	trans->remaining--;
-	trans->buffer += kernel_pointer_size;
-	return val;
-}
-
-
-int enough_remaining(struct transient * trans, size_t size)
-{
-	if (trans->remaining >= size)
-		return 1;
-
-	verbprintf(vmisc, "Dangling ESCAPE_CODE.\n");
-	opd_stats[OPD_DANGLING_CODE]++;
-	return 0;
-}
-
-
-static void opd_put_sample(struct transient * trans, unsigned long long pc)
-{
-	unsigned long long event;
-
-	if (!enough_remaining(trans, 1)) {
-		trans->remaining = 0;
-		return;
-	}
-
-	event = pop_buffer_value(trans);
-
-	if (trans->tracing != TRACING_ON)
-		trans->event = event;
-
-	trans->pc = pc;
-
-	/* sfile can change at each sample for kernel */
-	if (trans->in_kernel != 0)
-		clear_trans_current(trans);
-
-	if (!trans->in_kernel && trans->cookie == NO_COOKIE)
-		trans->anon = find_anon_mapping(trans);
-
-	/* get the current sfile if needed */
-	if (!trans->current)
-		trans->current = sfile_find(trans);
-
-	/*
-	 * can happen if kernel sample falls through the cracks, or if
-	 * it's a sample from an anon region we couldn't find
-	 */
-	if (!trans->current)
-		goto out;
-
-	if (trans->tracing != TRACING_ON) {
-		opd_stats[OPD_SAMPLES]++;
-		opd_stats[trans->in_kernel == 1 ? OPD_KERNEL : OPD_PROCESS]++;
-	}
-
-
-	/* FIXME: this logic is perhaps too harsh? */
-	if (trans->current->ignored || (trans->last && trans->last->ignored))
-		goto out;
-
-	/* log the sample or arc */
-	sfile_log_sample(trans);
-
-out:
-	/* switch to trace mode */
-	if (trans->tracing == TRACING_START)
-		trans->tracing = TRACING_ON;
-
-	update_trans_last(trans);
-}
-
-
-static void code_unknown(struct transient * trans __attribute__((unused)))
-{
-	fprintf(stderr, "Unknown code !\n");
-	abort();
-}
-
-
-static void code_ctx_switch(struct transient * trans)
-{
-	clear_trans_current(trans);
-
-	if (!enough_remaining(trans, 5)) {
-		trans->remaining = 0;
-		return;
-	}
-
-	trans->tid = pop_buffer_value(trans);
-	trans->app_cookie = pop_buffer_value(trans);
-	/* must be ESCAPE_CODE, CTX_TGID_CODE, tgid. Like this
-	 * because tgid was added later in a compatible manner.
-	 */
-	pop_buffer_value(trans);
-	pop_buffer_value(trans);
-	trans->tgid = pop_buffer_value(trans);
-
-	if (vmisc) {
-		char const * app = find_cookie(trans->app_cookie);
-		printf("CTX_SWITCH to tid %lu, tgid %lu, cookie %llx(%s)\n",
-		       (unsigned long)trans->tid, (unsigned long)trans->tgid,
-		       trans->app_cookie, app ? app : "none");
-	}
-}
-
-
-static void code_cpu_switch(struct transient * trans)
-{
-	clear_trans_current(trans);
-
-	if (!enough_remaining(trans, 1)) {
-		trans->remaining = 0;
-		return;
-	}
-
-	trans->cpu = pop_buffer_value(trans);
-	verbprintf(vmisc, "CPU_SWITCH to %lu\n", trans->cpu);
-}
-
-
-static void code_cookie_switch(struct transient * trans)
-{
-	clear_trans_current(trans);
-
-	if (!enough_remaining(trans, 1)) {
-		trans->remaining = 0;
-		return;
-	}
-
-	trans->cookie = pop_buffer_value(trans);
-
-	if (vmisc) {
-		char const * name = verbose_cookie(trans->cookie);
-		verbprintf(vmisc, "COOKIE_SWITCH to cookie %s(%llx)\n",
-			   name, trans->cookie);
-	}
-}
-
-
-static void code_kernel_enter(struct transient * trans)
-{
-	verbprintf(vmisc, "KERNEL_ENTER_SWITCH to kernel\n");
-	trans->in_kernel = 1;
-	clear_trans_current(trans);
-	/* subtlety: we must keep trans->cookie cached,
-	 * even though it's meaningless for the kernel -
-	 * we won't necessarily get a cookie switch on
-	 * kernel exit. See comments in opd_sfile.c
-	 */
-}
-
-
-static void code_user_enter(struct transient * trans)
-{
-	verbprintf(vmisc, "USER_ENTER_SWITCH to user-space\n");
-	trans->in_kernel = 0;
-	clear_trans_current(trans);
-	clear_trans_last(trans);
-}
-
-
-static void code_module_loaded(struct transient * trans __attribute__((unused)))
-{
-	verbprintf(vmodule, "MODULE_LOADED_CODE\n");
-	opd_reread_module_info();
-	clear_trans_current(trans);
-	clear_trans_last(trans);
-}
-
-
-/*
- * This also implicitly signals the end of the previous
- * trace, so we never explicitly set TRACING_OFF when
- * processing a buffer.
- */
-static void code_trace_begin(struct transient * trans)
-{
-	verbprintf(varcs, "TRACE_BEGIN\n");
-	trans->tracing = TRACING_START;
-}
-
-static void code_xen_enter(struct transient * trans)
-{
-	verbprintf(vmisc, "XEN_ENTER_SWITCH to xen\n");
-	trans->in_kernel = 1;
-	trans->current = NULL;
-	/* subtlety: we must keep trans->cookie cached, even though it's
-	 * meaningless for Xen - we won't necessarily get a cookie switch
-	 * on Xen exit. See comments in opd_sfile.c. It seems that we can
-	 * get away with in_kernel = 1 as long as we supply the correct
-	 * Xen image, and its address range in startup find_kernel_image
-	 * is modified to look in the Xen image also
-	 */
-}
-
-extern void code_spu_profiling(struct transient * trans);
-extern void code_spu_ctx_switch(struct transient * trans);
-
-extern void code_ibs_fetch_sample(struct transient * trans);
-extern void code_ibs_op_sample(struct transient * trans);
-
-handler_t handlers[LAST_CODE + 1] = {
-	&code_unknown,
-	&code_ctx_switch,
-	&code_cpu_switch,
-	&code_cookie_switch,
-	&code_kernel_enter,
-	&code_user_enter,
-	&code_module_loaded,
-	/* tgid handled differently */
-	&code_unknown,
-	&code_trace_begin,
-	&code_unknown,
-	&code_xen_enter,
-#if defined(__powerpc__)
-	&code_spu_profiling,
-	&code_spu_ctx_switch,
-#else
-	&code_unknown,
-	&code_unknown,
-#endif
-	&code_ibs_fetch_sample,
-	&code_ibs_op_sample,
-};
-
-extern void (*special_processor)(struct transient *);
-
-void opd_process_samples(char const * buffer, size_t count)
-{
-	struct transient trans = {
-		.buffer = buffer,
-		.remaining = count,
-		.tracing = TRACING_OFF,
-		.current = NULL,
-		.last = NULL,
-		.cookie = INVALID_COOKIE,
-		.app_cookie = INVALID_COOKIE,
-		.anon = NULL,
-		.last_anon = NULL,
-		.pc = 0,
-		.last_pc = 0,
-		.event = 0,
-		.in_kernel = -1,
-		.cpu = -1,
-		.tid = -1,
-		.embedded_offset = UNUSED_EMBEDDED_OFFSET,
-		.tgid = -1,
-		.ext = NULL
-	};
-
-	/* FIXME: was uint64_t but it can't compile on alpha where uint64_t
-	 * is an unsigned long and below the printf("..." %llu\n", code)
-	 * generate a warning, this look like a stopper to use c98 types :/
-	 */
-	unsigned long long code;
-
-	if (special_processor) {
-		special_processor(&trans);
-		return;
-	}
-
-	while (trans.remaining) {
-		code = pop_buffer_value(&trans);
-
-		if (!is_escape_code(code)) {
-			opd_put_sample(&trans, code);
-			continue;
-		}
-
-		if (!trans.remaining) {
-			verbprintf(vmisc, "Dangling ESCAPE_CODE.\n");
-			opd_stats[OPD_DANGLING_CODE]++;
-			break;
-		}
-
-		// started with ESCAPE_CODE, next is type
-		code = pop_buffer_value(&trans);
-	
-		if (code >= LAST_CODE) {
-			fprintf(stderr, "Unknown code %llu\n", code);
-			abort();
-		}
-
-		handlers[code](&trans);
-	}
-}
diff --git a/daemon/opd_trans.h b/daemon/opd_trans.h
deleted file mode 100644
index c0a868b..0000000
--- a/daemon/opd_trans.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * @file daemon/opd_trans.h
- * Processing the sample buffer
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- *
- * Modified by Maynard Johnson <maynardj@us.ibm.com>
- * These modifications are:
- * (C) Copyright IBM Corporation 2007
- */
-
-#ifndef OPD_TRANS_H
-#define OPD_TRANS_H
-
-#include "opd_cookie.h"
-#include "op_types.h"
-
-#include <stdint.h>
-
-struct sfile;
-struct anon_mapping;
-
-enum tracing_type {
-	TRACING_OFF,
-	TRACING_START,
-	TRACING_ON
-};
-
-/**
- * Transient values used for parsing the event buffer.
- * Note that these are reset for each buffer read, but
- * that should be ok as in the kernel, cpu_buffer_reset()
- * ensures that a correct context starts off the buffer.
- */
-struct transient {
-	char const * buffer;
-	size_t remaining;
-	enum tracing_type tracing;
-	struct sfile * current;
-	struct sfile * last;
-	struct anon_mapping * anon;
-	struct anon_mapping * last_anon;
-	cookie_t cookie;
-	cookie_t app_cookie;
-	vma_t pc;
-	vma_t last_pc;
-	unsigned long event;
-	int in_kernel;
-	unsigned long cpu;
-	pid_t tid;
-	pid_t tgid;
-	uint64_t embedded_offset;
-	void * ext;
-};
-
-typedef void (*handler_t)(struct transient *);
-extern handler_t handlers[];
-
-uint64_t pop_buffer_value(struct transient * trans);
-int enough_remaining(struct transient * trans, size_t size);
-static inline void update_trans_last(struct transient * trans)
-{
-	trans->last = trans->current;
-	trans->last_anon = trans->anon;
-	trans->last_pc = trans->pc;
-}
-
-extern size_t kernel_pointer_size;
-static inline int is_escape_code(uint64_t code)
-{
-	return kernel_pointer_size == 4 ? code == ~0LU : code == ~0LLU;
-}
-
-void opd_process_samples(char const * buffer, size_t count);
-
-/** used when we need to clear data that's been freed */
-void clear_trans_last(struct transient * trans);
-
-/** used when we need to clear data that's been freed */
-void clear_trans_current(struct transient * trans);
-
-#endif /* OPD_TRANS_H */
diff --git a/daemon/oprofiled.c b/daemon/oprofiled.c
deleted file mode 100644
index fbe63c6..0000000
--- a/daemon/oprofiled.c
+++ /dev/null
@@ -1,532 +0,0 @@
-/**
- * @file daemon/oprofiled.c
- * Initialisation and setup
- *
- * @remark Copyright 2002, 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
-#include "config.h"
- 
-#include "oprofiled.h"
-#include "opd_printf.h"
-#include "opd_events.h"
-#include "opd_extended.h"
-
-#include "op_config.h"
-#include "op_version.h"
-#include "op_hw_config.h"
-#include "op_libiberty.h"
-#include "op_file.h"
-#include "op_abi.h"
-#include "op_string.h"
-#include "op_cpu_type.h"
-#include "op_popt.h"
-#include "op_lockfile.h"
-#include "op_list.h"
-#include "op_fileio.h"
-
-#include <sys/types.h>
-#include <sys/resource.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <dirent.h>
-#include <limits.h>
-
-sig_atomic_t signal_alarm;
-sig_atomic_t signal_hup;
-sig_atomic_t signal_term;
-sig_atomic_t signal_child;
-sig_atomic_t signal_usr1;
-sig_atomic_t signal_usr2;
-
-uint op_nr_counters;
-op_cpu cpu_type;
-int no_event_ok;
-int vsfile;
-int vsamples;
-int varcs;
-int vmodule;
-int vmisc;
-int vext;
-int separate_lib;
-int separate_kernel;
-int separate_thread;
-int separate_cpu;
-int no_vmlinux;
-char * vmlinux;
-char * kernel_range;
-char * session_dir;
-int no_xen;
-char * xenimage;
-char * xen_range;
-static char * verbose;
-static char * binary_name_filter;
-static char * events;
-static char * ext_feature;
-static int showvers;
-static struct oprofiled_ops * opd_ops;
-extern struct oprofiled_ops opd_24_ops;
-extern struct oprofiled_ops opd_26_ops;
-
-#define OPD_IMAGE_FILTER_HASH_SIZE 32
-static struct list_head images_filter[OPD_IMAGE_FILTER_HASH_SIZE];
-
-static struct poptOption options[] = {
-	{ "session-dir", 0, POPT_ARG_STRING, &session_dir, 0, "place sample database in dir instead of default location", "/var/lib/oprofile", },
-	{ "kernel-range", 'r', POPT_ARG_STRING, &kernel_range, 0, "Kernel VMA range", "start-end", },
-	{ "vmlinux", 'k', POPT_ARG_STRING, &vmlinux, 0, "vmlinux kernel image", "file", },
-	{ "no-vmlinux", 0, POPT_ARG_NONE, &no_vmlinux, 0, "vmlinux kernel image file not available", NULL, },
-	{ "xen-range", 0, POPT_ARG_STRING, &xen_range, 0, "Xen VMA range", "start-end", },
-	{ "xen-image", 0, POPT_ARG_STRING, &xenimage, 0, "Xen image", "file", },
-	{ "image", 0, POPT_ARG_STRING, &binary_name_filter, 0, "image name filter", "profile these comma separated image" },
-	{ "separate-lib", 0, POPT_ARG_INT, &separate_lib, 0, "separate library samples for each distinct application", "[0|1]", },
-	{ "separate-kernel", 0, POPT_ARG_INT, &separate_kernel, 0, "separate kernel samples for each distinct application", "[0|1]", },
-	{ "separate-thread", 0, POPT_ARG_INT, &separate_thread, 0, "thread-profiling mode", "[0|1]" },
-	{ "separate-cpu", 0, POPT_ARG_INT, &separate_cpu, 0, "separate samples for each CPU", "[0|1]" },
-	{ "events", 'e', POPT_ARG_STRING, &events, 0, "events list", "[events]" },
-	{ "version", 'v', POPT_ARG_NONE, &showvers, 0, "show version", NULL, },
-	{ "verbose", 'V', POPT_ARG_STRING, &verbose, 0, "be verbose in log file", "all,sfile,arcs,samples,module,misc", },
-	{ "ext-feature", 'x', POPT_ARG_STRING, &ext_feature, 1, "enable extended feature", "<extended-feature-name>:[args]", },
-	POPT_AUTOHELP
-	{ NULL, 0, 0, NULL, 0, NULL, NULL, },
-};
- 
-
-void opd_open_logfile(void)
-{
-	if (open(op_log_file, O_WRONLY|O_CREAT|O_NOCTTY|O_APPEND, 0644) == -1) {
-		perror("oprofiled: couldn't re-open stdout: ");
-		exit(EXIT_FAILURE);
-	}
-
-	if (dup2(1, 2) == -1) {
-		perror("oprofiled: couldn't dup stdout to stderr: ");
-		exit(EXIT_FAILURE);
-	}
-}
- 
-
-/**
- * opd_fork - fork and return as child
- *
- * fork() and exit the parent with _exit().
- * Failure is fatal.
- */
-static void opd_fork(void)
-{
-	switch (fork()) {
-		case -1:
-			perror("oprofiled: fork() failed: ");
-			exit(EXIT_FAILURE);
-			break;
-		case 0:
-			break;
-		default:
-			/* parent */
-			_exit(EXIT_SUCCESS);
-			break;
-	}
-}
-
- 
-static void opd_go_daemon(void)
-{
-	opd_fork();
-
-	if (chdir(op_session_dir)) {
-		fprintf(stderr, "oprofiled: opd_go_daemon: couldn't chdir to %s: %s",
-			op_session_dir, strerror(errno));
-		exit(EXIT_FAILURE);
-	}
-
-	if (setsid() < 0) {
-		perror("oprofiled: opd_go_daemon: couldn't setsid: ");
-		exit(EXIT_FAILURE);
-	}
-
-	opd_fork();
-}
-
-
-static void opd_write_abi(void)
-{
-	char * cbuf;
- 
-	cbuf = xmalloc(strlen(op_session_dir) + 5);
-	strcpy(cbuf, op_session_dir);
-	strcat(cbuf, "/abi");
-	op_write_abi_to_file(cbuf);
-	free(cbuf);
-}
-
-
-/**
- * opd_alarm - sync files and report stats
- */
-static void opd_alarm(int val __attribute__((unused)))
-{
-	signal_alarm = 1;
-}
- 
-
-/* re-open logfile for logrotate */
-static void opd_sighup(int val __attribute__((unused)))
-{
-	signal_hup = 1;
-}
-
-
-static void opd_sigterm(int val __attribute__((unused)))
-{
-	signal_term = 1;
-}
-
-static void opd_sigchild(int val __attribute__((unused)))
-{
-	signal_child = 1;
-}
- 
-
-static void opd_sigusr1(int val __attribute__((unused)))
-{
-	signal_usr1 = 1;
-}
-
- 
-static void opd_sigusr2(int val __attribute__((unused)))
-{
-	signal_usr2 = 1;
-}
-
-
-static void opd_setup_signals(void)
-{
-	struct sigaction act;
- 
-	act.sa_handler = opd_alarm;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-
-	if (sigaction(SIGALRM, &act, NULL)) {
-		perror("oprofiled: install of SIGALRM handler failed: ");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = opd_sighup;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-	sigaddset(&act.sa_mask, SIGALRM);
-
-	if (sigaction(SIGHUP, &act, NULL)) {
-		perror("oprofiled: install of SIGHUP handler failed: ");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = opd_sigterm;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-	sigaddset(&act.sa_mask, SIGTERM);
-
-	if (sigaction(SIGTERM, &act, NULL)) {
-		perror("oprofiled: install of SIGTERM handler failed: ");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = opd_sigchild;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-	sigaddset(&act.sa_mask, SIGCHLD);
-
-	if (sigaction(SIGCHLD, &act, NULL)) {
-		perror("oprofiled: install of SIGCHLD handler failed: ");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = opd_sigusr1;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-	sigaddset(&act.sa_mask, SIGTERM);
-
-	if (sigaction(SIGUSR1, &act, NULL)) {
-		perror("oprofiled: install of SIGUSR1 handler failed: ");
-		exit(EXIT_FAILURE);
-	}
-
-	act.sa_handler = opd_sigusr2;
-	act.sa_flags = 0;
-	sigemptyset(&act.sa_mask);
-	sigaddset(&act.sa_mask, SIGTERM);
-
-	if (sigaction(SIGUSR2, &act, NULL)) {
-		perror("oprofiled: install of SIGUSR2 handler failed: ");
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-struct opd_hashed_name {
-	char * name;
-	struct list_head next;
-};
-
-
-static void add_image_filter(char const * name)
-{
-	size_t hash;
-	struct opd_hashed_name * elt = xmalloc(sizeof(struct opd_hashed_name));
-	elt->name = xmalloc(PATH_MAX);
-	if (!realpath(name, elt->name)) {
-		free(elt->name);
-		free(elt);
-		return;
-	}
-	hash = op_hash_string(elt->name);
-	verbprintf(vmisc, "Adding to image filter: \"%s\"\n", elt->name);
-	list_add(&elt->next, &images_filter[hash % OPD_IMAGE_FILTER_HASH_SIZE]);
-}
-
-
-static void opd_parse_image_filter(void)
-{
-	size_t i;
-	char const * last = binary_name_filter;
-	char const * cur = binary_name_filter;
-
-	if (!binary_name_filter)
-		return;
-
-	for (i = 0; i < OPD_IMAGE_FILTER_HASH_SIZE; ++i)
-		list_init(&images_filter[i]);
-
-	while ((cur = strchr(last, ',')) != NULL) {
-		char * tmp = op_xstrndup(last, cur - last);
-		add_image_filter(tmp);
-		free(tmp);
-		last = cur + 1;
-	}
-	add_image_filter(last);
-}
-
-
-int is_image_ignored(char const * name)
-{
-	size_t hash;
-	struct list_head * pos;
-
-	if (!binary_name_filter)
-		return 0;
-	
-	hash = op_hash_string(name);
-
-	list_for_each(pos, &images_filter[hash % OPD_IMAGE_FILTER_HASH_SIZE]) {
-		struct opd_hashed_name * hashed_name =
-			list_entry(pos, struct opd_hashed_name, next);
-		if (!strcmp(hashed_name->name, name))
-			return 0;
-	}
-
-	return 1;
-}
-
-
-/** return the int in the given oprofilefs file */
-int opd_read_fs_int(char const * path, char const * name, int fatal)
-{
-	char filename[PATH_MAX + 1];
-	snprintf(filename, PATH_MAX, "%s/%s", path, name);
-	return op_read_int_from_file(filename, fatal);
-}
-
-
-static void opd_handle_verbose_option(char const * name)
-{
-	if (!strcmp(name, "all")) {
-		vsfile = 1;
-		vsamples = 1;
-		varcs = 1;
-		vmodule = 1;
-		vmisc = 1;
-		vext= 1;
-	} else if (!strcmp(name, "sfile")) {
-		vsfile = 1;
-	} else if (!strcmp(name, "arcs")) {
-		varcs = 1;
-	} else if (!strcmp(name, "samples")) {
-		vsamples = 1;
-	} else if (!strcmp(name, "module")) {
-		vmodule = 1;
-	} else if (!strcmp(name, "misc")) {
-		vmisc = 1;
-	} else if (!strcmp(name, "ext")) {
-		vext= 1;
-	} else {
-		fprintf(stderr, "unknown verbose options\n");
-		exit(EXIT_FAILURE);
-	}
-}
-
-static void opd_parse_verbose(void)
-{
-	char const * last = verbose;
-	char const * cur = verbose;
-
-	if (!verbose)
-		return;
-
-	while ((cur = strchr(last, ',')) != NULL) {
-		char * tmp = op_xstrndup(last, cur - last);
-		opd_handle_verbose_option(tmp);
-		free(tmp);
-		last = cur + 1;
-	}
-	opd_handle_verbose_option(last);
-}
-
-
-static void opd_options(int argc, char const * argv[])
-{
-	poptContext optcon;
-	char * tmp;
-
-	optcon = op_poptGetContext(NULL, argc, argv, options, 0);
-
-	if (showvers)
-		show_version(argv[0]);
-
-	opd_parse_verbose();
-
-	if (separate_kernel)
-		separate_lib = 1;
-
-	cpu_type = op_get_cpu_type();
-	op_nr_counters = op_get_nr_counters(cpu_type);
-
-	if (!no_vmlinux) {
-		if (!vmlinux || !strcmp("", vmlinux)) {
-			fprintf(stderr, "oprofiled: no vmlinux specified.\n");
-			poptPrintHelp(optcon, stderr, 0);
-			exit(EXIT_FAILURE);
-		}
-
-		/* canonicalise vmlinux filename. fix #637805 */
-		tmp = xmalloc(PATH_MAX);
-		if (realpath(vmlinux, tmp))
-			vmlinux = tmp;
-		else
-			free(tmp);
-
-		if (!kernel_range || !strcmp("", kernel_range)) {
-			fprintf(stderr, "oprofiled: no kernel VMA range specified.\n");
-			poptPrintHelp(optcon, stderr, 0);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	if(opd_ext_initialize(ext_feature) != EXIT_SUCCESS)
-		exit(EXIT_FAILURE);
-
-	if (events == NULL && no_event_ok == 0) {
-		fprintf(stderr, "oprofiled: no events specified.\n");
-		poptPrintHelp(optcon, stderr, 0);
-		exit(EXIT_FAILURE);
-	}
-
-	if (!xenimage || !strcmp("", xenimage)) {
-		no_xen = 1;
-	} else {
-		no_xen = 0;
-
-		/* canonicalise xen image filename. */
-		tmp = xmalloc(PATH_MAX);
-		if (realpath(xenimage, tmp))
-			xenimage = tmp;
-		else
-			free(tmp);
-
-		if (!xen_range || !strcmp("", xen_range)) {
-			fprintf(stderr, "oprofiled: no Xen VMA range specified.\n");
-			poptPrintHelp(optcon, stderr, 0);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	if (events != NULL)
-		opd_parse_events(events);
-
-	opd_parse_image_filter();
-
-	poptFreeContext(optcon);
-}
-
-
-/* determine what kernel we're running and which daemon
- * to use
- */
-static struct oprofiled_ops * get_ops(void)
-{
-	switch (op_get_interface()) {
-		case OP_INTERFACE_24:
-			printf("Using 2.4 OProfile kernel interface.\n");
-			return &opd_24_ops;
-		case OP_INTERFACE_26:
-			printf("Using 2.6+ OProfile kernel interface.\n");
-			return &opd_26_ops;
-		default:
-			break;
-	}
-
-	fprintf(stderr, "Couldn't determine kernel version.\n");
-	exit(EXIT_FAILURE);
-	return NULL;
-}
-
-
-int main(int argc, char const * argv[])
-{
-	int err;
-	struct rlimit rlim = { 2048, 2048 };
-
-	opd_options(argc, argv);
-	init_op_config_dirs(session_dir);
-
-	opd_setup_signals();
-
-	err = setrlimit(RLIMIT_NOFILE, &rlim);
-	if (err)
-		perror("warning: could not set RLIMIT_NOFILE to 2048: ");
-
-	opd_write_abi();
-
-	opd_ops = get_ops();
-
-	opd_ops->init();
-
-	opd_go_daemon();
-
-	/* clean up every 10 minutes */
-	alarm(60 * 10);
-
-	if (op_write_lock_file(op_lock_file)) {
-		fprintf(stderr, "oprofiled: could not create lock file %s\n",
-			op_lock_file);
-		exit(EXIT_FAILURE);
-	}
-
-	opd_ops->start();
-
-	opd_ops->exit();
-
-	return 0;
-}
diff --git a/daemon/oprofiled.h b/daemon/oprofiled.h
deleted file mode 100644
index b319df1..0000000
--- a/daemon/oprofiled.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * @file daemon/oprofiled.h
- * Initialisation and setup
- *
- * @remark Copyright 2002, 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * Modified by Aravind Menon for Xen
- * These modifications are:
- * Copyright (C) 2005 Hewlett-Packard Co.
- */
-
-#ifndef OPROFILED_H
-
-#include <signal.h>
-
-struct oprofiled_ops {
-	void (*init)(void);
-	void (*start)(void);
-	void (*exit)(void);
-};
-
-
-/**
- * opd_open_logfile - open the log file
- *
- * Open the logfile on stdout and stderr. This function
- * assumes that 1 and 2 are the lowest close()d file
- * descriptors. Failure to open on either descriptor is
- * a fatal error.
- */
-void opd_open_logfile(void);
-
- 
-/**
- * is_image_ignored - check if we must ignore this image
- * @param name the name to check
- *
- * Return true if the image should not be profiled
- */
-int is_image_ignored(char const * name);
-
-/** return the int in the given oprofilefs file, error is fatal if !is_fatal */
-int opd_read_fs_int(char const * path, char const * name, int is_fatal);
-
-
-/** global variable positioned by signal handler */
-extern sig_atomic_t signal_alarm;
-extern sig_atomic_t signal_hup;
-extern sig_atomic_t signal_term;
-extern sig_atomic_t signal_child;
-extern sig_atomic_t signal_usr1;
-extern sig_atomic_t signal_usr2;
-
-extern unsigned int op_nr_counters;
-extern int separate_lib;
-extern int separate_kernel;
-extern int separate_thread;
-extern int separate_cpu;
-extern int no_vmlinux;
-extern char * vmlinux;
-extern char * kernel_range;
-extern int no_xen;
-extern char * xenimage;
-extern char * xen_range;
-
-#endif /* OPROFILED_H */
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 68d472b..43a7781 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -10,20 +10,21 @@ STYLESHEETS=$(CHUNK_XHTML_STYLESHEET) $(srcdir)/xsl/xhtml-common.xsl
 
 man_MANS = \
 	oprofile.1 \
-	opcontrol.1 \
 	opreport.1 \
 	opannotate.1 \
 	opgprof.1 \
 	ophelp.1 \
+	op-check-perfevents.1 \
 	oparchive.1 \
 	opimport.1
 
 if BUILD_FOR_PERF_EVENT
-man_MANS += operf.1
+man_MANS += operf.1 \
+			ocount.1
 endif
 
 htmldir = $(prefix)/share/doc/oprofile
-dist_html_DATA = oprofile.html internals.html opreport.xsd op-jit-devel.html
+dist_html_DATA = oprofile.html internals.html opreport.xsd ophelp.xsd op-jit-devel.html
 
 if have_xsltproc
 
diff --git a/doc/Makefile.in b/doc/Makefile.in
index c122fc0..b3d4b5f 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -34,18 +34,20 @@ PRE_UNINSTALL = :
 POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
-@BUILD_FOR_PERF_EVENT_TRUE@am__append_1 = operf.1
+@BUILD_FOR_PERF_EVENT_TRUE@am__append_1 = operf.1 \
+@BUILD_FOR_PERF_EVENT_TRUE@			ocount.1
+
 subdir = doc
 DIST_COMMON = $(dist_html_DATA) $(srcdir)/Makefile.am \
-	$(srcdir)/Makefile.in $(srcdir)/opannotate.1.in \
-	$(srcdir)/oparchive.1.in $(srcdir)/opcontrol.1.in \
-	$(srcdir)/operf.1.in $(srcdir)/opgprof.1.in \
-	$(srcdir)/ophelp.1.in $(srcdir)/opimport.1.in \
-	$(srcdir)/opreport.1.in $(srcdir)/oprofile.1.in
+	$(srcdir)/Makefile.in $(srcdir)/ocount.1.in \
+	$(srcdir)/op-check-perfevents.1.in $(srcdir)/opannotate.1.in \
+	$(srcdir)/oparchive.1.in $(srcdir)/operf.1.in \
+	$(srcdir)/opgprof.1.in $(srcdir)/ophelp.1.in \
+	$(srcdir)/opimport.1.in $(srcdir)/opreport.1.in \
+	$(srcdir)/oprofile.1.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -54,15 +56,16 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
 mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES = oprofile.1 opcontrol.1 ophelp.1 opreport.1 \
-	opannotate.1 opgprof.1 oparchive.1 opimport.1 operf.1
+CONFIG_CLEAN_FILES = oprofile.1 ophelp.1 op-check-perfevents.1 \
+	opreport.1 opannotate.1 opgprof.1 oparchive.1 opimport.1 \
+	operf.1 ocount.1
 CONFIG_CLEAN_VPATH_FILES =
 SOURCES =
 DIST_SOURCES =
@@ -144,7 +147,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -168,20 +170,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -251,9 +246,9 @@ XHTML_STYLESHEET = $(srcdir)/xsl/xhtml.xsl
 CHUNK_XHTML_STYLESHEET = $(srcdir)/xsl/xhtml-chunk.xsl
 XML_CATALOG_FILES = xsl/catalog.xml
 STYLESHEETS = $(CHUNK_XHTML_STYLESHEET) $(srcdir)/xsl/xhtml-common.xsl
-man_MANS = oprofile.1 opcontrol.1 opreport.1 opannotate.1 opgprof.1 \
-	ophelp.1 oparchive.1 opimport.1 $(am__append_1)
-dist_html_DATA = oprofile.html internals.html opreport.xsd op-jit-devel.html
+man_MANS = oprofile.1 opreport.1 opannotate.1 opgprof.1 ophelp.1 \
+	op-check-perfevents.1 oparchive.1 opimport.1 $(am__append_1)
+dist_html_DATA = oprofile.html internals.html opreport.xsd ophelp.xsd op-jit-devel.html
 EXTRA_DIST = \
 	oprofile.1 \
 	oprofile.1.in \
@@ -303,10 +298,10 @@ $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 $(am__aclocal_m4_deps):
 oprofile.1: $(top_builddir)/config.status $(srcdir)/oprofile.1.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
-opcontrol.1: $(top_builddir)/config.status $(srcdir)/opcontrol.1.in
-	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 ophelp.1: $(top_builddir)/config.status $(srcdir)/ophelp.1.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+op-check-perfevents.1: $(top_builddir)/config.status $(srcdir)/op-check-perfevents.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 opreport.1: $(top_builddir)/config.status $(srcdir)/opreport.1.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 opannotate.1: $(top_builddir)/config.status $(srcdir)/opannotate.1.in
@@ -319,6 +314,8 @@ opimport.1: $(top_builddir)/config.status $(srcdir)/opimport.1.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 operf.1: $(top_builddir)/config.status $(srcdir)/operf.1.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+ocount.1: $(top_builddir)/config.status $(srcdir)/ocount.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 
 mostlyclean-libtool:
 	-rm -f *.lo
diff --git a/doc/internals.html b/doc/internals.html
index 39b1f2a..f8e6c06 100644
--- a/doc/internals.html
+++ b/doc/internals.html
@@ -112,7 +112,7 @@
                   </dt>
                   <dt>
                     <span class="sect2">
-                      <a href="#id569589">2.2. IA64 and perfmon</a>
+                      <a href="#idp4832768">2.2. IA64 and perfmon</a>
                     </span>
                   </dt>
                 </dl>
@@ -327,7 +327,7 @@
           <b>List of Figures</b>
         </p>
         <dl>
-          <dt>3.1. <a href="#id569716">The OProfile buffers</a></dt>
+          <dt>3.1. <a href="#idp4848096">The OProfile buffers</a></dt>
         </dl>
       </div>
       <div class="chapter" title="Chapter 1. Introduction">
@@ -385,7 +385,7 @@
           </dl>
         </div>
         <p>
-This document is current for OProfile version 0.9.8.
+This document is current for OProfile version 1.0.0.
 This document provides some details on the internal workings of OProfile for the
 interested hacker. This document assumes strong C, working C++, plus some knowledge of
 kernel internals and CPU hardware.
@@ -606,7 +606,7 @@ information.
                 </dt>
                 <dt>
                   <span class="sect2">
-                    <a href="#id569589">2.2. IA64 and perfmon</a>
+                    <a href="#idp4832768">2.2. IA64 and perfmon</a>
                   </span>
                 </dt>
               </dl>
@@ -750,7 +750,7 @@ or enable on a per-counter basis, unlike the PPro models).
             <div class="titlepage">
               <div>
                 <div>
-                  <h3 class="title"><a id="id569589"></a>2.2. IA64 and perfmon</h3>
+                  <h3 class="title"><a id="idp4832768"></a>2.2. IA64 and perfmon</h3>
                 </div>
               </div>
             </div>
@@ -904,7 +904,7 @@ iterator. This provides an entirely lock-free method for extracting data
 from the CPU buffers. This process is described in detail later in this chapter.
 </p>
           <div class="figure">
-            <a id="id569716"></a>
+            <a id="idp4848096"></a>
             <p class="title">
               <b>Figure 3.1. The OProfile buffers</b>
             </p>
diff --git a/doc/ocount.1.in b/doc/ocount.1.in
new file mode 100644
index 0000000..790356b
--- /dev/null
+++ b/doc/ocount.1.in
@@ -0,0 +1,274 @@
+.\" an page for ocount
+.\" Author: Maynard Johnson <maynardj@us.ibm.com>
+.TH ocount 1 "@DATE@" "oprofile @VERSION@"
+.SH NAME
+ocount \- Event counting tool for Linux
+
+.SH SYNOPSIS
+.B ocount
+[
+.I options
+]
+[ --system-wide | --process-list <pids> | --thread-list <tids> | --cpu-list <cpus> | [ command [ args ] ] ]
+
+.SH DESCRIPTION
+.BI ocount
+is an OProfile tool that can be used to count native hardware events occurring
+in either a given application, a set of processes or threads, a subset of active
+system processors, or the entire system. The data collected during
+a counting session is displayed to stdout by default or, optionally,
+to a file.
+.P
+When counting multiple events, the kernel may not be able to count all events
+simultaneously and, thus, may need to multiplex the counting of the events.
+If this happens, the "Percent time enabled" column in the
+.B ocount
+output will be less than 100, but counts are scaled up to a 100% estimated value.
+.br
+
+.SH RUN MODES
+One (and only one) of the following
+.SB run modes
+must be specified.  If you run
+.BI ocount
+using a run mode other than
+.BI "command " [args]
+, press Ctrl-c to stop
+.BI ocount
+when finished counting (e.g., when the monitored process ends).
+If you background
+.BI ocount
+(i.e., with '&') while using one these run modes, you
+.B must
+stop it in a controlled manner so that the data collection process can
+be shut down cleanly and final results can be displayed. Use
+.BI kill
+.BI -SIGINT
+.BI <ocount-PID>
+for this purpose.
+.TP
+.BI "command " [args]
+The
+.I command
+is the application for which to count events.
+.I args
+are the input arguments required by the application.
+The
+.I command
+and its arguments
+.B must
+be positioned at the
+end of the command line, after all ocount options.
+.br
+.TP
+.BI "--process-list / -p " pids
+Use this option to count events for one or more already-running applications, specified
+via a comma-separated list (
+.I pids
+). Event counts will be collected for all children of the passed process(es)
+as well. You must have privileges for the user ID under which the specified process(es)
+are running; e.g., for a non-root user, the user ID of the process(es) is the same as
+that used for running ocount. A lack of privileges will result in the following
+failure message:
+.br
+        perf_event_open failed with Permission denied
+.br
+
+.TP
+.BI "--thread-list / -r " tids
+Use this option to count events for one or more already-running threads, specified
+via a comma-separated list (
+.I tids
+). Event counts will
+.B not
+be collected for any children of the passed thread(s). See the description of
+.I --process-list
+concerning required privileges.
+.br
+
+.TP
+.BI "--system-wide / -s"
+This option is for counting events for all processes running on your system.  You must
+have root authority to run ocount in this mode.
+.br
+
+.TP
+.BI "--cpu-list / -C " cpus
+This option is for counting events on a subset of processors on your system. You must
+have root authority to run ocount in this mode. This is a comma-separated list, where each
+element in the list may be either a single processor number or a range of processor numbers;
+for example: '-C 2,3,4-11,15'.
+.br
+
+.SH OTHER OPTIONS
+.TP
+.BI "--events / -e " event1[,event2[,...]]
+This option is for passing a comma-separated list of event specifications
+for counting. Each event spec is of the form:
+.br
+.I "   name[:unitmask[:kernel[:user]]]"
+.br
+.B Note:
+Do
+.B not
+include a
+.I count
+value in the event spec, as that parameter is only needed when profiling.
+.P
+.RS
+You can specify
+.I unitmask
+values using either a numerical value (hex values
+.I must
+begin with "0x") or a symbolic name (if the
+.I name=<um_name>
+field is shown in the
+.B ophelp
+output). For some named unit masks, the hex value is not unique; thus, OProfile
+tools enforce specifying such unit masks value by name.
+If no unit mask is specified, the default unit mask value for the event is used.
+.P
+The
+.I kernel
+and
+.I user
+parts of the event specification are binary values ('1' or '0') indicating
+whether or not to count events in kernel space and user space.
+.br
+.B Note:
+In order to specify the
+.I kernel/user
+bits, you must also specify a
+.I unitmask
+value, even if the running processor type does not use unit masks \(em
+in which case, use the value '0' to signify a null unit mask; for example:
+.br
+   -e INST_RETIRED_ANY_P:0:1:0
+.br
+                         ^ ^ ^
+                         | | |--- '0': do not count user space events
+                         | |-- '1': count kernel space events
+                         |-- '0': the null unit mask
+.P
+Event names for certain processor types include a
+.I "_GRP<n>"
+suffix.  For such cases, the
+.I --events
+option may be specified with or without the
+.I "_GRP<n>"
+suffix.
+.P
+When no event specification is given, the default event for the running
+processor type will be used for counting.
+Use
+.BI ophelp
+to list the available events for your processor type.
+.RE
+.br
+
+.TP
+.BI "--separate-thread / -t"
+This option can be used in conjunction with either the
+.I --process-list
+or
+.I --thread-list
+option to display event counts on a per-thread (per-process) basis.  Without this option, all counts
+are aggregated.
+.P
+.RS
+.BI NOTE:
+If new threads are started by the process(es) being monitored after counting begins,
+the counts for those threads are aggregated with their parent's counts.
+.RE
+
+.br
+.TP
+.BI "--separate-cpu / -c"
+This option can be used in conjunction with either the
+.I --system-wide
+or
+.I --cpu-list
+option
+to display event counts on a per-cpu basis.  Without this option, all counts are aggregated.
+.br
+
+.TP
+.BI "--time-interval / -i " interval_length[:num_intervals]
+
+.B Note:
+The
+.I "interval_length"
+is given in milliseconds. However, the current implementation only supports
+100 ms granularity, so the given
+.I "interval_length"
+will be rounded to the nearest 100 ms.
+Results collected for each time interval are printed immediately
+instead of the default of one dump of cumulative event counts at the end of the run.
+Counters are reset to zero at the start of each interval.
+.P
+.RS
+If
+.I num_intervals
+is specified,
+.BI ocount
+exits after the specified number of intervals occur.
+.RE
+
+.TP
+.BI "--brief-format / -b"
+Use this option to print results in the following brief format:
+.br
+    [cpu or thread,]<event_name>[:umask[:K:U]],<count>,<percent_time_enabled>
+.br
+    [    <u32>    ,]<  string  >[< u32>[<bb>]],< u64 >,<       double       >
+
+The umask,
+.BR K ernel
+and
+.BR U ser
+modes are only printed if the values were specified as part of the event.
+The 'K' and 'U' fields are binary fields separated by colons, where the value for each binary
+field may be either '0' or '1'.
+.P
+.RS
+If
+.I --timer-interval
+is specified, a separate line formatted as
+.br
+    timestamp,<num_seconds_since_epoch>[.n]
+.br
+is printed ahead of each dump of event counts. If the time interval specified is
+less than one second, the timestamp will have 1/10 second precision.
+.RE
+
+.TP
+.BI "--output-file / -f " outfile_name
+Results are written to
+.I outfile_name
+instead of interactively to the terminal.
+.br
+.TP
+.BI "--verbose / -V"
+Use this option to increase the verbosity of the output.
+.br
+.TP
+.BI "--version / -v"
+Show ocount version.
+.br
+.TP
+.BI "--help / -h"
+Display brief usage message.
+.br
+.TP
+.BI "--usage / -u"
+Display brief usage message.
+.br
+
+.SH EXAMPLE
+$ ocount make
+
+.SH VERSION
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+operf(1).
diff --git a/doc/op-check-perfevents.1.in b/doc/op-check-perfevents.1.in
new file mode 100644
index 0000000..fc98683
--- /dev/null
+++ b/doc/op-check-perfevents.1.in
@@ -0,0 +1,36 @@
+.TH OP-CHECK-PERFEVENTS 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+op-check-perfevents \- checks for kernel perf pmu support
+.SH SYNOPSIS
+.br
+.B op-check-perfevents
+[
+.I options
+]
+.SH DESCRIPTION
+
+The small helper program
+.B op-check-perfevents
+determines whether the kernel supports the perf interface
+and returns a zero exit status if the perf pmu support is available.
+.SH OPTIONS
+.TP
+.BI "--help / -h"
+Show usage help message.
+.br
+.TP
+.BI "--verbose / -v"
+Print string describing the error number of perf_event_open syscall
+.br
+
+.SH ENVIRONMENT
+No special environment variables are recognised by op-check-perfevents.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/opannotate.1.in b/doc/opannotate.1.in
index ba57a38..98eda51 100644
--- a/doc/opannotate.1.in
+++ b/doc/opannotate.1.in
@@ -40,7 +40,21 @@ used --separate.
 .br
 .TP
 .BI "--exclude-file [files]"
-Exclude all files in the given comma-separated list of glob patterns.
+Exclude all files in the given comma-separated list of glob patterns. This option
+is supported solely with the
+.I --source
+option. It can be used to filter out source files in the output using the
+following types of specifications:
+.RS
+.IP \(bu 2
+filenames (basename -- i.e., no path)
+.IP \(bu 2
+filename glob specifications (all files whose base filename matches the given pattern)
+.IP \(bu 2
+directory segments (all source files located in the specified directory; e.g. "libio")
+.IP \(bu 2
+directory segment glob specifications (e.g., "libi*")
+.RE
 .br
 .TP
 .BI "--exclude-symbols / -e [symbols]"
@@ -62,6 +76,13 @@ A path to a filesystem to search for additional binaries.
 .TP
 .BI "--include-file [files]"
 Only include files in the given comma-separated list of glob patterns.
+The same rules apply for this option as for the
+.I --exclude-file
+option.
+.br
+.TP
+.BI "--merge / -m [lib,cpu,tid,tgid,unitmask,all]"
+Merge any profiles separated in a --separate session.
 .br
 .TP
 .BI "--include-symbols / -i [symbols]"
@@ -104,12 +125,11 @@ looking for them in --search-dirs.
 .BI "--session-dir="dir_path
 Use sample database from the specified directory
 .I dir_path
-instead of the default locations. If
+instead of the default location. If
 .I --session-dir
 is not specified, then
 .B opannotate
-will search for samples in
-.I <current_dir>/oprofile_data
+will search for samples in <current_dir>/oprofile_data
 first. If that directory does not exist, the standard session-dir of /var/lib/oprofile is used.
 .br
 .TP
@@ -119,8 +139,13 @@ for the binaries.
 .br
 .TP
 .BI "--threshold / -t [percentage]"
-Only output data for symbols that have more than the given percentage
-of total samples.
+For annotated assembly, only output data for symbols that have more than the given percentage
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the symbol is shown.
+
+For annotated source, only output data for source files that have more than the given percentage
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the source file is shown.
 .br
 .TP
 .BI "--verbose / -V [options]"
@@ -134,15 +159,9 @@ Show version.
 No special environment variables are recognised by opannotate.
 
 .SH FILES
-.I <current_dir>/oprofile_data/samples
-.RS 7
-Or
-.RE
-.I /var/lib/oprofile/samples/
-.LP
-.RS 7
+.TP
+.I <session_dir>/samples
 The location of the generated sample files.
-.RE
 
 .SH VERSION
 .TP
diff --git a/doc/oparchive.1.in b/doc/oparchive.1.in
index 0dba32d..753d6d5 100644
--- a/doc/oparchive.1.in
+++ b/doc/oparchive.1.in
@@ -36,12 +36,11 @@ Give verbose debugging output.
 .BI "--session-dir="dir_path
 Use sample database from the specified directory
 .I dir_path
-instead of the default locations. If
+instead of the default location. If
 .I --session-dir
 is not specified, then
 .B oparchive
-will search for samples in
-.I <current_dir>/oprofile_data
+will search for samples in <current_dir>/oprofile_data
 first. If that directory does not exist, the standard session-dir of /var/lib/oprofile is used.
 .br
 .TP
@@ -68,18 +67,12 @@ used --separate.
 Only list the files that would be archived, don't copy them.
 
 .SH ENVIRONMENT
-No special environment variables are recognised by oparchive.
+No special environment variables are recognized by oparchive.
 
 .SH FILES
-.I <current_dir>/oprofile_data/samples
-.RS 7
-Or
-.RE
-.I /var/lib/oprofile/samples/
-.LP
-.RS 7
+.TP
+.I <session_dir>/samples
 The location of the generated sample files.
-.RE
 
 .SH VERSION
 .TP
diff --git a/doc/opcontrol.1.in b/doc/opcontrol.1.in
deleted file mode 100644
index 0b595f3..0000000
--- a/doc/opcontrol.1.in
+++ /dev/null
@@ -1,195 +0,0 @@
-.TH OPCONTROL 1 "@DATE@" "oprofile @VERSION@"
-.UC 4
-.SH NAME
-opcontrol \- control OProfile profiling
-.SH SYNOPSIS
-.br
-.B opcontrol
-[
-.I options
-]
-.SH DESCRIPTION
-.B opcontrol
-can be used to start profiling, end a profiling session,
-dump profile data, and set up the profiling parameters.
-
-.SH OPTIONS
-.TP
-.BI "--help"
-Show help message.
-.br
-.TP
-.BI "--version"
-Show version.
-.br
-.TP
-.BI "--list-events"
-Shows the monitorable events.
-.br
-.TP
-.BI "--init"
-Load the OProfile module if required and make the OProfile driver
-interface available.
-.br
-.TP
-.BI "--setup"
-Followed by list options for profiling setup. Store setup 
-in ~root/.oprofile/daemonrc. Optional.
-.br
-.TP
-.BI "--status"
-Show configuration information.
-.br
-.TP
-.BI "--start-daemon"
-Start the oprofile daemon without starting profiling.
-.br
-.TP
-.BI "--start"
-Start data collection with either arguments provided by --setup
-or with information saved in ~root/.oprofile/daemonrc.
-.br
-.TP
-.BI "--dump"
-Force a flush of the collected profiling data to the daemon.
-.br
-.TP
-.BI "--stop"
-Stop data collection.
-.br
-.TP
-.BI "--shutdown"
-Stop data collection and kill the daemon.
-.br
-.TP
-.BI "--reset"
-Clear out data from current session, but leaves saved sessions.
-.br
-.TP
-.BI "--save="sessionname
-Save data from current session to sessionname.
-.br
-.TP
-.BI "--deinit"
-Shut down daemon. Unload the oprofile module and oprofilefs.
-.br
-.TP
-.BI "--session-dir="dir_path
-Use sample database out of directory dir_path instead of the default location (/var/lib/oprofile).
-.br
-.TP
-.BI "--buffer-size="num
-Set kernel buffer to num samples. The buffer watershed needs
-to be tweaked when changing this value.
-Rules:  A non-zero value goes into effect after a '--shutdown/start' sequence.
-A value of zero sets this parameter back to default value, but does not go into
-effect until after '--deinit/init' sequence.
-.br
-.TP
-.BI "--buffer-watershed="num
-Set kernel buffer watershed to num samples. When
-buffer-size - buffer-watershed free entries remain in the kernel buffer, data will be
-flushed to the daemon.  Most useful values are in the range [0.25 - 0.5] * buffer-size.
-Same rules as defined for buffer-size.
-.br
-.TP
-.BI "--cpu-buffer-size="num
-Set kernel per-cpu buffer to num samples. If you profile at high
-rate it can help to increase this if the log file show excessive count of
-sample lost cpu buffer overflow. Same rules as defined for buffer-size.
-.br
-.TP
-.BI "--event="[event|"default"]
-Specify an event to measure for the hardware performance counters,
-or "default" for the default event. The event is of the form
-"CPU_CLK_UNHALTED:30000:0:1:1" where the numeric values are
-count, unit mask, kernel-space counting, user-space counting,
-respectively.  Note that this over-rides all previous events selected;
-if you want two or more counters used simultaneously, you must specify
-them on the same opcontrol invocation. The numerical unit mask
-can also be a string which matches the first word in the unit mask
-description, but only for events with "extra:" parameters shown.
-Unit masks with "extra:" parameters
-.I must
-be specified by first word.
-.br
-.TP
-.BI "--separate="[none,lib,kernel,thread,cpu,all]
-Separate samples based on the given separator. 'lib' separates
-dynamically linked library samples per application. 'kernel' separates
-kernel and kernel module samples per application; 'kernel'
-implies 'library'. 'thread' gives separation for each thread and
-task.  'cpu' separates for each CPU. 'all' implies all of the above
-options and 'none' turns off separation.
-.br
-.TP
-.BI "--callgraph=#depth"
-Enable callgraph sample collection with a maximum depth. Use 0 to disable
-callgraph profiling. This option is available on x86 using a
-2.6+ kernel with callgraph support enabled.  It is also available on PowerPC using a 2.6.17+ kernel.
-.br
-.TP
-.BI "--image="[name,name...|"all"]
-Only profile the given absolute paths to binaries, or "all" to profile
-everything (the default).
-.br
-.TP
-.BI "--vmlinux="file
-vmlinux kernel image.
-.br
-.TP
-.BI "--no-vmlinux"
-Use this when you don't have a kernel vmlinux file, and you don't want to
-profile the kernel.
-.br
-.TP
-.BI "--verbose"
-Be verbose in the daemon log. This has a high overhead.
-.br
-.TP
-.BI "--kernel-range="start,end
-Set kernel range vma address in hexadecimal.
-
-.SH OPTIONS (specific to Xen)
-.TP
-.BI "--xen="file
-Xen image
-.br
-.TP
-.BI "--active-domains="<list>
-List of domain ids participating in a multi-domain profiling session. If 
-more than one domain is specified in <list> they should be separated using 
-commas. This option can only be used in domain 0 which is the only domain 
-that can coordinate a multi-domain profiling session. Including domain 0 in 
-the list of active domains is optional. (e.g. --active-domains=2,5,6 and 
---active-domains=0,2,5,6 are equivalent)
-.br
-.SH OPTIONS (specific to System z)
-.TP
-.BI "--s390hwsampbufsize="num
-Number of 2MB areas used per CPU for storing sample data.  The best
-size for the sample memory depends on the particular system and the
-workload to be measured.  Providing the sampler with too little memory
-results in lost samples. Reserving too much system memory for the
-sampler impacts the overall performance and, hence, also the workload
-to be measured.
-.br
-
-.SH ENVIRONMENT
-No special environment variables are recognised by opcontrol.
-
-.SH FILES
-.TP
-.I /root/.oprofile/daemonrc
-Configuration file for opcontrol
-.TP
-.I /var/lib/oprofile/samples/
-The location of the generated sample files.
-
-.SH VERSION
-.TP
-This man page is current for @PACKAGE@-@VERSION@.
-
-.SH SEE ALSO
-.BR @OP_DOCDIR@,
-.BR oprofile(1)
diff --git a/doc/operf.1.in b/doc/operf.1.in
index b109324..efaceb9 100644
--- a/doc/operf.1.in
+++ b/doc/operf.1.in
@@ -13,43 +13,41 @@ operf \- Performance profiler tool for Linux
 [ --system-wide | --pid <pid> | [ command [ args ] ] ]
 
 .SH DESCRIPTION
-Operf is an OProfile tool that can be used in place of opcontrol for profiling. Operf
-uses the Linux Performance Events Subsystem, and hence, does not require the use of
-the opcontrol daemon -- in fact, operf and opcontrol usage are mutually exclusive.
+Operf is the profiler tool provided with OProfile. Operf
+uses the Linux Performance Events Subsystem and, thus, does not require the
+obsolete oprofile kernel driver.
 .P
 By default, operf uses <current_dir>/oprofile_data as the session-dir and stores profiling data there.
 You can change this by way of the
 .I --session-dir
-option.
-.P
-The usual post-profiling analysis tools such as
+option. The usual post-profiling analysis tools such as
 .BI opreport(1)
 and
 .BI opannotate(1)
-can be used to generate profile reports.  The post-processing analysis tools will search for samples in
-.I <current_dir>/oprofile_data
-first. If that directory does not exist, the post-processing tools use the standard session-dir of /var/lib/oprofile.
+can be used to generate profile reports. Unless a
+.I session-dir
+is specified, the post-processing analysis tools will search for samples in
+<current_dir>/oprofile_data first. If that directory does not exist, the
+post-processing tools use the standard session-dir of /var/lib/oprofile.
 .P
 Statistics, such as total samples received
 and lost samples, are written to the operf.log file that can be found in the
 <session_dir>/samples directory.
 .br
 
-.SH OPTIONS
+.SH RUN MODES
+One (and only one) of the following
+.SB run modes
+must be specified:
 .TP
 .BI command [args]
 The command or application to be profiled.
 .I args
-are the input arguments that the command or application requires.  One (and only one) of either
-.I command
-,
-.I --pid
-or
-.I --system-wide
-is required.
+are the input arguments that the command or application requires.
 .br
 .TP
 .BI "--pid / -p " PID
+.RS
 This option enables operf to profile a running application.
 .I PID
 should be the process ID of the process you wish to profile.  When
@@ -65,7 +63,13 @@ data it has collected.  Use
 .BI -SIGINT
 .BI <operf-PID>
 for this purpose.
-.br
+.P
+.B Limitation:
+When using this option to profile a multi-threaded application that also forks
+new processes, be aware that samples for processes that are forked before profiling
+is started may not be recorded (depending on timing of thread creation and when
+operf is started).
+.RE
 .TP
 .BI "--system-wide / -s"
 This option is for performing a system-wide profile.  You must
@@ -86,30 +90,85 @@ that when running operf with this option, the user's current working
 directory should be /root or a subdirectory of /root to avoid
 storing sample data files in locations accessible by regular users.
 .br
+.SH OTHER OPTIONS
 .TP
-.BI "--vmlinux / k " vmlinux_path
+.BI "--vmlinux / -k " vmlinux_path
+.RS
 A vmlinux file that matches the running kernel that has symbol and/or debuginfo.
 Kernel samples will be attributed to this binary, allowing post-processing tools
 (like opreport) to attribute samples to the appropriate kernel symbols.
+.P
+The kernel symbol information may be obtained from /proc/kallsyms if
+the user does not specify a vmlinux file.  The symbol addresses are given
+in /proc/kallsyms if permitted by the setting of /proc/sys/kernel/kptr_restrict.
+.P
+If the
+.I --vmlinux
+option is not used and kernel symbols cannot be obtained from /proc/kallsyms,
+then all kernel samples are attributed to "no-vmlinux", which is simply
+a bucket to hold the samples and not an actual file.
+.RE
 .TP
 .BI "--events / -e " event1[,event2[,...]]
 This option is for passing a comma-separated list of event specifications
 for profiling. Each event spec is of the form:
 .br
 .I "   name:count[:unitmask[:kernel[:user]]]"
-.br
-When specifying a unit mask value, it may be either a hexadecimal value (which
-.I must
-begin with "0x") or a string (i.e, symbolic name) which matches the first word in
-the unit mask description. Specifying a symbolic name for the unit mask is valid only
-for unit masks having "extra:" parameters, as shown by the output of
-.B ophelp.
-Unit masks with "extra:" parameters
-.I must
-be specified using the symbolic name.  If no unit mask is specified, 0x0 will be
-used as the default.
 .P
 .RS
+The
+.I count
+value is used to control the sampling rate for profiling; it is the number
+of events to occur between samples. The rate is lowered by specifying a higher
+.I count
+value \(em i.e., a higher number of events to occur between samples.
+.P
+You can specify
+.I unitmask
+values using either a numerical value (hex values
+.I must
+begin with "0x") or a symbolic name (if the
+.I name=<um_name>
+field is shown in the
+.B ophelp
+output). For some named unit masks, the hex value is not unique; thus, OProfile
+tools enforce specifying such unit masks value by name.
+If no unit mask is specified, the default unit mask value for the event is used.
+.P
+The
+.I kernel
+and
+.I user
+parts of the event specification are binary values ('1' or '0') indicating
+whether or not to collect samples for kernel space and user space.
+.br
+.B Note:
+In order to specify the
+.I kernel/user
+bits, you must also specify a
+.I unitmask
+value, even if the processor type (or the specified event) does not use unit masks \(em
+in which case, use the value '0' to signify a null unit mask; for example:
+.br
+   -e INST_RETIRED_ANY_P:100000:0:1:0
+.br
+                         ^      ^ ^ ^
+                         |      | | |--- '0': do not record user space samples
+                         |      | |-- '1': record kernel space samples
+                         |      |-- '0': the null unit mask
+                         |--count value
+.P
+Event names for some IBM PowerPC systems include a
+.I _GRP<n>
+(group number) suffix. You can pass either the full event name or the base event name
+(i.e., without the suffix) to
+.B operf.
+If the base event name is passed,
+.B operf
+will automatically choose an appropriate group number suffix
+for the event; thus, OProfile post-processing tools will always show real event
+names that include the group number suffix.
+.Po
 When no event specification is given, the default event for the running
 processor type will be used for profiling.
 Use
@@ -126,7 +185,7 @@ full callchain is recorded, so there is no depth limit.
 .BI "--separate-thread / -t"
 This option categorizes samples by thread group ID (tgid) and thread ID (tid).
 The '--separate-thread' option is useful for seeing per-thread samples in
-multi-threaded applications.  When used in conjuction with the '--system-wide'
+multi-threaded applications.  When used in conjunction with the '--system-wide'
 option, the '--separate-thread' option is also useful for seeing per-process
 (i.e., per-thread group) samples for the case where multiple processes are
 executing the same program during a profiling run.
@@ -144,6 +203,7 @@ directory on the current path.
 .br
 .TP
 .BI "--lazy-conversion / -l"
+.RS
 Use this option to reduce the overhead of
 .BI operf
 during profiling. Normally, profile data received from the kernel is converted
@@ -156,7 +216,16 @@ particularly on busy multi-processor systems. The
 option directs
 .BI operf
 to wait until profiling is completed to do the conversion of profile data.
-.br
+.P
+.B Note:
+This option is
+.B not
+recommended to be used in conjunction with the
+.I --pid
+option for profiling multi-threaded processes. Depending
+on the order of thread creation (or forking of new processes),
+you may not get any samples for the new threads/processes.
+.RE
 .TP
 .BI "--append / -a"
 By default,
@@ -184,7 +253,7 @@ Show operf version.
 .br
 .TP
 .BI "--help / -h"
-Show a help message.
+Display brief usage message.
 .br
 .TP
 .BI "--usage / -u"
@@ -199,6 +268,3 @@ This man page is current for @PACKAGE@-@VERSION@.
 
 .SH SEE ALSO
 opreport(1), opannotate(1).
-
-.SH BUGS
-Some parameters are still under development.
diff --git a/doc/opgprof.1.in b/doc/opgprof.1.in
index 3e61ba9..5679289 100644
--- a/doc/opgprof.1.in
+++ b/doc/opgprof.1.in
@@ -30,7 +30,14 @@ Give verbose debugging output.
 .br
 .TP
 .BI "--session-dir="dir_path
-Use sample database out of directory dir_path instead of the default location (/var/lib/oprofile).
+Use sample database from the specified directory
+.I dir_path
+instead of the default location. If
+.I --session-dir
+is not specified, then
+.B opgprof
+will search for samples in <current_dir>/oprofile_data
+first. If that directory does not exist, the standard session-dir of /var/lib/oprofile is used.
 .br
 .TP
 .BI "--image-path / -p [paths]"
@@ -51,11 +58,11 @@ of total samples.
 Output to the given file instead of the default, gmon.out
 
 .SH ENVIRONMENT
-No special environment variables are recognised by opgprof.
+No special environment variables are recognized by opgprof.
 
 .SH FILES
 .TP
-.I /var/lib/oprofile/samples/
+.I <session_dir>/samples
 The location of the generated sample files.
 
 .SH VERSION
diff --git a/doc/ophelp.1.in b/doc/ophelp.1.in
index 3548d74..3d4a7de 100644
--- a/doc/ophelp.1.in
+++ b/doc/ophelp.1.in
@@ -27,6 +27,28 @@ Show the events for the given numerical CPU type.
 Show the symbolic CPU name.
 .br
 .TP
+.BI "--get-default-event / -d"
+.br
+Show the default event for the specified CPU type.
+.TP
+.BI "--check-events / -e [events]"
+Check the given space-separated event descriptions for validity.
+If the events are valid, show which pmu counter each event would be assigned to.
+.br
+.TP
+.BI "--callgraph [callgraph_depth]"
+Use the callgraph depth to compute the higher minimum sampling intervals
+for the events.
+.br
+.TP
+.BI "--unit-mask / -u [event]"
+Show the default unit mask for the given event.
+.br
+.TP
+.BI "--extra-mask / -E [event]"
+Show the extra unit mask for given event.
+.br
+.TP
 .BI "--xml / -X"
 List events in XML format.
 .br
@@ -45,9 +67,6 @@ No special environment variables are recognised by ophelp.
 .TP
 .I $prefix/share/oprofile/
 Event description files used by OProfile.
-.TP
-.I /var/lib/oprofile/samples/
-The location of the generated sample files.
 
 .SH VERSION
 .TP
diff --git a/doc/ophelp.xsd b/doc/ophelp.xsd
new file mode 100644
index 0000000..1270121
--- /dev/null
+++ b/doc/ophelp.xsd
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xs:schema
+ xmlns:xs="http://www.w3.org/2001/XMLSchema">
+  <xs:element name="help_events">
+    <xs:complexType>
+      <xs:sequence>
+		<xs:element minOccurs="1" maxOccurs="1" ref="header"/>
+		<xs:element minOccurs="1" maxOccurs="unbounded" ref="event"/>
+      </xs:sequence>
+      <!-- integer part of schemaversion should changes for major schema -->
+      <xs:attribute name="schemaversion" use="required">
+        <xs:simpleType>
+          <xs:restriction base = "xs:string">
+            <xs:pattern value = "2\.[0-9]+"/>
+          </xs:restriction>
+        </xs:simpleType>       </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="header">
+    <xs:complexType>
+      <xs:attribute name="title" type="xs:string" use="required"/>
+      <xs:attribute name="doc" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="event">
+    <xs:complexType>
+      <xs:sequence>
+	  <xs:element minOccurs="0" maxOccurs="1" ref="unit_masks"/>
+      </xs:sequence>
+      <xs:attribute name="event_name" type="xs:string" use="required"/>
+      <xs:attribute name="group" type="xs:nonNegativeInteger" use="optional"/>
+      <xs:attribute name="counter_mask" type="xs:nonNegativeInteger" use="required"/>
+      <xs:attribute name="min_count" type="xs:nonNegativeInteger" use="required"/>
+      <xs:attribute name="desc" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="unit_masks">
+    <xs:complexType>
+      <xs:sequence>
+		<xs:element minOccurs="1" maxOccurs="unbounded" ref="unit_mask"/>
+      </xs:sequence>
+      <xs:attribute name="default" type="xs:nonNegativeInteger" use="required"/>
+      <xs:attribute name="category" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="unit_mask">
+    <xs:complexType>
+      <xs:attribute name="name" type="xs:string" use="optional"/>
+      <xs:attribute name="mask" type="xs:nonNegativeInteger" use="required"/>
+      <xs:attribute name="desc" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+</xs:schema>
+
diff --git a/doc/opimport.1.in b/doc/opimport.1.in
index ef8ef5c..5bb59b8 100644
--- a/doc/opimport.1.in
+++ b/doc/opimport.1.in
@@ -43,11 +43,11 @@ Give verbose debugging output.
 Show version.
 
 .SH ENVIRONMENT
-No special environment variables are recognised by opimport
+No special environment variables are recognized by opimport
 
 .SH FILES
 .TP
-.I /var/lib/oprofile/abi
+.I <session_dir>/abi
 The abi file description of the sample database files
 
 .SH VERSION
diff --git a/doc/opreport.1.in b/doc/opreport.1.in
index 1742886..0627aa9 100644
--- a/doc/opreport.1.in
+++ b/doc/opreport.1.in
@@ -77,7 +77,7 @@ Output full paths instead of basenames.
 Merge any profiles separated in a --separate session.
 .br
 .TP
-.BI "--no-header"
+.BI "--no-header / -n"
 Don't output a header detailing profiling parameters.
 .br
 .TP
@@ -92,12 +92,11 @@ Reverse the sort from the default.
 .BI "--session-dir="dir_path
 Use sample database from the specified directory
 .I dir_path
-instead of the default locations. If
+instead of the default location. If
 .I --session-dir
 is not specified, then
 .B opreport
-will search for samples in
-.I <current_dir>/oprofile_data
+will search for samples in <current_dir>/oprofile_data
 first. If that directory does not exist, the standard session-dir of /var/lib/oprofile is used.
 .br
 .TP
@@ -114,10 +113,25 @@ binary image filename.
 .BI "--symbols / -l"
 List per-symbol information instead of a binary image summary.
 .br
+Usually, the total of all per-symbols samples for a given binary image
+equals the summary count for the binary image (shown by running
+.B opreport
+with no options).
+However, it's possible for some sample addresses to fall outside the range of
+any symbols for a given binary image.  In such cases, the total number of
+per-symbols samples for the binary image may be less than the summary count
+for the image. Running
+.B opreport
+with the
+.I --verbose=debug
+option will display an informational message when this difference is detected.
+This difference is typically very small and can be ignored.
+.br
 .TP
 .BI "--threshold / -t [percentage]"
 Only output data for symbols that have more than the given percentage
-of total samples.
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the symbol is shown.
 .br
 .TP
 .BI "--verbose / -V [options]"
@@ -135,15 +149,9 @@ Generate XML output.
 No special environment variables are recognized by opreport.
 
 .SH FILES
-.I <current_dir>/oprofile_data/samples
-.RS 7
-Or
-.RE
-.I /var/lib/oprofile/samples/
-.LP
-.RS 7
+.TP
+.I <session_dir>/samples
 The location of the generated sample files.
-.RE
 
 .SH VERSION
 .TP
diff --git a/doc/opreport.xsd b/doc/opreport.xsd
index 682a0bf..28e3128 100644
--- a/doc/opreport.xsd
+++ b/doc/opreport.xsd
@@ -110,7 +110,7 @@
   <xs:element name="process">
     <xs:complexType>
       <xs:sequence>
-	<xs:element minOccurs="1" maxOccurs="1" ref="count"/>
+        <xs:element minOccurs="1" maxOccurs="unbounded" ref="count"/>
         <xs:element minOccurs="1" maxOccurs="unbounded" ref="thread"/>
       </xs:sequence>
       <xs:attribute name="pid" type="xs:integer" use="required"/>
@@ -121,7 +121,7 @@
   <xs:element name="thread">
     <xs:complexType>
       <xs:sequence>
-		<xs:element minOccurs="1" maxOccurs="1" ref="count"/>
+        <xs:element minOccurs="1" maxOccurs="unbounded" ref="count"/>
         <xs:element minOccurs="1" maxOccurs="unbounded" ref="module"/>
       </xs:sequence>
       <xs:attribute name="tid" type="xs:integer" use="required"/>
@@ -131,10 +131,13 @@
   <xs:element name="binary">
     <xs:complexType>
       <xs:sequence>
-		<xs:element minOccurs="1" maxOccurs="1" ref="count"/>
+        <xs:element minOccurs="0" maxOccurs="unbounded" ref="count"/>
+        <!-- A binary element implicitly includes the binary file
+             as a module.  The count value here is the number of samples
+             just for the binary file module, which may be zero.  -->
         <xs:element minOccurs="0" maxOccurs="unbounded" ref="symbol"/>
-        <!-- When the separate=lib option is used an binary
-		     can contain a list of library Modules.  -->
+        <!-- When using operf or the separate=lib option of opcontrol, a binary
+             can contain a list of library Modules.  -->
         <xs:element minOccurs="0" maxOccurs="unbounded" ref="module"/>
       </xs:sequence>
       <xs:attribute name="name" type="xs:string" use="required"/>
@@ -144,7 +147,7 @@
   <xs:element name="module">
     <xs:complexType>
       <xs:sequence>
-		<xs:element minOccurs="0" maxOccurs="1" ref="count"/>
+        <xs:element minOccurs="0" maxOccurs="unbounded" ref="count"/>
         <xs:element minOccurs="1" maxOccurs="unbounded" ref="symbol"/>
       </xs:sequence>
       <xs:attribute name="name" type="xs:string" use="required"/>
@@ -203,7 +206,7 @@
       <xs:sequence>
         <xs:element minOccurs="0" maxOccurs="1" ref="callers"/>
         <xs:element minOccurs="0" maxOccurs="1" ref="callees"/>
-        <xs:element minOccurs="0" maxOccurs="1" ref="count"/>
+        <xs:element minOccurs="0" maxOccurs="unbounded" ref="count"/>
       </xs:sequence>
 	  <!-- idref is an index into symboltable table-->
       <xs:attribute name="idref" type="xs:nonNegativeInteger" use="required"/>
diff --git a/doc/oprofile.1 b/doc/oprofile.1
index fc4fcd8..efac053 100644
--- a/doc/oprofile.1
+++ b/doc/oprofile.1
@@ -1,14 +1,9 @@
-.TH OPROFILE 1 "Mon 27 August 2012" "oprofile 0.9.8"
+.TH OPROFILE 1 "Fri 12 September 2014" "oprofile 1.0.0"
 .UC 4
 .SH NAME
 oprofile \- a system-wide profiler
 .SH SYNOPSIS
 .br
-.B opcontrol
-[
-.I options
-]
-.br
 .B opreport
 [
 .I options
@@ -45,9 +40,6 @@ For a gentle guide to using OProfile, please read the HTML documentation
 listed in SEE ALSO.
 .br
 .SH OPCONTROL
-.B opcontrol
-is used for starting and stopping the OProfile daemon, and providing set-up
-parameters.
 .SH OPREPORT
 .B opreport
 gives image and symbol-based profile summaries for the whole system or
@@ -143,41 +135,39 @@ tgid: to restrict the results to particular threads within a process.
 This is only useful when using per-process profile separation.
 
 .SH ENVIRONMENT
-No special environment variables are recognised by oprofile.
+No special environment variables are recognized by oprofile.
 
 .SH FILES
 .TP
-.I $HOME/.oprofile/
-Configuration files
+.I /usr/local/share/doc/oprofile/oprofile.html
+OProfile user guide.
+.TP
+.I /usr/local/share/doc/oprofile/opreport.xsd
+Schema file for opreport XML output.
 .TP
-.I /root/.oprofile/daemonrc
-Configuration file for opcontrol
+.I /usr/local/share/doc/oprofile/ophelp.xsd
+Schema file for ophelp XML output.
 .TP
 .I /usr/local/share/oprofile/
 Event description files used by OProfile.
 .TP
-.I /var/lib/oprofile/samples/oprofiled.log
-The user-space daemon logfile.
-.TP
-.I /dev/oprofile
-The device filesystem for communication with the Linux kernel module. 
+.I <session-dir>/samples/operf.log
+The profiler log file.
 .TP
-.I /var/lib/oprofile/samples/
+.I <session-dir>/samples/current
 The location of the generated sample files.
 
 .SH VERSION
 .TP
-This man page is current for oprofile-0.9.8.
+This man page is current for oprofile-1.0.0.
 
 .SH SEE ALSO
 .BR /usr/local/share/doc/oprofile/,
-.BR opcontrol(1),
 .BR opreport(1),
 .BR opannotate(1),
 .BR oparchive(1),
 .BR opgprof(1),
 .BR gprof(1),
-.BR readprofile(1),
 .BR "CPU vendor architecture manuals"
 
 .SH COPYRIGHT
diff --git a/doc/oprofile.1.in b/doc/oprofile.1.in
index 0f2cb83..550954a 100644
--- a/doc/oprofile.1.in
+++ b/doc/oprofile.1.in
@@ -4,11 +4,6 @@
 oprofile \- a system-wide profiler
 .SH SYNOPSIS
 .br
-.B opcontrol
-[
-.I options
-]
-.br
 .B opreport
 [
 .I options
@@ -45,9 +40,6 @@ For a gentle guide to using OProfile, please read the HTML documentation
 listed in SEE ALSO.
 .br
 .SH OPCONTROL
-.B opcontrol
-is used for starting and stopping the OProfile daemon, and providing set-up
-parameters.
 .SH OPREPORT
 .B opreport
 gives image and symbol-based profile summaries for the whole system or
@@ -143,26 +135,26 @@ tgid: to restrict the results to particular threads within a process.
 This is only useful when using per-process profile separation.
 
 .SH ENVIRONMENT
-No special environment variables are recognised by oprofile.
+No special environment variables are recognized by oprofile.
 
 .SH FILES
 .TP
-.I $HOME/.oprofile/
-Configuration files
+.I @prefix@/share/doc/oprofile/oprofile.html
+OProfile user guide.
+.TP
+.I @prefix@/share/doc/oprofile/opreport.xsd
+Schema file for opreport XML output.
 .TP
-.I /root/.oprofile/daemonrc
-Configuration file for opcontrol
+.I @prefix@/share/doc/oprofile/ophelp.xsd
+Schema file for ophelp XML output.
 .TP
 .I @prefix@/share/oprofile/
 Event description files used by OProfile.
 .TP
-.I /var/lib/oprofile/samples/oprofiled.log
-The user-space daemon logfile.
-.TP
-.I /dev/oprofile
-The device filesystem for communication with the Linux kernel module. 
+.I <session-dir>/samples/operf.log
+The profiler log file.
 .TP
-.I /var/lib/oprofile/samples/
+.I <session-dir>/samples/current
 The location of the generated sample files.
 
 .SH VERSION
@@ -171,13 +163,11 @@ This man page is current for @PACKAGE@-@VERSION@.
 
 .SH SEE ALSO
 .BR @OP_DOCDIR@,
-.BR opcontrol(1),
 .BR opreport(1),
 .BR opannotate(1),
 .BR oparchive(1),
 .BR opgprof(1),
 .BR gprof(1),
-.BR readprofile(1),
 .BR "CPU vendor architecture manuals"
 
 .SH COPYRIGHT
diff --git a/doc/oprofile.html b/doc/oprofile.html
index 4e1a04e..3b00224 100644
--- a/doc/oprofile.html
+++ b/doc/oprofile.html
@@ -47,51 +47,56 @@
             <dl>
               <dt>
                 <span class="sect1">
-                  <a href="#legacy_mode">1. OProfile legacy mode</a>
+                  <a href="#legacy_mode">1. OProfile legacy profiling mode</a>
                 </span>
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#perf_events">2. OProfile perf_events mode</a>
+                  <a href="#perf_events">2. OProfile perf_events profiling mode</a>
                 </span>
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#applications">3. Applications of OProfile</a>
+                  <a href="#event_counting">3. OProfile event counting mode</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#applications">4. Applications of OProfile</a>
                 </span>
               </dt>
               <dd>
                 <dl>
                   <dt>
                     <span class="sect2">
-                      <a href="#jitsupport">3.1. Support for dynamically compiled (JIT) code</a>
+                      <a href="#jitsupport">4.1. Support for dynamically compiled (JIT) code</a>
                     </span>
                   </dt>
                   <dt>
                     <span class="sect2">
-                      <a href="#guestsupport">3.2. No support for virtual machine guests</a>
+                      <a href="#guestsupport">4.2. No support for virtual machine guests</a>
                     </span>
                   </dt>
                 </dl>
               </dd>
               <dt>
                 <span class="sect1">
-                  <a href="#requirements">4. System requirements</a>
+                  <a href="#requirements">5. System requirements</a>
                 </span>
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#resources">5. Internet resources</a>
+                  <a href="#resources">6. Internet resources</a>
                 </span>
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#install">6. Installation</a>
+                  <a href="#install">7. Installation</a>
                 </span>
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#uninstall">7. Uninstalling OProfile</a>
+                  <a href="#uninstall">8. Uninstalling OProfile</a>
                 </span>
               </dt>
             </dl>
@@ -110,19 +115,24 @@
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#getting-started-with-legacy">2. Getting started with OProfile using legacy mode</a>
+                  <a href="#getting-started-with-ocount">2. Getting started with OProfile using <span class="command"><strong>ocount</strong></span></a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#eventspec">3. Specifying performance counter events</a>
                 </span>
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#tools-overview">3. Tools summary</a>
+                  <a href="#tools-overview">4. Tools summary</a>
                 </span>
               </dt>
             </dl>
           </dd>
           <dt>
             <span class="chapter">
-              <a href="#controlling">3. Controlling the profiler</a>
+              <a href="#controlling-profiler">3. Controlling the profiler</a>
             </span>
           </dt>
           <dd>
@@ -134,92 +144,38 @@
               </dt>
               <dt>
                 <span class="sect1">
-                  <a href="#controlling-daemon">2. Using <span class="command"><strong>opcontrol</strong></span></a>
-                </span>
-              </dt>
-              <dd>
-                <dl>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#opcontrolexamples">2.1. Examples</a>
-                    </span>
-                  </dt>
-                </dl>
-              </dd>
-              <dt>
-                <span class="sect1">
-                  <a href="#eventspec">3. Specifying performance counter events</a>
-                </span>
-              </dt>
-              <dt>
-                <span class="sect1">
-                  <a href="#setup-jit">4. Setting up the JIT profiling feature</a>
+                  <a href="#setup-jit">2. Setting up the JIT profiling feature</a>
                 </span>
               </dt>
               <dd>
                 <dl>
                   <dt>
                     <span class="sect2">
-                      <a href="#setup-jit-jvm">4.1. JVM instrumentation</a>
+                      <a href="#setup-jit-jvm">2.1. JVM instrumentation</a>
                     </span>
                   </dt>
                 </dl>
               </dd>
               <dt>
                 <span class="sect1">
-                  <a href="#oprofile-gui">5. Using <span class="command"><strong>oprof_start</strong></span></a>
-                </span>
-              </dt>
-              <dt>
-                <span class="sect1">
-                  <a href="#detailed-parameters">6. Configuration details</a>
+                  <a href="#detailed-parameters">3. Configuration details</a>
                 </span>
               </dt>
               <dd>
                 <dl>
                   <dt>
                     <span class="sect2">
-                      <a href="#hardware-counters">6.1. Hardware performance counters</a>
-                    </span>
-                  </dt>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#timer">6.2. OProfile in timer interrupt mode</a>
-                    </span>
-                  </dt>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#p4">6.3. Pentium 4 support</a>
-                    </span>
-                  </dt>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#ia64">6.4. Intel Itanium 2 support</a>
-                    </span>
-                  </dt>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#ppc64">6.5. PowerPC64 support</a>
-                    </span>
-                  </dt>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#cell-be">6.6. Cell Broadband Engine support</a>
-                    </span>
-                  </dt>
-                  <dt>
-                    <span class="sect2">
-                      <a href="#amd-ibs-support">6.7. AMD64 (x86_64) Instruction-Based Sampling (IBS) support</a>
+                      <a href="#hardware-counters">3.1. Hardware performance counters</a>
                     </span>
                   </dt>
                   <dt>
                     <span class="sect2">
-                      <a href="#systemz">6.8. IBM System z hardware sampling support</a>
+                      <a href="#timer">3.2. OProfile timer interrupt mode</a>
                     </span>
                   </dt>
                   <dt>
                     <span class="sect2">
-                      <a href="#misuse">6.9. Dangerous counter settings</a>
+                      <a href="#special-notes">3.3. Architecture-specific configuration notes</a>
                     </span>
                   </dt>
                 </dl>
@@ -228,7 +184,7 @@
           </dd>
           <dt>
             <span class="chapter">
-              <a href="#results">4. Obtaining results</a>
+              <a href="#results">4. Obtaining profiling results</a>
             </span>
           </dt>
           <dd>
@@ -346,7 +302,7 @@
               </dd>
               <dt>
                 <span class="sect1">
-                  <a href="#oparchive">6. Archiving measurements (<span class="command"><strong>oparchive</strong></span>)</a>
+                  <a href="#oparchive">6. Analyzing profile data on another system (<span class="command"><strong>oparchive</strong></span>)</a>
                 </span>
               </dt>
               <dd>
@@ -468,7 +424,21 @@
           </dd>
           <dt>
             <span class="chapter">
-              <a href="#ack">6. Acknowledgments</a>
+              <a href="#controlling-counter">6. Controlling the event counter</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#controlling-ocount">1. Using <span class="command"><strong>ocount</strong></span></a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#ack">7. Acknowledgments</a>
             </span>
           </dt>
         </dl>
@@ -488,105 +458,138 @@
           <dl>
             <dt>
               <span class="sect1">
-                <a href="#legacy_mode">1. OProfile legacy mode</a>
+                <a href="#legacy_mode">1. OProfile legacy profiling mode</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#perf_events">2. OProfile perf_events profiling mode</a>
               </span>
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#perf_events">2. OProfile perf_events mode</a>
+                <a href="#event_counting">3. OProfile event counting mode</a>
               </span>
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#applications">3. Applications of OProfile</a>
+                <a href="#applications">4. Applications of OProfile</a>
               </span>
             </dt>
             <dd>
               <dl>
                 <dt>
                   <span class="sect2">
-                    <a href="#jitsupport">3.1. Support for dynamically compiled (JIT) code</a>
+                    <a href="#jitsupport">4.1. Support for dynamically compiled (JIT) code</a>
                   </span>
                 </dt>
                 <dt>
                   <span class="sect2">
-                    <a href="#guestsupport">3.2. No support for virtual machine guests</a>
+                    <a href="#guestsupport">4.2. No support for virtual machine guests</a>
                   </span>
                 </dt>
               </dl>
             </dd>
             <dt>
               <span class="sect1">
-                <a href="#requirements">4. System requirements</a>
+                <a href="#requirements">5. System requirements</a>
               </span>
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#resources">5. Internet resources</a>
+                <a href="#resources">6. Internet resources</a>
               </span>
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#install">6. Installation</a>
+                <a href="#install">7. Installation</a>
               </span>
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#uninstall">7. Uninstalling OProfile</a>
+                <a href="#uninstall">8. Uninstalling OProfile</a>
               </span>
             </dt>
           </dl>
         </div>
         <p>
-This manual applies to OProfile version 0.9.8.
-OProfile is a profiling system for Linux 2.6 and higher systems on a number of architectures. It is capable of profiling
-all parts of a running system, from the kernel (including modules and interrupt handlers) to shared libraries
-to binaries. OProfile can profile the whole system in the background, collecting information at a low overhead. These
-features make it ideal for profiling entire systems to determine bottle necks in real-world systems.
+This manual applies to OProfile version 1.0.0.
+OProfile is a set of performance monitoring tools for Linux 2.6 and higher systems, available on a number of architectures.
+OProfile provides the following features:
+</p>
+        <div class="itemizedlist">
+          <ul class="itemizedlist" type="disc">
+            <li class="listitem">Profiler</li>
+            <li class="listitem">Post-processing tools for analyzing profile data</li>
+            <li class="listitem">Event counter</li>
+          </ul>
+        </div>
+        <p>
+</p>
+        <p>
+OProfile is capable of monitoring native hardware events occurring in all parts of a running system, from the kernel
+(including modules and interrupt handlers) to shared libraries
+to binaries. OProfile can collect event information for the whole system in the background with very little overhead. These
+features make it ideal for monitoring entire systems to determine bottle necks in real-world systems.
 </p>
         <p>
 Many CPUs provide "performance counters", hardware registers that can count "events"; for example,
-cache misses, or CPU cycles. OProfile provides profiles of code based on the number of these occurring events:
+cache misses, or CPU cycles. OProfile can collect profiles of code based on the number of these occurring events:
 repeatedly, every time a certain (configurable) number of events has occurred, the PC value is recorded.
-This information is aggregated into profiles for each binary image.</p>
-        <p>
-Some hardware setups do not allow OProfile to use performance counters: in these cases, no
-events are available so OProfile operates in timer mode, as described in later chapters. Timer
-mode is only available in "legacy mode" (see <a class="xref" href="#legacy_mode" title="1. OProfile legacy mode">Section 1, &#8220;OProfile legacy mode&#8221;</a>).
-</p>
-        <div class="sect1" title="1. OProfile legacy mode"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="legacy_mode"></a>1. OProfile legacy mode</h2></div></div></div>
-"Legacy" OProfile consists of the <span class="command"><strong>opcontrol</strong></span> shell script, the <span class="command"><strong>oprofiled</strong></span> daemon, and several post-processing tools (e.g.,
-<span class="command"><strong>opreport</strong></span>). The <span class="command"><strong>opcontrol</strong></span> script is used for configuring, starting, and stopping a profiling session. An OProfile
-kernel driver (usually built as a kernel module) is used for collecting samples, which are then recorded into sample files by
-<span class="command"><strong>oprofiled</strong></span>. Using OProfile in "legacy mode" requires root user authority since the profiling is done on a system-wide basis, which may
-(if misused) cause adverse effects to the system.
-<div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>
-Profiling setup parameters that you specify using <span class="command"><strong>opcontrol</strong></span> are cached in <code class="filename">/root/.oprofile/daemonrc</code>.
-Subsequent runs of <code class="code">opcontrol --start</code> will continue to use these cached values until you
-override them with new values.
-</div></div>
-        <div class="sect1" title="2. OProfile perf_events mode"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="perf_events"></a>2. OProfile perf_events mode</h2></div></div></div>
-As of release 0.9.8, OProfile now includes the ability to profile a single process versus the system-wide technique
-of legacy OProfile. With this new technique, the <span class="command"><strong>operf</strong></span> program is used to control profiling instead of the
-<span class="command"><strong>opcontrol</strong></span> script and <span class="command"><strong>oprofiled</strong></span> daemon of leagacy mode. Also, <span class="command"><strong>operf</strong></span> does not require the
-special OProfile kernel driver that legacy mode does; instead, it interfaces with the kernel to collect samples via the Linux Kernel
-Performance Events Subsystem (hereafter referred to as "perf_events"). Using <span class="command"><strong>operf</strong></span> to profile a single
-process can be done as a normal user; however, root authority <span class="emphasis"><em>is</em></span> required to run <span class="command"><strong>operf</strong></span> in system-wide
-profiling mode.
-<div class="note" title="Note 1" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note 1</h3>
-The same OProfile post-processing tools are used whether you collect your profile with <span class="command"><strong>operf</strong></span> or <span class="command"><strong>opcontrol</strong></span>.
-</div><div class="note" title="Note 2" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note 2</h3>
+This information is aggregated into profiles for each binary image.  Alternatively, OProfile's event counting
+tool can collect simple raw event counts.</p>
+        <div class="sect1" title="1. OProfile legacy profiling mode"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="legacy_mode"></a>1. OProfile legacy profiling mode</h2></div></div></div>
+Prior to release 1.0, OProfile included a profiling tool consisting of the <span class="command"><strong>opcontrol</strong></span> shell script, the <span class="command"><strong>oprofiled</strong></span> daemon,
+and the attendant oprofile kernel driver. This "legacy profiler" was deprecated in release 0.9.8 with the introduction of
+the <span class="command"><strong>operf</strong></span> profiling tool (see <a class="xref" href="#perf_events" title="2. OProfile perf_events profiling mode">Section 2, &#8220;OProfile perf_events profiling mode&#8221;</a>). Some older architectures/platforms
+do not support the use of <span class="command"><strong>operf</strong></span>. For those cases, oprofile users should install release 0.9.9, which is the
+last release to include the legacy profiler.
+</div>
+        <div class="sect1" title="2. OProfile perf_events profiling mode">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="perf_events"></a>2. OProfile perf_events profiling mode</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+OProfile has the ability to profile a single process or every currently running process (i.e., system-wide)
+via the <span class="command"><strong>operf</strong></span> program. <span class="command"><strong>operf</strong></span> interfaces with the
+kernel to collect samples via the Linux Kernel Performance Events Subsystem (hereafter
+referred to as "perf_events").  OProfile can co-exist with other tools on your system that
+may also be using the perf_events kernel subsystem.
+</p>
+          <p>
+Using <span class="command"><strong>operf</strong></span> to profile a single
+process can be done as a normal user; however, root authority <span class="emphasis"><em>is</em></span> required to run
+<span class="command"><strong>operf</strong></span> in system-wide profiling mode.
+</p>
+          <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>
 Some older processor models are not supported by the underlying perf_events kernel and, thus, are not supported by <span class="command"><strong>operf</strong></span>.
 If you receive the message
 <table xmlns="" border="0" style="background: #E0E0E0;" width="90%"><tr><td><pre class="screen">  Your kernel's Performance Events Subsystem does not support your processor type</pre></td></tr></table>
-when attempting to use <span class="command"><strong>operf</strong></span>, try profiling with <span class="command"><strong>opcontrol</strong></span>
+when attempting to use <span class="command"><strong>operf</strong></span>, install OProfile 0.9.9 and try profiling with <span class="command"><strong>opcontrol</strong></span>
 to see if your processor type may be supported by OProfile's legacy mode.
-</div></div>
-        <div class="sect1" title="3. Applications of OProfile">
+</div>
+          <p>
+</p>
+        </div>
+        <div class="sect1" title="3. OProfile event counting mode"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="event_counting"></a>3. OProfile event counting mode</h2></div></div></div>
+OProfile provides the <span class="command"><strong>ocount</strong></span> tool for
+collecting raw event counts on a per-application, per-process, per-cpu, or system-wide basis.  Unlike the
+profiling tools, post-processing of the data collected is not necessary -- the data is displayed in the
+output of <span class="command"><strong>ocount</strong></span>.  A common use case for event counting tools is when performance analysts
+want to determine the CPI (cycles per instruction) for an application. High CPI implies possible stalls,
+and many architectures provide events that give detailed information about the different types of stalls.
+The events provided are architecture-specific, so we refer the reader to the hardware manuals available for
+the processor type being used.
+</div>
+        <div class="sect1" title="4. Applications of OProfile">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="applications"></a>3. Applications of OProfile</h2>
+                <h2 class="title" style="clear: both"><a id="applications"></a>4. Applications of OProfile</h2>
               </div>
             </div>
           </div>
@@ -608,7 +611,7 @@ OProfile is useful in a number of situations. You might want to use OProfile whe
                 <p>need to profile an application and its shared libraries</p>
               </li>
               <li class="listitem">
-                <p>need to profile dynamically compiled code of supported virtual machines (see <a class="xref" href="#jitsupport" title="3.1. Support for dynamically compiled (JIT) code">Section 3.1, &#8220;Support for dynamically compiled (JIT) code&#8221;</a>)</p>
+                <p>need to profile dynamically compiled code of supported virtual machines (see <a class="xref" href="#jitsupport" title="4.1. Support for dynamically compiled (JIT) code">Section 4.1, &#8220;Support for dynamically compiled (JIT) code&#8221;</a>)</p>
               </li>
               <li class="listitem">
                 <p>need to capture the performance behaviour of entire system</p>
@@ -649,66 +652,41 @@ OProfile is not a panacea. OProfile might not be a complete solution when you :
               </li>
             </ul>
           </div>
-          <div class="sect2" title="3.1. Support for dynamically compiled (JIT) code">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="jitsupport"></a>3.1. Support for dynamically compiled (JIT) code</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-Older versions of OProfile were not capable of attributing samples to symbols from dynamically
-compiled code, i.e. "just-in-time (JIT) code". Typical JIT compilers load the JIT code into
-anonymous memory regions. OProfile reported the samples from such code, but the attribution
-provided was simply:
-</p>
-            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-              <tr>
-                <td>
-                  <pre class="screen">     anon: &lt;tgid&gt;&lt;address range&gt;</pre>
-                </td>
-              </tr>
-            </table>
-            <p>
-Due to this limitation, it wasn't possible to profile applications executed by virtual machines (VMs)
-like the Java Virtual Machine. OProfile now contains an infrastructure to support JITed code.
+          <div class="sect2" title="4.1. Support for dynamically compiled (JIT) code"><div class="titlepage"><div><div><h3 class="title"><a id="jitsupport"></a>4.1. Support for dynamically compiled (JIT) code</h3></div></div></div><p>
+OProfile provides a framework to support JITed code ("just-in-time (JIT) compiled code").
 A development library is provided to allow developers
-to add support for any VM that produces dynamically compiled code (see the <span class="emphasis"><em>OProfile JIT agent
+to add support for any VM (virtual machine) that produces dynamically compiled code (see the <span class="emphasis"><em>OProfile JIT agent
 developer guide</em></span>).
-In addition, built-in support is included for the following:</p>
-            <div class="itemizedlist">
-              <ul class="itemizedlist" type="disc">
-                <li class="listitem">JVMTI agent library for Java (1.5 and higher)</li>
-                <li class="listitem">JVMPI agent library for Java (1.5 and lower)</li>
-              </ul>
-            </div>
-            <p>
-For information on how to use OProfile's JIT support, see <a class="xref" href="#setup-jit" title="4. Setting up the JIT profiling feature">Section 4, &#8220;Setting up the JIT profiling feature&#8221;</a>.
-</p>
-          </div>
-          <div class="sect2" title="3.2. No support for virtual machine guests">
+In addition, built-in support is included for the following:</p><div class="itemizedlist"><ul class="itemizedlist" type="disc"><li class="listitem">JVMTI agent library for Java (1.5 and higher)</li><li class="listitem">JVMPI agent library for Java (1.5 and lower)</li></ul></div>
+These libraries make it possible for OProfile to attribute profile samples
+to Java methods. Without a VM-specific agent library, OProfile will typically report
+samples from JITed code similar to the following example:
+<table xmlns="" border="0" style="background: #E0E0E0;" width="90%"><tr><td><pre class="screen">     anon: &lt;tgid&gt;&lt;address range&gt;</pre></td></tr></table>
+For information on how to use OProfile's JIT support, see <a class="xref" href="#setup-jit" title="2. Setting up the JIT profiling feature">Section 2, &#8220;Setting up the JIT profiling feature&#8221;</a>.
+</div>
+          <div class="sect2" title="4.2. No support for virtual machine guests">
             <div class="titlepage">
               <div>
                 <div>
-                  <h3 class="title"><a id="guestsupport"></a>3.2. No support for virtual machine guests</h3>
+                  <h3 class="title"><a id="guestsupport"></a>4.2. No support for virtual machine guests</h3>
                 </div>
               </div>
             </div>
             <p>
 OProfile currently does not support event-based profiling (i.e, using hardware events like cache misses,
-branch mispredicts) on virtual machine guests running under systems such as VMware.  The list of
-supported events displayed by ophelp or 'opcontrol --list-events' is based on CPU type and does
+branch mispredicts) on virtual machine guests running under systems such as VMware.
+(Note: KVM guests <span class="emphasis"><em>are</em></span> supported.)  The list of
+supported events displayed by ophelp is based on CPU type and does
 not take into account whether the running system is a guest system or real system.  To use
-OProfile on such guest systems, you can use timer mode (see <a class="xref" href="#timer" title="6.2. OProfile in timer interrupt mode">Section 6.2, &#8220;OProfile in timer interrupt mode&#8221;</a>).
+OProfile on such guest systems, you must use the legacy profiler's timer mode (see <a class="xref" href="#timer" title="3.2. OProfile timer interrupt mode">Section 3.2, &#8220;OProfile timer interrupt mode&#8221;</a>).
 </p>
           </div>
         </div>
-        <div class="sect1" title="4. System requirements">
+        <div class="sect1" title="5. System requirements">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="requirements"></a>4. System requirements</h2>
+                <h2 class="title" style="clear: both"><a id="requirements"></a>5. System requirements</h2>
               </div>
             </div>
           </div>
@@ -719,50 +697,15 @@ OProfile on such guest systems, you can use timer mode (see <a class="xref" href
               </dt>
               <dd>
                 <p>
-			To use OProfile's JIT support, a kernel version 2.6.13 or later is required.
-			In earlier kernel versions, the anonymous memory regions are not reported to OProfile and results
-			in profiling reports without any samples in these regions.
-			</p>
-                <p>
-                       Profiling the Cell Broadband Engine PowerPC Processing Element (PPE) requires a kernel version
-                       of 2.6.18 or more recent.
-                       Profiling the Cell Broadband Engine Synergistic Processing Element (SPE) requires a kernel version
-                       of 2.6.22 or more recent.  Additionally, full support of SPE profiling requires a BFD library
-                       from binutils code dated January 2007 or later.  To ensure the proper BFD support exists, run
-                       the <code class="code">configure</code> utility with <code class="code">--with-target=cell-be</code>.
-
-		       Profiling the Cell Broadband Engine using SPU events requires a kernel version of 2.6.29-rc1
-		       or  more recent.
-
-                       </p>
-                <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>Attempting to profile SPEs with kernel versions older than 2.6.22 may cause the
-                       system to crash.</div>
-                <p>
-                       </p>
-                <p>
-			Instruction-Based Sampling (IBS) profile on AMD family10h processors requires 
-			kernel version 2.6.28-rc2 or later.
-			</p>
-              </dd>
-              <dt>
-                <span class="term">Supported architecture</span>
-              </dt>
-              <dd>
-                <p>
-			For Intel IA32, processors as old as P6 generation or Pentium 4 core are
-			supported.  The AMD Athlon, Opteron, Phenom, and Turion CPUs are also supported.
-			Older IA32 CPU types can be used with the timer mode of OProfile; please
-			see later in this manual for details.  OProfile also supports most processor
-			types of the following architectures:  Alpha, MIPS, ARM, x86-64, sparc64, PowerPC,
-			AVR32, and, in timer mode, PA-RISC and s390.
+			Release 2.6.31 or higher
 		</p>
               </dd>
               <dt>
-                <span class="term">Uniprocessor or SMP</span>
+                <span class="term">Supported architectures</span>
               </dt>
               <dd>
                 <p>
-			SMP machines are fully supported.
+			AMD, ARM, Intel, PowerPC, Tile, MIPS
 		</p>
               </dd>
               <dt>
@@ -780,8 +723,7 @@ OProfile on such guest systems, you can use timer mode (see <a class="xref" href
               </dt>
               <dd>
                 <p>
-			In order to build the perf_events-enabled <span class="command"><strong>operf</strong></span> program, you need to either
-			install the kernel-headers package for your system or use the <code class="code">--with-kernel</code>
+			Either the kernel-headers package must be installed or use the <code class="code">--with-kernel</code>
 			configure option.
 		</p>
               </dd>
@@ -800,15 +742,6 @@ OProfile on such guest systems, you can use timer mode (see <a class="xref" href
 			account cannot be found.
 		</p>
               </dd>
-              <dt>
-                <span class="term">OProfile GUI</span>
-              </dt>
-              <dd>
-                <p>
-			The use of the GUI to start the profiler requires the <code class="filename">Qt</code> library.
-			Either <code class="filename">Qt 3</code> or <code class="filename">Qt 4</code> should work.
-		</p>
-              </dd>
               <dt>
                 <span class="term">
                   <acronym class="acronym">ELF</acronym>
@@ -830,11 +763,11 @@ OProfile on such guest systems, you can use timer mode (see <a class="xref" href
             </dl>
           </div>
         </div>
-        <div class="sect1" title="5. Internet resources">
+        <div class="sect1" title="6. Internet resources">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="resources"></a>5. Internet resources</h2>
+                <h2 class="title" style="clear: both"><a id="resources"></a>6. Internet resources</h2>
               </div>
             </div>
           </div>
@@ -874,7 +807,7 @@ OProfile on such guest systems, you can use timer mode (see <a class="xref" href
               <dd>
                 <p>
 			There is a bug tracker for OProfile at SourceForge,
-			<a class="ulink" href="http://sf.net/tracker/?group_id=16191&amp;atid=116191">http://sf.net/tracker/?group_id=16191&amp;atid=116191</a>.
+			<a class="ulink" href="http://sourceforge.net/p/oprofile/bugs/">http://sourceforge.net/p/oprofile/bugs/</a>.
 		</p>
               </dd>
               <dt>
@@ -889,11 +822,11 @@ OProfile on such guest systems, you can use timer mode (see <a class="xref" href
             </dl>
           </div>
         </div>
-        <div class="sect1" title="6. Installation">
+        <div class="sect1" title="7. Installation">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="install"></a>6. Installation</h2>
+                <h2 class="title" style="clear: both"><a id="install"></a>7. Installation</h2>
               </div>
             </div>
           </div>
@@ -911,7 +844,7 @@ is often all you need, but note these arguments to <span class="command"><strong
               <dd>
                 <p>
 			Use this option if you need to profile Java applications.  Also, see
-			<a class="xref" href="#requirements" title="4. System requirements">Section 4, &#8220;System requirements&#8221;</a>, "Required user account".  This option
+			<a class="xref" href="#requirements" title="5. System requirements">Section 5, &#8220;System requirements&#8221;</a>, "Required user account".  This option
 			is used to specify the location of the Java Development Kit (JDK)
 			source tree you wish to use. This is necessary to get the interface description
 			of the JVMPI (or JVMTI) interface to compile the JIT support code successfully.
@@ -1022,18 +955,13 @@ it's not sufficient to enable the local APIC -- you must also turn it on explici
 time by providing the "lapic" option to the kernel.
 If you use the NMI watchdog, be aware that the watchdog is disabled when profiling starts
 and not re-enabled until the profiling is stopped.
-</p>
-          <p>
-Please note that you must save or have available the <code class="filename">vmlinux</code> file
-generated during a kernel compile, as OProfile needs it (you can use
-<code class="option">--no-vmlinux</code>, but this will prevent kernel profiling).
 </p>
         </div>
-        <div class="sect1" title="7. Uninstalling OProfile">
+        <div class="sect1" title="8. Uninstalling OProfile">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="uninstall"></a>7. Uninstalling OProfile</h2>
+                <h2 class="title" style="clear: both"><a id="uninstall"></a>8. Uninstalling OProfile</h2>
               </div>
             </div>
           </div>
@@ -1063,12 +991,17 @@ remove all installed files except your configuration file in the directory <code
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#getting-started-with-legacy">2. Getting started with OProfile using legacy mode</a>
+                <a href="#getting-started-with-ocount">2. Getting started with OProfile using <span class="command"><strong>ocount</strong></span></a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#eventspec">3. Specifying performance counter events</a>
               </span>
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#tools-overview">3. Tools summary</a>
+                <a href="#tools-overview">4. Tools summary</a>
               </span>
             </dt>
           </dl>
@@ -1082,13 +1015,8 @@ remove all installed files except your configuration file in the directory <code
             </div>
           </div>
           <p>
-Profiling with <span class="command"><strong>operf</strong></span> is the recommended profiling mode with OProfile. Using
-this mode not only allows you to target your profiling more precisely (i.e., single process
-or system-wide), it also allows OProfile to co-exist better with other tools on your system that
-may also be using the perf_events kernel subsystem.
-</p>
-          <p>
-With <span class="command"><strong>operf</strong></span>, there is no initial setup needed -- simply invoke <span class="command"><strong>operf</strong></span> with
+Profiling with <span class="command"><strong>operf</strong></span> allows you to precisely target your profiling (i.e., single process
+or system-wide). With <span class="command"><strong>operf</strong></span>, there is no initial setup needed -- simply invoke <span class="command"><strong>operf</strong></span> with
 the options you need; then run the OProfile post-processing tool(s). The <span class="command"><strong>operf</strong></span> syntax
 is as follows:
 </p>
@@ -1117,205 +1045,347 @@ and <span class="command"><strong>opreport</strong></span> and other post-proces
 unless you pass the <code class="code">--session-dir</code> option.
 </p>
         </div>
-        <div class="sect1" title="2. Getting started with OProfile using legacy mode">
+        <div class="sect1" title="2. Getting started with OProfile using ocount">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="getting-started-with-legacy"></a>2. Getting started with OProfile using legacy mode</h2>
+                <h2 class="title" style="clear: both"><a id="getting-started-with-ocount"></a>2. Getting started with OProfile using <span class="command"><strong>ocount</strong></span></h2>
               </div>
             </div>
           </div>
           <p>
-Before you can use OProfile's legacy mode, you must set it up. The minimum setup required for this
-is to tell OProfile where the <code class="filename">vmlinux</code> file corresponding to the
-running kernel is, for example :
+<span class="command"><strong>ocount</strong></span> is an OProfile tool that can be used to count native hardware events occurring in either
+a specific application, a set of processes or threads, a set of active system processors, or the
+entire system. The data collected during a counting session is displayed to stdout by default, but may
+also be saved to a file.  The <span class="command"><strong>ocount</strong></span> syntax is as follows:
 </p>
-          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-            <tr>
-              <td>
-                <pre class="screen">opcontrol --vmlinux=/boot/vmlinux-`uname -r`</pre>
-              </td>
-            </tr>
-          </table>
           <p>
-If you don't want to profile the kernel itself,
-you can tell OProfile you don't have a <code class="filename">vmlinux</code> file :
 </p>
           <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
             <tr>
               <td>
-                <pre class="screen">opcontrol --no-vmlinux</pre>
+                <pre class="screen">ocount [ options ] [ --system-wide | --process-list &lt;pids&gt; | --thread-list &lt;tids&gt; | --cpu-list &lt;cpus&gt; [ command [ args ] ] ]
+</pre>
               </td>
             </tr>
           </table>
           <p>
-Now we are ready to start the daemon (<span class="command"><strong>oprofiled</strong></span>) which collects
-the profile data :
 </p>
-          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-            <tr>
-              <td>
-                <pre class="screen">opcontrol --start</pre>
-              </td>
-            </tr>
-          </table>
           <p>
-When you want to stop profiling, you can do so with :
+A typical usage might look like this:
+</p>
+          <p>
 </p>
           <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
             <tr>
               <td>
-                <pre class="screen">opcontrol --shutdown</pre>
+                <pre class="screen">ocount --events=CPU_CLK_UNHALTED,INST_RETIRED /home/user1/my_test_program my_arg</pre>
               </td>
             </tr>
           </table>
           <p>
-Note that unlike <span class="command"><strong>gprof</strong></span>, no instrumentation (<code class="option">-pg</code>
-and <code class="option">-a</code> options to <span class="command"><strong>gcc</strong></span>)
-is necessary.
 </p>
           <p>
-Periodically (or on <span class="command"><strong>opcontrol --shutdown</strong></span> or <span class="command"><strong>opcontrol --dump</strong></span>)
-the profile data is written out into the $SESSION_DIR/samples directory (by default at <code class="filename">/var/lib/oprofile/samples</code>).
-These profile files cover shared libraries, applications, the kernel (vmlinux), and kernel modules.
-You can clear the profile data (at any time) with <span class="command"><strong>opcontrol --reset</strong></span>.
+When <code class="filename">my_test_program</code> completes (or when you press Ctrl-C), counting
+stops and the results are displayed to the screen (as shown below).
 </p>
           <p>
-To place these sample database files in a specific directory instead of the default location
-(<code class="filename">/var/lib/oprofile</code>) use the <code class="option">--session-dir=dir</code> option.
-You must also specify the <code class="option">--session-dir</code> to tell the tools to continue using this directory.
-</p>
-          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-            <tr>
-              <td>
-                <pre class="screen">opcontrol --no-vmlinux --session-dir=/home/me/tmpsession</pre>
-              </td>
-            </tr>
-          </table>
-          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-            <tr>
-              <td>
-                <pre class="screen">opcontrol --start --session-dir=/home/me/tmpsession</pre>
-              </td>
-            </tr>
-          </table>
-          <p>
-You can get summaries of this data in a number of ways at any time. To get a summary of
-data across the entire system for all of these profiles, you can do :
 </p>
           <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
             <tr>
               <td>
-                <pre class="screen">opreport [--session-dir=dir]</pre>
+                <pre class="screen">
+Events were actively counted for 2.8 seconds.
+Event counts (actual) for /home/user1/my_test_program:
+	Event                   Count                    % time counted
+	CPU_CLK_UNHALTED        9,408,018,070            100.00
+	INST_RETIRED            16,719,918,108           100.00
+</pre>
               </td>
             </tr>
           </table>
           <p>
-Or to get a more detailed summary, for a particular image, you can do something like :
 </p>
-          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-            <tr>
-              <td>
-                <pre class="screen">opreport -l /boot/vmlinux-`uname -r`</pre>
-              </td>
-            </tr>
-          </table>
           <p>
-There are also a number of other ways of presenting the data, as described later in this manual.
-Note that OProfile will choose a default profiling setup for you. However, there are a number
-of options you can pass to <span class="command"><strong>opcontrol</strong></span> if you need to change something,
-also detailed later.
 </p>
         </div>
-        <div class="sect1" title="3. Tools summary">
+        <div class="sect1" title="3. Specifying performance counter events">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="tools-overview"></a>3. Tools summary</h2>
+                <h2 class="title" style="clear: both"><a id="eventspec"></a>3. Specifying performance counter events</h2>
               </div>
             </div>
           </div>
           <p>
-This section gives a brief description of the available OProfile utilities and their purpose.
+Whether profiling with <span class="command"><strong>operf</strong></span> or doing simple event counting with <span class="command"><strong>ocount</strong></span>,
+you can collect information about one more native hardware events using the <code class="code">--events</code>
+option -- a comma-separated list of event specfications. The event specification is the means to provide details
+of how each hardware performance counter should be set up.
+For profiling, the event specification is a colon-separated string of the form
+<code class="option"><span class="emphasis"><em>name</em></span>:<span class="emphasis"><em>count</em></span>:<span class="emphasis"><em>unitmask</em></span>:<span class="emphasis"><em>kernel</em></span>:<span class="emphasis"><em>user</em></span></code>
+as described in the table below. For <span class="command"><strong>ocount</strong></span>, specification is of the form
+<code class="option"><span class="emphasis"><em>name</em></span>:<span class="emphasis"><em>unitmask</em></span>:<span class="emphasis"><em>kernel</em></span>:<span class="emphasis"><em>user</em></span></code>.
+Note the presence of the <span class="emphasis"><em>count</em></span> field for profiling.  The <span class="emphasis"><em>count</em></span> field tells the profiler
+how many events should occur between a profile snapshot (usually referred to as a "sample").  Since
+<span class="command"><strong>ocount</strong></span> does not do sampling, the <span class="emphasis"><em>count</em></span> field is not needed.
 </p>
-          <div class="variablelist">
-            <dl>
-              <dt>
-                <span class="term">
-                  <code class="filename">ophelp</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This utility lists the available events and short descriptions.
-	</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="filename">operf</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This is the recommended program for collecting profile data.
-	</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="filename">opcontrol</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Used for controlling OProfile data collection in legacy mode, discussed in <a class="xref" href="#controlling" title="Chapter 3. Controlling the profiler">Chapter 3, <i>Controlling the profiler</i></a>.
-	</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="filename">agent libraries</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-			Used by virtual machines (like the Java VM) to record information about JITed code being profiled. See <a class="xref" href="#setup-jit" title="4. Setting up the JIT profiling feature">Section 4, &#8220;Setting up the JIT profiling feature&#8221;</a>.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="filename">opreport</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This is the main tool for retrieving useful profile data, described in
-		<a class="xref" href="#opreport" title="2. Image summaries and symbol summaries (opreport)">Section 2, &#8220;Image summaries and symbol summaries (<span class="command"><strong>opreport</strong></span>)&#8221;</a>.
-	</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="filename">opannotate</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This utility can be used to produce annotated source, assembly or mixed source/assembly.
-		Source level annotation is available only if the application was compiled with 
-		debugging symbols. See <a class="xref" href="#opannotate" title="3. Outputting annotated source (opannotate)">Section 3, &#8220;Outputting annotated source (<span class="command"><strong>opannotate</strong></span>)&#8221;</a>.
-	</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="filename">opgprof</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This utility can output gprof-style data files for a binary, for use with
-		<span class="command"><strong>gprof -p</strong></span>. See <a class="xref" href="#opgprof" title="5. gprof-compatible output (opgprof)">Section 5, &#8220;<span class="command"><strong>gprof</strong></span>-compatible output (<span class="command"><strong>opgprof</strong></span>)&#8221;</a>.
-	</p>
-              </dd>
-              <dt>
-                <span class="term">
+          <p>
+If no event specs are passed to <span class="command"><strong>operf</strong></span> or <span class="command"><strong>ocount</strong></span>,
+the default event will be used.
+</p>
+          <p>
+</p>
+          <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>The perf_events kernel subsystem allocates hardware counters as necessary, but some processor
+types have restrictions as to what hardware events may be counted simultaneously.
+The kernel employs a multiplexing technique when such
+hardware restrictions are encountered, such that events are monitored on a rotating basis.
+</div>
+          <p>
+</p>
+          <div class="informaltable">
+            <table border="1">
+              <colgroup>
+                <col />
+                <col />
+              </colgroup>
+              <tbody>
+                <tr>
+                  <td>
+                    <code class="option">name</code>
+                  </td>
+                  <td>The symbolic event name, e.g. <code class="constant">CPU_CLK_UNHALTED</code></td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">count</code>
+                  </td>
+                  <td>The counter reset value, e.g. 100000; use only for profiling</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">unitmask</code>
+                  </td>
+                  <td>The unit mask, as given in the events list: e.g. 0x0f; or a symbolic name
+if a <code class="constant">name=&lt;um_name&gt;</code> field is present</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">kernel</code>
+                  </td>
+                  <td>Enable profiling of kernel code</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">user</code>
+                  </td>
+                  <td>Enable profiling of userspace code</td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+          <p>
+The last three values are optional; if you omit them (e.g. <code class="option">operf --events=DATA_MEM_REFS:30000</code>),
+they will be set to the default values (i.e., the default unit mask value for the given event, and profiling (or counting)
+both kernel and userspace code will be enabled). Note that on some architectures, some events may
+require a unit mask be specified.
+</p>
+          <p>
+You can specify unit mask values using either a numerical value (hex values
+<span class="emphasis"><em>must</em></span> begin with "0x") or a symbolic name (if the <code class="constant">name=&lt;um_name&gt;</code>
+field is shown in the <span class="command"><strong>ophelp</strong></span> output). For some named unit masks, the hex value is not unique; thus, OProfile
+tools enforce specifying such unit masks value by name.
+</p>
+          <p>
+The table below lists the default profiling event for various processor types. The same events
+can be used for <span class="command"><strong>ocount</strong></span>, minus the <span class="emphasis"><em>count</em></span> field.
+</p>
+          <div class="informaltable">
+            <table border="1">
+              <colgroup>
+                <col />
+                <col />
+                <col />
+              </colgroup>
+              <tbody>
+                <tr>
+                  <td>Processor</td>
+                  <td>cpu_type</td>
+                  <td>Default event</td>
+                </tr>
+                <tr>
+                  <td>Alpha EV67</td>
+                  <td>alpha/ev67</td>
+                  <td>CYCLES:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>ARM/XScale PMU1</td>
+                  <td>arm/xscale1</td>
+                  <td>CPU_CYCLES:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>ARM/XScale PMU2</td>
+                  <td>arm/xscale2</td>
+                  <td>CPU_CYCLES:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>ARM/MPCore</td>
+                  <td>arm/mpcore</td>
+                  <td>CPU_CYCLES:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Athlon</td>
+                  <td>i386/athlon</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Pentium Pro</td>
+                  <td>i386/ppro</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Pentium II</td>
+                  <td>i386/pii</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Pentium III</td>
+                  <td>i386/piii</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Pentium M (P6 core)</td>
+                  <td>i386/p6_mobile</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Pentium 4 (non-HT)</td>
+                  <td>i386/p4</td>
+                  <td>GLOBAL_POWER_EVENTS:100000:1:1:1</td>
+                </tr>
+                <tr>
+                  <td>Pentium 4 (HT)</td>
+                  <td>i386/p4-ht</td>
+                  <td>GLOBAL_POWER_EVENTS:100000:1:1:1</td>
+                </tr>
+                <tr>
+                  <td>Hammer</td>
+                  <td>x86-64/hammer</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Family10h</td>
+                  <td>x86-64/family10</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>Family11h</td>
+                  <td>x86-64/family11h</td>
+                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>IBM pseries</td>
+                  <td>ppc64/power{ 4|5|6|7|8|970 }</td>
+                  <td>CYCLES:100000:0:1:1</td>
+                </tr>
+                <tr>
+                  <td>IBM s390</td>
+                  <td>s390/{ z10|z196|zEC12 }</td>
+                  <td>HWSAMPLING:4127518:0:1:1</td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+        </div>
+        <div class="sect1" title="4. Tools summary">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="tools-overview"></a>4. Tools summary</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+This section gives a brief description of the available OProfile utilities and their purpose.
+</p>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">
+                  <code class="filename">ophelp</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility lists the available events and short descriptions.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">operf</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This is the program for collecting profile data, discussed in <a class="xref" href="#controlling-operf" title="1. Using operf">Section 1, &#8220;Using <span class="command"><strong>operf</strong></span>&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">ocount</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This tool is used for simple event counting, as described in in <a class="xref" href="#controlling-ocount" title="1. Using ocount">Section 1, &#8220;Using <span class="command"><strong>ocount</strong></span>&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">agent libraries</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Used by virtual machines (like the Java VM) to record information about JITed code being profiled. See <a class="xref" href="#setup-jit" title="2. Setting up the JIT profiling feature">Section 2, &#8220;Setting up the JIT profiling feature&#8221;</a>.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opreport</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This is the main tool for retrieving useful profile data, described in
+		<a class="xref" href="#opreport" title="2. Image summaries and symbol summaries (opreport)">Section 2, &#8220;Image summaries and symbol summaries (<span class="command"><strong>opreport</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opannotate</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility can be used to produce annotated source, assembly or mixed source/assembly.
+		Source level annotation is available only if the application was compiled with 
+		debugging symbols. See <a class="xref" href="#opannotate" title="3. Outputting annotated source (opannotate)">Section 3, &#8220;Outputting annotated source (<span class="command"><strong>opannotate</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opgprof</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility can output gprof-style data files for a binary, for use with
+		<span class="command"><strong>gprof -p</strong></span>. See <a class="xref" href="#opgprof" title="5. gprof-compatible output (opgprof)">Section 5, &#8220;<span class="command"><strong>gprof</strong></span>-compatible output (<span class="command"><strong>opgprof</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
                   <code class="filename">oparchive</code>
                 </span>
               </dt>
@@ -1325,7 +1395,7 @@ This section gives a brief description of the available OProfile utilities and t
 		and sample files and copy the files into an archive.
 		The archive is self-contained and can be moved to another
 		machine for further analysis.
-		See <a class="xref" href="#oparchive" title="6. Archiving measurements (oparchive)">Section 6, &#8220;Archiving measurements (<span class="command"><strong>oparchive</strong></span>)&#8221;</a>.
+		See <a class="xref" href="#oparchive" title="6. Analyzing profile data on another system (oparchive)">Section 6, &#8220;Analyzing profile data on another system (<span class="command"><strong>oparchive</strong></span>)&#8221;</a>.
 	</p>
               </dd>
               <dt>
@@ -1336,7 +1406,7 @@ This section gives a brief description of the available OProfile utilities and t
               <dd>
                 <p>
 		This utility converts sample database files from a foreign binary format (abi) to
-		the native format. This is useful only when moving sample files between hosts,
+		the native format. This is useful only when moving sample files between hosts
 		for analysis on platforms other than the one used for collection.
 		See <a class="xref" href="#opimport" title="7. Converting sample database files (opimport)">Section 7, &#8220;Converting sample database files (<span class="command"><strong>opimport</strong></span>)&#8221;</a>.
 	</p>
@@ -1349,7 +1419,7 @@ This section gives a brief description of the available OProfile utilities and t
         <div class="titlepage">
           <div>
             <div>
-              <h2 class="title"><a id="controlling"></a>Chapter 3. Controlling the profiler</h2>
+              <h2 class="title"><a id="controlling-profiler"></a>Chapter 3. Controlling the profiler</h2>
             </div>
           </div>
         </div>
@@ -1365,92 +1435,38 @@ This section gives a brief description of the available OProfile utilities and t
             </dt>
             <dt>
               <span class="sect1">
-                <a href="#controlling-daemon">2. Using <span class="command"><strong>opcontrol</strong></span></a>
-              </span>
-            </dt>
-            <dd>
-              <dl>
-                <dt>
-                  <span class="sect2">
-                    <a href="#opcontrolexamples">2.1. Examples</a>
-                  </span>
-                </dt>
-              </dl>
-            </dd>
-            <dt>
-              <span class="sect1">
-                <a href="#eventspec">3. Specifying performance counter events</a>
-              </span>
-            </dt>
-            <dt>
-              <span class="sect1">
-                <a href="#setup-jit">4. Setting up the JIT profiling feature</a>
+                <a href="#setup-jit">2. Setting up the JIT profiling feature</a>
               </span>
             </dt>
             <dd>
               <dl>
                 <dt>
                   <span class="sect2">
-                    <a href="#setup-jit-jvm">4.1. JVM instrumentation</a>
+                    <a href="#setup-jit-jvm">2.1. JVM instrumentation</a>
                   </span>
                 </dt>
               </dl>
             </dd>
             <dt>
               <span class="sect1">
-                <a href="#oprofile-gui">5. Using <span class="command"><strong>oprof_start</strong></span></a>
-              </span>
-            </dt>
-            <dt>
-              <span class="sect1">
-                <a href="#detailed-parameters">6. Configuration details</a>
+                <a href="#detailed-parameters">3. Configuration details</a>
               </span>
             </dt>
             <dd>
               <dl>
                 <dt>
                   <span class="sect2">
-                    <a href="#hardware-counters">6.1. Hardware performance counters</a>
-                  </span>
-                </dt>
-                <dt>
-                  <span class="sect2">
-                    <a href="#timer">6.2. OProfile in timer interrupt mode</a>
-                  </span>
-                </dt>
-                <dt>
-                  <span class="sect2">
-                    <a href="#p4">6.3. Pentium 4 support</a>
+                    <a href="#hardware-counters">3.1. Hardware performance counters</a>
                   </span>
                 </dt>
                 <dt>
                   <span class="sect2">
-                    <a href="#ia64">6.4. Intel Itanium 2 support</a>
+                    <a href="#timer">3.2. OProfile timer interrupt mode</a>
                   </span>
                 </dt>
                 <dt>
                   <span class="sect2">
-                    <a href="#ppc64">6.5. PowerPC64 support</a>
-                  </span>
-                </dt>
-                <dt>
-                  <span class="sect2">
-                    <a href="#cell-be">6.6. Cell Broadband Engine support</a>
-                  </span>
-                </dt>
-                <dt>
-                  <span class="sect2">
-                    <a href="#amd-ibs-support">6.7. AMD64 (x86_64) Instruction-Based Sampling (IBS) support</a>
-                  </span>
-                </dt>
-                <dt>
-                  <span class="sect2">
-                    <a href="#systemz">6.8. IBM System z hardware sampling support</a>
-                  </span>
-                </dt>
-                <dt>
-                  <span class="sect2">
-                    <a href="#misuse">6.9. Dangerous counter settings</a>
+                    <a href="#special-notes">3.3. Architecture-specific configuration notes</a>
                   </span>
                 </dt>
               </dl>
@@ -1483,7 +1499,7 @@ Additionally, each counter is programmed with a "count" value, which corresponds
 detailed the profile is. The lower the value, the more frequently profile
 samples are taken. You can choose to sample only kernel code, user-space code,
 or both (both is the default). Finally, some events have a "unit mask"
--- this is a value that further restricts the types of event that are counted.
+-- this is a value that further restricts the type of event being counted.
 You can see the event types and unit masks for your CPU using <span class="command"><strong>ophelp</strong></span>.
 More information on event specification can be found at <a class="xref" href="#eventspec" title="3. Specifying performance counter events">Section 3, &#8220;Specifying performance counter events&#8221;</a>.
 </p>
@@ -1510,12 +1526,12 @@ Following is a description of the <span class="command"><strong>operf</strong></
             <dl>
               <dt>
                 <span class="term">
-                  <code class="option">command</code>
+                  <code class="option">command [args]</code>
                 </span>
               </dt>
               <dd>
                 <p>
-		The command or application to be profiled. <span class="command"><strong>args</strong></span> are the input arguments
+		The command or application to be profiled. The <span class="emphasis"><em>[args]</em></span> are the input arguments
         that the command or application requires. Either <code class="code">command</code>, <code class="code">--pid</code> or
         <code class="code">--system-wide</code> is required, but cannot be used simultaneously.
 		</p>
@@ -1561,8 +1577,11 @@ Following is a description of the <span class="command"><strong>operf</strong></
 		A vmlinux file that matches the running kernel that has symbol and/or debuginfo.
 		Kernel samples will be attributed to this binary, allowing post-processing tools
 		(like <span class="command"><strong>opreport</strong></span>) to attribute samples to the appropriate kernel symbols.
-		If this option is not specified, all kernel samples will be attributed to a pseudo
-		binary named "no-vmlinux".
+		If this option is not specified, the file /proc/kallsyms is used to obtain
+		kernel symbol addresses correponding to sample addresses.  However, the setting of
+		/proc/sys/kernel/kptr_restrict may restrict a non-root user's access to
+		/proc/kallsyms, in which case,
+		all kernel samples are attributed to a pseudo binary named "no-vmlinux".
 		</p>
               </dd>
               <dt>
@@ -1638,809 +1657,87 @@ Following is a description of the <span class="command"><strong>operf</strong></
 		The <code class="code">--separate-thread</code> option is useful for seeing per-thread samples in
 		multi-threaded applications.  When used in conjuction with the <code class="code">--system-wide</code>
 		option, <code class="code">--separate-thread</code> is also useful for seeing per-process
-		(i.e., per-thread group) samples for the case where multiple processes are
-		executing the same program during a profiling run.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--separate-cpu / -c</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This option categorizes samples by cpu.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--session-dir / -d [path]</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		This option specifies the session directory to hold the sample data. If not specified,
-		the data is saved in the <code class="filename">oprofile_data</code> directory on the current path.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">---lazy-conversion / -l</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Use this option to reduce the overhead of <span class="command"><strong>operf</strong></span> during profiling.
-		Normally, profile data received from the kernel is converted to OProfile format
-		during profiling time. This is typically not an issue when profiling a single
-		application. But when using the <code class="code">--system-wide</code> option, this on-the-fly
-		conversion process can cause noticeable overhead, particularly on busy
-		multi-processor systems. The <code class="code">--lazy-conversion</code> option directs
-		<span class="command"><strong>operf</strong></span> to wait until profiling is completed to do the conversion
-		of profile data.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--verbose / -V [level]</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		A comma-separated list of debugging control values used to increase the verbosity of the
-		output. Valid values are: debug, record, convert, misc, sfile, arcs, and the special value, 'all'.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--version -v </code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Show <span class="command"><strong>operf</strong></span> version.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--help / -h</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Show a help message.
-		</p>
-              </dd>
-            </dl>
-          </div>
-        </div>
-        <div class="sect1" title="2. Using opcontrol">
-          <div class="titlepage">
-            <div>
-              <div>
-                <h2 class="title" style="clear: both"><a id="controlling-daemon"></a>2. Using <span class="command"><strong>opcontrol</strong></span></h2>
-              </div>
-            </div>
-          </div>
-          <p>
-In this section we describe the configuration and control of the profiling system
-with opcontrol in more depth. See <a class="xref" href="#controlling-operf" title="1. Using operf">Section 1, &#8220;Using <span class="command"><strong>operf</strong></span>&#8221;</a> for a description
-of the preferred profiling method.
-</p>
-          <p>
-The <span class="command"><strong>opcontrol</strong></span> script has a default setup, but you
-can alter this with the options given below. In particular, you can select
-specific hardware events on which to base your profile. See <a class="xref" href="#controlling-operf" title="1. Using operf">Section 1, &#8220;Using <span class="command"><strong>operf</strong></span>&#8221;</a> for an
-introduction to hardware events and performance counter configuration.
-The event types and unit masks for your CPU are listed by <span class="command"><strong>opcontrol
---list-events</strong></span> or <span class="command"><strong>ophelp</strong></span>.
-</p>
-          <p>
-The <span class="command"><strong>opcontrol</strong></span> script provides the following actions :
-</p>
-          <div class="variablelist">
-            <dl>
-              <dt>
-                <span class="term">
-                  <code class="option">--init</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Loads the OProfile module if required and makes the OProfile driver
-		interface available.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--setup</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Followed by list arguments for profiling set up. List of arguments
-		    saved in <code class="filename">/root/.oprofile/daemonrc</code>.
-		    Giving this option is not necessary; you can just directly pass one
-		    of the setup options, e.g. <span class="command"><strong>opcontrol --no-vmlinux</strong></span>.
-		  </p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--status</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Show configuration information.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--start-daemon</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Start the oprofile daemon without starting actual profiling. The profiling
-		can then be started using <code class="option">--start</code>. This is useful for avoiding
-		measuring the cost of daemon startup, as <code class="option">--start</code> is a simple
-		write to a file in oprofilefs.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--start</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Start data collection with either arguments provided by <code class="option">--setup</code>
-		or information saved in <code class="filename">/root/.oprofile/daemonrc</code>. Specifying
-		the addition <code class="option">--verbose</code> makes the daemon generate lots of debug data
-		whilst it is running.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--dump</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Force a flush of the collected profiling data to the daemon.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--stop</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Stop data collection.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--shutdown</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Stop data collection and kill the daemon.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--reset</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Clears out data from current session, but leaves saved sessions.
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--save=</code>session_name</span>
-              </dt>
-              <dd>
-                <p>
-		    Save data from current session to session_name.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--deinit</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-                Shuts down daemon. Unload the OProfile module and oprofilefs.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--list-events</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    List event types and unit masks.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--help</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		    Generate usage messages.
-		</p>
-              </dd>
-            </dl>
-          </div>
-          <p>
-There are a number of possible settings, of which, only
-<code class="option">--vmlinux</code> (or <code class="option">--no-vmlinux</code>)
-is required. These settings are stored in <code class="filename">~/.oprofile/daemonrc</code>.
-</p>
-          <div class="variablelist">
-            <dl>
-              <dt>
-                <span class="term"><code class="option">--buffer-size=</code>num</span>
-              </dt>
-              <dd>
-                <p>
-		Number of samples in kernel buffer. 
-		Buffer watershed needs to be tweaked when changing this value.
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--buffer-watershed=</code>num</span>
-              </dt>
-              <dd>
-                <p>
-		Set kernel buffer watershed to num samples. When remain only
-		buffer-size - buffer-watershed free entries remain in the kernel buffer, data will be
-		flushed to the daemon.  Most useful values are in the range [0.25 - 0.5] * buffer-size.
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--cpu-buffer-size=</code>num</span>
-              </dt>
-              <dd>
-                <p>
-		Number of samples in kernel per-cpu buffer. If you
-		profile at high rate, it can help to increase this if the log
-		file show excessive count of samples lost due to cpu buffer overflow. 
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--event=</code>[eventspec]</span>
-              </dt>
-              <dd>
-                <p>
-		Use the given performance counter event to profile.
-		See <a class="xref" href="#eventspec" title="3. Specifying performance counter events">Section 3, &#8220;Specifying performance counter events&#8221;</a> below.
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--session-dir=</code>dir_path</span>
-              </dt>
-              <dd>
-                <p>
-		    Create/use sample database out of directory <code class="filename">dir_path</code> instead of
-		the default location (/var/lib/oprofile).
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--separate=</code>[none,lib,kernel,thread,cpu,all]</span>
-              </dt>
-              <dd>
-                <p>
-		By default, every profile is stored in a single file. Thus, for example,
-		samples in the C library are all accredited to the <code class="filename">/lib/libc.o</code>
-		profile. However, you choose to create separate sample files by specifying
-		one of the below options.
-		</p>
-                <div class="informaltable">
-                  <table border="1">
-                    <colgroup>
-                      <col />
-                      <col />
-                    </colgroup>
-                    <tbody>
-                      <tr>
-                        <td>
-                          <code class="option">none</code>
-                        </td>
-                        <td>No profile separation (default)</td>
-                      </tr>
-                      <tr>
-                        <td>
-                          <code class="option">lib</code>
-                        </td>
-                        <td>Create per-application profiles for libraries</td>
-                      </tr>
-                      <tr>
-                        <td>
-                          <code class="option">kernel</code>
-                        </td>
-                        <td>Create per-application profiles for the kernel and kernel modules</td>
-                      </tr>
-                      <tr>
-                        <td>
-                          <code class="option">thread</code>
-                        </td>
-                        <td>Create profiles for each thread and each task</td>
-                      </tr>
-                      <tr>
-                        <td>
-                          <code class="option">cpu</code>
-                        </td>
-                        <td>Create profiles for each CPU</td>
-                      </tr>
-                      <tr>
-                        <td>
-                          <code class="option">all</code>
-                        </td>
-                        <td>All of the above options</td>
-                      </tr>
-                    </tbody>
-                  </table>
-                </div>
-                <p>
-		Note  that <code class="option">--separate=kernel</code> also turns on <code class="option">--separate=lib</code>.
-		
-		When using <code class="option">--separate=kernel</code>, samples in hardware interrupts, soft-irqs, or other
-		asynchronous kernel contexts are credited to the task currently running. This means you will see
-		seemingly nonsense profiles such as <code class="filename">/bin/bash</code> showing samples for the PPP modules,
-		etc.
-		</p>
-                <p>
-		Using <code class="option">--separate=thread</code> creates a lot
-		of sample files if you leave OProfile running for a while; it's most
-		useful when used for short sessions, or when using image filtering.
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--callgraph=</code>#depth</span>
-              </dt>
-              <dd>
-                <p>
-		Enable call-graph sample collection with a maximum depth. Use 0 to disable
-		callgraph profiling.  NOTE: Callgraph support is available on a limited
-		number of platforms at this time; for example:
-		</p>
-                <p>
-		</p>
-                <div class="itemizedlist">
-                  <ul class="itemizedlist" type="disc">
-                    <li class="listitem">
-                      <p>x86 with 2.6 or higher kernel</p>
-                    </li>
-                    <li class="listitem">
-                      <p>ARM with 2.6 or higher kernel</p>
-                    </li>
-                    <li class="listitem">
-                      <p>PowerPC with 2.6.17 or higher kernel</p>
-                    </li>
-                  </ul>
-                </div>
-                <p>
-		</p>
-                <p>
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--image=</code>image,[images]|"all"</span>
-              </dt>
-              <dd>
-                <p>
-		Image filtering. If you specify one or more absolute
-		paths to binaries, OProfile will only produce profile results for those
-		binary images. This is useful for restricting the sometimes voluminous
-		output you may get otherwise, especially with
-		<code class="option">--separate=thread</code>. Note that if you are using
-		<code class="option">--separate=lib</code> or
-		<code class="option">--separate=kernel</code>, then if you specification an
-		application binary, the shared libraries and kernel code
-		<span class="emphasis"><em>are</em></span> included. Specify the value
-		"all" to profile everything (the default).
-		</p>
-              </dd>
-              <dt>
-                <span class="term"><code class="option">--vmlinux=</code>file</span>
-              </dt>
-              <dd>
-                <p>
-		vmlinux kernel image.
-		</p>
-              </dd>
-              <dt>
-                <span class="term">
-                  <code class="option">--no-vmlinux</code>
-                </span>
-              </dt>
-              <dd>
-                <p>
-		Use this when you don't have a kernel vmlinux file, and you don't want
-		to profile the kernel. This still counts the total number of kernel samples,
-		but can't give symbol-based results for the kernel or any modules.
-		</p>
-              </dd>
-            </dl>
-          </div>
-          <div class="sect2" title="2.1. Examples">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="opcontrolexamples"></a>2.1. Examples</h3>
-                </div>
-              </div>
-            </div>
-            <div class="sect3" title="2.1.1. Intel performance counter setup">
-              <div class="titlepage">
-                <div>
-                  <div>
-                    <h4 class="title"><a id="examplesperfctr"></a>2.1.1. Intel performance counter setup</h4>
-                  </div>
-                </div>
-              </div>
-              <p>
-Here, we have a Pentium III running at 800MHz, and we want to look at where data memory
-references are happening most, and also get results for CPU time.
-</p>
-              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-                <tr>
-                  <td>
-                    <pre class="screen">
-# opcontrol --event=CPU_CLK_UNHALTED:400000 --event=DATA_MEM_REFS:10000
-# opcontrol --vmlinux=/boot/2.6.0/vmlinux
-# opcontrol --start
-</pre>
-                  </td>
-                </tr>
-              </table>
-            </div>
-            <div class="sect3" title="2.1.2. Starting the daemon separately">
-              <div class="titlepage">
-                <div>
-                  <div>
-                    <h4 class="title"><a id="examplesstartdaemon"></a>2.1.2. Starting the daemon separately</h4>
-                  </div>
-                </div>
-              </div>
-              <p>
-Use <code class="option">--start-daemon</code> to avoid
-the profiler startup affecting results.
-</p>
-              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-                <tr>
-                  <td>
-                    <pre class="screen">
-# opcontrol --vmlinux=/boot/2.6.0/vmlinux
-# opcontrol --start-daemon
-# my_favourite_benchmark --init
-# opcontrol --start ; my_favourite_benchmark --run ; opcontrol --stop
-</pre>
-                  </td>
-                </tr>
-              </table>
-            </div>
-            <div class="sect3" title="2.1.3. Separate profiles for libraries and the kernel">
-              <div class="titlepage">
-                <div>
-                  <div>
-                    <h4 class="title"><a id="exampleseparate"></a>2.1.3. Separate profiles for libraries and the kernel</h4>
-                  </div>
-                </div>
-              </div>
-              <p>
-Here, we want to see a profile of the OProfile daemon itself, including when
-it was running inside the kernel driver, and its use of shared libraries.
-</p>
-              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-                <tr>
-                  <td>
-                    <pre class="screen">
-# opcontrol --separate=kernel --vmlinux=/boot/2.6.0/vmlinux
-# opcontrol --start
-# my_favourite_stress_test --run
-# opreport -l -p /lib/modules/2.6.0/kernel /usr/local/bin/oprofiled
-</pre>
-                  </td>
-                </tr>
-              </table>
-            </div>
-            <div class="sect3" title="2.1.4. Profiling sessions">
-              <div class="titlepage">
-                <div>
-                  <div>
-                    <h4 class="title"><a id="examplessessions"></a>2.1.4. Profiling sessions</h4>
-                  </div>
-                </div>
-              </div>
-              <p>
-It can often be useful to split up profiling data into several different
-time periods. For example, you may want to collect data on an application's
-startup separately from the normal runtime data. You can use the simple
-command <span class="command"><strong>opcontrol --save</strong></span> to do this. For example :
-</p>
-              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-                <tr>
-                  <td>
-                    <pre class="screen">
-# opcontrol --save=blah
-</pre>
-                  </td>
-                </tr>
-              </table>
-              <p>
-will create a sub-directory in <code class="filename">$SESSION_DIR/samples</code> containing the samples
-up to that point (the current session's sample files are moved into this
-directory). You can then pass this session name as a parameter to the post-profiling
-analysis tools, to only get data up to the point you named the
-session. If you do not want to save a session, you can do
-<span class="command"><strong>rm -rf $SESSION_DIR/samples/sessionname</strong></span> or, for the
-current session, <span class="command"><strong>opcontrol --reset</strong></span>.
-</p>
-            </div>
-          </div>
-        </div>
-        <div class="sect1" title="3. Specifying performance counter events">
-          <div class="titlepage">
-            <div>
-              <div>
-                <h2 class="title" style="clear: both"><a id="eventspec"></a>3. Specifying performance counter events</h2>
-              </div>
-            </div>
-          </div>
-          <p>
-Both methods of profiling (<span class="command"><strong>operf</strong></span> and <span class="command"><strong>opcontrol</strong></span>)
-allow you to give one or more event specifications to provide details of how each
-hardware performance counter should be setup. With <span class="command"><strong>operf</strong></span>, you
-can provide a comma-separated list of event specfications using the <code class="code">--events</code>
-option.  With <span class="command"><strong>opcontrol</strong></span>, you use the <code class="code">--event</code> option
-for each desired event specification.
-The event specification is a colon-separated string of the form
-<code class="option"><span class="emphasis"><em>name</em></span>:<span class="emphasis"><em>count</em></span>:<span class="emphasis"><em>unitmask</em></span>:<span class="emphasis"><em>kernel</em></span>:<span class="emphasis"><em>user</em></span></code>
-as described in the table below.
-</p>
-          <p>
-If no event specs are passed to <span class="command"><strong>operf</strong></span> or <span class="command"><strong>opcontrol</strong></span>,
-the default event will be used for profiling. With <span class="command"><strong>opcontrol</strong></span>, if you have
-previously specified some non-default event but want to revert to the default event, use
-<code class="option">--event=default</code>. Use of this option overrides all previous event selections
-that have been cached.
-</p>
-          <p>
-</p>
-          <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>OProfile will allocate hardware counters as necessary, but some processor
-types have restrictions as to what hardware events may be counted simultaneously.
-The <span class="command"><strong>operf</strong></span> program uses a multiplexing technique when such
-hardware restrictions are encountered, but <span class="command"><strong>opcontrol</strong></span> does
-not have this capability; instead, <span class="command"><strong>opcontrol</strong></span> will display an
-error message if you select an incompatible set of events.
-</div>
-          <p>
-</p>
-          <div class="informaltable">
-            <table border="1">
-              <colgroup>
-                <col />
-                <col />
-              </colgroup>
-              <tbody>
-                <tr>
-                  <td>
-                    <code class="option">name</code>
-                  </td>
-                  <td>The symbolic event name, e.g. <code class="constant">CPU_CLK_UNHALTED</code></td>
-                </tr>
-                <tr>
-                  <td>
-                    <code class="option">count</code>
-                  </td>
-                  <td>The counter reset value, e.g. 100000</td>
-                </tr>
-                <tr>
-                  <td>
-                    <code class="option">unitmask</code>
-                  </td>
-                  <td>The unit mask, as given in the events list: e.g. 0x0f; or a symbolic name as
-given by the first word of the description (only valid for unit masks having an "extra:" parameter)</td>
-                </tr>
-                <tr>
-                  <td>
-                    <code class="option">kernel</code>
-                  </td>
-                  <td>Whether to profile kernel code</td>
-                </tr>
-                <tr>
-                  <td>
-                    <code class="option">user</code>
-                  </td>
-                  <td>Whether to profile userspace code</td>
-                </tr>
-              </tbody>
-            </table>
-          </div>
-          <p>
-The last three values are optional, if you omit them (e.g. <code class="option">--event=DATA_MEM_REFS:30000</code>),
-they will be set to the default values (a unit mask of 0, and profiling both kernel and
-userspace code). Note that some events require a unit mask.
-</p>
-          <p>
-When specifying a unit mask value, it may be either a hexadecimal value (which
-<span class="emphasis"><em>must</em></span> begin with "0x") or a string (i.e, symbolic name) which matches
-the first word in the unit mask description. Specifying a symbolic name for
-the unit mask is valid only for unit masks having "extra:" parameters, as
-shown by the output of <span class="command"><strong>ophelp</strong></span>.  Unit masks with "extra:" parameters must be
-specified using the symbolic name.
-</p>
-          <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;">
-            <h3 class="title">Note</h3>
-            <p>
-When using legacy mode <span class="command"><strong>opcontrol</strong></span> on PowerPC platforms, all events specified must be in the same group;
-i.e., the group number appended to the event name (e.g. <code class="constant">&lt;<span class="emphasis"><em>some-event-name</em></span>&gt;_GRP9
-</code>) must be the same.
-</p>
-          </div>
-          <p>
-If OProfile is using timer-interrupt mode, there is no event configuration possible.
-</p>
-          <p>
-The table below lists the default event for various processor types:
-</p>
-          <div class="informaltable">
-            <table border="1">
-              <colgroup>
-                <col />
-                <col />
-                <col />
-              </colgroup>
-              <tbody>
-                <tr>
-                  <td>Processor</td>
-                  <td>cpu_type</td>
-                  <td>Default event</td>
-                </tr>
-                <tr>
-                  <td>Alpha EV4</td>
-                  <td>alpha/ev4</td>
-                  <td>CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Alpha EV5</td>
-                  <td>alpha/ev5</td>
-                  <td>CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Alpha PCA56</td>
-                  <td>alpha/pca56</td>
-                  <td>CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Alpha EV6</td>
-                  <td>alpha/ev6</td>
-                  <td>CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Alpha EV67</td>
-                  <td>alpha/ev67</td>
-                  <td>CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>ARM/XScale PMU1</td>
-                  <td>arm/xscale1</td>
-                  <td>CPU_CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>ARM/XScale PMU2</td>
-                  <td>arm/xscale2</td>
-                  <td>CPU_CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>ARM/MPCore</td>
-                  <td>arm/mpcore</td>
-                  <td>CPU_CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>AVR32</td>
-                  <td>avr32</td>
-                  <td>CPU_CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Athlon</td>
-                  <td>i386/athlon</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Pentium Pro</td>
-                  <td>i386/ppro</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Pentium II</td>
-                  <td>i386/pii</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Pentium III</td>
-                  <td>i386/piii</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Pentium M (P6 core)</td>
-                  <td>i386/p6_mobile</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Pentium 4 (non-HT)</td>
-                  <td>i386/p4</td>
-                  <td>GLOBAL_POWER_EVENTS:100000:1:1:1</td>
-                </tr>
-                <tr>
-                  <td>Pentium 4 (HT)</td>
-                  <td>i386/p4-ht</td>
-                  <td>GLOBAL_POWER_EVENTS:100000:1:1:1</td>
-                </tr>
-                <tr>
-                  <td>Hammer</td>
-                  <td>x86-64/hammer</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Family10h</td>
-                  <td>x86-64/family10</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Family11h</td>
-                  <td>x86-64/family11h</td>
-                  <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Itanium</td>
-                  <td>ia64/itanium</td>
-                  <td>CPU_CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>Itanium 2</td>
-                  <td>ia64/itanium2</td>
-                  <td>CPU_CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>TIMER_INT</td>
-                  <td>timer</td>
-                  <td>None selectable</td>
-                </tr>
-                <tr>
-                  <td>IBM pseries</td>
-                  <td>PowerPC 4/5/6/7/970/Cell</td>
-                  <td>CYCLES:100000:0:1:1</td>
-                </tr>
-                <tr>
-                  <td>IBM s390</td>
-                  <td>timer</td>
-                  <td>None selectable</td>
-                </tr>
-                <tr>
-                  <td>IBM s390x</td>
-                  <td>timer</td>
-                  <td>None selectable</td>
-                </tr>
-              </tbody>
-            </table>
+		(i.e., per-thread group) samples for the case where multiple processes are
+		executing the same program during a profiling run.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--separate-cpu / -c</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This option categorizes samples by cpu.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--session-dir / -d [path]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This option specifies the session directory to hold the sample data. If not specified,
+		the data is saved in the <code class="filename">oprofile_data</code> directory on the current path.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">---lazy-conversion / -l</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Use this option to reduce the overhead of <span class="command"><strong>operf</strong></span> during profiling.
+		Normally, profile data received from the kernel is converted to OProfile format
+		during profiling time. This is typically not an issue when profiling a single
+		application. But when using the <code class="code">--system-wide</code> option, this on-the-fly
+		conversion process can cause noticeable overhead, particularly on busy
+		multi-processor systems. The <code class="code">--lazy-conversion</code> option directs
+		<span class="command"><strong>operf</strong></span> to wait until profiling is completed to do the conversion
+		of profile data.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--verbose / -V [level]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		A comma-separated list of debugging control values used to increase the verbosity of the
+		output. Valid values are: debug, record, convert, misc, sfile, arcs, and the special value, 'all'.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--version -v </code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Show <span class="command"><strong>operf</strong></span> version.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--help / -h</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Show a help message.
+		</p>
+              </dd>
+            </dl>
           </div>
         </div>
-        <div class="sect1" title="4. Setting up the JIT profiling feature">
+        <div class="sect1" title="2. Setting up the JIT profiling feature">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="setup-jit"></a>4. Setting up the JIT profiling feature</h2>
+                <h2 class="title" style="clear: both"><a id="setup-jit"></a>2. Setting up the JIT profiling feature</h2>
               </div>
             </div>
           </div>
@@ -2449,14 +1746,14 @@ The table below lists the default event for various processor types:
 		it needs to be instrumented with an agent library. We use the
 		agent libraries for Java in the following example. To use the
 		Java profiling feature, you must build OProfile with the "--with-java" option
-                (<a class="xref" href="#install" title="6. Installation">Section 6, &#8220;Installation&#8221;</a>).
+                (<a class="xref" href="#install" title="7. Installation">Section 7, &#8220;Installation&#8221;</a>).
 
 	</p>
-          <div class="sect2" title="4.1. JVM instrumentation">
+          <div class="sect2" title="2.1. JVM instrumentation">
             <div class="titlepage">
               <div>
                 <div>
-                  <h3 class="title"><a id="setup-jit-jvm"></a>4.1. JVM instrumentation</h3>
+                  <h3 class="title"><a id="setup-jit-jvm"></a>2.1. JVM instrumentation</h3>
                 </div>
               </div>
             </div>
@@ -2514,54 +1811,19 @@ The table below lists the default event for various processor types:
                 </p>
           </div>
         </div>
-        <div class="sect1" title="5. Using oprof_start">
-          <div class="titlepage">
-            <div>
-              <div>
-                <h2 class="title" style="clear: both"><a id="oprofile-gui"></a>5. Using <span class="command"><strong>oprof_start</strong></span></h2>
-              </div>
-            </div>
-          </div>
-          <p>
-The <span class="command"><strong>oprof_start</strong></span> application provides a convenient way to start the profiler.
-Note that <span class="command"><strong>oprof_start</strong></span> is just a wrapper around the <span class="command"><strong>opcontrol</strong></span> script,
-so it does not provide more services than the script itself.
-</p>
-          <p>
-After <span class="command"><strong>oprof_start</strong></span> is started you can select the event type for each counter;
-the sampling rate and other related parameters are explained in <a class="xref" href="#controlling-daemon" title="2. Using opcontrol">Section 2, &#8220;Using <span class="command"><strong>opcontrol</strong></span>&#8221;</a>.
-The "Configuration" section allows you to set general parameters such as the buffer size, kernel filename
-etc. The counter setup interface should be self-explanatory; <a class="xref" href="#hardware-counters" title="6.1. Hardware performance counters">Section 6.1, &#8220;Hardware performance counters&#8221;</a> and related 
-links contain information on using unit masks.
-</p>
-          <p>
-A status line shows the current status of the profiler: how long it has been running, and the average
-number of interrupts received per second and the total, over all processors.
-Note that quitting <span class="command"><strong>oprof_start</strong></span> does not stop the profiler.
-</p>
-          <p>
-Your configuration is saved in the same file as <span class="command"><strong>opcontrol</strong></span> uses; that is,
-<code class="filename">~/.oprofile/daemonrc</code>.
-</p>
-          <p>
-</p>
-          <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3><span class="command"><strong>oprof_start</strong></span> does not currently support <span class="command"><strong>operf</strong></span>.</div>
-          <p>
-</p>
-        </div>
-        <div class="sect1" title="6. Configuration details">
+        <div class="sect1" title="3. Configuration details">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="detailed-parameters"></a>6. Configuration details</h2>
+                <h2 class="title" style="clear: both"><a id="detailed-parameters"></a>3. Configuration details</h2>
               </div>
             </div>
           </div>
-          <div class="sect2" title="6.1. Hardware performance counters">
+          <div class="sect2" title="3.1. Hardware performance counters">
             <div class="titlepage">
               <div>
                 <div>
-                  <h3 class="title"><a id="hardware-counters"></a>6.1. Hardware performance counters</h3>
+                  <h3 class="title"><a id="hardware-counters"></a>3.1. Hardware performance counters</h3>
                 </div>
               </div>
             </div>
@@ -2574,7 +1836,8 @@ events other than the default event chosen by OProfile.
               <h3 class="title">Note</h3>
               <p>
 Your CPU type may not include the requisite support for hardware performance counters, in which case
-you must use OProfile in timer mode (see <a class="xref" href="#timer" title="6.2. OProfile in timer interrupt mode">Section 6.2, &#8220;OProfile in timer interrupt mode&#8221;</a>). 
+you must use OProfile in timer mode (see <a class="xref" href="#timer" title="3.2. OProfile timer interrupt mode">Section 3.2, &#8220;OProfile timer interrupt mode&#8221;</a>), which is only available in
+OProfile releases prior to 1.0.
 </p>
             </div>
             <p>
@@ -2588,82 +1851,92 @@ https://www.power.org/events/Power7</a> contains specific information on the per
 monitor unit for the IBM POWER7.
 </p>
             <p>
-These processors are capable of delivering an interrupt when a counter overflows.
+A physical performance monitor counter (PMC) is configured by a profiling tool to count a particular
+type of event. When the counter overflows, an interrupt is delivered to the processor.
 This is the basic mechanism on which OProfile is based. The delivery mode is <acronym class="acronym">NMI</acronym>,
 so blocking interrupts in the kernel does not prevent profiling. When the interrupt handler is called,
-the current <acronym class="acronym">PC</acronym> value and the current task are recorded into the profiling structure.
-This allows the overflow event to be attached to a specific assembly instruction in a binary image.
-OProfile receives this data from the kernel and writes it to the sample files.
+the current <acronym class="acronym">PC</acronym> (program counter) value and the current task are recorded into the profiling structure.
+This allows the overflow event to be attributed to a specific assembly instruction in a specific binary image.
+OProfile receives this data (commonly referred to as a "sample") from the kernel and writes it to the sample files.
 </p>
             <p>
 If we use an event such as <code class="constant">CPU_CLK_UNHALTED</code> or <code class="constant">INST_RETIRED</code>
 (<code class="constant">GLOBAL_POWER_EVENTS</code> or <code class="constant">INSTR_RETIRED</code>, respectively, on the Pentium 4), we can
-use the overflow counts as an estimate of actual time spent in each part of code. Alternatively we can profile interesting
+use the overflow counts (samples) as an estimate of actual time spent in each part of code. Alternatively we can profile interesting
 data such as the cache behaviour of routines with the other available counters.
 </p>
             <p>
 However there are several caveats. First, there are those issues listed in the Intel manual. There is a delay
 between the counter overflow and the interrupt delivery that can skew results on a small scale - this means
 you cannot rely on the profiles at the instruction level as being perfectly accurate.
-If you are using an "event-mode" counter such as the cache counters, a count registered against it doesn't mean
-that it is responsible for that event. However, it implies that the counter overflowed in the dynamic
-vicinity of that instruction, to within a few instructions. Further details on this problem can be found in 
+For example, if you are profiling an application with an event that counts L1 cache misses, a sample attributed
+to a particular instruction in the application doesn't necessarily mean that exact instruction is responsible
+for that event; instead, it means the sample was taken in the dynamic vicinity of that instruction,
+usually with a margin of error of a few instructions. Further details on this problem can be found in
 <a class="xref" href="#interpreting" title="Chapter 5. Interpreting profiling results">Chapter 5, <i>Interpreting profiling results</i></a> and also in the Digital paper "ProfileMe: A Hardware Performance Counter".
 </p>
             <p>
-Each counter has several configuration parameters.
-First, there is the unit mask: this simply further specifies what to count.
-Second, there is the counter value, discussed below. Third, there is a parameter whether to increment counts
+Each counter has several configuration parameters besides the type of event to count.
+First, there is the unit mask, which is used to further qualify exactly what to count.
+Second, there is the <code class="constant">count</code> field, discussed below. Third, there are parameters
+to specify whether to increment counts
 whilst in kernel or user space. You can configure these separately for each counter.
 </p>
             <p>
-After each overflow event, the counter will be re-initialized
-such that another overflow will occur after this many events have been counted. Thus, higher
-values mean less-detailed profiling, and lower values mean more detail, but higher overhead.
-Picking a good value for this
-parameter is, unfortunately, somewhat of a black art. It is of course dependent on the event
-you have chosen.
+When the profiler is initially setup, a performance monitor counter is chosen for counting the
+event, and it is initialized using the <code class="constant">count</code> value.
+Once profiling begins, the counter increments with each event detected, and the counter
+<span class="emphasis"><em>overflows</em></span> when the <code class="constant">count</code> value is reached.
+As described above, the counter overflow generates an interrupt, and the sample is recorded.
+After each overflow event, the counter is re-initialized using the <code class="constant">count</code> value,
+and counting begins anew for the next sample. Higher values for <code class="constant">count</code>
+result in samples being taken less frequently, and therefore less-detailed (and, potentially,
+less accurate) profiling. Lower values mean more detail, but higher overhead.
+Picking a good value for this parameter is, unfortunately, somewhat of a black art. It is
+of course dependent on the event you have chosen.
 Specifying too large a value will mean not enough interrupts are generated
-to give a realistic profile (though this problem can be ameliorated by profiling for <span class="emphasis"><em>longer</em></span>).
-Specifying too small a value can lead to higher performance overhead.
+to give a realistic profile (though this problem can be ameliorated by profiling for
+longer time periods. Specifying too small a value can lead to higher performance overhead.
 </p>
           </div>
-          <div class="sect2" title="6.2. OProfile in timer interrupt mode">
+          <div class="sect2" title="3.2. OProfile timer interrupt mode">
             <div class="titlepage">
               <div>
                 <div>
-                  <h3 class="title"><a id="timer"></a>6.2. OProfile in timer interrupt mode</h3>
+                  <h3 class="title"><a id="timer"></a>3.2. OProfile timer interrupt mode</h3>
                 </div>
               </div>
             </div>
             <p>
-Some CPU types do not provide the needed hardware support to use the hardware performance counters. This includes
-some laptops, classic Pentiums, and other CPU types not yet supported by OProfile (such as Cyrix).
-On these machines, OProfile falls back to using the timer interrupt for profiling,
-back to using the real-time clock interrupt to collect samples.  In timer mode, OProfile
-is not able to profile code that has interrupts disabled.
-</p>
-            <p>
-You can force use of the timer interrupt by using the <code class="option">timer=1</code> module
-parameter (or <code class="option">oprofile.timer=1</code> on the boot command line if OProfile is
-built-in).  If OProfile was built as a kernel module, then you must pass the 'timer=1'
-parameter with the modprobe command.  Do this before executing 'opcontrol --init' or
-edit the opcontrol command's invocation of modprobe to pass the 'timer=1' parameter.
-
-</p>
-            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>Timer mode is only available using the legacy <span class="command"><strong>opcontrol</strong></span> command.</div>
+Some CPU types do not provide the needed hardware support for hardware performance counters.
+Additionally, some older architectures are not supported by the perf_events kernel subsystem.
+On such machines, the <span class="command"><strong>operf</strong></span> and <span class="command"><strong>ocount</strong></span> commands will exit with a message indicating the
+processor type is not supported. However, you can install OProfile 0.9.9 and use the legacy
+opcontrol-based profiler, which will fall back to using timer interrupts for profiling.
+Note that in timer mode, OProfile is not able to profile code that has interrupts disabled.
+</p>
+            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>Timer mode is only available using the legacy <span class="command"><strong>opcontrol</strong></span> command,
+available in releases prior to 1.0.</div>
             <p>
 </p>
           </div>
-          <div class="sect2" title="6.3. Pentium 4 support">
+          <div class="sect2" title="3.3. Architecture-specific configuration notes">
             <div class="titlepage">
               <div>
                 <div>
-                  <h3 class="title"><a id="p4"></a>6.3. Pentium 4 support</h3>
+                  <h3 class="title"><a id="special-notes"></a>3.3. Architecture-specific configuration notes</h3>
                 </div>
               </div>
             </div>
-            <p>
+            <div class="sect3" title="3.3.1. Pentium 4 support">
+              <div class="titlepage">
+                <div>
+                  <div>
+                    <h4 class="title"><a id="p4"></a>3.3.1. Pentium 4 support</h4>
+                  </div>
+                </div>
+              </div>
+              <p>
 The Pentium 4 / Xeon performance counters are organized around 3 types of model specific registers (MSRs): 45 event
 selection control registers (ESCRs), 18 counter configuration control registers (CCCRs) and 18 counters. ESCRs describe a
 particular set of events which are to be recorded, and CCCRs bind ESCRs to counters and configure their
@@ -2672,370 +1945,34 @@ another at any time. There is, however, a subset of 8 counters, 8 ESCRs, and 8 C
 one another, so OProfile only accesses those registers, treating them as a bank of 8 "normal" counters, similar
 to those in the P6 or Athlon/Opteron/Phenom/Turion families of CPU.
 </p>
-            <p>
+              <p>
 There is currently no support for Precision Event-Based Sampling (PEBS), nor any advanced uses of the Debug Store
 (DS). Current support is limited to the conservative extension of OProfile's existing interrupt-based model described
 above.
-</p>
-          </div>
-          <div class="sect2" title="6.4. Intel Itanium 2 support">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="ia64"></a>6.4. Intel Itanium 2 support</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-The Itanium 2 performance monitoring unit (PMU) organizes the counters as four
-pairs of performance event monitoring registers. Each pair is composed of a
-Performance Monitoring Configuration (PMC) register and Performance Monitoring
-Data (PMD) register.  The PMC selects the performance event being monitored and
-the PMD determines the sampling interval. The IA64 Performance Monitoring Unit
-(PMU) triggers sampling with maskable interrupts. Thus, samples will not occur
-in sections of the IA64 kernel where interrupts are disabled.
-</p>
-            <p>
-None of the advance features of the Itanium 2 performance monitoring unit
-such as opcode matching, address range matching, or precise event sampling are
-supported by this version of OProfile.  The Itanium 2 support only maps OProfile's
-existing interrupt-based model to the PMU hardware.
-</p>
-          </div>
-          <div class="sect2" title="6.5. PowerPC64 support">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="ppc64"></a>6.5. PowerPC64 support</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-The performance monitoring unit (PMU) for the IBM PowerPC 64-bit processors 
-consists of between 4 and 8 counters (depending on the model), plus three
-special purpose registers used for programming the counters -- MMCR0, MMCR1,
-and MMCRA.  Advanced features such as instruction matching and thresholding are
-not supported by this version of OProfile.
-</p>
-            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>Later versions of the IBM POWER5+ processor (beginning with revision 3.0)
-run the performance monitor unit in POWER6 mode, effectively removing OProfile's
-access to counters 5 and 6.  These two counters are dedicated to counting
-instructions completed and cycles, respectively.  In POWER6 mode, however, the
-counters do not generate an interrupt on overflow and so are unusable by
-OProfile.  Kernel versions 2.6.23 and higher will recognize this mode
-and export "ppc64/power5++" as the cpu_type to the oprofilefs pseudo filesystem.
-OProfile userspace responds to this cpu_type by removing these counters from
-the list of potential events to count.  Without this kernel support, attempts
-to profile using an event from one of these counters will yield incorrect
-results -- typically, zero (or near zero) samples in the generated report.
-</div>
-            <p>
-</p>
-          </div>
-          <div class="sect2" title="6.6. Cell Broadband Engine support">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="cell-be"></a>6.6. Cell Broadband Engine support</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-The Cell Broadband Engine (CBE) processor core consists of a PowerPC Processing
-Element (PPE) and 8 Synergistic Processing Elements (SPE).  PPEs and SPEs each
-consist of a processing unit (PPU and SPU, respectively) and other hardware
-components, such as memory controllers.
-</p>
-            <p>
-A PPU has two hardware threads (aka "virtual CPUs").  The performance monitor
-unit of the CBE collects event information on one hardware thread at a time.
-Therefore, when profiling PPE events,
-OProfile collects the profile based on the selected events by time slicing the
-performance counter hardware between the two threads.   The user must ensure the
-collection interval is long enough so that the time spent collecting data for
-each PPU is sufficient to obtain a good profile.
-</p>
-            <p>
-To profile an SPU application, the user should specify the SPU_CYCLES event.
-When starting OProfile with SPU_CYCLES, the opcontrol script enforces certain
-separation parameters (separate=cpu,lib) to ensure that sufficient information
-is collected in the sample data in order to generate a complete report.  The
---merge=cpu option can be used to obtain a more readable report if analyzing
-the performance of each separate SPU is not necessary.
-</p>
-            <p>
-Profiling with an SPU event (events 4100 through 4163) is not compatible with any other
-event.  Further more, only one SPU event can be specified at a time.  The hardware only
-supports profiling on one SPU per node at a time.  The OProfile kernel code time slices
-between the eight SPUs to collect data on all SPUs.
-</p>
-            <p>
-SPU profile reports have some unique characteristics compared to reports for
-standard architectures:
-</p>
-            <div class="itemizedlist">
-              <ul class="itemizedlist" type="disc">
-                <li class="listitem">Typically no "app name" column.  This is really standard OProfile behavior
-when the report contains samples for just a single application, which is
-commonly the case when profiling SPUs.</li>
-                <li class="listitem">"CPU" equates to "SPU"</li>
-                <li class="listitem">Specifying '--long-filenames' on the opreport command does not always result
-in long filenames.  This happens when the SPU application code is embedded in
-the PPE executable or shared library.  The embedded SPU ELF data contains only the
-short filename (i.e., no path information) for the SPU binary file that was used as
-the source for embedding.   The reason that just the short filename is used is because
-the original SPU binary file may not exist or be accessible at runtime.  The performance
-analyst must have sufficient knowledge of the application to be able to correlate the
-SPU binary image names found in the  report to the application's source files.
-<div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>
-Compile the application with -g and generate the OProfile report
-with -g to facilitate finding the right source file(s) on which to focus.
-</div></li>
-              </ul>
-            </div>
-          </div>
-          <div class="sect2" title="6.7. AMD64 (x86_64) Instruction-Based Sampling (IBS) support">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="amd-ibs-support"></a>6.7. AMD64 (x86_64) Instruction-Based Sampling (IBS) support</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-Instruction-Based Sampling (IBS) is a new performance measurement technique
-available on AMD Family 10h processors. Traditional performance counter
-sampling is not precise enough to isolate performance issues to individual
-instructions. IBS, however, precisely identifies instructions which are not
-making the best use of the processor pipeline and memory hierarchy.
-For more information, please refer to the "Instruction-Based Sampling:
-A New Performance Analysis Technique for AMD Family 10h Processors" (
-<a class="ulink" href="http://developer.amd.com/assets/AMD_IBS_paper_EN.pdf">
-http://developer.amd.com/assets/AMD_IBS_paper_EN.pdf</a>).
-There are two types of IBS profile types, described in the following sections.
-</p>
-            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>Profiling on IBS events is only supported with legacy mode profiling
-(i.e., with <span class="command"><strong>opcontrol</strong></span>).</div>
-            <p>
-</p>
-            <div class="sect3" title="6.7.1. IBS Fetch">
-              <div class="titlepage">
-                <div>
-                  <div>
-                    <h4 class="title"><a id="ibs-fetch"></a>6.7.1. IBS Fetch</h4>
-                  </div>
-                </div>
-              </div>
-              <p>
-IBS fetch sampling is a statistical sampling method which counts completed
-fetch operations. When the number of completed fetch operations reaches the
-maximum fetch count (the sampling period), IBS tags the fetch operation and
-monitors that operation until it either completes or aborts. When a tagged
-fetch completes or aborts, a sampling interrupt is generated and an IBS fetch
-sample is taken. An IBS fetch sample contains a timestamp, the identifier of
-the interrupted process, the virtual fetch address, and several event flags
-and values that describe what happened during the fetch operation. 
 </p>
             </div>
-            <div class="sect3" title="6.7.2. IBS Op">
+            <div class="sect3" title="3.3.2. PowerPC64 support">
               <div class="titlepage">
                 <div>
                   <div>
-                    <h4 class="title"><a id="ibs-op"></a>6.7.2. IBS Op</h4>
+                    <h4 class="title"><a id="ppc64"></a>3.3.2. PowerPC64 support</h4>
                   </div>
                 </div>
               </div>
               <p>
-IBS op sampling selects, tags, and monitors macro-ops as issued from AMD64
-instructions. Two options are available for selecting ops for sampling:
-</p>
-              <div class="itemizedlist">
-                <ul class="itemizedlist" type="disc">
-                  <li class="listitem">
-Cycles-based selection counts CPU clock cycles. The op is tagged and monitored
-when the count reaches a threshold (the sampling period) and a valid op is
-available. 
-</li>
-                  <li class="listitem">
-Dispatched op-based selection counts dispatched macro-ops.
-When the count reaches a threshold, the next valid op is tagged and monitored. 
-</li>
-                </ul>
-              </div>
-              <p>
-In both cases, an IBS sample is generated only if the tagged op retires.
-Thus, IBS op event information does not measure speculative execution activity.
-The execution stages of the pipeline monitor the tagged macro-op. When the
-tagged macro-op retires, a sampling interrupt is generated and an IBS op
-sample is taken. An IBS op sample contains a timestamp, the identifier of
-the interrupted process, the virtual address of the AMD64 instruction from
-which the op was issued, and several event flags and values that describe
-what happened when the macro-op executed.
-</p>
-            </div>
-            <p>
-Enabling IBS profiling is done simply by specifying IBS performance events
-through the "--event=" options. These events are listed in the
-<code class="function">opcontrol --list-events</code>.
-</p>
-            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-              <tr>
-                <td>
-                  <pre class="screen">
-opcontrol --event=IBS_FETCH_XXX:&lt;count&gt;:&lt;um&gt;:&lt;kernel&gt;:&lt;user&gt;
-opcontrol --event=IBS_OP_XXX:&lt;count&gt;:&lt;um&gt;:&lt;kernel&gt;:&lt;user&gt;
-
-Note: * All IBS fetch event must have the same event count and unitmask,
-        as do those for IBS op.
-</pre>
-                </td>
-              </tr>
-            </table>
-          </div>
-          <div class="sect2" title="6.8. IBM System z hardware sampling support">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="systemz"></a>6.8. IBM System z hardware sampling support</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-IBM System z provides a facility which does instruction sampling as
-part of the CPU.  This has great advantages over the timer based
-sampling approach like better sampling resolution with less overhead
-and the possibility to get samples within code sections where
-interrupts are disabled (useful especially for Linux kernel code).
-</p>
-            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>Profiling with the instruction sampling facility is currently only supported
-with legacy mode profiling (i.e., with <span class="command"><strong>opcontrol</strong></span>).</div>
-            <p>
-A public description of the System z CPU-Measurement Facilities can be
-found here:
-<a class="ulink" href="http://www-01.ibm.com/support/docview.wss?uid=isg26fcd1cc32246f4c8852574ce0044734a">The Load-Program-Parameter and CPU-Measurement Facilities</a>
-</p>
-            <p>
-System z hardware sampling can be used for Linux instances in LPAR
-mode. The hardware sampling support used by OProfile was introduced
-for System z10 in October 2008.
-</p>
-            <p>
-To enable hardware sampling for an LPAR you must activate the LPAR
-with authorization for basic sampling control. See the "Support
-Element Operations Guide" for your mainframe system for more
-information.
-</p>
-            <p>
-The hardware sampling facility can be enabled and disabled using the
-event interface.  A `virtual' counter 0 has been defined that only supports
-a single event, HWSAMPLING. By default the HWSAMPLING event is
-enabled on machines providing the facility.  For both events only the
-`count', `kernel' and `user' options are evaluated by the kernel
-module.
-</p>
-            <p>
-The `count' value is the sampling rate as it is passed to the CPU
-measurement facility.  A sample will be taken by the hardware every
-`count' cycles. Using low values here will quickly fill up the
-sampling buffers and will generate CPU load on the OProfile daemon and
-the kernel module being busy flushing the hardware buffers.  This
-might considerably impact the workload to be profiled.
-</p>
-            <p>
-The unit mask `um' is required to be zero.
-</p>
-            <p>
-The opcontrol tool provides a new option specific to System z
-hardware sampling:
-</p>
-            <div class="itemizedlist">
-              <ul class="itemizedlist" type="disc">
-                <li class="listitem">--s390hwsampbufsize="num": Number of 2MB areas
-used per CPU for storing sample data.  The best
-size for the sample memory depends on the particular system and the
-workload to be measured.  Providing the sampler with too little memory
-results in lost samples. Reserving too much system memory for the
-sampler impacts the overall performance and, hence, also the workload
-to be measured.</li>
-              </ul>
-            </div>
-            <p>
-A special counter <code class="filename">/dev/oprofile/timer</code> is provided
-by the kernel module allowing to switch back to timer mode sampling
-dynamically.  The TIMER event is limited to be used only with this
-counter.  The TIMER event can be specified using the
-<code class="option">--event=</code> as with every other event.
-</p>
-            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
-              <tr>
-                <td>
-                  <pre class="screen">opcontrol --event=TIMER:1</pre>
-                </td>
-              </tr>
-            </table>
-            <p>
-On z10 or later machines the default event is set to TIMER in case the
-hardware sampling facility is not available.
-</p>
-            <p>
-Although required, the 'count' parameter of the TIMER event is
-ignored.  The value may eventually be used for timer based sampling
-with a configurable sampling frequency, but this is currently not
-supported.
-</p>
-          </div>
-          <div class="sect2" title="6.9. Dangerous counter settings">
-            <div class="titlepage">
-              <div>
-                <div>
-                  <h3 class="title"><a id="misuse"></a>6.9. Dangerous counter settings</h3>
-                </div>
-              </div>
-            </div>
-            <p>
-OProfile is a low-level profiler which allows continuous profiling with a low-overhead cost.
-When using OProfile legacy mode profiling, it may be possible to configure such a low a counter reset value
-(i.e., high sampling rate) that the system can become overloaded with counter interrupts and your
-system's responsiveness may be severely impacted. Whilst some validation is done on the <code class="code">count</code>
-values you pass to <span class="command"><strong>opcontrol</strong></span> with your event specification, it is not foolproof.
-</p>
-            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;">
-              <h3 class="title">Note</h3>
-              <p>
-This can happen as follows: When the profiler count
-reaches zero, an NMI handler is called which stores the sample values in an internal buffer, then resets the counter
-to its original value. If the reset count you specified is very low, a pending NMI can be sent before the NMI handler has
-completed. Due to the priority of the NMI, the pending interrupt is delivered immediately after
-completion of the previous interrupt handler, and control never returns to other parts of the system.
-If all processors are stuck in this mode, the system will appear to be frozen.
-</p>
-            </div>
-            <p>If this happens, it will be impossible to bring the system back to a workable state.
-There is no way to provide real security against this happening, other than making sure to use a reasonable value
-for the counter reset. For example, setting <code class="constant">CPU_CLK_UNHALTED</code> event type with a ridiculously low reset count (e.g. 500)
-is likely to freeze the system.
+The performance monitoring unit (PMU) for the IBM PowerPC 64-bit processors 
+consists of between 4 and 8 counters (depending on the model).  Advanced features
+such as instruction matching and thresholding are not supported by OProfile.
 </p>
-            <p>
-In short : <span class="command"><strong>Don't try a foolish sample count value</strong></span>. Unfortunately the definition of a foolish value
-is really dependent on the event type. If ever in doubt, post a message to </p>
-            <div class="address">
-              <p><code class="email">&lt;<a class="email" href="mailto:oprofile-list@lists.sf.net">oprofile-list@lists.sf.net</a>&gt;</code>.</p>
             </div>
-            <p>
-</p>
-            <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>
-The scenario described above cannot occur if you use <span class="command"><strong>operf</strong></span> for profiling instead of
-<span class="command"><strong>opcontrol</strong></span>, because the perf_events kernel subsystem automatically detects when performance monitor
-interrupts are arriving at a dangerous level and will throttle back the sampling rate.
-</div>
           </div>
         </div>
       </div>
-      <div class="chapter" title="Chapter 4. Obtaining results">
+      <div class="chapter" title="Chapter 4. Obtaining profiling results">
         <div class="titlepage">
           <div>
             <div>
-              <h2 class="title"><a id="results"></a>Chapter 4. Obtaining results</h2>
+              <h2 class="title"><a id="results"></a>Chapter 4. Obtaining profiling results</h2>
             </div>
           </div>
         </div>
@@ -3157,7 +2094,7 @@ interrupts are arriving at a dangerous level and will throttle back the sampling
             </dd>
             <dt>
               <span class="sect1">
-                <a href="#oparchive">6. Archiving measurements (<span class="command"><strong>oparchive</strong></span>)</a>
+                <a href="#oparchive">6. Analyzing profile data on another system (<span class="command"><strong>oparchive</strong></span>)</a>
               </span>
             </dt>
             <dd>
@@ -3186,23 +2123,13 @@ interrupts are arriving at a dangerous level and will throttle back the sampling
           </dl>
         </div>
         <p>
-OK, so the profiler has been running, but it's not much use unless we can get some data out. Sometimes,
-OProfile does a little <span class="emphasis"><em>too</em></span> good a job of keeping overhead low, and no data reaches
-the profiler. This can happen on lightly-loaded machines. If you're using OPorifle legacy mode, you can
-force a dump at any time with :
-</p>
-        <p>
-          <span class="command">
-            <strong>opcontrol --dump</strong>
-          </span>
-        </p>
-        <p>This ensures that any profile data collected by the <span class="command"><strong>oprofiled</strong></span> daemon has been flusehd
-to disk.  Remember to do a <code class="code">dump</code>, <code class="code">stop</code>, <code class="code">shutdown</code>, or <code class="code">deinit</code>
-before complaining there is no profiling data!
-</p>
-        <p>
-Now that we've got some data, it has to be processed. That's the job of <span class="command"><strong>opreport</strong></span>,
-<span class="command"><strong>opannotate</strong></span>, or <span class="command"><strong>opgprof</strong></span>.
+After collecting profile data, the raw data must undergo special processing in order for you to
+perform your analysis. The analysis tools that perform this special processing are
+<span class="command"><strong>opreport</strong></span>, <span class="command"><strong>opannotate</strong></span>, and <span class="command"><strong>opgprof</strong></span>.
+Additionally, the <span class="command"><strong>oparchive</strong></span> is used to gather together profile
+data, sampled binary files, etc. for the purpose of off-line analysis.  While
+not really an analysis tool, <span class="command"><strong>oparchive</strong></span> is put in that category
+for convenience since it takes many of the same options as the other analysis tools.
 </p>
         <div class="sect1" title="1. Profile specifications">
           <div class="titlepage">
@@ -3213,11 +2140,11 @@ Now that we've got some data, it has to be processed. That's the job of <span cl
             </div>
           </div>
           <p>
-All of the analysis tools take a <span class="emphasis"><em>profile specification</em></span>.
-This is a set of definitions that describe which actual profiles should be
+All of the analysis tools take a <span class="emphasis"><em>profile specification</em></span>
+as an input argument.
+This is a set of definitions that describes the specific profile data that should be
 examined. The simplest profile specification is empty: this will match all
-the available profile files for the current session (this is what happens
-when you do <span class="command"><strong>opreport</strong></span>).
+the available profile files for the current session.
 </p>
           <p>
 Specification parameters are of the form <code class="option">name:value[,value]</code>.
@@ -3225,10 +2152,11 @@ For example, if I wanted to get a combined symbol summary for
 <code class="filename">/bin/myprog</code> and <code class="filename">/bin/myprog2</code>,
 I could do <span class="command"><strong>opreport -l image:/bin/myprog,/bin/myprog2</strong></span>.
 As a special case, you don't actually need to specify the <code class="option">image:</code>
-part here: anything left on the command line is assumed to be an
+part of the specification. Anything left on the command line after all other
+<span class="command"><strong>opreport</strong></span> options have been processed is assumed to be an
 <code class="option">image:</code> name. Similarly, if no <code class="option">session:</code>
 is specified, then <code class="option">session:current</code> is assumed ("current"
-is a special name of the current / last profiling session).
+is a special name of the current (i.e., most recent) profiling session).
 </p>
           <p>
 In addition to the comma-separated list shown above, some of the 
@@ -3414,10 +2342,7 @@ Differential profile of an archived binary with the current session :
                 <dd>
                   <p>
 		Same as <code class="option">image:</code>, but only for images that are for
-		a particular primary binary image (namely, an application). This only
-		makes sense to use if you're using <code class="option">--separate</code>.
-		This includes kernel modules and the kernel when using
-		<code class="option">--separate=kernel</code>.
+		a particular primary binary image (namely, an application).
 		</p>
                 </dd>
                 <dt>
@@ -3446,7 +2371,6 @@ Differential profile of an archived binary with the current session :
                   <p>
 		The symbolic event name to match on, e.g. <code class="option">event:DATA_MEM_REFS</code>.
 		You can pass a list of events for side-by-side comparison with <span class="command"><strong>opreport</strong></span>.
-		When using the timer interrupt, the event is always "TIMER".
 		</p>
                 </dd>
                 <dt>
@@ -3461,11 +2385,10 @@ Differential profile of an archived binary with the current session :
                   <p>
 		The event count to match on, e.g. <code class="option">event:DATA_MEM_REFS count:30000</code>.
 		Note that this value refers to the count value in the event spec you passed
-		to <span class="command"><strong>opcontrol</strong></span> or <span class="command"><strong>operf</strong></span> when setting up to do a
+		to <span class="command"><strong>operf</strong></span> when setting up to do a
 		profile run.  It has nothing to do with the sample counts in the profile data
 		itself.
 		You can pass a list of events for side-by-side comparison with <span class="command"><strong>opreport</strong></span>.
-		When using the timer interrupt, the count is always 0 (indicating it cannot be set).
 		</p>
                 </dd>
                 <dt>
@@ -3543,9 +2466,8 @@ Differential profile of an archived binary with the current session :
               </div>
             </div>
             <p>
-Each session's sample files can be found in the $SESSION_DIR/samples/ directory (default when
-using legacy mode: <code class="filename">/var/lib/oprofile/samples/</code>; default when using
-<span class="command"><strong>operf</strong></span>:  <code class="filename">&lt;cur_dir&gt;/oprofile_data/samples/</code>).
+Each session's sample files can be found in the $SESSION_DIR/samples/ directory (default
+for <span class="command"><strong>operf</strong></span> is <code class="filename">&lt;cur_dir&gt;/oprofile_data/samples/</code>).
 These are used, along with the binary image files, to produce human-readable data.
 In some circumstances (e.g., kernel modules), OProfile
 will not be able to find the binary images. All the tools have an <code class="option">--image-path</code>
@@ -3622,12 +2544,7 @@ taken per second.
                   <p>
 Similarly, if the application spends little time in the main binary image
 itself, with most of it spent in shared libraries it uses, you might
-not see any samples for the binary image (i.e., executable) itself. If you're
-using OProfile legacy mode profiling, then we recommend using
-<span class="command"><strong>opcontrol --separate=lib</strong></span> before the
-profiling session so that <span class="command"><strong>opreport</strong></span> and friends show
-the library profiles on a per-application basis.  This is done automatically
-when profiling with <span class="command"><strong>operf</strong></span>, so no special setup is necessary.
+not see any samples for the binary image (i.e., executable) itself.
 </p>
                 </dd>
                 <dt>
@@ -3644,7 +2561,7 @@ but no task with that group ID ever ran the code.
                 </dt>
                 <dd>
                   <p>
-If you're using a particular event counter, for example counting MMX
+If you're profiling a particular event, for example counting MMX
 operations, the code might simply have not generated any events in the
 first place. Verify the code you're profiling does what you expect it
 to.
@@ -3678,7 +2595,7 @@ The <span class="command"><strong>opreport</strong></span> utility is the primar
 getting formatted data out of OProfile. It produces two types of data: image summaries
 and symbol summaries. An image summary lists the number of samples for individual
 binary images such as libraries or applications. Symbol summaries provide per-symbol
-profile data. In the following example, we're getting an image summary for the whole
+profile data. In the following truncated example, we see an image summary for the whole
 system:
 </p>
           <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
@@ -3686,19 +2603,36 @@ system:
               <td>
                 <pre class="screen">
 $ opreport --long-filenames
-CPU: PIII, speed 863.195 MHz (estimated)
-Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 23150
-   905898 59.7415 /usr/lib/gcc-lib/i386-redhat-linux/3.2/cc1plus
-   214320 14.1338 /boot/2.6.0/vmlinux
-   103450  6.8222 /lib/i686/libc-2.3.2.so
-    60160  3.9674 /usr/local/bin/madplay
-    31769  2.0951 /usr/local/oprofile-pp/bin/oprofiled
-    26550  1.7509 /usr/lib/libartsflow.so.1.0.0
-    23906  1.5765 /usr/bin/as
-    18770  1.2378 /oprofile
-    15528  1.0240 /usr/lib/qt-3.0.5/lib/libqt-mt.so.3.0.5
-    11979  0.7900 /usr/X11R6/bin/XFree86
-    11328  0.7471 /bin/bash
+CPU: Intel Sandy Bridge microarchitecture, speed 2401 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000
+CPU_CLK_UNHALT...|
+  samples|      %|
+------------------
+    22577 28.9011 /usr/bin/Xorg
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+            16846 74.6158 /proc/kallsyms
+             2126  9.4167 /usr/bin/Xorg
+              763  3.3795 /usr/lib64/libpixman-1.so.0.26.2
+              ...
+    17402 22.2766 /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.55.x86_64/jre/bin/java
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+             5666 32.5595 anon (tgid:29664 range:0x7f3475000000-0x7f347616ffff)
+             2312 13.2858 /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.55.x86_64/jre/lib/amd64/server/libjvm.so
+             ...
+    11554 14.7904 /home/user1/oprof-install/bin/operf
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+             7467 64.6270 /proc/kallsyms
+             1691 14.6356 /usr/bin/operf
+             1324 11.4592 /lib64/libc-2.12.so
+              455  3.9380 /usr/lib64/libstdc++.so.6.0.13
+              315  2.7263 /ext4
+              ...
     ...
 </pre>
               </td>
@@ -3708,31 +2642,28 @@ Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mas
 If we had specified <code class="option">--symbols</code> in the previous command, we would have
 gotten a symbol summary of all the images across the entire system. We can restrict this to only
 part of the system profile; for example,
-below is a symbol summary of the OProfile daemon. Note that as we used
-<span class="command"><strong>opcontrol --separate=lib,kernel</strong></span>, symbols from images that <span class="command"><strong>oprofiled</strong></span>
-has used are also shown.
+below is a symbol summary for the <span class="command"><strong>operf</strong></span> program used to collect the profile.
 </p>
           <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
             <tr>
               <td>
                 <pre class="screen">
-$ opreport -l -p /lib/modules/`uname -r` `which oprofiled` 2&gt;/dev/null | more
-CPU: Core 2, speed 2.534e+06 MHz (estimated)
-Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000
+$ opreport -l -p /lib/modules/`uname -r` `which operf` 2&gt;/dev/null | more
+CPU: Intel Sandy Bridge microarchitecture, speed 2401 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000
 samples  %        image name               symbol name
-1353     24.9447  vmlinux                  sidtab_context_to_sid
-500       9.2183  vmlinux                  avtab_hash_eval
-154       2.8392  vmlinux                  __link_path_walk
-152       2.8024  vmlinux                  d_prune_aliases
-120       2.2124  vmlinux                  avtab_search_node
-104       1.9174  vmlinux                  find_next_bit
-85        1.5671  vmlinux                  selinux_file_fcntl
-82        1.5118  vmlinux                  avtab_write
-81        1.4934  oprofiled                odb_update_node_with_offset
-73        1.3459  oprofiled                opd_process_samples
-72        1.3274  vmlinux                  avc_has_perm_noaudit
-61        1.1246  libc-2.12.so             _IO_vfscanf
-59        1.0878  ext4.ko                  ext4_mark_iloc_dirty
+860       7.4607  kallsyms                 avtab_search_node
+474       4.1121  operf                    OP_perf_utils::op_write_event(event_union*, unsigned long long)
+461       3.9993  kallsyms                 avc_has_perm_noaudit
+455       3.9473  libstdc++.so.6.0.13      /usr/lib64/libstdc++.so.6.0.13
+412       3.5742  libc-2.12.so             _IO_vfscanf
+369       3.2012  kallsyms                 __d_lookup
+350       3.0363  kallsyms                 sidtab_context_to_sid
+274       2.3770  operf                    OP_perf_utils::op_record_process_exec_mmaps(int, int, int, operf_record*)
+232       2.0127  operf                    operf_process_info::find_mapping_for_sample(unsigned long long, bool)
+222       1.9259  kallsyms                 __link_path_walk
+191       1.6570  kallsyms                 pipe_read
+34        0.2950  ext4.ko                  ext4_mark_iloc_dirty
 ...
 </pre>
               </td>
@@ -3748,8 +2679,8 @@ If you have used one of the <code class="option">--separate[*]</code> options
 whilst profiling, there can be several separate profiles for
 a single binary image within a session. Normally the output
 will keep these images separated. So, for example, if you profiled
-with separation on a per-cpu basis (<code class="code">opcontrol --separate=cpu</code> or
-<code class="code">operf --separate-cpu</code>), you would see separate columns in
+with separation on a per-cpu basis (<code class="code">operf --separate-cpu</code>),
+you would see separate columns in
 the output of <span class="command"><strong>opreport</strong></span> for each CPU where samples
 were recorded. But it can be useful to merge these results back together
 to make the report more readable. The <code class="option">--merge</code> option allows
@@ -3880,11 +2811,11 @@ as calling <code class="function">strfry()</code>, but it's clear from the sourc
 that this doesn't actually happen. See <a class="xref" href="#interpreting-callgraph" title="3. Interpreting call-graph profiles">Section 3, &#8220;Interpreting call-graph profiles&#8221;</a> for an explanation.
 </p>
             </div>
-            <div class="sect3" title="2.3.2. Callgraph and JIT support">
+            <div class="sect3" title="2.3.2. Callgraph is not supported with JIT samples">
               <div class="titlepage">
                 <div>
                   <div>
-                    <h4 class="title"><a id="cg-with-jitsupport"></a>2.3.2. Callgraph and JIT support</h4>
+                    <h4 class="title"><a id="cg-with-jitsupport"></a>2.3.2. Callgraph is not supported with JIT samples</h4>
                   </div>
                 </div>
               </div>
@@ -3969,13 +2900,12 @@ A typical way to use this feature is with archives created with
               <tr>
                 <td>
                   <pre class="screen">
-$ ./a
+$ operf ./a
 $ oparchive -o orig ./a
-$ opcontrol --reset
   # edit and recompile a
-$ ./a
+$ operf ./a
   # now compare the current profile of a with the archived profile
-$ opreport -xl ./a { archive:./orig } { }
+$ opreport  --session-dir=`pwd`/oprofile_data/ -xl ./a { archive:./orig } { }
 CPU: PIII, speed 863.233 MHz (estimated)
 Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a
 unit mask of 0x00 (No unit mask) count 100000
@@ -4059,8 +2989,7 @@ samples  %        image name    		                symbol name
 </p>
             <p>
 Note that, since such mappings are dependent upon individual invocations of
-a binary, these mappings are always listed as a dependent image,
-even when using the legacy mode <code class="option">opcontrol --separate=none</code> command.
+a binary, these mappings are always listed as a dependent image.
 Equally, the results are not affected by the <code class="option">--merge</code>
 option.
 </p>
@@ -4071,7 +3000,7 @@ Enhanced support for JITed code is now available for some virtual machines;
 e.g., the Java Virtual Machine.  For details about OProfile output for
 JITed code, see <a class="xref" href="#getting-jit-reports" title="4. OProfile results with JIT samples">Section 4, &#8220;OProfile results with JIT samples&#8221;</a>.
 </p>
-            <p>For more information about JIT support in OProfile, see <a class="xref" href="#jitsupport" title="3.1. Support for dynamically compiled (JIT) code">Section 3.1, &#8220;Support for dynamically compiled (JIT) code&#8221;</a>.
+            <p>For more information about JIT support in OProfile, see <a class="xref" href="#jitsupport" title="4.1. Support for dynamically compiled (JIT) code">Section 4.1, &#8220;Support for dynamically compiled (JIT) code&#8221;</a>.
 </p>
           </div>
           <div class="sect2" title="2.6. XML formatted output">
@@ -4172,8 +3101,7 @@ offsets for the image binary.
                 <dd>
                   <p>
 Do not include application-specific images for libraries, kernel modules
-and the kernel. This option only makes sense if the profile session
-used --separate.
+and the kernel..
 </p>
                 </dd>
                 <dt>
@@ -4288,12 +3216,18 @@ Reverse the sort from the default.
 </p>
                 </dd>
                 <dt>
-                  <span class="term"><code class="option">--session-dir=</code>dir_path</span>
+                  <span class="term">
+                    <code class="option">--session-dir=dir_path</code>
+                  </span>
                 </dt>
                 <dd>
                   <p>
-Use sample database out of directory <code class="filename">dir_path</code> 
-instead of the default location (/var/lib/oprofile).
+Use sample database from the specified directory <code class="filename">dir_path</code> instead
+of the default location. If this option is not specified, then opreport will search for
+samples in <code class="filename">&lt;cur_dir&gt;/oprofile_data</code>
+first. If that directory does not exist, the standard session-dir of
+<code class="filename">/var/lib/oprofile</code> is used
+as the session directory.
 </p>
                 </dd>
                 <dt>
@@ -4336,7 +3270,8 @@ List per-symbol information instead of a binary image summary.
                 <dd>
                   <p>
 Only output data for symbols that have more than the given percentage
-of total samples.
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the symbol is shown.
 </p>
                 </dd>
                 <dt>
@@ -4392,7 +3327,7 @@ use <span class="command"><strong>opannotate <code class="option">--assembly</co
 as long as the binary has (at least) symbol information.
 </p>
           <p>
-Note that for the reason explained in <a class="xref" href="#hardware-counters" title="6.1. Hardware performance counters">Section 6.1, &#8220;Hardware performance counters&#8221;</a> the results can be
+Note that for the reason explained in <a class="xref" href="#hardware-counters" title="3.1. Hardware performance counters">Section 3.1, &#8220;Hardware performance counters&#8221;</a> the results can be
 inaccurate. The debug information itself can add other problems; for example, the line number for a symbol can be
 incorrect. Assembly instructions can be re-ordered and moved by the compiler, and this can lead to
 crediting source lines with samples not really "owned" by this line. Also see
@@ -4545,8 +3480,7 @@ pattern-matching to make C++ symbol demangling more readable.
                 <dd>
                   <p>
 Do not include application-specific images for libraries, kernel modules
-and the kernel. This option only makes sense if the profile session
-used --separate.
+and the kernel.
 </p>
                 </dd>
                 <dt>
@@ -4557,6 +3491,19 @@ used --separate.
                 <dd>
                   <p>
 Exclude all files in the given comma-separated list of glob patterns.
+This option is supported solely with the <code class="code">--source</code>
+option. It can be used to filter out source files in the output using the
+following types of specifications:
+</p>
+                  <div class="itemizedlist">
+                    <ul class="itemizedlist" type="disc">
+                      <li class="listitem">filenames (basename -- i.e., no path)</li>
+                      <li class="listitem">filename glob specifications (all files whose base filename matches the given pattern)</li>
+                      <li class="listitem">directory segments (all source files located in the specified directory; e.g. "libio")</li>
+                      <li class="listitem">directory segment glob specifications (e.g., "libi*")</li>
+                    </ul>
+                  </div>
+                  <p>
 </p>
                 </dd>
                 <dt>
@@ -4608,6 +3555,7 @@ A path to a filesystem to search for additional binaries.
                 <dd>
                   <p>
 Only include files in the given comma-separated list of glob patterns.
+The same rules apply for this option as for the <code class="code">--exclude-file</code> option.
 </p>
                 </dd>
                 <dt>
@@ -4694,8 +3642,23 @@ source files when the debug information only contains relative paths.
                 </dt>
                 <dd>
                   <p>
-Output annotated source. This requires debugging information to be available
-for the binaries.
+Output annotated source. This requires debugging information to be available
+for the binaries.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--session-dir=dir_path</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Use sample database from the specified directory <code class="filename">dir_path</code> instead
+of the default location. If this option is not specified, then opannotate will search for
+samples in <code class="filename">&lt;cur_dir&gt;/oprofile_data</code>
+first. If that directory does not exist, the standard session-dir of
+<code class="filename">/var/lib/oprofile</code> is used
+as the session directory.
 </p>
                 </dd>
                 <dt>
@@ -4705,8 +3668,14 @@ for the binaries.
                 </dt>
                 <dd>
                   <p>
-Only output data for symbols that have more than the given percentage
-of total samples.
+For annotated assembly, only output data for symbols that have more than the given percentage
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the symbol is shown.
+</p>
+                  <p>
+For annotated source, only output data for source files that have more than the given percentage
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the source file is shown.
 </p>
                 </dd>
                 <dt>
@@ -4912,6 +3881,14 @@ of total samples.
                 <dd>
                   <p>
 Give verbose debugging output.
+<dt><span class="term"><code class="option">--session-dir=dir_path</code></span></dt><dd><p>
+Use sample database from the specified directory <code class="filename">dir_path</code> instead
+of the default location. If this option is not specified, then opgprof will search for
+samples in <code class="filename">&lt;cur_dir&gt;/oprofile_data</code>
+first. If that directory does not exist, the standard session-dir of
+<code class="filename">/var/lib/oprofile</code> is used
+as the session directory.
+</p></dd>
 </p>
                 </dd>
                 <dt>
@@ -4928,24 +3905,24 @@ Show version.
             </div>
           </div>
         </div>
-        <div class="sect1" title="6. Archiving measurements (oparchive)">
+        <div class="sect1" title="6. Analyzing profile data on another system (oparchive)">
           <div class="titlepage">
             <div>
               <div>
-                <h2 class="title" style="clear: both"><a id="oparchive"></a>6. Archiving measurements (<span class="command"><strong>oparchive</strong></span>)</h2>
+                <h2 class="title" style="clear: both"><a id="oparchive"></a>6. Analyzing profile data on another system (<span class="command"><strong>oparchive</strong></span>)</h2>
               </div>
             </div>
           </div>
           <p>
 	The <span class="command"><strong>oparchive</strong></span> utility generates a directory populated
 	with executable, debug, and oprofile sample files. This directory can be
-	moved to another machine via <span class="command"><strong>tar</strong></span> and analyzed without
-	further use of the data collection machine.
+	copied to another (host) machine and analyzed offline, with no further need to
+	access the data collection machine (target).
 </p>
           <p>
-	The following command would collect the sample files, the executables
-	associated with the sample files, and the debuginfo files associated
-	with the executables and copy them into
+	The following command, executed on the target system, will collect the
+	sample files, the executables associated with the sample files, and the
+	debuginfo files associated with the executables and copy them into
 	<code class="filename">/tmp/current_data</code>:
 </p>
           <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
@@ -4957,6 +3934,66 @@ Show version.
               </td>
             </tr>
           </table>
+          <p>
+	When transferring archived profile data to a host machine for offline analysis,
+	you need to determine if the oprofile ABI format is compatible between the
+	target system and the host system; if it isn't, you must run the <span class="command"><strong>opimport</strong></span>
+	command to convert the target's sample data files to the format of your host system.
+	See <a class="xref" href="#opimport" title="7. Converting sample database files (opimport)">Section 7, &#8220;Converting sample database files (<span class="command"><strong>opimport</strong></span>)&#8221;</a> for more details.
+</p>
+          <p>
+	After your profile data is transferred to the host system and (if necessary)
+	you have run the <span class="command"><strong>opimport</strong></span> command to convert the file
+	format, you can now run the <span class="command"><strong>opreport</strong></span> and
+	<span class="command"><strong>opannotate</strong></span> commands.  However, you must provide an
+	"archive specification" to let these post-processing tools know where to find
+	of the profile data (sample files, executables, etc.); for example:
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+# opreport archive:/home/user1/my_oprofile_archive --symbols
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+	Furthermore, if your profile was collected on your target system into a session-dir
+	other than <code class="filename">/var/lib/oprofile</code>, the <span class="command"><strong>oparchive</strong></span>
+	command will display a message similar to the following:
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+# NOTE: The sample data in this archive is located at /home/user1/test-stuff/oprofile_data
+instead of the standard location of /var/lib/oprofile.  Hence, when using opreport
+and other post-processing tools on this archive, you must pass the following option:
+        --session-dir=/home/user1/test-stuff/oprofile_data
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+	Then the above <span class="command"><strong>opreport</strong></span> example would have to include that
+	<code class="option">--session-dir</code> option.
+</p>
+          <p>
+</p>
+          <div class="note" title="Note" style="margin-left: 0.5in; margin-right: 0.5in;"><h3 class="title">Note</h3>
+	 In some host/target development environments, all target executables, libraries, and
+	 debuginfo files are stored in a root directory on the host to facilitate offline
+	 analysis.  In such cases, the <span class="command"><strong>oparchive</strong></span> command collects more data
+	 than is necessary; so, when copying the resulting output of <span class="command"><strong>oparchive</strong></span>,
+	 you can skip all of the executables, etc, and just archive the <code class="filename">$SESSION_DIR</code>
+	 tree located within the output directory you specified in your <span class="command"><strong>oparchive</strong></span>
+	 command. Then, when running the <span class="command"><strong>opreport</strong></span> or	<span class="command"><strong>opannotate</strong></span>
+	 commands on your host system, pass the <code class="option">--root</code> option to point to the
+	 location of your target's executables, etc.
+</div>
+          <p>
+</p>
           <div class="sect2" title="6.1. Usage of oparchive">
             <div class="titlepage">
               <div>
@@ -4985,8 +4022,7 @@ Show help message.
                 <dd>
                   <p>
 Do not include application-specific images for libraries, kernel modules
-and the kernel. This option only makes sense if the profile session
-used --separate.
+and the kernel.
 </p>
                 </dd>
                 <dt>
@@ -5038,6 +4074,21 @@ Only list the files that would be archived, don't copy them.
                 <dd>
                   <p>
 Give verbose debugging output.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--session-dir=dir_path</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Use sample database from the specified directory <code class="filename">dir_path</code> instead
+of the default location. If this option is not specified, then oparchive will search for
+samples in <code class="filename">&lt;cur_dir&gt;/oprofile_data</code>
+first. If that directory does not exist, the standard session-dir of
+<code class="filename">/var/lib/oprofile</code> is used
+as the session directory.
 </p>
                 </dd>
                 <dt>
@@ -5064,15 +4115,20 @@ Show version.
           </div>
           <p>
 	This utility converts sample database files from a foreign binary format (abi) to
-	the native format. This is useful only when moving sample files between systems
-	for analysis on platforms other than the one used for collection. The <span class="command"><strong>
-	oparchive</strong></span> should be used on the machine where the profile was taken (target)
-	in order to collect sample files and all other necessary information. The archive
-	directory that is the output from <span class="command"><strong>oparchive</strong></span> should be copied
-	to the system where you wish to perform your performance analysis (host). If the
-	When the architecture of your target and host systems differ, then you'll need to
-	use the <span class="command"><strong>opimport</strong></span> command. The abi format of the sample files
-	to be imported is described in a text file located in <code class="filename">$SESSION_DIR/abi</code>.
+	the native format. This is required when moving sample files to a (host) system
+	other than the one used for collection (target system), and the host and target systems are different
+	architectures. The abi format of the sample files to be imported is described in a
+	text file located in <code class="filename">$SESSION_DIR/abi</code>.  If you are unsure if
+	your target and host systems have compatible architectures (in regard to the OProfile
+	ABI), simply diff a <code class="filename">$SESSION_DIR/abi</code> file from the target system
+	with one from the host system.  If any differences show up at all, you must run the
+	<span class="command"><strong>opimport</strong></span> command.
+</p>
+          <p>
+	The <span class="command"><strong>oparchive</strong></span> command should be used on the machine where
+	the profile was taken (target) in order to collect sample files and all other necessary
+	information. The archive directory that is the output from <span class="command"><strong>oparchive</strong></span>
+	should be copied to the system where you wish to perform your performance analysis (host).
 </p>
           <p>
 	The following command converts an input sample file to the specified
@@ -5426,10 +4482,7 @@ problem and OProfile can do nothing about it.
 OProfile uses non-maskable interrupts (NMI) on the P6 generation, Pentium 4,
 Athlon, Opteron, Phenom, and Turion processors. These interrupts can occur even in sections of the
 kernel where interrupts are disabled, allowing collection of samples in virtually
-all executable code.  The timer interrupt mode and Itanium 2 collection mechanisms
-use maskable interrupts; therefore, these profiling mechanisms have "sample
-shadows", or blind spots: regions where no samples will be collected. Typically, the samples
-will be attributed to the code immediately after the interrupts are re-enabled.
+all executable code.
 </p>
           </div>
           <div class="sect2" title="2.2. Idle time">
@@ -5461,7 +4514,7 @@ will appear as <code class="function">poll_idle()</code> in your kernel profile.
 OProfile profiles kernel modules by default. However, there are a couple of problems
 you may have when trying to get results. First, you may have booted via an initrd;
 this means that the actual path for the module binaries cannot be determined automatically.
-To get around this, you can use the <code class="option">-p</code> option to the profiling tools
+To get around this, you can use the <code class="option">-p</code> option to the analysis tools
 to specify where to look for the kernel modules.
 </p>
             <p>
@@ -5491,7 +4544,7 @@ information for OProfile to get this information.
             </div>
           </div>
           <p>
-Sometimes the results from call-graph profiles may be different to what
+Sometimes the results from call-graph profiles may be different from what
 you expect to see. The first thing to check is whether the target
 binaries where compiled with frame pointers enabled (if the binary was
 compiled using <span class="command"><strong>gcc</strong></span>'s
@@ -5954,11 +5007,301 @@ and <a class="ulink" href="http://developer.amd.com/devguides.jsp/">http://devel
 </p>
         </div>
       </div>
-      <div class="chapter" title="Chapter 6. Acknowledgments">
+      <div class="chapter" title="Chapter 6. Controlling the event counter">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="controlling-counter"></a>Chapter 6. Controlling the event counter</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#controlling-ocount">1. Using <span class="command"><strong>ocount</strong></span></a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <div class="sect1" title="1. Using ocount">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="controlling-ocount"></a>1. Using <span class="command"><strong>ocount</strong></span></h2>
+              </div>
+            </div>
+          </div>
+          <p>
+This section describes in detail how <span class="command"><strong>ocount</strong></span> is used.
+Unless the <code class="option">--events</code> option is specified, <span class="command"><strong>ocount</strong></span> will use
+the default event for your system. For most systems, the default event is some
+cycles-based event, assuming your processor type supports hardware performance
+counters. The event specification used for <span class="command"><strong>ocount</strong></span> is slightly
+different from that required for profiling -- a <span class="emphasis"><em>count</em></span> value
+is not needed. You can see the event information for your CPU using <span class="command"><strong>ophelp</strong></span>.
+More information on event specification can be found at <a class="xref" href="#eventspec" title="3. Specifying performance counter events">Section 3, &#8220;Specifying performance counter events&#8221;</a>.
+</p>
+          <p>
+The <span class="command"><strong>ocount</strong></span> command syntax is:
+</p>
+          <p>
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">ocount [ options ] [ --system-wide | --process-list &lt;pids&gt; | --thread-list &lt;tids&gt; | --cpu-list &lt;cpus&gt; [ command [ args ] ] ]
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+</p>
+          <p>
+<span class="command"><strong>ocount</strong></span> has 5 run modes:
+</p>
+          <p>
+</p>
+          <div class="itemizedlist">
+            <ul class="itemizedlist" type="disc">
+              <li class="listitem">system-wide</li>
+              <li class="listitem">process-list</li>
+              <li class="listitem">thread-list</li>
+              <li class="listitem">cpu-list</li>
+              <li class="listitem">command</li>
+            </ul>
+          </div>
+          <p>
+</p>
+          <p>
+One and only one of these 5 run modes must be specified when you run <span class="command"><strong>ocount</strong></span>.
+If you run <span class="command"><strong>ocount</strong></span> using a run mode other than <code class="code">command [args]</code>, press Ctrl-c
+to stop it when finished counting (e.g., when the monitored process ends). If you background <span class="command"><strong>ocount</strong></span>
+(i.e., with &#8217;&amp;&#8217;) while using one these run modes, you must stop it in a controlled manner so that
+the data collection process can be shut down cleanly and final results can be displayed.
+Use <code class="code">kill -SIGINT &lt;ocount-PID&gt;</code> for this purpose.
+</p>
+          <p>
+Following is a description of the <span class="command"><strong>ocount</strong></span> options.
+</p>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">
+                  <code class="option">command [args]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		The command or application to be profiled. The <span class="emphasis"><em>[args]</em></span> are the input arguments
+        that the command or application requires. The command and its arguments must be positioned at the
+        end of the command line, after all other <span class="command"><strong>ocount</strong></span> options.
+        </p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--process-list / -p [PIDs]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Use this option to count events for one or more already-running applications, specified via
+        a comma-separated list (PIDs). Event counts will be collected for all children of the
+        passed process(es) as well.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--thread-list / -r [TIDs]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Use this option to count events for one or more already-running threads, specified via
+        a comma-separated list (TIDs). Event counts will <span class="emphasis"><em>not</em></span> be collected
+        for any children of the passed thread(s).
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--system-wide / -s</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This option is for counting events for all processes running on your system. You must have
+        root authority to run <span class="command"><strong>ocount</strong></span> in this mode.
+        </p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--cpu-list / -C [CPUs]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This option is for counting events on a subset of processors on your system. You must have
+        root authority to run <span class="command"><strong>ocount</strong></span> in this mode. This is a comma-separated list,
+        where each element in the list may be either a single processor number or a range of processor
+        numbers; for example: &#8217;-C 2,3,4-11,15&#8217;.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--events / -e  [event1[,event2[,...]]]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This option is for passing a comma-separated list of event specifications
+		for counting. Each event spec is of the form:
+		</p>
+                <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                  <tr>
+                    <td>
+                      <pre class="screen">name[:unitmask[:kernel[:user]]]</pre>
+                    </td>
+                  </tr>
+                </table>
+                <p>
+		When no event specification is given, the default event for the running
+		processor type will be used for counting. Use <span class="command"><strong>ophelp</strong></span>
+		to list the available events for your processor type.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--separate-thread / -t</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+        This option can be used in conjunction with either the <code class="code">--process-list</code> or
+        <code class="code">--thread-list</code> option to display event counts on a per-thread (per-process) basis.
+        Without this option, all counts are aggregated.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--separate-cpu / -c</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This option can be used in conjunction with either the <code class="code">--system-wide</code> or
+		<code class="code">--cpu-list</code> option to display event counts on a per-cpu basis. Without this option,
+		all counts are aggregated.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--time-interval / -i interval_length[:num_intervals]</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		<span class="command"><strong>Note: </strong></span>The <code class="code">interval_length</code> is given in milliseconds.
+              However, the current implementation only supports 100 ms
+              granularity, so the given <code class="code">interval_length</code> will be rounded
+              to the nearest 100 ms.  Results collected for each time
+              interval are printed immediately instead of the default
+              of one dump of cumulative event counts at the end of the
+              run.  Counters are reset to zero at the start of each
+              interval.
+		</p>
+                <p>
+              If <code class="code">num_intervals</code> is specified, ocount exits after the
+              specified number of intervals occur.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--brief-format / -b</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Use this option to print results in the following brief format:
+		</p>
+                <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                  <tr>
+                    <td>
+                      <pre class="screen">
+                  [optional cpu or thread,]&lt;event_name&gt;,&lt;count&gt;,&lt;percent_time_enabled&gt;
+                  [        &lt;int&gt;         ,]&lt;  string  &gt;,&lt; u64 &gt;,&lt;     double         &gt;
+        </pre>
+                    </td>
+                  </tr>
+                </table>
+                <p>
+        If <code class="code">--timer-interval</code> is specified, a separate line formatted as
+        </p>
+                <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                  <tr>
+                    <td>
+                      <pre class="screen">
+                  timestamp,&lt;num_seconds_since_epoch&gt;[.n]
+        </pre>
+                    </td>
+                  </tr>
+                </table>
+                <p>
+        is printed ahead of each dump of event counts. If the time interval specified is
+        less than one second, the timestamp will have 1/10 second precision.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--output-file / -f outfile_name</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Results are written to outfile_name instead of interactively to the terminal.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--verbose / -V</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Use this option to increase the verbosity of the output.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--version -v </code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Show <span class="command"><strong>ocount</strong></span> version.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--help / -h</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Show a help message.
+		</p>
+              </dd>
+            </dl>
+          </div>
+        </div>
+      </div>
+      <div class="chapter" title="Chapter 7. Acknowledgments">
         <div class="titlepage">
           <div>
             <div>
-              <h2 class="title"><a id="ack"></a>Chapter 6. Acknowledgments</h2>
+              <h2 class="title"><a id="ack"></a>Chapter 7. Acknowledgments</h2>
             </div>
           </div>
         </div>
diff --git a/doc/oprofile.xml b/doc/oprofile.xml
index 6bbab72..01cd309 100644
--- a/doc/oprofile.xml
+++ b/doc/oprofile.xml
@@ -3,7 +3,7 @@
 <book id="oprofile-guide">
 <bookinfo>
 	<title>OProfile manual</title>
- 
+
 	<authorgroup>
 		<author>
 			<firstname>John</firstname>
@@ -27,56 +27,72 @@
 
 <para>
 This manual applies to OProfile version <oprofileversion />.
-OProfile is a profiling system for Linux 2.6 and higher systems on a number of architectures. It is capable of profiling
-all parts of a running system, from the kernel (including modules and interrupt handlers) to shared libraries
-to binaries. OProfile can profile the whole system in the background, collecting information at a low overhead. These
-features make it ideal for profiling entire systems to determine bottle necks in real-world systems.
+OProfile is a set of performance monitoring tools for Linux 2.6 and higher systems, available on a number of architectures.
+OProfile provides the following features:
+<itemizedlist>
+<listitem>Profiler</listitem>
+<listitem>Post-processing tools for analyzing profile data</listitem>
+<listitem>Event counter</listitem>
+</itemizedlist>
 </para>
+<para>
+OProfile is capable of monitoring native hardware events occurring in all parts of a running system, from the kernel
+(including modules and interrupt handlers) to shared libraries
+to binaries. OProfile can collect event information for the whole system in the background with very little overhead. These
+features make it ideal for monitoring entire systems to determine bottle necks in real-world systems.
+</para>
+
 <para>
 Many CPUs provide "performance counters", hardware registers that can count "events"; for example,
-cache misses, or CPU cycles. OProfile provides profiles of code based on the number of these occurring events:
+cache misses, or CPU cycles. OProfile can collect profiles of code based on the number of these occurring events:
 repeatedly, every time a certain (configurable) number of events has occurred, the PC value is recorded.
-This information is aggregated into profiles for each binary image.</para>
-<para>
-Some hardware setups do not allow OProfile to use performance counters: in these cases, no
-events are available so OProfile operates in timer mode, as described in later chapters. Timer
-mode is only available in "legacy mode" (see <xref linkend="legacy_mode"/>).
-</para>
+This information is aggregated into profiles for each binary image.  Alternatively, OProfile's event counting
+tool can collect simple raw event counts.</para>
 <sect1 id="legacy_mode">
-<title>OProfile legacy mode</title>
-"Legacy" OProfile consists of the <command>opcontrol</command> shell script, the <command>oprofiled</command> daemon, and several post-processing tools (e.g.,
-<command>opreport</command>). The <command>opcontrol</command> script is used for configuring, starting, and stopping a profiling session. An OProfile
-kernel driver (usually built as a kernel module) is used for collecting samples, which are then recorded into sample files by
-<command>oprofiled</command>. Using OProfile in "legacy mode" requires root user authority since the profiling is done on a system-wide basis, which may
-(if misused) cause adverse effects to the system.
-<note>
-Profiling setup parameters that you specify using <command>opcontrol</command> are cached in <filename>/root/.oprofile/daemonrc</filename>.
-Subsequent runs of <code>opcontrol --start</code> will continue to use these cached values until you
-override them with new values.
-</note>
+<title>OProfile legacy profiling mode</title>
+Prior to release 1.0, OProfile included a profiling tool consisting of the <command>opcontrol</command> shell script, the <command>oprofiled</command> daemon,
+and the attendant oprofile kernel driver. This "legacy profiler" was deprecated in release 0.9.8 with the introduction of
+the <command>operf</command> profiling tool (see <xref linkend="perf_events"/>). Some older architectures/platforms
+do not support the use of <command>operf</command>. For those cases, oprofile users should install release 0.9.9, which is the
+last release to include the legacy profiler.
 </sect1>
 <sect1 id="perf_events">
-<title>OProfile perf_events mode</title>
-As of release 0.9.8, OProfile now includes the ability to profile a single process versus the system-wide technique
-of legacy OProfile. With this new technique, the <command>operf</command> program is used to control profiling instead of the
-<command>opcontrol</command> script and <command>oprofiled</command> daemon of leagacy mode. Also, <command>operf</command> does not require the
-special OProfile kernel driver that legacy mode does; instead, it interfaces with the kernel to collect samples via the Linux Kernel
-Performance Events Subsystem (hereafter referred to as "perf_events"). Using <command>operf</command> to profile a single
-process can be done as a normal user; however, root authority <emphasis>is</emphasis> required to run <command>operf</command> in system-wide
-profiling mode.
-<note>
-<title>Note 1</title>
-The same OProfile post-processing tools are used whether you collect your profile with <command>operf</command> or <command>opcontrol</command>.
-</note>
+<title>OProfile perf_events profiling mode</title>
+<para>
+OProfile has the ability to profile a single process or every currently running process (i.e., system-wide)
+via the <command>operf</command> program. <command>operf</command> interfaces with the
+kernel to collect samples via the Linux Kernel Performance Events Subsystem (hereafter
+referred to as "perf_events").  OProfile can co-exist with other tools on your system that
+may also be using the perf_events kernel subsystem.
+</para>
+<para>
+Using <command>operf</command> to profile a single
+process can be done as a normal user; however, root authority <emphasis>is</emphasis> required to run
+<command>operf</command> in system-wide profiling mode.
 <note>
-<title>Note 2</title>
+<title>Note</title>
 Some older processor models are not supported by the underlying perf_events kernel and, thus, are not supported by <command>operf</command>.
 If you receive the message
 <screen>  Your kernel's Performance Events Subsystem does not support your processor type</screen>
-when attempting to use <command>operf</command>, try profiling with <command>opcontrol</command>
+when attempting to use <command>operf</command>, install OProfile 0.9.9 and try profiling with <command>opcontrol</command>
 to see if your processor type may be supported by OProfile's legacy mode.
 </note>
+</para>
+</sect1>
+
+<sect1 id="event_counting">
+<title>OProfile event counting mode</title>
+OProfile provides the <command>ocount</command> tool for
+collecting raw event counts on a per-application, per-process, per-cpu, or system-wide basis.  Unlike the
+profiling tools, post-processing of the data collected is not necessary -- the data is displayed in the
+output of <command>ocount</command>.  A common use case for event counting tools is when performance analysts
+want to determine the CPI (cycles per instruction) for an application. High CPI implies possible stalls,
+and many architectures provide events that give detailed information about the different types of stalls.
+The events provided are architecture-specific, so we refer the reader to the hardware manuals available for
+the processor type being used.
 </sect1>
+
+
 <sect1 id="applications">
 <title>Applications of OProfile</title>
 <para>
@@ -107,33 +123,30 @@ OProfile is not a panacea. OProfile might not be a complete solution when you :
 <sect2 id="jitsupport">
 <title>Support for dynamically compiled (JIT) code</title>
 <para>
-Older versions of OProfile were not capable of attributing samples to symbols from dynamically
-compiled code, i.e. "just-in-time (JIT) code". Typical JIT compilers load the JIT code into
-anonymous memory regions. OProfile reported the samples from such code, but the attribution
-provided was simply:
-<screen>     anon: &lt;tgid&gt;&lt;address range&gt;</screen>
-Due to this limitation, it wasn't possible to profile applications executed by virtual machines (VMs)
-like the Java Virtual Machine. OProfile now contains an infrastructure to support JITed code.
+OProfile provides a framework to support JITed code ("just-in-time (JIT) compiled code").
 A development library is provided to allow developers
-to add support for any VM that produces dynamically compiled code (see the <emphasis>OProfile JIT agent
+to add support for any VM (virtual machine) that produces dynamically compiled code (see the <emphasis>OProfile JIT agent
 developer guide</emphasis>).
 In addition, built-in support is included for the following:</para>
 <itemizedlist><listitem>JVMTI agent library for Java (1.5 and higher)</listitem>
 <listitem>JVMPI agent library for Java (1.5 and lower)</listitem>
 </itemizedlist>
-<para>
+These libraries make it possible for OProfile to attribute profile samples
+to Java methods. Without a VM-specific agent library, OProfile will typically report
+samples from JITed code similar to the following example:
+<screen>     anon: &lt;tgid&gt;&lt;address range&gt;</screen>
 For information on how to use OProfile's JIT support, see <xref linkend="setup-jit"/>.
-</para>
 </sect2>
 
 <sect2 id="guestsupport">
 <title>No support for virtual machine guests</title>
 <para>
 OProfile currently does not support event-based profiling (i.e, using hardware events like cache misses,
-branch mispredicts) on virtual machine guests running under systems such as VMware.  The list of
-supported events displayed by ophelp or 'opcontrol --list-events' is based on CPU type and does
+branch mispredicts) on virtual machine guests running under systems such as VMware.
+(Note: KVM guests <emphasis>are</emphasis> supported.)  The list of
+supported events displayed by ophelp is based on CPU type and does
 not take into account whether the running system is a guest system or real system.  To use
-OProfile on such guest systems, you can use timer mode (see <xref linkend="timer" />).
+OProfile on such guest systems, you must use the legacy profiler's timer mode (see <xref linkend="timer" />).
 </para>
 </sect2>
 
@@ -147,47 +160,13 @@ OProfile on such guest systems, you can use timer mode (see <xref linkend="timer
 	<varlistentry>
 		<term>Linux kernel</term>
 		<listitem><para>
-			To use OProfile's JIT support, a kernel version 2.6.13 or later is required.
-			In earlier kernel versions, the anonymous memory regions are not reported to OProfile and results
-			in profiling reports without any samples in these regions.
-			</para>
-
-                       <para>
-                       Profiling the Cell Broadband Engine PowerPC Processing Element (PPE) requires a kernel version
-                       of 2.6.18 or more recent.
-                       Profiling the Cell Broadband Engine Synergistic Processing Element (SPE) requires a kernel version
-                       of 2.6.22 or more recent.  Additionally, full support of SPE profiling requires a BFD library
-                       from binutils code dated January 2007 or later.  To ensure the proper BFD support exists, run
-                       the <code>configure</code> utility with <code>--with-target=cell-be</code>.
-
-		       Profiling the Cell Broadband Engine using SPU events requires a kernel version of 2.6.29-rc1
-		       or  more recent.
-
-                       <note>Attempting to profile SPEs with kernel versions older than 2.6.22 may cause the
-                       system to crash.</note>
-                       </para>
-		
-			<para>
-			Instruction-Based Sampling (IBS) profile on AMD family10h processors requires 
-			kernel version 2.6.28-rc2 or later.
-			</para>
-		</listitem>
-	</varlistentry>
-	<varlistentry>
-		<term>Supported architecture</term>
-		<listitem><para>
-			For Intel IA32, processors as old as P6 generation or Pentium 4 core are
-			supported.  The AMD Athlon, Opteron, Phenom, and Turion CPUs are also supported.
-			Older IA32 CPU types can be used with the timer mode of OProfile; please
-			see later in this manual for details.  OProfile also supports most processor
-			types of the following architectures:  Alpha, MIPS, ARM, x86-64, sparc64, PowerPC,
-			AVR32, and, in timer mode, PA-RISC and s390.
+			Release 2.6.31 or higher
 		</para></listitem>
 	</varlistentry>
 	<varlistentry>
-		<term>Uniprocessor or SMP</term>
+		<term>Supported architectures</term>
 		<listitem><para>
-			SMP machines are fully supported.
+			AMD, ARM, Intel, PowerPC, Tile, MIPS
 		</para></listitem>
 	</varlistentry>
 	<varlistentry>
@@ -201,8 +180,7 @@ OProfile on such guest systems, you can use timer mode (see <xref linkend="timer
 	<varlistentry>
 		<term>Required kernel headers</term>
 		<listitem><para>
-			In order to build the perf_events-enabled <command>operf</command> program, you need to either
-			install the kernel-headers package for your system or use the <code>--with-kernel</code>
+			Either the kernel-headers package must be installed or use the <code>--with-kernel</code>
 			configure option.
 		</para></listitem>
 	</varlistentry>
@@ -219,13 +197,6 @@ OProfile on such guest systems, you can use timer mode (see <xref linkend="timer
 			account cannot be found.
 		</para></listitem>
 	</varlistentry>
-	<varlistentry>
-		<term>OProfile GUI</term>
-		<listitem><para>
-			The use of the GUI to start the profiler requires the <filename>Qt</filename> library.
-			Either <filename>Qt 3</filename> or <filename>Qt 4</filename> should work.
-		</para></listitem>
-	</varlistentry>
 	<varlistentry>
  		<term><acronym>ELF</acronym></term>
 		<listitem><para>
@@ -273,7 +244,7 @@ OProfile on such guest systems, you can use timer mode (see <xref linkend="timer
 		<term>Bug tracker</term>
 		<listitem><para>
 			There is a bug tracker for OProfile at SourceForge,
-			<ulink url="http://sf.net/tracker/?group_id=16191&amp;atid=116191">http://sf.net/tracker/?group_id=16191&amp;atid=116191</ulink>.
+			<ulink url="http://sourceforge.net/p/oprofile/bugs/">http://sourceforge.net/p/oprofile/bugs/</ulink>.
 		</para></listitem>
 	</varlistentry>
 	<varlistentry>
@@ -383,11 +354,6 @@ time by providing the "lapic" option to the kernel.
 If you use the NMI watchdog, be aware that the watchdog is disabled when profiling starts
 and not re-enabled until the profiling is stopped.
 </para>
-<para>
-Please note that you must save or have available the <filename>vmlinux</filename> file
-generated during a kernel compile, as OProfile needs it (you can use
-<option>--no-vmlinux</option>, but this will prevent kernel profiling).
-</para>
 
 </sect1>
 
@@ -406,13 +372,8 @@ remove all installed files except your configuration file in the directory <file
 <sect1 id="getting-started-with-operf">
 <title>Getting started with OProfile using <command>operf</command></title>
 <para>
-Profiling with <command>operf</command> is the recommended profiling mode with OProfile. Using
-this mode not only allows you to target your profiling more precisely (i.e., single process
-or system-wide), it also allows OProfile to co-exist better with other tools on your system that
-may also be using the perf_events kernel subsystem.
-</para>
-<para>
-With <command>operf</command>, there is no initial setup needed -- simply invoke <command>operf</command> with
+Profiling with <command>operf</command> allows you to precisely target your profiling (i.e., single process
+or system-wide). With <command>operf</command>, there is no initial setup needed -- simply invoke <command>operf</command> with
 the options you need; then run the OProfile post-processing tool(s). The <command>operf</command> syntax
 is as follows:
 </para>
@@ -430,61 +391,113 @@ unless you pass the <code>--session-dir</code> option.
 </para>
 </sect1>
 
-<sect1 id="getting-started-with-legacy">
-<title>Getting started with OProfile using legacy mode</title>
+
+<sect1 id="getting-started-with-ocount">
+<title>Getting started with OProfile using <command>ocount</command></title>
 <para>
-Before you can use OProfile's legacy mode, you must set it up. The minimum setup required for this
-is to tell OProfile where the <filename>vmlinux</filename> file corresponding to the
-running kernel is, for example :
-</para>
-<screen>opcontrol --vmlinux=/boot/vmlinux-`uname -r`</screen>
+<command>ocount</command> is an OProfile tool that can be used to count native hardware events occurring in either
+a specific application, a set of processes or threads, a set of active system processors, or the
+entire system. The data collected during a counting session is displayed to stdout by default, but may
+also be saved to a file.  The <command>ocount</command> syntax is as follows:
 <para>
-If you don't want to profile the kernel itself,
-you can tell OProfile you don't have a <filename>vmlinux</filename> file :
+<screen>ocount [ options ] [ --system-wide | --process-list &lt;pids&gt; | --thread-list &lt;tids&gt; | --cpu-list &lt;cpus&gt; [ command [ args ] ] ]
+</screen>
 </para>
-<screen>opcontrol --no-vmlinux</screen>
+A typical usage might look like this:
 <para>
-Now we are ready to start the daemon (<command>oprofiled</command>) which collects
-the profile data :
+<screen>ocount --events=CPU_CLK_UNHALTED,INST_RETIRED /home/user1/my_test_program my_arg</screen>
 </para>
-<screen>opcontrol --start</screen>
+When <filename>my_test_program</filename> completes (or when you press Ctrl-C), counting
+stops and the results are displayed to the screen (as shown below).
 <para>
-When you want to stop profiling, you can do so with :
+<screen>
+Events were actively counted for 2.8 seconds.
+Event counts (actual) for /home/user1/my_test_program:
+	Event                   Count                    % time counted
+	CPU_CLK_UNHALTED        9,408,018,070            100.00
+	INST_RETIRED            16,719,918,108           100.00
+</screen>
 </para>
-<screen>opcontrol --shutdown</screen>
+</para>
+</sect1>
+
+<sect1 id="eventspec">
+<title>Specifying performance counter events</title>
 <para>
-Note that unlike <command>gprof</command>, no instrumentation (<option>-pg</option>
-and <option>-a</option> options to <command>gcc</command>)
-is necessary.
+Whether profiling with <command>operf</command> or doing simple event counting with <command>ocount</command>,
+you can collect information about one more native hardware events using the <code>--events</code>
+option -- a comma-separated list of event specfications. The event specification is the means to provide details
+of how each hardware performance counter should be set up.
+For profiling, the event specification is a colon-separated string of the form
+<option><emphasis>name</emphasis>:<emphasis>count</emphasis>:<emphasis>unitmask</emphasis>:<emphasis>kernel</emphasis>:<emphasis>user</emphasis></option>
+as described in the table below. For <command>ocount</command>, specification is of the form
+<option><emphasis>name</emphasis>:<emphasis>unitmask</emphasis>:<emphasis>kernel</emphasis>:<emphasis>user</emphasis></option>.
+Note the presence of the <emphasis>count</emphasis> field for profiling.  The <emphasis>count</emphasis> field tells the profiler
+how many events should occur between a profile snapshot (usually referred to as a "sample").  Since
+<command>ocount</command> does not do sampling, the <emphasis>count</emphasis> field is not needed.
 </para>
 <para>
-Periodically (or on <command>opcontrol --shutdown</command> or <command>opcontrol --dump</command>)
-the profile data is written out into the $SESSION_DIR/samples directory (by default at <filename>/var/lib/oprofile/samples</filename>).
-These profile files cover shared libraries, applications, the kernel (vmlinux), and kernel modules.
-You can clear the profile data (at any time) with <command>opcontrol --reset</command>.
+If no event specs are passed to <command>operf</command> or <command>ocount</command>,
+the default event will be used.
 </para>
 <para>
-To place these sample database files in a specific directory instead of the default location
-(<filename>/var/lib/oprofile</filename>) use the <option>--session-dir=dir</option> option.
-You must also specify the <option>--session-dir</option> to tell the tools to continue using this directory.
+<note>The perf_events kernel subsystem allocates hardware counters as necessary, but some processor
+types have restrictions as to what hardware events may be counted simultaneously.
+The kernel employs a multiplexing technique when such
+hardware restrictions are encountered, such that events are monitored on a rotating basis.
+</note>
 </para>
-<screen>opcontrol --no-vmlinux --session-dir=/home/me/tmpsession</screen>
-<screen>opcontrol --start --session-dir=/home/me/tmpsession</screen>
+<informaltable frame="all">
+<tgroup cols='2'>
+<tbody>
+<row><entry><option>name</option></entry><entry>The symbolic event name, e.g. <constant>CPU_CLK_UNHALTED</constant></entry></row>
+<row><entry><option>count</option></entry><entry>The counter reset value, e.g. 100000; use only for profiling</entry></row>
+<row><entry><option>unitmask</option></entry><entry>The unit mask, as given in the events list: e.g. 0x0f; or a symbolic name
+if a <constant>name=&lt;um_name&gt;</constant> field is present</entry></row>
+<row><entry><option>kernel</option></entry><entry>Enable profiling of kernel code</entry></row>
+<row><entry><option>user</option></entry><entry>Enable profiling of userspace code</entry></row>
+</tbody>
+</tgroup>
+</informaltable>
 <para>
-You can get summaries of this data in a number of ways at any time. To get a summary of
-data across the entire system for all of these profiles, you can do :
+The last three values are optional; if you omit them (e.g. <option>operf --events=DATA_MEM_REFS:30000</option>),
+they will be set to the default values (i.e., the default unit mask value for the given event, and profiling (or counting)
+both kernel and userspace code will be enabled). Note that on some architectures, some events may
+require a unit mask be specified.
 </para>
-<screen>opreport [--session-dir=dir]</screen>
 <para>
-Or to get a more detailed summary, for a particular image, you can do something like :
+You can specify unit mask values using either a numerical value (hex values
+<emphasis>must</emphasis> begin with "0x") or a symbolic name (if the <constant>name=&lt;um_name&gt;</constant>
+field is shown in the <command>ophelp</command> output). For some named unit masks, the hex value is not unique; thus, OProfile
+tools enforce specifying such unit masks value by name.
 </para>
-<screen>opreport -l /boot/vmlinux-`uname -r`</screen>
 <para>
-There are also a number of other ways of presenting the data, as described later in this manual.
-Note that OProfile will choose a default profiling setup for you. However, there are a number
-of options you can pass to <command>opcontrol</command> if you need to change something,
-also detailed later.
+The table below lists the default profiling event for various processor types. The same events
+can be used for <command>ocount</command>, minus the <emphasis>count</emphasis> field.
 </para>
+<informaltable frame="all">
+<tgroup cols='3'>
+<tbody>
+<row><entry>Processor</entry><entry>cpu_type</entry><entry>Default event</entry></row>
+<row><entry>Alpha EV67</entry><entry>alpha/ev67</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>ARM/XScale PMU1</entry><entry>arm/xscale1</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>ARM/XScale PMU2</entry><entry>arm/xscale2</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>ARM/MPCore</entry><entry>arm/mpcore</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>Athlon</entry><entry>i386/athlon</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium Pro</entry><entry>i386/ppro</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium II</entry><entry>i386/pii</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium III</entry><entry>i386/piii</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium M (P6 core)</entry><entry>i386/p6_mobile</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium 4 (non-HT)</entry><entry>i386/p4</entry><entry>GLOBAL_POWER_EVENTS:100000:1:1:1</entry></row>
+<row><entry>Pentium 4 (HT)</entry><entry>i386/p4-ht</entry><entry>GLOBAL_POWER_EVENTS:100000:1:1:1</entry></row>
+<row><entry>Hammer</entry><entry>x86-64/hammer</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Family10h</entry><entry>x86-64/family10</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Family11h</entry><entry>x86-64/family11h</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>IBM pseries</entry><entry>ppc64/power{ 4|5|6|7|8|970 }</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>IBM s390</entry><entry>s390/{ z10|z196|zEC12 }</entry><entry>HWSAMPLING:4127518:0:1:1</entry></row>
+</tbody>
+</tgroup>
+</informaltable>
 
 </sect1>
 
@@ -504,14 +517,14 @@ This section gives a brief description of the available OProfile utilities and t
 <varlistentry>
 	<term><filename>operf</filename></term>
 	<listitem><para>
-		This is the recommended program for collecting profile data.
+		This is the program for collecting profile data, discussed in <xref linkend="controlling-operf" />.
 	</para></listitem>
 </varlistentry>
 	
 <varlistentry>
-	<term><filename>opcontrol</filename></term>
+	<term><filename>ocount</filename></term>
 	<listitem><para>
-		Used for controlling OProfile data collection in legacy mode, discussed in <xref linkend="controlling" />.
+		This tool is used for simple event counting, as described in in <xref linkend="controlling-ocount" />.
 	</para></listitem>
 </varlistentry>
 
@@ -562,7 +575,7 @@ This section gives a brief description of the available OProfile utilities and t
 	<term><filename>opimport</filename></term>
 	<listitem><para>
 		This utility converts sample database files from a foreign binary format (abi) to
-		the native format. This is useful only when moving sample files between hosts,
+		the native format. This is useful only when moving sample files between hosts
 		for analysis on platforms other than the one used for collection.
 		See <xref linkend="opimport" />.
 	</para></listitem>
@@ -572,8 +585,8 @@ This section gives a brief description of the available OProfile utilities and t
 </sect1>
 	
 </chapter>
- 
-<chapter id="controlling">
+
+<chapter id="controlling-profiler">
 <title>Controlling the profiler</title>
 
 <sect1 id="controlling-operf">
@@ -596,7 +609,7 @@ Additionally, each counter is programmed with a "count" value, which corresponds
 detailed the profile is. The lower the value, the more frequently profile
 samples are taken. You can choose to sample only kernel code, user-space code,
 or both (both is the default). Finally, some events have a "unit mask"
--- this is a value that further restricts the types of event that are counted.
+-- this is a value that further restricts the type of event being counted.
 You can see the event types and unit masks for your CPU using <command>ophelp</command>.
 More information on event specification can be found at <xref linkend="eventspec"/>.
 </para>
@@ -615,9 +628,9 @@ Following is a description of the <command>operf</command> options.
 </para>
 <variablelist>
 	<varlistentry>
-		<term><option>command</option></term>
+		<term><option>command [args]</option></term>
 		<listitem><para>
-		The command or application to be profiled. <command>args</command> are the input arguments
+		The command or application to be profiled. The <emphasis>[args]</emphasis> are the input arguments
         that the command or application requires. Either <code>command</code>, <code>--pid</code> or
         <code>--system-wide</code> is required, but cannot be used simultaneously.
 		</para></listitem>
@@ -651,8 +664,11 @@ Following is a description of the <command>operf</command> options.
 		A vmlinux file that matches the running kernel that has symbol and/or debuginfo.
 		Kernel samples will be attributed to this binary, allowing post-processing tools
 		(like <command>opreport</command>) to attribute samples to the appropriate kernel symbols.
-		If this option is not specified, all kernel samples will be attributed to a pseudo
-		binary named "no-vmlinux".
+		If this option is not specified, the file /proc/kallsyms is used to obtain
+		kernel symbol addresses correponding to sample addresses.  However, the setting of
+		/proc/sys/kernel/kptr_restrict may restrict a non-root user's access to
+		/proc/kallsyms, in which case,
+		all kernel samples are attributed to a pseudo binary named "no-vmlinux".
 		</para></listitem>
 	</varlistentry>
 	<varlistentry>
@@ -738,419 +754,19 @@ Following is a description of the <command>operf</command> options.
 	<varlistentry>
 		<term><option>--version -v </option></term>
 		<listitem><para>
-		Show <command>operf</command> version.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--help / -h</option></term>
-		<listitem><para>
-		Show a help message.
-		</para></listitem>
-	</varlistentry>
-</variablelist>
-</sect1>
-
-<sect1 id="controlling-daemon">
-<title>Using <command>opcontrol</command></title>
-<para>
-In this section we describe the configuration and control of the profiling system
-with opcontrol in more depth. See <xref linkend="controlling-operf"/> for a description
-of the preferred profiling method.
-</para>
-<para>
-The <command>opcontrol</command> script has a default setup, but you
-can alter this with the options given below. In particular, you can select
-specific hardware events on which to base your profile. See <xref linkend="controlling-operf"/> for an
-introduction to hardware events and performance counter configuration.
-The event types and unit masks for your CPU are listed by <command>opcontrol
---list-events</command> or <command>ophelp</command>.
-</para>
-<para>
-The <command>opcontrol</command> script provides the following actions :
-</para>
-<variablelist>
-	<varlistentry>
-		<term><option>--init</option></term>
-		<listitem><para>
-		Loads the OProfile module if required and makes the OProfile driver
-		interface available.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--setup</option></term>
-		<listitem><para>
-		    Followed by list arguments for profiling set up. List of arguments
-		    saved in <filename>/root/.oprofile/daemonrc</filename>.
-		    Giving this option is not necessary; you can just directly pass one
-		    of the setup options, e.g. <command>opcontrol --no-vmlinux</command>.
-		  </para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--status</option></term>
-		<listitem><para>
-		Show configuration information.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--start-daemon</option></term>
-		<listitem><para>
-		    Start the oprofile daemon without starting actual profiling. The profiling
-		can then be started using <option>--start</option>. This is useful for avoiding
-		measuring the cost of daemon startup, as <option>--start</option> is a simple
-		write to a file in oprofilefs.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--start</option></term>
-		<listitem><para>
-		    Start data collection with either arguments provided by <option>--setup</option>
-		or information saved in <filename>/root/.oprofile/daemonrc</filename>. Specifying
-		the addition <option>--verbose</option> makes the daemon generate lots of debug data
-		whilst it is running.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--dump</option></term>
-		<listitem><para>
-		    Force a flush of the collected profiling data to the daemon.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--stop</option></term>
-		<listitem><para>
-		    Stop data collection.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--shutdown</option></term>
-		<listitem><para>
-		    Stop data collection and kill the daemon.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--reset</option></term>
-		<listitem><para>
-		    Clears out data from current session, but leaves saved sessions.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--save=</option>session_name</term>
-		<listitem><para>
-		    Save data from current session to session_name.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--deinit</option></term>
-		<listitem><para>
-                Shuts down daemon. Unload the OProfile module and oprofilefs.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--list-events</option></term>
-		<listitem><para>
-		    List event types and unit masks.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--help</option></term>
-		<listitem><para>
-		    Generate usage messages.
-		</para></listitem>
-	</varlistentry>
-</variablelist>
-
-<para>
-There are a number of possible settings, of which, only
-<option>--vmlinux</option> (or <option>--no-vmlinux</option>)
-is required. These settings are stored in <filename>~/.oprofile/daemonrc</filename>.
-</para>
-<variablelist>
-	<varlistentry>
-		<term><option>--buffer-size=</option>num</term>
-		<listitem><para>
-		Number of samples in kernel buffer. 
-		Buffer watershed needs to be tweaked when changing this value.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--buffer-watershed=</option>num</term>
-		<listitem><para>
-		Set kernel buffer watershed to num samples. When remain only
-		buffer-size - buffer-watershed free entries remain in the kernel buffer, data will be
-		flushed to the daemon.  Most useful values are in the range [0.25 - 0.5] * buffer-size.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--cpu-buffer-size=</option>num</term>
-		<listitem><para>
-		Number of samples in kernel per-cpu buffer. If you
-		profile at high rate, it can help to increase this if the log
-		file show excessive count of samples lost due to cpu buffer overflow. 
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--event=</option>[eventspec]</term>
-		<listitem><para>
-		Use the given performance counter event to profile.
-		See <xref linkend="eventspec" /> below.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--session-dir=</option>dir_path</term>
-		<listitem><para>
-		    Create/use sample database out of directory <filename>dir_path</filename> instead of
-		the default location (/var/lib/oprofile).
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--separate=</option>[none,lib,kernel,thread,cpu,all]</term>
-		<listitem><para>
-		By default, every profile is stored in a single file. Thus, for example,
-		samples in the C library are all accredited to the <filename>/lib/libc.o</filename>
-		profile. However, you choose to create separate sample files by specifying
-		one of the below options.
-		</para>
-		<informaltable frame="all">
-		<tgroup cols='2'> 
-		<tbody>
-		<row><entry><option>none</option></entry><entry>No profile separation (default)</entry></row>
-		<row><entry><option>lib</option></entry><entry>Create per-application profiles for libraries</entry></row>
-		<row><entry><option>kernel</option></entry><entry>Create per-application profiles for the kernel and kernel modules</entry></row>
-		<row><entry><option>thread</option></entry><entry>Create profiles for each thread and each task</entry></row>
-		<row><entry><option>cpu</option></entry><entry>Create profiles for each CPU</entry></row>
-		<row><entry><option>all</option></entry><entry>All of the above options</entry></row>
-		</tbody>
-		</tgroup>
-		</informaltable>
-		<para>
-		Note  that <option>--separate=kernel</option> also turns on <option>--separate=lib</option>.
-		<!-- FIXME: update if this change -->
-		When using <option>--separate=kernel</option>, samples in hardware interrupts, soft-irqs, or other
-		asynchronous kernel contexts are credited to the task currently running. This means you will see
-		seemingly nonsense profiles such as <filename>/bin/bash</filename> showing samples for the PPP modules,
-		etc.
-		</para>
-		<para>
-		Using <option>--separate=thread</option> creates a lot
-		of sample files if you leave OProfile running for a while; it's most
-		useful when used for short sessions, or when using image filtering.
-		</para>
-		</listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--callgraph=</option>#depth</term>
-		<listitem><para>
-		Enable call-graph sample collection with a maximum depth. Use 0 to disable
-		callgraph profiling.  NOTE: Callgraph support is available on a limited
-		number of platforms at this time; for example:
-		<para>
-		<itemizedlist>
-		<listitem><para>x86 with 2.6 or higher kernel</para></listitem>
-		<listitem><para>ARM with 2.6 or higher kernel</para></listitem>
-		<listitem><para>PowerPC with 2.6.17 or higher kernel</para></listitem>
-		</itemizedlist>
-		</para>
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--image=</option>image,[images]|"all"</term>
-		<listitem><para>
-		Image filtering. If you specify one or more absolute
-		paths to binaries, OProfile will only produce profile results for those
-		binary images. This is useful for restricting the sometimes voluminous
-		output you may get otherwise, especially with
-		<option>--separate=thread</option>. Note that if you are using
-		<option>--separate=lib</option> or
-		<option>--separate=kernel</option>, then if you specification an
-		application binary, the shared libraries and kernel code
-		<emphasis>are</emphasis> included. Specify the value
-		"all" to profile everything (the default).
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--vmlinux=</option>file</term>
-		<listitem><para>
-		vmlinux kernel image.
-		</para></listitem>
-	</varlistentry>
-	<varlistentry>
-		<term><option>--no-vmlinux</option></term>
-		<listitem><para>
-		Use this when you don't have a kernel vmlinux file, and you don't want
-		to profile the kernel. This still counts the total number of kernel samples,
-		but can't give symbol-based results for the kernel or any modules.
-		</para></listitem>
-	</varlistentry>
-</variablelist>
-
-<sect2 id="opcontrolexamples">
-<title>Examples</title>
-
-<sect3 id="examplesperfctr">
-<title>Intel performance counter setup</title>
-<para>
-Here, we have a Pentium III running at 800MHz, and we want to look at where data memory
-references are happening most, and also get results for CPU time.
-</para>
-<screen>
-# opcontrol --event=CPU_CLK_UNHALTED:400000 --event=DATA_MEM_REFS:10000
-# opcontrol --vmlinux=/boot/2.6.0/vmlinux
-# opcontrol --start
-</screen>
-</sect3>
-
-<sect3 id="examplesstartdaemon">
-<title>Starting the daemon separately</title>
-<para>
-Use <option>--start-daemon</option> to avoid
-the profiler startup affecting results.
-</para>
-<screen>
-# opcontrol --vmlinux=/boot/2.6.0/vmlinux
-# opcontrol --start-daemon
-# my_favourite_benchmark --init
-# opcontrol --start ; my_favourite_benchmark --run ; opcontrol --stop
-</screen>
-</sect3>
-
-<sect3 id="exampleseparate">
-<title>Separate profiles for libraries and the kernel</title>
-<para>
-Here, we want to see a profile of the OProfile daemon itself, including when
-it was running inside the kernel driver, and its use of shared libraries.
-</para>
-<screen>
-# opcontrol --separate=kernel --vmlinux=/boot/2.6.0/vmlinux
-# opcontrol --start
-# my_favourite_stress_test --run
-# opreport -l -p /lib/modules/2.6.0/kernel /usr/local/bin/oprofiled
-</screen>
-</sect3>
-
-<sect3 id="examplessessions">
-<title>Profiling sessions</title>
-<para>
-It can often be useful to split up profiling data into several different
-time periods. For example, you may want to collect data on an application's
-startup separately from the normal runtime data. You can use the simple
-command <command>opcontrol --save</command> to do this. For example :
-</para>
-<screen>
-# opcontrol --save=blah
-</screen>
-<para>
-will create a sub-directory in <filename>$SESSION_DIR/samples</filename> containing the samples
-up to that point (the current session's sample files are moved into this
-directory). You can then pass this session name as a parameter to the post-profiling
-analysis tools, to only get data up to the point you named the
-session. If you do not want to save a session, you can do
-<command>rm -rf $SESSION_DIR/samples/sessionname</command> or, for the
-current session, <command>opcontrol --reset</command>.
-</para>
-</sect3>
-</sect2> 
-</sect1>
-
-<sect1 id="eventspec">
-<title>Specifying performance counter events</title>
-<para>
-Both methods of profiling (<command>operf</command> and <command>opcontrol</command>)
-allow you to give one or more event specifications to provide details of how each
-hardware performance counter should be setup. With <command>operf</command>, you
-can provide a comma-separated list of event specfications using the <code>--events</code>
-option.  With <command>opcontrol</command>, you use the <code>--event</code> option
-for each desired event specification.
-The event specification is a colon-separated string of the form
-<option><emphasis>name</emphasis>:<emphasis>count</emphasis>:<emphasis>unitmask</emphasis>:<emphasis>kernel</emphasis>:<emphasis>user</emphasis></option>
-as described in the table below.
-</para>
-<para>
-If no event specs are passed to <command>operf</command> or <command>opcontrol</command>,
-the default event will be used for profiling. With <command>opcontrol</command>, if you have
-previously specified some non-default event but want to revert to the default event, use
-<option>--event=default</option>. Use of this option overrides all previous event selections
-that have been cached.
-</para>
-<para>
-<note>OProfile will allocate hardware counters as necessary, but some processor
-types have restrictions as to what hardware events may be counted simultaneously.
-The <command>operf</command> program uses a multiplexing technique when such
-hardware restrictions are encountered, but <command>opcontrol</command> does
-not have this capability; instead, <command>opcontrol</command> will display an
-error message if you select an incompatible set of events.
-</note>
-</para>
-<informaltable frame="all">
-<tgroup cols='2'> 
-<tbody>
-<row><entry><option>name</option></entry><entry>The symbolic event name, e.g. <constant>CPU_CLK_UNHALTED</constant></entry></row>
-<row><entry><option>count</option></entry><entry>The counter reset value, e.g. 100000</entry></row>
-<row><entry><option>unitmask</option></entry><entry>The unit mask, as given in the events list: e.g. 0x0f; or a symbolic name as
-given by the first word of the description (only valid for unit masks having an "extra:" parameter)</entry></row>
-<row><entry><option>kernel</option></entry><entry>Whether to profile kernel code</entry></row>
-<row><entry><option>user</option></entry><entry>Whether to profile userspace code</entry></row>
-</tbody>
-</tgroup>
-</informaltable>
-<para>
-The last three values are optional, if you omit them (e.g. <option>--event=DATA_MEM_REFS:30000</option>),
-they will be set to the default values (a unit mask of 0, and profiling both kernel and
-userspace code). Note that some events require a unit mask.
-</para>
-<para>
-When specifying a unit mask value, it may be either a hexadecimal value (which
-<emphasis>must</emphasis> begin with "0x") or a string (i.e, symbolic name) which matches
-the first word in the unit mask description. Specifying a symbolic name for
-the unit mask is valid only for unit masks having "extra:" parameters, as
-shown by the output of <command>ophelp</command>.  Unit masks with "extra:" parameters must be
-specified using the symbolic name.
-</para>
-<note><para>
-When using legacy mode <command>opcontrol</command> on PowerPC platforms, all events specified must be in the same group;
-i.e., the group number appended to the event name (e.g. <constant>&lt;<emphasis>some-event-name</emphasis>&gt;_GRP9
-</constant>) must be the same.
-</para></note>
-<para>
-If OProfile is using timer-interrupt mode, there is no event configuration possible.
-</para>
-<para>
-The table below lists the default event for various processor types:
-</para>
-<informaltable frame="all">
-<tgroup cols='3'> 
-<tbody>
-<row><entry>Processor</entry><entry>cpu_type</entry><entry>Default event</entry></row>
-<row><entry>Alpha EV4</entry><entry>alpha/ev4</entry><entry>CYCLES:100000:0:1:1</entry></row>
-<row><entry>Alpha EV5</entry><entry>alpha/ev5</entry><entry>CYCLES:100000:0:1:1</entry></row>
-<row><entry>Alpha PCA56</entry><entry>alpha/pca56</entry><entry>CYCLES:100000:0:1:1</entry></row>
-<row><entry>Alpha EV6</entry><entry>alpha/ev6</entry><entry>CYCLES:100000:0:1:1</entry></row>
-<row><entry>Alpha EV67</entry><entry>alpha/ev67</entry><entry>CYCLES:100000:0:1:1</entry></row>
-<row><entry>ARM/XScale PMU1</entry><entry>arm/xscale1</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
-<row><entry>ARM/XScale PMU2</entry><entry>arm/xscale2</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
-<row><entry>ARM/MPCore</entry><entry>arm/mpcore</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
-<row><entry>AVR32</entry><entry>avr32</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
-<row><entry>Athlon</entry><entry>i386/athlon</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Pentium Pro</entry><entry>i386/ppro</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Pentium II</entry><entry>i386/pii</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Pentium III</entry><entry>i386/piii</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Pentium M (P6 core)</entry><entry>i386/p6_mobile</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Pentium 4 (non-HT)</entry><entry>i386/p4</entry><entry>GLOBAL_POWER_EVENTS:100000:1:1:1</entry></row>
-<row><entry>Pentium 4 (HT)</entry><entry>i386/p4-ht</entry><entry>GLOBAL_POWER_EVENTS:100000:1:1:1</entry></row>
-<row><entry>Hammer</entry><entry>x86-64/hammer</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Family10h</entry><entry>x86-64/family10</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Family11h</entry><entry>x86-64/family11h</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
-<row><entry>Itanium</entry><entry>ia64/itanium</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
-<row><entry>Itanium 2</entry><entry>ia64/itanium2</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
-<row><entry>TIMER_INT</entry><entry>timer</entry><entry>None selectable</entry></row>
-<row><entry>IBM pseries</entry><entry>PowerPC 4/5/6/7/970/Cell</entry><entry>CYCLES:100000:0:1:1</entry></row>
-<row><entry>IBM s390</entry><entry>timer</entry><entry>None selectable</entry></row>
-<row><entry>IBM s390x</entry><entry>timer</entry><entry>None selectable</entry></row>
-</tbody>
-</tgroup>
-</informaltable>
-
+		Show <command>operf</command> version.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--help / -h</option></term>
+		<listitem><para>
+		Show a help message.
+		</para></listitem>
+	</varlistentry>
+</variablelist>
 </sect1>
- 
+
+
 <sect1 id="setup-jit">
 	<title>Setting up the JIT profiling feature</title>
 	<para>
@@ -1196,33 +812,6 @@ The table below lists the default event for various processor types:
 	</sect2>
 </sect1>
 
-<sect1 id="oprofile-gui">
-<title>Using <command>oprof_start</command></title>
-<para>
-The <command>oprof_start</command> application provides a convenient way to start the profiler.
-Note that <command>oprof_start</command> is just a wrapper around the <command>opcontrol</command> script,
-so it does not provide more services than the script itself.
-</para>
-<para>
-After <command>oprof_start</command> is started you can select the event type for each counter;
-the sampling rate and other related parameters are explained in <xref linkend="controlling-daemon" />.
-The "Configuration" section allows you to set general parameters such as the buffer size, kernel filename
-etc. The counter setup interface should be self-explanatory; <xref linkend="hardware-counters" /> and related 
-links contain information on using unit masks.
-</para>
-<para>
-A status line shows the current status of the profiler: how long it has been running, and the average
-number of interrupts received per second and the total, over all processors.
-Note that quitting <command>oprof_start</command> does not stop the profiler.
-</para>
-<para>
-Your configuration is saved in the same file as <command>opcontrol</command> uses; that is,
-<filename>~/.oprofile/daemonrc</filename>.
-</para>
-<para>
-<note><command>oprof_start</command> does not currently support <command>operf</command>.</note>
-</para>
-</sect1>
 
 <sect1 id="detailed-parameters">
 <title>Configuration details</title>
@@ -1237,7 +826,8 @@ events other than the default event chosen by OProfile.
 <note>
 <para>
 Your CPU type may not include the requisite support for hardware performance counters, in which case
-you must use OProfile in timer mode (see <xref linkend="timer" />). 
+you must use OProfile in timer mode (see <xref linkend="timer" />), which is only available in
+OProfile releases prior to 1.0.
 </para>
 </note>
 <para>
@@ -1252,69 +842,73 @@ https://www.power.org/events/Power7</ulink> contains specific information on the
 monitor unit for the IBM POWER7.
 </para>
 <para>
-These processors are capable of delivering an interrupt when a counter overflows.
+A physical performance monitor counter (PMC) is configured by a profiling tool to count a particular
+type of event. When the counter overflows, an interrupt is delivered to the processor.
 This is the basic mechanism on which OProfile is based. The delivery mode is <acronym>NMI</acronym>,
 so blocking interrupts in the kernel does not prevent profiling. When the interrupt handler is called,
-the current <acronym>PC</acronym> value and the current task are recorded into the profiling structure.
-This allows the overflow event to be attached to a specific assembly instruction in a binary image.
-OProfile receives this data from the kernel and writes it to the sample files.
+the current <acronym>PC</acronym> (program counter) value and the current task are recorded into the profiling structure.
+This allows the overflow event to be attributed to a specific assembly instruction in a specific binary image.
+OProfile receives this data (commonly referred to as a "sample") from the kernel and writes it to the sample files.
 </para>
 <para>
 If we use an event such as <constant>CPU_CLK_UNHALTED</constant> or <constant>INST_RETIRED</constant>
 (<constant>GLOBAL_POWER_EVENTS</constant> or <constant>INSTR_RETIRED</constant>, respectively, on the Pentium 4), we can
-use the overflow counts as an estimate of actual time spent in each part of code. Alternatively we can profile interesting
+use the overflow counts (samples) as an estimate of actual time spent in each part of code. Alternatively we can profile interesting
 data such as the cache behaviour of routines with the other available counters.
 </para>
 <para>
 However there are several caveats. First, there are those issues listed in the Intel manual. There is a delay
 between the counter overflow and the interrupt delivery that can skew results on a small scale - this means
 you cannot rely on the profiles at the instruction level as being perfectly accurate.
-If you are using an "event-mode" counter such as the cache counters, a count registered against it doesn't mean
-that it is responsible for that event. However, it implies that the counter overflowed in the dynamic
-vicinity of that instruction, to within a few instructions. Further details on this problem can be found in 
+For example, if you are profiling an application with an event that counts L1 cache misses, a sample attributed
+to a particular instruction in the application doesn't necessarily mean that exact instruction is responsible
+for that event; instead, it means the sample was taken in the dynamic vicinity of that instruction,
+usually with a margin of error of a few instructions. Further details on this problem can be found in
 <xref linkend="interpreting" /> and also in the Digital paper "ProfileMe: A Hardware Performance Counter".
 </para>
 <para>
-Each counter has several configuration parameters.
-First, there is the unit mask: this simply further specifies what to count.
-Second, there is the counter value, discussed below. Third, there is a parameter whether to increment counts
+Each counter has several configuration parameters besides the type of event to count.
+First, there is the unit mask, which is used to further qualify exactly what to count.
+Second, there is the <constant>count</constant> field, discussed below. Third, there are parameters
+to specify whether to increment counts
 whilst in kernel or user space. You can configure these separately for each counter.
 </para>
 <para>
-After each overflow event, the counter will be re-initialized
-such that another overflow will occur after this many events have been counted. Thus, higher
-values mean less-detailed profiling, and lower values mean more detail, but higher overhead.
-Picking a good value for this
-parameter is, unfortunately, somewhat of a black art. It is of course dependent on the event
-you have chosen.
+When the profiler is initially setup, a performance monitor counter is chosen for counting the
+event, and it is initialized using the <constant>count</constant> value.
+Once profiling begins, the counter increments with each event detected, and the counter
+<emphasis>overflows</emphasis> when the <constant>count</constant> value is reached.
+As described above, the counter overflow generates an interrupt, and the sample is recorded.
+After each overflow event, the counter is re-initialized using the <constant>count</constant> value,
+and counting begins anew for the next sample. Higher values for <constant>count</constant>
+result in samples being taken less frequently, and therefore less-detailed (and, potentially,
+less accurate) profiling. Lower values mean more detail, but higher overhead.
+Picking a good value for this parameter is, unfortunately, somewhat of a black art. It is
+of course dependent on the event you have chosen.
 Specifying too large a value will mean not enough interrupts are generated
-to give a realistic profile (though this problem can be ameliorated by profiling for <emphasis>longer</emphasis>).
-Specifying too small a value can lead to higher performance overhead.
+to give a realistic profile (though this problem can be ameliorated by profiling for
+longer time periods. Specifying too small a value can lead to higher performance overhead.
 </para>
 
 </sect2>
 
 <sect2 id="timer">
-<title>OProfile in timer interrupt mode</title>
-<para>
-Some CPU types do not provide the needed hardware support to use the hardware performance counters. This includes
-some laptops, classic Pentiums, and other CPU types not yet supported by OProfile (such as Cyrix).
-On these machines, OProfile falls back to using the timer interrupt for profiling,
-back to using the real-time clock interrupt to collect samples.  In timer mode, OProfile
-is not able to profile code that has interrupts disabled.
-</para>
+<title>OProfile timer interrupt mode</title>
 <para>
-You can force use of the timer interrupt by using the <option>timer=1</option> module
-parameter (or <option>oprofile.timer=1</option> on the boot command line if OProfile is
-built-in).  If OProfile was built as a kernel module, then you must pass the 'timer=1'
-parameter with the modprobe command.  Do this before executing 'opcontrol --init' or
-edit the opcontrol command's invocation of modprobe to pass the 'timer=1' parameter.
-
-<note>Timer mode is only available using the legacy <command>opcontrol</command> command.</note>
+Some CPU types do not provide the needed hardware support for hardware performance counters.
+Additionally, some older architectures are not supported by the perf_events kernel subsystem.
+On such machines, the <command>operf</command> and <command>ocount</command> commands will exit with a message indicating the
+processor type is not supported. However, you can install OProfile 0.9.9 and use the legacy
+opcontrol-based profiler, which will fall back to using timer interrupts for profiling.
+Note that in timer mode, OProfile is not able to profile code that has interrupts disabled.
+<note>Timer mode is only available using the legacy <command>opcontrol</command> command,
+available in releases prior to 1.0.</note>
 </para>
 </sect2>
 
-<sect2 id="p4">
+<sect2 id="special-notes">
+<title>Architecture-specific configuration notes</title>
+<sect3 id="p4">
 <title>Pentium 4 support</title>
 <para>
 The Pentium 4 / Xeon performance counters are organized around 3 types of model specific registers (MSRs): 45 event
@@ -1330,338 +924,46 @@ There is currently no support for Precision Event-Based Sampling (PEBS), nor any
 (DS). Current support is limited to the conservative extension of OProfile's existing interrupt-based model described
 above.
 </para>
-</sect2>
-
-<sect2 id="ia64">
-<title>Intel Itanium 2 support</title>
-<para>
-The Itanium 2 performance monitoring unit (PMU) organizes the counters as four
-pairs of performance event monitoring registers. Each pair is composed of a
-Performance Monitoring Configuration (PMC) register and Performance Monitoring
-Data (PMD) register.  The PMC selects the performance event being monitored and
-the PMD determines the sampling interval. The IA64 Performance Monitoring Unit
-(PMU) triggers sampling with maskable interrupts. Thus, samples will not occur
-in sections of the IA64 kernel where interrupts are disabled.
-</para>
-<para>
-None of the advance features of the Itanium 2 performance monitoring unit
-such as opcode matching, address range matching, or precise event sampling are
-supported by this version of OProfile.  The Itanium 2 support only maps OProfile's
-existing interrupt-based model to the PMU hardware.
-</para>
-</sect2>
+</sect3>
 
-<sect2 id="ppc64">
+<sect3 id="ppc64">
 <title>PowerPC64 support</title>
 <para>
 The performance monitoring unit (PMU) for the IBM PowerPC 64-bit processors 
-consists of between 4 and 8 counters (depending on the model), plus three
-special purpose registers used for programming the counters -- MMCR0, MMCR1,
-and MMCRA.  Advanced features such as instruction matching and thresholding are
-not supported by this version of OProfile.
-<note>Later versions of the IBM POWER5+ processor (beginning with revision 3.0)
-run the performance monitor unit in POWER6 mode, effectively removing OProfile's
-access to counters 5 and 6.  These two counters are dedicated to counting
-instructions completed and cycles, respectively.  In POWER6 mode, however, the
-counters do not generate an interrupt on overflow and so are unusable by
-OProfile.  Kernel versions 2.6.23 and higher will recognize this mode
-and export "ppc64/power5++" as the cpu_type to the oprofilefs pseudo filesystem.
-OProfile userspace responds to this cpu_type by removing these counters from
-the list of potential events to count.  Without this kernel support, attempts
-to profile using an event from one of these counters will yield incorrect
-results -- typically, zero (or near zero) samples in the generated report.
-</note>
-</para>
-
-</sect2>
-
-<sect2 id="cell-be">
-<title>Cell Broadband Engine support</title>
-<para>
-The Cell Broadband Engine (CBE) processor core consists of a PowerPC Processing
-Element (PPE) and 8 Synergistic Processing Elements (SPE).  PPEs and SPEs each
-consist of a processing unit (PPU and SPU, respectively) and other hardware
-components, such as memory controllers.
-</para>
-<para>
-A PPU has two hardware threads (aka "virtual CPUs").  The performance monitor
-unit of the CBE collects event information on one hardware thread at a time.
-Therefore, when profiling PPE events,
-OProfile collects the profile based on the selected events by time slicing the
-performance counter hardware between the two threads.   The user must ensure the
-collection interval is long enough so that the time spent collecting data for
-each PPU is sufficient to obtain a good profile.
-</para>
-<para>
-To profile an SPU application, the user should specify the SPU_CYCLES event.
-When starting OProfile with SPU_CYCLES, the opcontrol script enforces certain
-separation parameters (separate=cpu,lib) to ensure that sufficient information
-is collected in the sample data in order to generate a complete report.  The
---merge=cpu option can be used to obtain a more readable report if analyzing
-the performance of each separate SPU is not necessary.
-</para>
-<para>
-Profiling with an SPU event (events 4100 through 4163) is not compatible with any other
-event.  Further more, only one SPU event can be specified at a time.  The hardware only
-supports profiling on one SPU per node at a time.  The OProfile kernel code time slices
-between the eight SPUs to collect data on all SPUs.
-</para>
-<para>
-SPU profile reports have some unique characteristics compared to reports for
-standard architectures:
-</para>
-<itemizedlist>
-<listitem>Typically no "app name" column.  This is really standard OProfile behavior
-when the report contains samples for just a single application, which is
-commonly the case when profiling SPUs.</listitem>
-<listitem>"CPU" equates to "SPU"</listitem>
-<listitem>Specifying '--long-filenames' on the opreport command does not always result
-in long filenames.  This happens when the SPU application code is embedded in
-the PPE executable or shared library.  The embedded SPU ELF data contains only the
-short filename (i.e., no path information) for the SPU binary file that was used as
-the source for embedding.   The reason that just the short filename is used is because
-the original SPU binary file may not exist or be accessible at runtime.  The performance
-analyst must have sufficient knowledge of the application to be able to correlate the
-SPU binary image names found in the  report to the application's source files.
-<note>
-Compile the application with -g and generate the OProfile report
-with -g to facilitate finding the right source file(s) on which to focus.
-</note>
-</listitem>
-</itemizedlist>
-
-</sect2>
-
-<sect2 id="amd-ibs-support">
-<title>AMD64 (x86_64) Instruction-Based Sampling (IBS) support</title>
-
-<para>
-Instruction-Based Sampling (IBS) is a new performance measurement technique
-available on AMD Family 10h processors. Traditional performance counter
-sampling is not precise enough to isolate performance issues to individual
-instructions. IBS, however, precisely identifies instructions which are not
-making the best use of the processor pipeline and memory hierarchy.
-For more information, please refer to the "Instruction-Based Sampling:
-A New Performance Analysis Technique for AMD Family 10h Processors" (
-<ulink url="http://developer.amd.com/assets/AMD_IBS_paper_EN.pdf">
-http://developer.amd.com/assets/AMD_IBS_paper_EN.pdf</ulink>).
-There are two types of IBS profile types, described in the following sections.
-<note>Profiling on IBS events is only supported with legacy mode profiling
-(i.e., with <command>opcontrol</command>).</note>
-</para>
-
-<sect3 id="ibs-fetch">
-<title>IBS Fetch</title>
-
-<para>
-IBS fetch sampling is a statistical sampling method which counts completed
-fetch operations. When the number of completed fetch operations reaches the
-maximum fetch count (the sampling period), IBS tags the fetch operation and
-monitors that operation until it either completes or aborts. When a tagged
-fetch completes or aborts, a sampling interrupt is generated and an IBS fetch
-sample is taken. An IBS fetch sample contains a timestamp, the identifier of
-the interrupted process, the virtual fetch address, and several event flags
-and values that describe what happened during the fetch operation. 
-</para>
-
-</sect3>
-
-<sect3 id="ibs-op">
-<title>IBS Op</title>
-
-<para>
-IBS op sampling selects, tags, and monitors macro-ops as issued from AMD64
-instructions. Two options are available for selecting ops for sampling:
-</para>
-
-<itemizedlist>
-<listitem>
-Cycles-based selection counts CPU clock cycles. The op is tagged and monitored
-when the count reaches a threshold (the sampling period) and a valid op is
-available. 
-</listitem>
-
-<listitem>
-Dispatched op-based selection counts dispatched macro-ops.
-When the count reaches a threshold, the next valid op is tagged and monitored. 
-</listitem>
-</itemizedlist>
-
-<para>
-In both cases, an IBS sample is generated only if the tagged op retires.
-Thus, IBS op event information does not measure speculative execution activity.
-The execution stages of the pipeline monitor the tagged macro-op. When the
-tagged macro-op retires, a sampling interrupt is generated and an IBS op
-sample is taken. An IBS op sample contains a timestamp, the identifier of
-the interrupted process, the virtual address of the AMD64 instruction from
-which the op was issued, and several event flags and values that describe
-what happened when the macro-op executed.
+consists of between 4 and 8 counters (depending on the model).  Advanced features
+such as instruction matching and thresholding are not supported by OProfile.
 </para>
 
 </sect3>
-
-<para>
-Enabling IBS profiling is done simply by specifying IBS performance events
-through the "--event=" options. These events are listed in the
-<function>opcontrol --list-events</function>.
-</para>
-
-<screen>
-opcontrol --event=IBS_FETCH_XXX:&lt;count&gt;:&lt;um&gt;:&lt;kernel&gt;:&lt;user&gt;
-opcontrol --event=IBS_OP_XXX:&lt;count&gt;:&lt;um&gt;:&lt;kernel&gt;:&lt;user&gt;
-
-Note: * All IBS fetch event must have the same event count and unitmask,
-        as do those for IBS op.
-</screen>
-
 </sect2>
 
-<sect2 id="systemz">
-<title>IBM System z hardware sampling support</title>
-<para>
-IBM System z provides a facility which does instruction sampling as
-part of the CPU.  This has great advantages over the timer based
-sampling approach like better sampling resolution with less overhead
-and the possibility to get samples within code sections where
-interrupts are disabled (useful especially for Linux kernel code).
-</para>
-<note>Profiling with the instruction sampling facility is currently only supported
-with legacy mode profiling (i.e., with <command>opcontrol</command>).</note>
-<para>
-A public description of the System z CPU-Measurement Facilities can be
-found here:
-<ulink url="http://www-01.ibm.com/support/docview.wss?uid=isg26fcd1cc32246f4c8852574ce0044734a">The Load-Program-Parameter and CPU-Measurement Facilities</ulink>
-</para>
-<para>
-System z hardware sampling can be used for Linux instances in LPAR
-mode. The hardware sampling support used by OProfile was introduced
-for System z10 in October 2008.
-</para>
-<para>
-To enable hardware sampling for an LPAR you must activate the LPAR
-with authorization for basic sampling control. See the "Support
-Element Operations Guide" for your mainframe system for more
-information.
-</para>
-<para>
-The hardware sampling facility can be enabled and disabled using the
-event interface.  A `virtual' counter 0 has been defined that only supports
-a single event, HWSAMPLING. By default the HWSAMPLING event is
-enabled on machines providing the facility.  For both events only the
-`count', `kernel' and `user' options are evaluated by the kernel
-module.
-</para>
-<para>
-The `count' value is the sampling rate as it is passed to the CPU
-measurement facility.  A sample will be taken by the hardware every
-`count' cycles. Using low values here will quickly fill up the
-sampling buffers and will generate CPU load on the OProfile daemon and
-the kernel module being busy flushing the hardware buffers.  This
-might considerably impact the workload to be profiled.
-</para>
-<para>
-The unit mask `um' is required to be zero.
-</para>
-<para>
-The opcontrol tool provides a new option specific to System z
-hardware sampling:
-</para>
-
-<itemizedlist>
-<listitem>--s390hwsampbufsize="num": Number of 2MB areas
-used per CPU for storing sample data.  The best
-size for the sample memory depends on the particular system and the
-workload to be measured.  Providing the sampler with too little memory
-results in lost samples. Reserving too much system memory for the
-sampler impacts the overall performance and, hence, also the workload
-to be measured.</listitem>
-</itemizedlist>
-
-<para>
-A special counter <filename>/dev/oprofile/timer</filename> is provided
-by the kernel module allowing to switch back to timer mode sampling
-dynamically.  The TIMER event is limited to be used only with this
-counter.  The TIMER event can be specified using the
-<option>--event=</option> as with every other event.
-</para>
-<screen>opcontrol --event=TIMER:1</screen>
-<para>
-On z10 or later machines the default event is set to TIMER in case the
-hardware sampling facility is not available.
-</para>
-<para>
-Although required, the 'count' parameter of the TIMER event is
-ignored.  The value may eventually be used for timer based sampling
-with a configurable sampling frequency, but this is currently not
-supported.
-</para>
-
-</sect2>
 
-<sect2 id="misuse">
-<title>Dangerous counter settings</title>
-<para>
-OProfile is a low-level profiler which allows continuous profiling with a low-overhead cost.
-When using OProfile legacy mode profiling, it may be possible to configure such a low a counter reset value
-(i.e., high sampling rate) that the system can become overloaded with counter interrupts and your
-system's responsiveness may be severely impacted. Whilst some validation is done on the <code>count</code>
-values you pass to <command>opcontrol</command> with your event specification, it is not foolproof.
-</para>
-<note><para>
-This can happen as follows: When the profiler count
-reaches zero, an NMI handler is called which stores the sample values in an internal buffer, then resets the counter
-to its original value. If the reset count you specified is very low, a pending NMI can be sent before the NMI handler has
-completed. Due to the priority of the NMI, the pending interrupt is delivered immediately after
-completion of the previous interrupt handler, and control never returns to other parts of the system.
-If all processors are stuck in this mode, the system will appear to be frozen.
-</para></note>
-<para>If this happens, it will be impossible to bring the system back to a workable state.
-There is no way to provide real security against this happening, other than making sure to use a reasonable value
-for the counter reset. For example, setting <constant>CPU_CLK_UNHALTED</constant> event type with a ridiculously low reset count (e.g. 500)
-is likely to freeze the system.
-</para>
-<para>
-In short : <command>Don't try a foolish sample count value</command>. Unfortunately the definition of a foolish value
-is really dependent on the event type. If ever in doubt, post a message to <address><email>oprofile-list@lists.sf.net</email>.</address>
-</para>
-<note>
-The scenario described above cannot occur if you use <command>operf</command> for profiling instead of
-<command>opcontrol</command>, because the perf_events kernel subsystem automatically detects when performance monitor
-interrupts are arriving at a dangerous level and will throttle back the sampling rate.
-</note>
-</sect2>
 
 </sect1>
- 
+
 </chapter>
 
 <chapter id="results">
-<title>Obtaining results</title>
-<para>
-OK, so the profiler has been running, but it's not much use unless we can get some data out. Sometimes,
-OProfile does a little <emphasis>too</emphasis> good a job of keeping overhead low, and no data reaches
-the profiler. This can happen on lightly-loaded machines. If you're using OPorifle legacy mode, you can
-force a dump at any time with :
-</para>
-<para><command>opcontrol --dump</command></para>
-<para>This ensures that any profile data collected by the <command>oprofiled</command> daemon has been flusehd
-to disk.  Remember to do a <code>dump</code>, <code>stop</code>, <code>shutdown</code>, or <code>deinit</code>
-before complaining there is no profiling data!
-</para>
+<title>Obtaining profiling results</title>
 <para>
-Now that we've got some data, it has to be processed. That's the job of <command>opreport</command>,
-<command>opannotate</command>, or <command>opgprof</command>.
+After collecting profile data, the raw data must undergo special processing in order for you to
+perform your analysis. The analysis tools that perform this special processing are
+<command>opreport</command>, <command>opannotate</command>, and <command>opgprof</command>.
+Additionally, the <command>oparchive</command> is used to gather together profile
+data, sampled binary files, etc. for the purpose of off-line analysis.  While
+not really an analysis tool, <command>oparchive</command> is put in that category
+for convenience since it takes many of the same options as the other analysis tools.
 </para>
 
 <sect1 id="profile-spec">
 <title>Profile specifications</title>
 
 <para>
-All of the analysis tools take a <emphasis>profile specification</emphasis>.
-This is a set of definitions that describe which actual profiles should be
+All of the analysis tools take a <emphasis>profile specification</emphasis>
+as an input argument.
+This is a set of definitions that describes the specific profile data that should be
 examined. The simplest profile specification is empty: this will match all
-the available profile files for the current session (this is what happens
-when you do <command>opreport</command>).
+the available profile files for the current session.
 </para>
 <para>
 Specification parameters are of the form <option>name:value[,value]</option>.
@@ -1669,10 +971,11 @@ For example, if I wanted to get a combined symbol summary for
 <filename>/bin/myprog</filename> and <filename>/bin/myprog2</filename>,
 I could do <command>opreport -l image:/bin/myprog,/bin/myprog2</command>.
 As a special case, you don't actually need to specify the <option>image:</option>
-part here: anything left on the command line is assumed to be an
+part of the specification. Anything left on the command line after all other
+<command>opreport</command> options have been processed is assumed to be an
 <option>image:</option> name. Similarly, if no <option>session:</option>
 is specified, then <option>session:current</option> is assumed ("current"
-is a special name of the current / last profiling session).
+is a special name of the current (i.e., most recent) profiling session).
 </para>
 <para>
 In addition to the comma-separated list shown above, some of the 
@@ -1779,10 +1082,7 @@ Differential profile of an archived binary with the current session :
 		<term><option>lib-image:</option><emphasis>imagelist</emphasis></term>
 		<listitem><para>
 		Same as <option>image:</option>, but only for images that are for
-		a particular primary binary image (namely, an application). This only
-		makes sense to use if you're using <option>--separate</option>.
-		This includes kernel modules and the kernel when using
-		<option>--separate=kernel</option>.
+		a particular primary binary image (namely, an application).
 		</para></listitem>
 	</varlistentry>
 
@@ -1799,7 +1099,6 @@ Differential profile of an archived binary with the current session :
 		<listitem><para>
 		The symbolic event name to match on, e.g. <option>event:DATA_MEM_REFS</option>.
 		You can pass a list of events for side-by-side comparison with <command>opreport</command>.
-		When using the timer interrupt, the event is always "TIMER".
 		</para></listitem>
 	</varlistentry>
 
@@ -1808,11 +1107,10 @@ Differential profile of an archived binary with the current session :
 		<listitem><para>
 		The event count to match on, e.g. <option>event:DATA_MEM_REFS count:30000</option>.
 		Note that this value refers to the count value in the event spec you passed
-		to <command>opcontrol</command> or <command>operf</command> when setting up to do a
+		to <command>operf</command> when setting up to do a
 		profile run.  It has nothing to do with the sample counts in the profile data
 		itself.
 		You can pass a list of events for side-by-side comparison with <command>opreport</command>.
-		When using the timer interrupt, the count is always 0 (indicating it cannot be set).
 		</para></listitem>
 	</varlistentry>
 
@@ -1861,9 +1159,8 @@ Differential profile of an archived binary with the current session :
 <sect2 id="locating-and-managing-binary-images">
 <title>Locating and managing binary images</title>
 <para>
-Each session's sample files can be found in the $SESSION_DIR/samples/ directory (default when
-using legacy mode: <filename>/var/lib/oprofile/samples/</filename>; default when using
-<command>operf</command>:  <filename>&lt;cur_dir&gt;/oprofile_data/samples/</filename>).
+Each session's sample files can be found in the $SESSION_DIR/samples/ directory (default
+for <command>operf</command> is <filename>&lt;cur_dir&gt;/oprofile_data/samples/</filename>).
 These are used, along with the binary image files, to produce human-readable data.
 In some circumstances (e.g., kernel modules), OProfile
 will not be able to find the binary images. All the tools have an <option>--image-path</option>
@@ -1910,19 +1207,14 @@ taken per second.
 <varlistentry><term>application spent most of its time in libraries</term><listitem><para>
 Similarly, if the application spends little time in the main binary image
 itself, with most of it spent in shared libraries it uses, you might
-not see any samples for the binary image (i.e., executable) itself. If you're
-using OProfile legacy mode profiling, then we recommend using
-<command>opcontrol --separate=lib</command> before the
-profiling session so that <command>opreport</command> and friends show
-the library profiles on a per-application basis.  This is done automatically
-when profiling with <command>operf</command>, so no special setup is necessary.
+not see any samples for the binary image (i.e., executable) itself.
 </para></listitem></varlistentry>
 <varlistentry><term>specification was really too strict</term><listitem><para>
 For example, you specified something like <option>tgid:3433</option>,
 but no task with that group ID ever ran the code.
 </para></listitem></varlistentry>
 <varlistentry><term>application didn't generate any events</term><listitem><para>
-If you're using a particular event counter, for example counting MMX
+If you're profiling a particular event, for example counting MMX
 operations, the code might simply have not generated any events in the
 first place. Verify the code you're profiling does what you expect it
 to.
@@ -1946,52 +1238,66 @@ The <command>opreport</command> utility is the primary utility you will use for
 getting formatted data out of OProfile. It produces two types of data: image summaries
 and symbol summaries. An image summary lists the number of samples for individual
 binary images such as libraries or applications. Symbol summaries provide per-symbol
-profile data. In the following example, we're getting an image summary for the whole
+profile data. In the following truncated example, we see an image summary for the whole
 system:
 </para>
 <screen>
 $ opreport --long-filenames
-CPU: PIII, speed 863.195 MHz (estimated)
-Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 23150
-   905898 59.7415 /usr/lib/gcc-lib/i386-redhat-linux/3.2/cc1plus
-   214320 14.1338 /boot/2.6.0/vmlinux
-   103450  6.8222 /lib/i686/libc-2.3.2.so
-    60160  3.9674 /usr/local/bin/madplay
-    31769  2.0951 /usr/local/oprofile-pp/bin/oprofiled
-    26550  1.7509 /usr/lib/libartsflow.so.1.0.0
-    23906  1.5765 /usr/bin/as
-    18770  1.2378 /oprofile
-    15528  1.0240 /usr/lib/qt-3.0.5/lib/libqt-mt.so.3.0.5
-    11979  0.7900 /usr/X11R6/bin/XFree86
-    11328  0.7471 /bin/bash
+CPU: Intel Sandy Bridge microarchitecture, speed 2401 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000
+CPU_CLK_UNHALT...|
+  samples|      %|
+------------------
+    22577 28.9011 /usr/bin/Xorg
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+            16846 74.6158 /proc/kallsyms
+             2126  9.4167 /usr/bin/Xorg
+              763  3.3795 /usr/lib64/libpixman-1.so.0.26.2
+              ...
+    17402 22.2766 /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.55.x86_64/jre/bin/java
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+             5666 32.5595 anon (tgid:29664 range:0x7f3475000000-0x7f347616ffff)
+             2312 13.2858 /usr/lib/jvm/java-1.7.0-openjdk-1.7.0.55.x86_64/jre/lib/amd64/server/libjvm.so
+             ...
+    11554 14.7904 /home/user1/oprof-install/bin/operf
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+             7467 64.6270 /proc/kallsyms
+             1691 14.6356 /usr/bin/operf
+             1324 11.4592 /lib64/libc-2.12.so
+              455  3.9380 /usr/lib64/libstdc++.so.6.0.13
+              315  2.7263 /ext4
+              ...
     ...
 </screen>
 <para>
 If we had specified <option>--symbols</option> in the previous command, we would have
 gotten a symbol summary of all the images across the entire system. We can restrict this to only
 part of the system profile; for example,
-below is a symbol summary of the OProfile daemon. Note that as we used
-<command>opcontrol --separate=lib,kernel</command>, symbols from images that <command>oprofiled</command>
-has used are also shown.
+below is a symbol summary for the <command>operf</command> program used to collect the profile.
 </para>
 <screen>
-$ opreport -l -p /lib/modules/`uname -r` `which oprofiled` 2>/dev/null | more
-CPU: Core 2, speed 2.534e+06 MHz (estimated)
-Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000
+$ opreport -l -p /lib/modules/`uname -r` `which operf` 2>/dev/null | more
+CPU: Intel Sandy Bridge microarchitecture, speed 2401 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000
 samples  %        image name               symbol name
-1353     24.9447  vmlinux                  sidtab_context_to_sid
-500       9.2183  vmlinux                  avtab_hash_eval
-154       2.8392  vmlinux                  __link_path_walk
-152       2.8024  vmlinux                  d_prune_aliases
-120       2.2124  vmlinux                  avtab_search_node
-104       1.9174  vmlinux                  find_next_bit
-85        1.5671  vmlinux                  selinux_file_fcntl
-82        1.5118  vmlinux                  avtab_write
-81        1.4934  oprofiled                odb_update_node_with_offset
-73        1.3459  oprofiled                opd_process_samples
-72        1.3274  vmlinux                  avc_has_perm_noaudit
-61        1.1246  libc-2.12.so             _IO_vfscanf
-59        1.0878  ext4.ko                  ext4_mark_iloc_dirty
+860       7.4607  kallsyms                 avtab_search_node
+474       4.1121  operf                    OP_perf_utils::op_write_event(event_union*, unsigned long long)
+461       3.9993  kallsyms                 avc_has_perm_noaudit
+455       3.9473  libstdc++.so.6.0.13      /usr/lib64/libstdc++.so.6.0.13
+412       3.5742  libc-2.12.so             _IO_vfscanf
+369       3.2012  kallsyms                 __d_lookup
+350       3.0363  kallsyms                 sidtab_context_to_sid
+274       2.3770  operf                    OP_perf_utils::op_record_process_exec_mmaps(int, int, int, operf_record*)
+232       2.0127  operf                    operf_process_info::find_mapping_for_sample(unsigned long long, bool)
+222       1.9259  kallsyms                 __link_path_walk
+191       1.6570  kallsyms                 pipe_read
+34        0.2950  ext4.ko                  ext4_mark_iloc_dirty
 ...
 </screen>
 
@@ -2007,8 +1313,8 @@ If you have used one of the <option>--separate[*]</option> options
 whilst profiling, there can be several separate profiles for
 a single binary image within a session. Normally the output
 will keep these images separated. So, for example, if you profiled
-with separation on a per-cpu basis (<code>opcontrol --separate=cpu</code> or
-<code>operf --separate-cpu</code>), you would see separate columns in
+with separation on a per-cpu basis (<code>operf --separate-cpu</code>),
+you would see separate columns in
 the output of <command>opreport</command> for each CPU where samples
 were recorded. But it can be useful to merge these results back together
 to make the report more readable. The <option>--merge</option> option allows
@@ -2120,7 +1426,7 @@ linkend="interpreting-callgraph" /> for an explanation.
 </para>
 </sect3>
 <sect3 id="cg-with-jitsupport">
-<title>Callgraph and JIT support</title>
+<title>Callgraph is not supported with JIT samples</title>
 <para>
 Callgraph output where anonymously mapped code is in the callstack can sometimes be misleading.
 For all such code, the samples for the anonymously mapped code are stored in a samples subdirectory
@@ -2182,13 +1488,12 @@ A typical way to use this feature is with archives created with
 <command>oparchive</command>. Let's look at an example:
 </para>
 <screen>
-$ ./a
+$ operf ./a
 $ oparchive -o orig ./a
-$ opcontrol --reset
   # edit and recompile a
-$ ./a
+$ operf ./a
   # now compare the current profile of a with the archived profile
-$ opreport -xl ./a { archive:./orig } { }
+$ opreport  --session-dir=`pwd`/oprofile_data/ -xl ./a { archive:./orig } { }
 CPU: PIII, speed 863.233 MHz (estimated)
 Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a
 unit mask of 0x00 (No unit mask) count 100000
@@ -2257,8 +1562,7 @@ samples  %        image name    		                symbol name
 </para>
 <para>
 Note that, since such mappings are dependent upon individual invocations of
-a binary, these mappings are always listed as a dependent image,
-even when using the legacy mode <option>opcontrol --separate=none</option> command.
+a binary, these mappings are always listed as a dependent image.
 Equally, the results are not affected by the <option>--merge</option>
 option.
 </para>
@@ -2319,8 +1623,7 @@ offsets for the image binary.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--exclude-dependent / -x</option></term><listitem><para>
 Do not include application-specific images for libraries, kernel modules
-and the kernel. This option only makes sense if the profile session
-used --separate.
+and the kernel..
 </para></listitem></varlistentry>
 <varlistentry><term><option>--exclude-symbols / -e [symbols]</option></term><listitem><para>
 Exclude all the symbols in the given comma-separated list.
@@ -2356,9 +1659,13 @@ Output to the given file instead of stdout.
 <varlistentry><term><option>--reverse-sort / -r</option></term><listitem><para>
 Reverse the sort from the default.
 </para></listitem></varlistentry>
-<varlistentry><term><option>--session-dir=</option>dir_path</term><listitem><para>
-Use sample database out of directory <filename>dir_path</filename> 
-instead of the default location (/var/lib/oprofile).
+<varlistentry><term><option>--session-dir=dir_path</option></term><listitem><para>
+Use sample database from the specified directory <filename>dir_path</filename> instead
+of the default location. If this option is not specified, then opreport will search for
+samples in <filename>&lt;cur_dir&gt;/oprofile_data</filename>
+first. If that directory does not exist, the standard session-dir of
+<filename>/var/lib/oprofile</filename> is used
+as the session directory.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--show-address / -w</option></term><listitem><para>
 Show the VMA address of each symbol (off by default).
@@ -2373,7 +1680,8 @@ List per-symbol information instead of a binary image summary.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--threshold / -t [percentage]</option></term><listitem><para>
 Only output data for symbols that have more than the given percentage
-of total samples.
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the symbol is shown.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
 Give verbose debugging output.
@@ -2502,11 +1810,19 @@ pattern-matching to make C++ symbol demangling more readable.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--exclude-dependent / -x</option></term><listitem><para>
 Do not include application-specific images for libraries, kernel modules
-and the kernel. This option only makes sense if the profile session
-used --separate.
+and the kernel.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--exclude-file [files]</option></term><listitem><para>
 Exclude all files in the given comma-separated list of glob patterns.
+This option is supported solely with the <code>--source</code>
+option. It can be used to filter out source files in the output using the
+following types of specifications:
+<itemizedlist>
+<listitem>filenames (basename -- i.e., no path)</listitem>
+<listitem>filename glob specifications (all files whose base filename matches the given pattern)</listitem>
+<listitem>directory segments (all source files located in the specified directory; e.g. "libio")</listitem>
+<listitem>directory segment glob specifications (e.g., "libi*")</listitem>
+</itemizedlist>
 </para></listitem></varlistentry>
 <varlistentry><term><option>--exclude-symbols / -e [symbols]</option></term><listitem><para>
 Exclude all the symbols in the given comma-separated list.
@@ -2523,6 +1839,7 @@ A path to a filesystem to search for additional binaries.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--include-file [files]</option></term><listitem><para>
 Only include files in the given comma-separated list of glob patterns.
+The same rules apply for this option as for the <code>--exclude-file</code> option.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--include-symbols / -i [symbols]</option></term><listitem><para>
 Only include symbols in the given comma-separated list.
@@ -2561,9 +1878,23 @@ source files when the debug information only contains relative paths.
 Output annotated source. This requires debugging information to be available
 for the binaries.
 </para></listitem></varlistentry>
+<varlistentry><term><option>--session-dir=dir_path</option></term><listitem><para>
+Use sample database from the specified directory <filename>dir_path</filename> instead
+of the default location. If this option is not specified, then opannotate will search for
+samples in <filename>&lt;cur_dir&gt;/oprofile_data</filename>
+first. If that directory does not exist, the standard session-dir of
+<filename>/var/lib/oprofile</filename> is used
+as the session directory.
+</para></listitem></varlistentry>
 <varlistentry><term><option>--threshold / -t [percentage]</option></term><listitem><para>
-Only output data for symbols that have more than the given percentage
-of total samples.
+For annotated assembly, only output data for symbols that have more than the given percentage
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the symbol is shown.
+</para>
+<para>
+For annotated source, only output data for source files that have more than the given percentage
+of total samples. For profiles using multiple events, if the threshold is reached
+for any event, then all sample data for the source file is shown.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
 Give verbose debugging output.
@@ -2683,6 +2014,14 @@ of total samples.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
 Give verbose debugging output.
+<varlistentry><term><option>--session-dir=dir_path</option></term><listitem><para>
+Use sample database from the specified directory <filename>dir_path</filename> instead
+of the default location. If this option is not specified, then opgprof will search for
+samples in <filename>&lt;cur_dir&gt;/oprofile_data</filename>
+first. If that directory does not exist, the standard session-dir of
+<filename>/var/lib/oprofile</filename> is used
+as the session directory.
+</para></listitem></varlistentry>
 </para></listitem></varlistentry>
 <varlistentry><term><option>--version / -v</option></term><listitem><para>
 Show version.
@@ -2694,18 +2033,18 @@ Show version.
 </sect1> <!-- opgprof -->
 
 <sect1 id="oparchive">
-<title>Archiving measurements (<command>oparchive</command>)</title>
+<title>Analyzing profile data on another system (<command>oparchive</command>)</title>
 <para>
 	The <command>oparchive</command> utility generates a directory populated
 	with executable, debug, and oprofile sample files. This directory can be
-	moved to another machine via <command>tar</command> and analyzed without
-	further use of the data collection machine.
+	copied to another (host) machine and analyzed offline, with no further need to
+	access the data collection machine (target).
 </para>
 
 <para>
-	The following command would collect the sample files, the executables
-	associated with the sample files, and the debuginfo files associated
-	with the executables and copy them into
+	The following command, executed on the target system, will collect the
+	sample files, the executables associated with the sample files, and the
+	debuginfo files associated with the executables and copy them into
 	<filename>/tmp/current_data</filename>:
 </para>
 
@@ -2713,6 +2052,59 @@ Show version.
 # oparchive -o /tmp/current_data
 </screen>
 
+<para>
+	When transferring archived profile data to a host machine for offline analysis,
+	you need to determine if the oprofile ABI format is compatible between the
+	target system and the host system; if it isn't, you must run the <command>opimport</command>
+	command to convert the target's sample data files to the format of your host system.
+	See <xref linkend="opimport"/> for more details.
+</para>
+
+<para>
+	After your profile data is transferred to the host system and (if necessary)
+	you have run the <command>opimport</command> command to convert the file
+	format, you can now run the <command>opreport</command> and
+	<command>opannotate</command> commands.  However, you must provide an
+	"archive specification" to let these post-processing tools know where to find
+	of the profile data (sample files, executables, etc.); for example:
+</para>
+
+<screen>
+# opreport archive:/home/user1/my_oprofile_archive --symbols
+</screen>
+
+<para>
+	Furthermore, if your profile was collected on your target system into a session-dir
+	other than <filename>/var/lib/oprofile</filename>, the <command>oparchive</command>
+	command will display a message similar to the following:
+</para>
+
+<screen>
+# NOTE: The sample data in this archive is located at /home/user1/test-stuff/oprofile_data
+instead of the standard location of /var/lib/oprofile.  Hence, when using opreport
+and other post-processing tools on this archive, you must pass the following option:
+        --session-dir=/home/user1/test-stuff/oprofile_data
+</screen>
+
+<para>
+	Then the above <command>opreport</command> example would have to include that
+	<option>--session-dir</option> option.
+</para>
+
+<para>
+<note>
+	 In some host/target development environments, all target executables, libraries, and
+	 debuginfo files are stored in a root directory on the host to facilitate offline
+	 analysis.  In such cases, the <command>oparchive</command> command collects more data
+	 than is necessary; so, when copying the resulting output of <command>oparchive</command>,
+	 you can skip all of the executables, etc, and just archive the <filename>$SESSION_DIR</filename>
+	 tree located within the output directory you specified in your <command>oparchive</command>
+	 command. Then, when running the <command>opreport</command> or	<command>opannotate</command>
+	 commands on your host system, pass the <option>--root</option> option to point to the
+	 location of your target's executables, etc.
+</note>
+</para>
+
 <sect2 id="oparchive-details">
 <title>Usage of <command>oparchive</command></title>
 
@@ -2722,8 +2114,7 @@ Show help message.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--exclude-dependent / -x</option></term><listitem><para>
 Do not include application-specific images for libraries, kernel modules
-and the kernel. This option only makes sense if the profile session
-used --separate.
+and the kernel.
 </para></listitem></varlistentry>
 <varlistentry><term><option>--image-path / -p [paths]</option></term><listitem><para>
 Comma-separated list of additional paths to search for binaries.
@@ -2741,6 +2132,14 @@ Only list the files that would be archived, don't copy them.
 <varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
 Give verbose debugging output.
 </para></listitem></varlistentry>
+<varlistentry><term><option>--session-dir=dir_path</option></term><listitem><para>
+Use sample database from the specified directory <filename>dir_path</filename> instead
+of the default location. If this option is not specified, then oparchive will search for
+samples in <filename>&lt;cur_dir&gt;/oprofile_data</filename>
+first. If that directory does not exist, the standard session-dir of
+<filename>/var/lib/oprofile</filename> is used
+as the session directory.
+</para></listitem></varlistentry>
 <varlistentry><term><option>--version / -v</option></term><listitem><para>
 Show version.
 </para></listitem></varlistentry>
@@ -2754,15 +2153,21 @@ Show version.
 <title>Converting sample database files (<command>opimport</command>)</title>
 <para>
 	This utility converts sample database files from a foreign binary format (abi) to
-	the native format. This is useful only when moving sample files between systems
-	for analysis on platforms other than the one used for collection. The <command>
-	oparchive</command> should be used on the machine where the profile was taken (target)
-	in order to collect sample files and all other necessary information. The archive
-	directory that is the output from <command>oparchive</command> should be copied
-	to the system where you wish to perform your performance analysis (host). If the
-	When the architecture of your target and host systems differ, then you'll need to
-	use the <command>opimport</command> command. The abi format of the sample files
-	to be imported is described in a text file located in <filename>$SESSION_DIR/abi</filename>.
+	the native format. This is required when moving sample files to a (host) system
+	other than the one used for collection (target system), and the host and target systems are different
+	architectures. The abi format of the sample files to be imported is described in a
+	text file located in <filename>$SESSION_DIR/abi</filename>.  If you are unsure if
+	your target and host systems have compatible architectures (in regard to the OProfile
+	ABI), simply diff a <filename>$SESSION_DIR/abi</filename> file from the target system
+	with one from the host system.  If any differences show up at all, you must run the
+	<command>opimport</command> command.
+</para>
+
+<para>
+	The <command>oparchive</command> command should be used on the machine where
+	the profile was taken (target) in order to collect sample files and all other necessary
+	information. The archive directory that is the output from <command>oparchive</command>
+	should be copied to the system where you wish to perform your performance analysis (host).
 </para>
 
 <para>
@@ -2919,10 +2324,7 @@ problem and OProfile can do nothing about it.
 OProfile uses non-maskable interrupts (NMI) on the P6 generation, Pentium 4,
 Athlon, Opteron, Phenom, and Turion processors. These interrupts can occur even in sections of the
 kernel where interrupts are disabled, allowing collection of samples in virtually
-all executable code.  The timer interrupt mode and Itanium 2 collection mechanisms
-use maskable interrupts; therefore, these profiling mechanisms have "sample
-shadows", or blind spots: regions where no samples will be collected. Typically, the samples
-will be attributed to the code immediately after the interrupts are re-enabled.
+all executable code.
 </para>
 </sect2>
 <sect2 id="idle">
@@ -2942,7 +2344,7 @@ will appear as <function>poll_idle()</function> in your kernel profile.
 OProfile profiles kernel modules by default. However, there are a couple of problems
 you may have when trying to get results. First, you may have booted via an initrd;
 this means that the actual path for the module binaries cannot be determined automatically.
-To get around this, you can use the <option>-p</option> option to the profiling tools
+To get around this, you can use the <option>-p</option> option to the analysis tools
 to specify where to look for the kernel modules.
 </para>
 <para>
@@ -2967,7 +2369,7 @@ information for OProfile to get this information.
 <sect1 id="interpreting-callgraph">
 <title>Interpreting call-graph profiles</title>
 <para>
-Sometimes the results from call-graph profiles may be different to what
+Sometimes the results from call-graph profiles may be different from what
 you expect to see. The first thing to check is whether the target
 binaries where compiled with frame pointers enabled (if the binary was
 compiled using <command>gcc</command>'s
@@ -3332,6 +2734,183 @@ and <ulink url="http://developer.amd.com/devguides.jsp/">http://developer.amd.co
 </para>
 </sect1>
 </chapter>
+
+<chapter id="controlling-counter">
+<title>Controlling the event counter</title>
+<sect1 id="controlling-ocount">
+<title>Using <command>ocount</command></title>
+<para>
+This section describes in detail how <command>ocount</command> is used.
+Unless the <option>--events</option> option is specified, <command>ocount</command> will use
+the default event for your system. For most systems, the default event is some
+cycles-based event, assuming your processor type supports hardware performance
+counters. The event specification used for <command>ocount</command> is slightly
+different from that required for profiling -- a <emphasis>count</emphasis> value
+is not needed. You can see the event information for your CPU using <command>ophelp</command>.
+More information on event specification can be found at <xref linkend="eventspec"/>.
+</para>
+<para>
+The <command>ocount</command> command syntax is:
+<para>
+<screen>ocount [ options ] [ --system-wide | --process-list &lt;pids&gt; | --thread-list &lt;tids&gt; | --cpu-list &lt;cpus&gt; [ command [ args ] ] ]
+</screen>
+</para></para>
+<para>
+<command>ocount</command> has 5 run modes:
+<para>
+<itemizedlist>
+<listitem>system-wide</listitem>
+<listitem>process-list</listitem>
+<listitem>thread-list</listitem>
+<listitem>cpu-list</listitem>
+<listitem>command</listitem>
+</itemizedlist>
+</para></para>
+<para>
+One and only one of these 5 run modes must be specified when you run <command>ocount</command>.
+If you run <command>ocount</command> using a run mode other than <code>command [args]</code>, press Ctrl-c
+to stop it when finished counting (e.g., when the monitored process ends). If you background <command>ocount</command>
+(i.e., with â&amp;â) while using one these run modes, you must stop it in a controlled manner so that
+the data collection process can be shut down cleanly and final results can be displayed.
+Use <code>kill -SIGINT &lt;ocount-PID&gt;</code> for this purpose.
+</para>
+<para>
+Following is a description of the <command>ocount</command> options.
+</para>
+<variablelist>
+	<varlistentry>
+		<term><option>command [args]</option></term>
+		<listitem><para>
+		The command or application to be profiled. The <emphasis>[args]</emphasis> are the input arguments
+        that the command or application requires. The command and its arguments must be positioned at the
+        end of the command line, after all other <command>ocount</command> options.
+        </para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--process-list / -p [PIDs]</option></term>
+		<listitem><para>
+		Use this option to count events for one or more already-running applications, specified via
+        a comma-separated list (PIDs). Event counts will be collected for all children of the
+        passed process(es) as well.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--thread-list / -r [TIDs]</option></term>
+		<listitem><para>
+		Use this option to count events for one or more already-running threads, specified via
+        a comma-separated list (TIDs). Event counts will <emphasis>not</emphasis> be collected
+        for any children of the passed thread(s).
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--system-wide / -s</option></term>
+		<listitem><para>
+		This option is for counting events for all processes running on your system. You must have
+        root authority to run <command>ocount</command> in this mode.
+        </para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--cpu-list / -C [CPUs]</option></term>
+		<listitem><para>
+		This option is for counting events on a subset of processors on your system. You must have
+        root authority to run <command>ocount</command> in this mode. This is a comma-separated list,
+        where each element in the list may be either a single processor number or a range of processor
+        numbers; for example: â-C 2,3,4-11,15â.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--events / -e  [event1[,event2[,...]]]</option></term>
+		<listitem><para>
+		This option is for passing a comma-separated list of event specifications
+		for counting. Each event spec is of the form:
+		</para>
+		<screen>name[:unitmask[:kernel[:user]]]</screen>
+		<para>
+		When no event specification is given, the default event for the running
+		processor type will be used for counting. Use <command>ophelp</command>
+		to list the available events for your processor type.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--separate-thread / -t</option></term>
+		<listitem><para>
+        This option can be used in conjunction with either the <code>--process-list</code> or
+        <code>--thread-list</code> option to display event counts on a per-thread (per-process) basis.
+        Without this option, all counts are aggregated.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--separate-cpu / -c</option></term>
+		<listitem><para>
+		This option can be used in conjunction with either the <code>--system-wide</code> or
+		<code>--cpu-list</code> option to display event counts on a per-cpu basis. Without this option,
+		all counts are aggregated.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--time-interval / -i interval_length[:num_intervals]</option></term>
+		<listitem><para>
+		<command>Note: </command>The <code>interval_length</code> is given in milliseconds.
+              However, the current implementation only supports 100 ms
+              granularity, so the given <code>interval_length</code> will be rounded
+              to the nearest 100 ms.  Results collected for each time
+              interval are printed immediately instead of the default
+              of one dump of cumulative event counts at the end of the
+              run.  Counters are reset to zero at the start of each
+              interval.
+		</para>
+		<para>
+              If <code>num_intervals</code> is specified, ocount exits after the
+              specified number of intervals occur.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+	   <term><option>--brief-format / -b</option></term>
+		<listitem><para>
+		Use this option to print results in the following brief format:
+		<para><screen>
+                  [optional cpu or thread,]&lt;event_name&gt;,&lt;count&gt;,&lt;percent_time_enabled&gt;
+                  [        &lt;int&gt;         ,]&lt;  string  &gt;,&lt; u64 &gt;,&lt;     double         &gt;
+        </screen></para>
+        If <code>--timer-interval</code> is specified, a separate line formatted as
+        <para><screen>
+                  timestamp,&lt;num_seconds_since_epoch&gt;[.n]
+        </screen></para>
+        is printed ahead of each dump of event counts. If the time interval specified is
+        less than one second, the timestamp will have 1/10 second precision.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>--output-file / -f outfile_name</option></term>
+		<listitem><para>
+		Results are written to outfile_name instead of interactively to the terminal.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--verbose / -V</option></term>
+		<listitem><para>
+		Use this option to increase the verbosity of the output.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--version -v </option></term>
+		<listitem><para>
+		Show <command>ocount</command> version.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--help / -h</option></term>
+		<listitem><para>
+		Show a help message.
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+
+</sect1>
+</chapter>
+
+
 <chapter id="ack">
 <title>Acknowledgments</title>
 <para>
diff --git a/events/Makefile.am b/events/Makefile.am
index 7c14713..d68f0e8 100644
--- a/events/Makefile.am
+++ b/events/Makefile.am
@@ -1,9 +1,5 @@
 event_files = \
-	alpha/ev4/events alpha/ev4/unit_masks \
-	alpha/ev5/events alpha/ev5/unit_masks \
 	alpha/ev67/events alpha/ev67/unit_masks \
-	alpha/ev6/events alpha/ev6/unit_masks \
-	alpha/pca56/events alpha/pca56/unit_masks \
 	i386/athlon/events i386/athlon/unit_masks \
 	i386/core_2/events i386/core_2/unit_masks \
 	i386/p4/events i386/p4-ht/events \
@@ -20,27 +16,26 @@ event_files = \
 	i386/westmere/events i386/westmere/unit_masks \
 	i386/sandybridge/events i386/sandybridge/unit_masks \
 	i386/ivybridge/events i386/ivybridge/unit_masks \
-	ia64/ia64/events ia64/ia64/unit_masks \
-	ia64/itanium2/events ia64/itanium2/unit_masks \
-	ia64/itanium/events ia64/itanium/unit_masks \
+	i386/haswell/events i386/haswell/unit_masks \
+	i386/broadwell/events i386/broadwell/unit_masks \
+	i386/silvermont/events i386/silvermont/unit_masks \
+	ppc64/architected_events_v1/events ppc64/architected_events_v1/unit_masks \
 	ppc64/power4/events ppc64/power4/event_mappings ppc64/power4/unit_masks \
 	ppc64/power5/events ppc64/power5/event_mappings ppc64/power5/unit_masks \
 	ppc64/power5+/events ppc64/power5+/event_mappings ppc64/power5+/unit_masks \
 	ppc64/power5++/events ppc64/power5++/event_mappings ppc64/power5++/unit_masks \
 	ppc64/power6/events ppc64/power6/event_mappings ppc64/power6/unit_masks \
 	ppc64/power7/events ppc64/power7/event_mappings ppc64/power7/unit_masks \
+	ppc64/power8/events ppc64/power8/unit_masks \
 	ppc64/970/events ppc64/970/event_mappings ppc64/970/unit_masks \
 	ppc64/970MP/events ppc64/970MP/event_mappings ppc64/970MP/unit_masks \
-	ppc64/ibm-compat-v1/events ppc64/ibm-compat-v1/event_mappings ppc64/ibm-compat-v1/unit_masks \
-	ppc64/pa6t/events ppc64/pa6t/event_mappings ppc64/pa6t/unit_masks \
-	ppc64/cell-be/events ppc64/cell-be/unit_masks \
-	rtc/events rtc/unit_masks \
 	x86-64/hammer/events x86-64/hammer/unit_masks \
 	x86-64/family10/events x86-64/family10/unit_masks \
 	x86-64/family11h/events x86-64/family11h/unit_masks \
 	x86-64/family12h/events x86-64/family12h/unit_masks \
 	x86-64/family14h/events x86-64/family14h/unit_masks \
 	x86-64/family15h/events x86-64/family15h/unit_masks \
+	x86-64/generic/events x86-64/generic/unit_masks \
 	arm/xscale1/events arm/xscale1/unit_masks \
 	arm/xscale2/events arm/xscale2/unit_masks \
 	arm/armv6/events arm/armv6/unit_masks \
@@ -48,12 +43,16 @@ event_files = \
 	arm/armv7/events arm/armv7/unit_masks \
 	arm/armv7-scorpion/events arm/armv7-scorpion/unit_masks \
 	arm/armv7-scorpionmp/events arm/armv7-scorpionmp/unit_masks \
+	arm/armv7-krait/events arm/armv7-krait/unit_masks \
 	arm/armv7-ca9/events arm/armv7-ca9/unit_masks \
 	arm/armv7-ca5/events arm/armv7-ca5/unit_masks \
 	arm/armv7-ca7/events arm/armv7-ca7/unit_masks \
 	arm/armv7-ca15/events arm/armv7-ca15/unit_masks \
 	arm/mpcore/events arm/mpcore/unit_masks \
-	avr32/events avr32/unit_masks \
+	arm/armv8-pmuv3-common/events arm/armv8-pmuv3-common/unit_masks \
+	arm/armv8-xgene/events arm/armv8-xgene/unit_masks \
+	arm/armv8-ca57/events arm/armv8-ca57/unit_masks \
+	arm/armv8-ca53/events arm/armv8-ca53/unit_masks \
 	mips/20K/events mips/20K/unit_masks \
 	mips/24K/events mips/24K/unit_masks \
 	mips/25K/events mips/25K/unit_masks \
@@ -72,12 +71,15 @@ event_files = \
 	ppc/7450/events ppc/7450/unit_masks \
 	ppc/e500/events ppc/e500/unit_masks \
 	ppc/e500v2/events ppc/e500v2/unit_masks \
+	ppc/e500mc/events ppc/e500mc/unit_masks \
+	ppc/e6500/events ppc/e6500/unit_masks \
 	ppc/e300/events ppc/e300/unit_masks \
 	tile/tile64/events tile/tile64/unit_masks \
 	tile/tilepro/events tile/tilepro/unit_masks \
 	tile/tilegx/events tile/tilegx/unit_masks \
 	s390/z10/events s390/z10/unit_masks \
-	s390/z196/events s390/z196/unit_masks
+	s390/z196/events s390/z196/unit_masks \
+	s390/zEC12/events s390/zEC12/unit_masks
 
 install-data-local:
 	for i in ${event_files} ; do \
diff --git a/events/Makefile.in b/events/Makefile.in
index 7fac3d0..3108c6e 100644
--- a/events/Makefile.in
+++ b/events/Makefile.in
@@ -38,7 +38,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -47,7 +46,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -110,7 +109,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -134,20 +132,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -212,11 +203,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 topdir = @topdir@
 event_files = \
-	alpha/ev4/events alpha/ev4/unit_masks \
-	alpha/ev5/events alpha/ev5/unit_masks \
 	alpha/ev67/events alpha/ev67/unit_masks \
-	alpha/ev6/events alpha/ev6/unit_masks \
-	alpha/pca56/events alpha/pca56/unit_masks \
 	i386/athlon/events i386/athlon/unit_masks \
 	i386/core_2/events i386/core_2/unit_masks \
 	i386/p4/events i386/p4-ht/events \
@@ -233,27 +220,26 @@ event_files = \
 	i386/westmere/events i386/westmere/unit_masks \
 	i386/sandybridge/events i386/sandybridge/unit_masks \
 	i386/ivybridge/events i386/ivybridge/unit_masks \
-	ia64/ia64/events ia64/ia64/unit_masks \
-	ia64/itanium2/events ia64/itanium2/unit_masks \
-	ia64/itanium/events ia64/itanium/unit_masks \
+	i386/haswell/events i386/haswell/unit_masks \
+	i386/broadwell/events i386/broadwell/unit_masks \
+	i386/silvermont/events i386/silvermont/unit_masks \
+	ppc64/architected_events_v1/events ppc64/architected_events_v1/unit_masks \
 	ppc64/power4/events ppc64/power4/event_mappings ppc64/power4/unit_masks \
 	ppc64/power5/events ppc64/power5/event_mappings ppc64/power5/unit_masks \
 	ppc64/power5+/events ppc64/power5+/event_mappings ppc64/power5+/unit_masks \
 	ppc64/power5++/events ppc64/power5++/event_mappings ppc64/power5++/unit_masks \
 	ppc64/power6/events ppc64/power6/event_mappings ppc64/power6/unit_masks \
 	ppc64/power7/events ppc64/power7/event_mappings ppc64/power7/unit_masks \
+	ppc64/power8/events ppc64/power8/unit_masks \
 	ppc64/970/events ppc64/970/event_mappings ppc64/970/unit_masks \
 	ppc64/970MP/events ppc64/970MP/event_mappings ppc64/970MP/unit_masks \
-	ppc64/ibm-compat-v1/events ppc64/ibm-compat-v1/event_mappings ppc64/ibm-compat-v1/unit_masks \
-	ppc64/pa6t/events ppc64/pa6t/event_mappings ppc64/pa6t/unit_masks \
-	ppc64/cell-be/events ppc64/cell-be/unit_masks \
-	rtc/events rtc/unit_masks \
 	x86-64/hammer/events x86-64/hammer/unit_masks \
 	x86-64/family10/events x86-64/family10/unit_masks \
 	x86-64/family11h/events x86-64/family11h/unit_masks \
 	x86-64/family12h/events x86-64/family12h/unit_masks \
 	x86-64/family14h/events x86-64/family14h/unit_masks \
 	x86-64/family15h/events x86-64/family15h/unit_masks \
+	x86-64/generic/events x86-64/generic/unit_masks \
 	arm/xscale1/events arm/xscale1/unit_masks \
 	arm/xscale2/events arm/xscale2/unit_masks \
 	arm/armv6/events arm/armv6/unit_masks \
@@ -261,12 +247,16 @@ event_files = \
 	arm/armv7/events arm/armv7/unit_masks \
 	arm/armv7-scorpion/events arm/armv7-scorpion/unit_masks \
 	arm/armv7-scorpionmp/events arm/armv7-scorpionmp/unit_masks \
+	arm/armv7-krait/events arm/armv7-krait/unit_masks \
 	arm/armv7-ca9/events arm/armv7-ca9/unit_masks \
 	arm/armv7-ca5/events arm/armv7-ca5/unit_masks \
 	arm/armv7-ca7/events arm/armv7-ca7/unit_masks \
 	arm/armv7-ca15/events arm/armv7-ca15/unit_masks \
 	arm/mpcore/events arm/mpcore/unit_masks \
-	avr32/events avr32/unit_masks \
+	arm/armv8-pmuv3-common/events arm/armv8-pmuv3-common/unit_masks \
+	arm/armv8-xgene/events arm/armv8-xgene/unit_masks \
+	arm/armv8-ca57/events arm/armv8-ca57/unit_masks \
+	arm/armv8-ca53/events arm/armv8-ca53/unit_masks \
 	mips/20K/events mips/20K/unit_masks \
 	mips/24K/events mips/24K/unit_masks \
 	mips/25K/events mips/25K/unit_masks \
@@ -285,12 +275,15 @@ event_files = \
 	ppc/7450/events ppc/7450/unit_masks \
 	ppc/e500/events ppc/e500/unit_masks \
 	ppc/e500v2/events ppc/e500v2/unit_masks \
+	ppc/e500mc/events ppc/e500mc/unit_masks \
+	ppc/e6500/events ppc/e6500/unit_masks \
 	ppc/e300/events ppc/e300/unit_masks \
 	tile/tile64/events tile/tile64/unit_masks \
 	tile/tilepro/events tile/tilepro/unit_masks \
 	tile/tilegx/events tile/tilegx/unit_masks \
 	s390/z10/events s390/z10/unit_masks \
-	s390/z196/events s390/z196/unit_masks
+	s390/z196/events s390/z196/unit_masks \
+	s390/zEC12/events s390/zEC12/unit_masks
 
 EXTRA_DIST = $(event_files)
 all: all-am
diff --git a/events/alpha/ev4/events b/events/alpha/ev4/events
deleted file mode 100644
index 8b193d1..0000000
--- a/events/alpha/ev4/events
+++ /dev/null
@@ -1,18 +0,0 @@
-# Alpha EV4 events.
-#
-event:0x00 counters:0 um:zero minimum:4096 name:ISSUES : Total issues divided by 2
-event:0x02 counters:0 um:zero minimum:4096 name:PIPELINE_DRY : Nothing issued, no valid I-stream data
-event:0x04 counters:0 um:zero minimum:4096 name:LOAD_INSNS : All load instructions
-event:0x06 counters:0 um:zero minimum:4096 name:PIPELINE_FROZEN : Nothing issued, resource conflict
-event:0x08 counters:0 um:zero minimum:4096 name:BRANCH_INSNS : All branches (conditional, unconditional, jsr, hw_rei)
-event:0x0a counters:0 um:zero minimum:4096 name:CYCLES : Total cycles
-event:0x0b counters:0 um:zero minimum:4096 name:PAL_MODE : Cycles while in PALcode environment
-event:0x0c counters:0 um:zero minimum:4096 name:NON_ISSUES : Total nonissues divided by 2
-event:0x10 counters:0 um:zero minimum:256 name:DCACHE_MISSES : Total D-cache misses
-event:0x11 counters:0 um:zero minimum:256 name:ICACHE_MISSES : Total I-cache misses
-event:0x12 counters:0 um:zero minimum:256 name:DUAL_ISSUE_CYCLES : Cycles of dual issue
-event:0x13 counters:0 um:zero minimum:256 name:BRANCH_MISPREDICTS : Branch mispredicts (conditional, jsr, hw_rei)
-event:0x14 counters:0 um:zero minimum:256 name:FP_INSNS : FP operate instructions (not br, load, store)
-event:0x15 counters:0 um:zero minimum:256 name:INTEGER_OPERATE : Integer operate instructions
-event:0x16 counters:0 um:zero minimum:256 name:STORE_INSNS : Store instructions
-# There's also EXTERNAL, by which we could monitor the 21066/21068 bus controller.
diff --git a/events/alpha/ev4/unit_masks b/events/alpha/ev4/unit_masks
deleted file mode 100644
index bc77cc8..0000000
--- a/events/alpha/ev4/unit_masks
+++ /dev/null
@@ -1,4 +0,0 @@
-# Alpha EV4 possible unit masks
-#
-name:zero type:mandatory default:0x0
-	0x0 No unit mask
diff --git a/events/alpha/ev5/events b/events/alpha/ev5/events
deleted file mode 100644
index 709e06a..0000000
--- a/events/alpha/ev5/events
+++ /dev/null
@@ -1,49 +0,0 @@
-# Alpha EV5 events
-#
-event:0x00 counters:0,2 um:zero minimum:256 name:CYCLES : Total cycles
-event:0x01 counters:0 um:zero minimum:256 name:ISSUES : Total issues
-event:0x02 counters:1 um:zero minimum:256 name:NON_ISSUE_CYCLES : Nothing issued, pipeline frozen
-event:0x03 counters:1 um:zero minimum:256 name:SPLIT_ISSUE_CYCLES : Some but not all issuable instructions issued
-event:0x04 counters:1 um:zero minimum:256 name:PIPELINE_DRY : Nothing issued, pipeline dry
-event:0x05 counters:1 um:zero minimum:256 name:REPLAY_TRAP : Replay traps (ldu, wb/maf, litmus test)
-event:0x06 counters:1 um:zero minimum:256 name:SINGLE_ISSUE_CYCLES : Single issue cycles
-event:0x07 counters:1 um:zero minimum:256 name:DUAL_ISSUE_CYCLES : Dual issue cycles
-event:0x08 counters:1 um:zero minimum:256 name:TRIPLE_ISSUE_CYCLES : Triple issue cycles
-event:0x09 counters:1 um:zero minimum:256 name:QUAD_ISSUE_CYCLES : Quad issue cycles
-event:0x0a counters:1 um:zero minimum:256 name:FLOW_CHANGE : Flow change (meaning depends on counter 2)
-# ??? This one's dependent on the value in PCSEL2: If measuring PC_MISPR,
-# this is jsr-ret instructions, if measuring BRANCH_MISPREDICTS, this is
-# conditional branches, otherwise this is all branch insns, including hw_rei.
-event:0x0b counters:1 um:zero minimum:256 name:INTEGER_OPERATE : Integer operate instructions
-event:0x0c counters:1 um:zero minimum:256 name:FP_INSNS : FP operate instructions (not br, load, store)
-# FIXME: Bug carried over
-event:0x0c counters:1 um:zero minimum:256 name:LOAD_INSNS : Load instructions
-event:0x0d counters:1 um:zero minimum:256 name:STORE_INSNS : Store instructions
-event:0x0e counters:1 um:zero minimum:256 name:ICACHE_ACCESS : Instruction cache access
-event:0x0f um:zero minimum:256 name:DCACHE_ACCESS : Data cache access
-event:0x10 counters:2 um:zero minimum:256 name:LONG_STALLS : Stalls longer than 15 cycles
-event:0x11 counters:2 um:zero minimum:256 name:PC_MISPR : PC mispredicts
-event:0x12 counters:2 um:zero minimum:256 name:BRANCH_MISPREDICTS : Branch mispredicts
-event:0x13 counters:2 um:zero minimum:256 name:ICACHE_MISSES : Instruction cache misses
-event:0x14 counters:2 um:zero minimum:256 name:ITB_MISS : Instruction TLB miss
-event:0x15 counters:2 um:zero minimum:256 name:DCACHE_MISSES : Data cache misses
-event:0x16 counters:2 um:zero minimum:256 name:DTB_MISS : Data TLB miss
-event:0x17 counters:2 um:zero minimum:256 name:LOADS_MERGED : Loads merged in MAF
-event:0x18 counters:2 um:zero minimum:256 name:LDU_REPLAYS : LDU replay traps
-event:0x19 counters:2 um:zero minimum:256 name:WB_MAF_FULL_REPLAYS : WB/MAF full replay traps
-event:0x1a counters:2 um:zero minimum:256 name:MEM_BARRIER : Memory barrier instructions
-event:0x1b counters:2 um:zero minimum:256 name:LOAD_LOCKED : LDx/L instructions
-event:0x1c counters:1 um:zero minimum:256 name:SCACHE_ACCESS : S-cache access
-event:0x1d counters:1 um:zero minimum:256 name:SCACHE_READ : S-cache read
-event:0x1e counters:1,2 um:zero minimum:256 name:SCACHE_WRITE : S-cache write
-event:0x1f counters:1 um:zero minimum:256 name:SCACHE_VICTIM : S-cache victim
-event:0x20 counters:2 um:zero minimum:256 name:SCACHE_MISS : S-cache miss
-event:0x21 counters:2 um:zero minimum:256 name:SCACHE_READ_MISS : S-cache read miss
-event:0x22 counters:2 um:zero minimum:256 name:SCACHE_WRITE_MISS : S-cache write miss
-event:0x23 counters:2 um:zero minimum:256 name:SCACHE_SH_WRITE : S-cache shared writes
-event:0x24 counters:1 um:zero minimum:256 name:BCACHE_HIT : B-cache hit
-event:0x25 counters:1 um:zero minimum:256 name:BCACHE_VICTIM : B-cache victim
-event:0x26 counters:2 um:zero minimum:256 name:BCACHE_MISS : B-cache miss
-event:0x27 counters:1 um:zero minimum:256 name:SYS_REQ : System requests
-event:0x28 counters:2 um:zero minimum:256 name:SYS_INV : System invalidates
-event:0x29 counters:2 um:zero minimum:256 name:SYS_READ_REQ : System read requests
diff --git a/events/alpha/ev5/unit_masks b/events/alpha/ev5/unit_masks
deleted file mode 100644
index 4f24fa9..0000000
--- a/events/alpha/ev5/unit_masks
+++ /dev/null
@@ -1,4 +0,0 @@
-# Alpha EV-5 possible unit masks
-#
-name:zero type:mandatory default:0x0
-	0x0 No unit mask
diff --git a/events/alpha/ev6/events b/events/alpha/ev6/events
deleted file mode 100644
index 2039cef..0000000
--- a/events/alpha/ev6/events
+++ /dev/null
@@ -1,11 +0,0 @@
-# Alpha EV6 events
-#
-event:0x00 counters:0,1 um:zero minimum:500 name:CYCLES : Total cycles
-event:0x01 counters:1 um:zero minimum:500 name:RETIRED : Retired instructions
-event:0x02 counters:1 um:zero minimum:500 name:COND_BRANCHES : Retired conditional branches
-event:0x03 counters:1 um:zero minimum:500 name:BRANCH_MISPREDICTS : Retired branch mispredicts
-event:0x04 counters:1 um:zero minimum:500 name:DTB_MISS : Retired DTB single misses * 2
-event:0x05 counters:1 um:zero minimum:500 name:DTB_DD_MISS : Retired DTB double double misses
-event:0x06 counters:1 um:zero minimum:500 name:ITB_MISS : Retired ITB misses
-event:0x07 counters:1 um:zero minimum:500 name:UNALIGNED_TRAP : Retired unaligned traps
-event:0x08 counters:1 um:zero minimum:500 name:REPLAY_TRAP : Replay traps
diff --git a/events/alpha/ev6/unit_masks b/events/alpha/ev6/unit_masks
deleted file mode 100644
index bbe38c6..0000000
--- a/events/alpha/ev6/unit_masks
+++ /dev/null
@@ -1,4 +0,0 @@
-# Alpha EV-6 possible unit masks
-#
-name:zero type:mandatory default:0x0
-	0x0 No unit mask
diff --git a/events/alpha/ev67/events b/events/alpha/ev67/events
index b603871..6e62383 100644
--- a/events/alpha/ev67/events
+++ b/events/alpha/ev67/events
@@ -1,27 +1,6 @@
 # Alpha EV-67 Events
 #
-event:0x00 counters:0 um:zero minimum:500 name:CYCLES : Total cycles
-event:0x01 counters:1 um:zero minimum:500 name:DELAYED_CYCLES : Cycles of delayed retire pointer advance
-# FIXME: bug carried over
-event:0x00 counters:0,1 um:zero minimum:500 name:RETIRED : Retired instructions
-event:0x02 counters:1 um:zero minimum:500 name:BCACHE_MISS : Bcache misses/long probe latency
-event:0x03 counters:1 um:zero minimum:500 name:MBOX_REPLAY : Mbox replay traps
-# FIXME: all the below used PM_CTR
-event:0x04 counters:0 um:zero minimum:500 name:STALLED_0 : PCTR0 triggered; stalled between fetch and map stages
-event:0x05 counters:0 um:zero minimum:500 name:TAKEN_0 : PCTR0 triggered; branch was not mispredicted and taken
-event:0x06 counters:0 um:zero minimum:500 name:MISPREDICT_0 : PCTR0 triggered; branch was mispredicted
-event:0x07 counters:0 um:zero minimum:500 name:ITB_MISS_0 : PCTR0 triggered; ITB miss
-event:0x08 counters:0 um:zero minimum:500 name:DTB_MISS_0 : PCTR0 triggered; DTB miss
-event:0x09 counters:0 um:zero minimum:500 name:REPLAY_0 : PCTR0 triggered; replay trap
-event:0x0a counters:0 um:zero minimum:500 name:LOAD_STORE_0 : PCTR0 triggered; load-store order replay trap
-event:0x0b counters:0 um:zero minimum:500 name:ICACHE_MISS_0 : PCTR0 triggered; Icache miss
-event:0x0c counters:0 um:zero minimum:500 name:UNALIGNED_0 : PCTR0 triggered; unaligned load/store trap
-event:0x0d counters:0 um:zero minimum:500 name:STALLED_1 : PCTR1 triggered; stalled between fetch and map stages
-event:0x0e counters:0 um:zero minimum:500 name:TAKEN_1 : PCTR1 triggered; branch was not mispredicted and taken
-event:0x0f counters:0 um:zero minimum:500 name:MISPREDICT_1 : PCTR1 triggered; branch was mispredicted
-event:0x10 counters:0 um:zero minimum:500 name:ITB_MISS_1 : PCTR1 triggered; ITB miss
-event:0x11 counters:0 um:zero minimum:500 name:DTB_MISS_1 : PCTR1 triggered; DTB miss
-event:0x12 counters:0 um:zero minimum:500 name:REPLAY_1 : PCTR1 triggered; replay trap
-event:0x13 counters:0 um:zero minimum:500 name:LOAD_STORE_1 : PCTR1 triggered; load-store order replay trap
-event:0x14 counters:0 um:zero minimum:500 name:ICACHE_MISS_1 : PCTR1 triggered; Icache miss
-event:0x15 counters:0 um:zero minimum:500 name:UNALIGNED_1 : PCTR1 triggered; unaligned load/store trap
+event:0x01 counters:0,1 um:zero minimum:500 name:CYCLES : Total cycles
+event:0x02 counters:0 um:zero minimum:500 name:INSTRUCTIONS : Retired instructions
+event:0x03 counters:1 um:zero minimum:500 name:BCACHE_MISS : Bcache misses/long probe latency
+event:0x04 counters:1 um:zero minimum:500 name:MBOX_REPLAY : Mbox replay traps
diff --git a/events/alpha/pca56/events b/events/alpha/pca56/events
deleted file mode 100644
index 334babe..0000000
--- a/events/alpha/pca56/events
+++ /dev/null
@@ -1,2 +0,0 @@
-# PCA-56
-# FIXME: no events ? What's going on here Falk ?
diff --git a/events/alpha/pca56/unit_masks b/events/alpha/pca56/unit_masks
deleted file mode 100644
index 2b807b7..0000000
--- a/events/alpha/pca56/unit_masks
+++ /dev/null
@@ -1,3 +0,0 @@
-# Alpha PCA-56 possible unit masks
-#
-# FIXME: any events ...?
diff --git a/events/arm/armv7-common/events b/events/arm/armv7-common/events
index 0b6ed45..c83b2b7 100644
--- a/events/arm/armv7-common/events
+++ b/events/arm/armv7-common/events
@@ -33,4 +33,4 @@ event:0x1B counters:1,2,3,4,5,6 um:zero minimum:500 name:INST_SPEC : Instruction
 event:0x1C counters:1,2,3,4,5,6 um:zero minimum:500 name:TTBR_WRITE_RETIRED : Write to TTBR architecturally executed, condition code pass
 event:0x1D counters:1,2,3,4,5,6 um:zero minimum:500 name:BUS_CYCLES : Bus cycle
 
-event:0xFF counters:0 um:zero minimum:500 name:CPU_CYCLES : CPU cycle
+event:0xFF counters:0 um:zero minimum:100000 name:CPU_CYCLES : CPU cycle
diff --git a/events/arm/armv7-krait/events b/events/arm/armv7-krait/events
new file mode 100644
index 0000000..ec838c7
--- /dev/null
+++ b/events/arm/armv7-krait/events
@@ -0,0 +1,3 @@
+# ARM V7 events
+# WARNING: just re-uses common ARM PMU codes as Stephen Boyd advised
+include:arm/armv7-common
diff --git a/events/avr32/unit_masks b/events/arm/armv7-krait/unit_masks
similarity index 54%
rename from events/avr32/unit_masks
rename to events/arm/armv7-krait/unit_masks
index 37d9839..4027469 100644
--- a/events/avr32/unit_masks
+++ b/events/arm/armv7-krait/unit_masks
@@ -1,4 +1,4 @@
-# AVR32 performance counters possible unit masks
+# ARM V7 PMNC possible unit masks
 #
 name:zero type:mandatory default:0x00
 	0x00 No unit mask
diff --git a/events/arm/armv8-ca53/events b/events/arm/armv8-ca53/events
new file mode 100644
index 0000000..5e1b4d8
--- /dev/null
+++ b/events/arm/armv8-ca53/events
@@ -0,0 +1,38 @@
+#
+# Copyright (c) Red Hat, 2014.
+# Contributed by William Cohen <wcohen@redhat.com>
+#
+# ARM Cortex A53 events
+# From Cortex A53 TRM
+#
+include:arm/armv8-pmuv3-common
+event:0x60 um:zero minimum:10007 name:BUS_ACCESS_LD : Bus access - Read
+event:0x61 um:zero minimum:10007 name:BUS_ACCESS_ST : Bus access - Write
+event:0x7A um:zero minimum:10007 name:BR_INDIRECT_SPEC : Branch speculatively executed - Indirect branch
+event:0x86 um:zero minimum:10007 name:EXC_IRQ : Exception taken, IRQ
+event:0x87 um:zero minimum:10007 name:EXC_FIQ : Exception taken, FIQ
+event:0xC0 um:zero minimum:10007 name:EXT_MEM_REQ : External memory request
+event:0xC1 um:zero minimum:10007 name:EXT_MEM_REQ_NC : Non-cacheable external memory request
+event:0xC2 um:zero minimum:10007 name:PREFETCH_LINEFILL : Linefill because of prefetch
+event:0xC3 um:zero minimum:10007 name:PREFETCH_LINEFILL_DROP : Instruction Cache Throttle occurred
+event:0xC4 um:zero minimum:10007 name:READ_ALLOC_ENTER : Entering read allocate mode
+event:0xC5 um:zero minimum:10007 name:READ_ALLOC : Read allocate mode
+event:0xC6 um:zero minimum:10007 name:PRE_DECODE_ERR : Pre-decode error
+event:0xC7 um:zero minimum:10007 name:STALL_SB_FULL : Data Write operation that stalls the pipeline because the store buffer is full
+event:0xC8 um:zero minimum:10007 name:EXT_SNOOP : SCU Snooped data from another CPU for this CPU
+event:0xC9 um:zero minimum:10007 name:BR_COND : Conditional branch executed
+event:0xCA um:zero minimum:10007 name:BR_INDIRECT_MISPRED : Indirect branch mispredicted
+event:0xCB um:zero minimum:10007 name:BR_INDIRECT_MISPRED_ADDR : Indirect branch mispredicted because of address miscompare
+event:0xCC um:zero minimum:10007 name:BR_COND_MISPRED : Conditional branch mispredicted
+event:0xD0 um:zero minimum:10007 name:L1I_CACHE_ERR : L1 Instruction Cache (data or tag) memory error
+event:0xD1 um:zero minimum:10007 name:L1D_CACHE_ERR : L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable
+event:0xD2 um:zero minimum:10007 name:TLB_ERR : TLB memory error
+event:0xE0 um:zero minimum:10007 name:OTHER_IQ_DEP_STALL : Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error
+event:0xE1 um:zero minimum:10007 name:IC_DEP_STALL : Cycles the DPU IQ is empty and there is an instruction cache miss being processed
+event:0xE2 um:zero minimum:10007 name:IUTLB_DEP_STALL : Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed
+event:0xE3 um:zero minimum:10007 name:DECODE_DEP_STALL : Cycles the DPU IQ is empty and there is a pre-decode error being processed
+event:0xE4 um:zero minimum:10007 name:OTHER_INTERLOCK_STALL : Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction
+event:0xE5 um:zero minimum:10007 name:AGU_DEP_STALL : Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU
+event:0xE6 um:zero minimum:10007 name:SIMD_DEP_STALL : Cycles there is an interlock for an Advanced SIMD/Floating-point operation.
+event:0xE7 um:zero minimum:10007 name:LD_DEP_STALL : Cycles there is a stall in the Wr stage because of a load miss
+event:0xE8 um:zero minimum:10007 name:ST_DEP_STALL : Cycles there is a stall in the Wr stage because of a store
diff --git a/events/arm/armv8-ca53/unit_masks b/events/arm/armv8-ca53/unit_masks
new file mode 100644
index 0000000..42b12b4
--- /dev/null
+++ b/events/arm/armv8-ca53/unit_masks
@@ -0,0 +1,3 @@
+# ARMv8 Cortex A53 unit masks
+#
+include:arm/armv8-pmuv3-common
diff --git a/events/arm/armv8-ca57/events b/events/arm/armv8-ca57/events
new file mode 100644
index 0000000..62974c1
--- /dev/null
+++ b/events/arm/armv8-ca57/events
@@ -0,0 +1,67 @@
+#
+# Copyright (c) Red Hat, 2014.
+# Contributed by William Cohen <wcohen@redhat.com>
+#
+# ARM Cortex A57 events
+# From Cortex A57 TRM
+#
+include:arm/armv8-pmuv3-common
+event:0x40 um:zero minimum:10007 name:L1D_CACHE_LD : Level 1 data cache access - Read
+event:0x41 um:zero minimum:10007 name:L1D_CACHE_ST : Level 1 data cache access - Write
+event:0x42 um:zero minimum:10007 name:L1D_CACHE_REFILL_LD : Level 1 data cache refill - Read
+event:0x43 um:zero minimum:10007 name:L1D_CACHE_REFILL_ST : Level 1 data cache refill - Write
+event:0x46 um:zero minimum:10007 name:L1D_CACHE_WB_VICTIM : Level 1 data cache Write-back - Victim
+event:0x47 um:zero minimum:10007 name:L1D_CACHE_WB_CLEAN : Level 1 data cache Write-back - Cleaning event:and coherency
+event:0x48 um:zero minimum:10007 name:L1D_CACHE_INVAL : Level 1 data cache invalidate
+event:0x4C um:zero minimum:10007 name:L1D_TLB_REFILL_LD : Level 1 data TLB refill - Read
+event:0x4D um:zero minimum:10007 name:L1D_TLB_REFILL_ST : Level 1 data TLB refill - Write
+event:0x50 um:zero minimum:10007 name:L2D_CACHE_LD : Level 2 data cache access - Read
+event:0x51 um:zero minimum:10007 name:L2D_CACHE_ST : Level 2 data cache access - Write
+event:0x52 um:zero minimum:10007 name:L2D_CACHE_REFILL_LD : Level 2 data cache refill - Read
+event:0x53 um:zero minimum:10007 name:L2D_CACHE_REFILL_ST : Level 2 data cache refill - Write
+event:0x56 um:zero minimum:10007 name:L2D_CACHE_WB_VICTIM : Level 2 data cache Write-back - Victim
+event:0x57 um:zero minimum:10007 name:L2D_CACHE_WB_CLEAN : Level 2 data cache Write-back - Cleaning and coherency
+event:0x58 um:zero minimum:10007 name:L2D_CACHE_INVAL : Level 2 data cache invalidate
+event:0x60 um:zero minimum:10007 name:BUS_ACCESS_LD : Bus access - Read
+event:0x61 um:zero minimum:10007 name:BUS_ACCESS_ST : Bus access - Write
+event:0x62 um:zero minimum:10007 name:BUS_ACCESS_SHARED : Bus access - Normal
+event:0x63 um:zero minimum:10007 name:BUS_ACCESS_NOT_SHARED : Bus access - Not normal
+event:0x64 um:zero minimum:10007 name:BUS_ACCESS_NORMAL : Bus access - Normal
+event:0x65 um:zero minimum:10007 name:BUS_ACCESS_PERIPH : Bus access - Peripheral
+event:0x66 um:zero minimum:10007 name:MEM_ACCESS_LD : Data memory access - Read
+event:0x67 um:zero minimum:10007 name:MEM_ACCESS_ST : Data memory access - Write
+event:0x68 um:zero minimum:10007 name:UNALIGNED_LD_SPEC : Unaligned access - Read
+event:0x69 um:zero minimum:10007 name:UNALIGNED_ST_SPEC : Unaligned access - Write
+event:0x6A um:zero minimum:10007 name:UNALIGNED_LDST_SPEC : Unaligned access
+event:0x6C um:zero minimum:10007 name:LDREX_SPEC : Exclusive operation speculatively executed - LDREX
+event:0x6D um:zero minimum:10007 name:STREX_PASS_SPEC : Exclusive instruction speculatively executed - STREX pass
+event:0x6E um:zero minimum:10007 name:STREX_FAIL_SPEC : Exclusive operation speculatively executed - STREX fail
+event:0x70 um:zero minimum:10007 name:LD_SPEC : Operation speculatively executed - Load
+event:0x71 um:zero minimum:10007 name:ST_SPEC : Operation speculatively executed - Store
+event:0x72 um:zero minimum:10007 name:LDST_SPEC : Operation speculatively executed - Load or store
+event:0x73 um:zero minimum:10007 name:DP_SPEC : Operation speculatively executed - Integer data processing
+event:0x74 um:zero minimum:10007 name:ASE_SPEC : Operation speculatively executed - Advanced SIMD
+event:0x75 um:zero minimum:10007 name:VFP_SPEC : Operation speculatively executed - VFP
+event:0x76 um:zero minimum:10007 name:PC_WRITE_SPEC : Operation speculatively executed - Software change of the PC
+event:0x77 um:zero minimum:10007 name:CRYPTO_SPEC : Operation speculatively executed, crypto data processing
+event:0x78 um:zero minimum:10007 name:BR_IMMED_SPEC : Branch speculatively executed - Immediate branch
+event:0x79 um:zero minimum:10007 name:BR_RETURN_SPEC : Branch speculatively executed - Procedure return
+event:0x7A um:zero minimum:10007 name:BR_INDIRECT_SPEC : Branch speculatively executed - Indirect branch
+event:0x7C um:zero minimum:10007 name:ISB_SPEC : Barrier speculatively executed - ISB
+event:0x7D um:zero minimum:10007 name:DSB_SPEC : Barrier speculatively executed - DSB
+event:0x7E um:zero minimum:10007 name:DMB_SPEC : Barrier speculatively executed - DMB
+event:0x81 um:zero minimum:10007 name:EXC_UNDEF : Exception taken, other synchronous
+event:0x82 um:zero minimum:10007 name:EXC_SVC : Exception taken, Supervisor Call
+event:0x83 um:zero minimum:10007 name:EXC_PABORT : Exception taken, Instruction Abort
+event:0x84 um:zero minimum:10007 name:EXC_DABORT : Exception taken, Data Abort or SError
+event:0x86 um:zero minimum:10007 name:EXC_IRQ : Exception taken, IRQ
+event:0x87 um:zero minimum:10007 name:EXC_FIQ : Exception taken, FIQ
+event:0x88 um:zero minimum:10007 name:EXC_SMC : Exception taken, Secure Monitor Call
+event:0x8A um:zero minimum:10007 name:EXC_HVC : Exception taken, Hypervisor Call
+event:0x8B um:zero minimum:10007 name:EXC_TRAP_PABORT : Exception taken, Instruction Abort not taken locally
+event:0x8C um:zero minimum:10007 name:EXC_TRAP_DABORT : Exception taken, Data Abort, or SError not taken locally
+event:0x8D um:zero minimum:10007 name:EXC_TRAP_OTHER : Exception taken â Other traps not taken locally
+event:0x8E um:zero minimum:10007 name:EXC_TRAP_IRQ : Exception taken, IRQ not taken locally
+event:0x8F um:zero minimum:10007 name:EXC_TRAP_FIQ : Exception taken, FIQ not taken locally
+event:0x90 um:zero minimum:10007 name:RC_LD_SPEC : Release consistency instruction speculatively executed â Load-Acquire
+event:0x91 um:zero minimum:10007 name:RC_ST_SPEC : Release consistency instruction speculatively executed â Store-Release
diff --git a/events/arm/armv8-ca57/unit_masks b/events/arm/armv8-ca57/unit_masks
new file mode 100644
index 0000000..5d69263
--- /dev/null
+++ b/events/arm/armv8-ca57/unit_masks
@@ -0,0 +1,3 @@
+# ARMv8 Cortex A57 unit masks
+#
+include:arm/armv8-pmuv3-common
diff --git a/events/arm/armv8-pmuv3-common/events b/events/arm/armv8-pmuv3-common/events
new file mode 100644
index 0000000..3cdff03
--- /dev/null
+++ b/events/arm/armv8-pmuv3-common/events
@@ -0,0 +1,38 @@
+#
+# Copyright (c) Red Hat, 2014.
+# Contributed by William Cohen <wcohen@redhat.com>
+#
+# ARMv8 pmu v3 architected events
+
+event:0x00 um:zero minimum:500 name:SW_INCR : Instruction architecturally executed, condition code check pass, software increment
+event:0x01 um:zero minimum:5000 name:L1I_CACHE_REFILL : Level 1 instruction cache refill
+event:0x02 um:zero minimum:5000 name:L1I_TLB_REFILL : Level 1 instruction TLB refill
+event:0x03 um:zero minimum:5000 name:L1D_CACHE_REFILL : Level 1 data cache refill
+event:0x04 um:zero minimum:5000 name:L1D_CACHE : Level 1 data cache access
+event:0x05 um:zero minimum:5000 name:L1D_TLB_REFILL : Level 1 data TLB refill
+event:0x06 um:zero minimum:100000 name:LD_RETIRED : Instruction architecturally executed, condition code check pass, load
+event:0x07 um:zero minimum:100000 name:ST_RETIRED : Instruction architecturally executed, condition code check pass, store
+event:0x08 um:zero minimum:100000 name:INST_RETIRED : Instruction architecturally executed
+event:0x09 um:zero minimum:500 name:EXC_TAKEN : Exception taken
+event:0x0A um:zero minimum:500 name:EXC_RETURN : Instruction architecturally executed, condition code check pass, exception return
+event:0x0B um:zero minimum:500 name:CID_WRITE_RETIRED : Instruction architecturally executed, condition code check pass, write to CONTEXTIDR
+event:0x0C um:zero minimum:5000 name:PC_WRITE_RETIRED : Instruction architecturally executed, condition code check pass, software change of the PC
+event:0x0D um:zero minimum:5000 name:BR_IMMED_RETIRED : Instruction architecturally executed, immediate branch
+event:0x0E um:zero minimum:5000 name:BR_RETURN_RETIRED : Instruction architecturally executed, condition code check pass, procedure return
+event:0x0F um:zero minimum:500 name:UNALIGNED_LDST_RETIRED : Instruction architecturally executed, condition code check pass, unaligned load or store
+event:0x10 um:zero minimum:5000 name:BR_MIS_PRED : Mispredicted or not predicted branch speculatively executed
+event:0x11 um:zero minimum:100000 name:CPU_CYCLES : Cycle
+event:0x12 um:zero minimum:5000 name:BR_PRED : Predictable branch speculatively executed
+event:0x13 um:zero minimum:100000 name:MEM_ACCESS : Data memory access
+event:0x14 um:zero minimum:5000 name:L1I_CACHE : Level 1 instruction cache access
+event:0x15 um:zero minimum:5000 name:L1D_CACHE_WB : Level 1 data cache write-back
+event:0x16 um:zero minimum:5000 name:L2D_CACHE : Level 2 data cache access
+event:0x17 um:zero minimum:5000 name:L2D_CACHE_REFILL : Level 2 data cache refill
+event:0x18 um:zero minimum:5000 name:L2D_CACHE_WB : Level 2 data cache write-back
+event:0x19 um:zero minimum:5000 name:BUS_ACCESS : Bus access
+event:0x1A um:zero minimum:500 name:MEMORY_ERROR : Local memory error
+event:0x1B um:zero minimum:100000 name:INST_SPEC : Operation speculatively executed
+event:0x1C um:zero minimum:5000 name:TTBR_WRITE_RETIRED : Instruction architecturally executed, condition code check pass, write to TTBR
+event:0x1D um:zero minimum:5000 name:BUS_CYCLES : Bus cycle
+event:0x1F um:zero minimum:5000 name:L1D_CACHE_ALLOCATE : Level 1 data cache allocation without refill
+event:0x20 um:zero minimum:5000 name:L2D_CACHE_ALLOCATE : Level 2 data cache allocation without refill
diff --git a/events/arm/armv8-pmuv3-common/unit_masks b/events/arm/armv8-pmuv3-common/unit_masks
new file mode 100644
index 0000000..7666c35
--- /dev/null
+++ b/events/arm/armv8-pmuv3-common/unit_masks
@@ -0,0 +1,4 @@
+# ARMv8 architected events unit masks
+#
+name:zero type:mandatory default:0x00
+	0x00 No unit mask
diff --git a/events/arm/armv8-xgene/events b/events/arm/armv8-xgene/events
new file mode 100644
index 0000000..3e28463
--- /dev/null
+++ b/events/arm/armv8-xgene/events
@@ -0,0 +1,7 @@
+#
+# Copyright (c) Red Hat, 2014.
+# Contributed by William Cohen <wcohen@redhat.com>
+#
+# Basic ARM V8 events
+#
+include:arm/armv8-pmuv3-common
diff --git a/events/arm/armv8-xgene/unit_masks b/events/arm/armv8-xgene/unit_masks
new file mode 100644
index 0000000..9ace2eb
--- /dev/null
+++ b/events/arm/armv8-xgene/unit_masks
@@ -0,0 +1,3 @@
+# ARMv8 architected events unit masks
+#
+include:arm/armv8-pmuv3-common
diff --git a/events/avr32/events b/events/avr32/events
deleted file mode 100644
index 489d914..0000000
--- a/events/avr32/events
+++ /dev/null
@@ -1,27 +0,0 @@
-# AVR32 events
-#
-event:0x00 counters:1,2 um:zero minimum:500 name:IFU_IFETCH_MISS : number of instruction fetch misses
-event:0x01 counters:1,2 um:zero minimum:500 name:CYCLES_IFU_MEM_STALL : cycles instruction fetch pipe is stalled
-event:0x02 counters:1,2 um:zero minimum:500 name:CYCLES_DATA_STALL : cycles stall due to data dependency
-event:0x03 counters:1,2 um:zero minimum:500 name:ITLB_MISS : number of Instruction TLB misses
-event:0x04 counters:1,2 um:zero minimum:500 name:DTLB_MISS : number of Data TLB misses
-event:0x05 counters:1,2 um:zero minimum:500 name:BR_INST_EXECUTED : branch instruction executed w/ or w/o program flow change
-event:0x06 counters:1,2 um:zero minimum:500 name:BR_INST_MISS_PRED : branch mispredicted
-event:0x07 counters:1,2 um:zero minimum:500 name:INSN_EXECUTED : instructions executed
-event:0x08 counters:1,2 um:zero minimum:500 name:DCACHE_WBUF_FULL : data cache write buffers full
-event:0x09 counters:1,2 um:zero minimum:500 name:CYCLES_DCACHE_WBUF_FULL : cycles stalled due to data cache write buffers full
-event:0x0a counters:1,2 um:zero minimum:500 name:DCACHE_READ_MISS : data cache read miss
-event:0x0b counters:1,2 um:zero minimum:500 name:CYCLES_DCACHE_READ_MISS : cycles stalled due to data cache read miss
-event:0x0c counters:1,2 um:zero minimum:500 name:WRITE_ACCESS : write access
-event:0x0d counters:1,2 um:zero minimum:500 name:CYCLES_WRITE_ACCESS : cycles when write access is ongoing
-event:0x0e counters:1,2 um:zero minimum:500 name:READ_ACCESS : read access
-event:0x0f counters:1,2 um:zero minimum:500 name:CYCLES_READ_ACCESS : cycles when read access is ongoing
-event:0x10 counters:1,2 um:zero minimum:500 name:CACHE_STALL : read or write access that stalled
-event:0x11 counters:1,2 um:zero minimum:500 name:CYCLES_CACHE_STALL : cycles stalled doing read or write access
-event:0x12 counters:1,2 um:zero minimum:500 name:DCACHE_ACCESS : data cache access
-event:0x13 counters:1,2 um:zero minimum:500 name:CYCLES_DCACHE_ACCESS : cycles when data cache access is ongoing
-event:0x14 counters:1,2 um:zero minimum:500 name:DCACHE_WB : data cache line writeback
-event:0x15 counters:1,2 um:zero minimum:500 name:ACCUMULATOR_HIT : accumulator cache hit
-event:0x16 counters:1,2 um:zero minimum:500 name:ACCUMULATOR_MISS : accumulator cache miss
-event:0x17 counters:1,2 um:zero minimum:500 name:BTB_HIT : branch target buffer hit
-event:0xff counters:0 um:zero minimum:500 name:CPU_CYCLES : clock cycles counter
diff --git a/events/i386/atom/unit_masks b/events/i386/atom/unit_masks
index acaec23..4802ddb 100644
--- a/events/i386/atom/unit_masks
+++ b/events/i386/atom/unit_masks
@@ -3,118 +3,118 @@
 #
 include:i386/arch_perfmon
 name:store_forwards type:mandatory default:0x81
-	0x81 good Good store forwards
+	0x81 extra: good Good store forwards
 name:segment_reg_loads type:mandatory default:0x00
-	0x00 any Number of segment register loads
+	0x00 extra: any Number of segment register loads
 name:simd_prefetch type:bitmask default:0x01
-	0x01 prefetcht0 Streaming SIMD Extensions (SSE) PrefetchT0 instructions executed
-	0x06 sw_l2 Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions executed
-	0x08 prefetchnta Streaming SIMD Extensions (SSE) Prefetch NTA instructions executed
+	0x01 extra: prefetcht0 Streaming SIMD Extensions (SSE) PrefetchT0 instructions executed
+	0x06 extra: sw_l2 Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions executed
+	0x08 extra: prefetchnta Streaming SIMD Extensions (SSE) Prefetch NTA instructions executed
 name:data_tlb_misses type:bitmask default:0x07
-	0x07 dtlb_miss Memory accesses that missed the DTLB
-	0x05 dtlb_miss_ld DTLB misses due to load operations
-	0x09 l0_dtlb_miss_ld L0_DTLB misses due to load operations
-	0x06 dtlb_miss_st DTLB misses due to store operations
+	0x07 extra: dtlb_miss Memory accesses that missed the DTLB
+	0x05 extra: dtlb_miss_ld DTLB misses due to load operations
+	0x09 extra: l0_dtlb_miss_ld L0_DTLB misses due to load operations
+	0x06 extra: dtlb_miss_st DTLB misses due to store operations
 name:page_walks type:bitmask default:0x03
-	0x03 walks Number of page-walks executed
-	0x03 cycles Duration of page-walks in core cycles
+	0x03 extra: walks Number of page-walks executed
+	0x03 extra: cycles Duration of page-walks in core cycles
 name:x87_comp_ops_exe type:bitmask default:0x81
-	0x01 s Floating point computational micro-ops executed
-	0x81 ar Floating point computational micro-ops retired
+	0x01 extra: s Floating point computational micro-ops executed
+	0x81 extra: ar Floating point computational micro-ops retired
 name:fp_assist type:mandatory default:0x81
-	0x81 ar Floating point assists
+	0x81 extra: ar Floating point assists
 name:mul type:bitmask default:0x01
-	0x01 s Multiply operations executed
-	0x81 ar Multiply operations retired
+	0x01 extra: s Multiply operations executed
+	0x81 extra: ar Multiply operations retired
 name:div type:bitmask default:0x01
-	0x01 s Divide operations executed
-	0x81 ar Divide operations retired
+	0x01 extra: s Divide operations executed
+	0x81 extra: ar Divide operations retired
 name:l2_rqsts type:bitmask default:0x41
-	0x41 i_state L2 cache demand requests from this core that missed the L2
+	0x41 extra: i_state L2 cache demand requests from this core that missed the L2
 	0x4F mesi L2 cache demand requests from this core
 name:cpu_clk_unhalted type:bitmask default:0x00
-	0x00 core_p Core cycles when core is not halted
-	0x01 bus Bus cycles when core is not halted
-	0x02 no_other Bus cycles when core is active and the other is halted
+	0x00 extra: core_p Core cycles when core is not halted
+	0x01 extra: bus Bus cycles when core is not halted
+	0x02 extra: no_other Bus cycles when core is active and the other is halted
 name:l1d_cache type:bitmask default:0x21
-	0x21 ld L1 Cacheable Data Reads
-	0x22 st L1 Cacheable Data Writes
+	0x21 extra: ld L1 Cacheable Data Reads
+	0x22 extra: st L1 Cacheable Data Writes
 name:icache type:bitmask default:0x03
-	0x03 accesses Instruction fetches
-	0x02 misses Icache miss
+	0x03 extra: accesses Instruction fetches
+	0x02 extra: misses Icache miss
 name:itlb type:bitmask default:0x04
-	0x04 flush ITLB flushes
-	0x02 misses ITLB misses
+	0x04 extra: flush ITLB flushes
+	0x02 extra: misses ITLB misses
 name:macro_insts type:exclusive default:0x03
-	0x02 cisc_decoded CISC macro instructions decoded
-	0x03 all_decoded All Instructions decoded
+	0x02 extra: cisc_decoded CISC macro instructions decoded
+	0x03 extra: all_decoded All Instructions decoded
 name:simd_uops_exec type:exclusive default:0x80
-	0x00 s SIMD micro-ops executed (excluding stores)
-	0x80 ar SIMD micro-ops retired (excluding stores)
+	0x00 extra: s SIMD micro-ops executed (excluding stores)
+	0x80 extra: ar SIMD micro-ops retired (excluding stores)
 name:simd_sat_uop_exec type:bitmask default:0x00
-	0x00 s SIMD saturated arithmetic micro-ops executed
-	0x80 ar SIMD saturated arithmetic micro-ops retired
+	0x00 extra: s SIMD saturated arithmetic micro-ops executed
+	0x80 extra: ar SIMD saturated arithmetic micro-ops retired
 name:simd_uop_type_exec type:bitmask default:0x01
-	0x01 s SIMD packed multiply microops executed
-	0x81 ar SIMD packed multiply microops retired
-	0x02 s SIMD packed shift micro-ops executed
-	0x82 ar SIMD packed shift micro-ops retired
-	0x04 s SIMD pack micro-ops executed
-	0x84 ar SIMD pack micro-ops retired
-	0x08 s SIMD unpack micro-ops executed
-	0x88 ar SIMD unpack micro-ops retired
-	0x10 s SIMD packed logical microops executed
-	0x90 ar SIMD packed logical microops retired
-	0x20 s SIMD packed arithmetic micro-ops executed
+	0x01 extra: s SIMD packed multiply microops executed
+	0x81 extra: ar SIMD packed multiply microops retired
+	0x02 extra: s SIMD packed shift micro-ops executed
+	0x82 extra: ar SIMD packed shift micro-ops retired
+	0x04 extra: s SIMD pack micro-ops executed
+	0x84 extra: ar SIMD pack micro-ops retired
+	0x08 extra: s SIMD unpack micro-ops executed
+	0x88 extra: ar SIMD unpack micro-ops retired
+	0x10 extra: s SIMD packed logical microops executed
+	0x90 extra: ar SIMD packed logical microops retired
+	0x20 extra: s SIMD packed arithmetic micro-ops executed
 	0xA0 ar SIMD packed arithmetic micro-ops retired
 name:uops_retired type:mandatory default:0x10
-	0x10 any Micro-ops retired
+	0x10 extra: any Micro-ops retired
 name:br_inst_retired type:bitmask default:0x00
-	0x00 any Retired branch instructions
-	0x01 pred_not_taken Retired branch instructions that were predicted not-taken
-	0x02 mispred_not_taken Retired branch instructions that were mispredicted not-taken
-	0x04 pred_taken Retired branch instructions that were predicted taken
-	0x08 mispred_taken Retired branch instructions that were mispredicted taken
+	0x00 extra: any Retired branch instructions
+	0x01 extra: pred_not_taken Retired branch instructions that were predicted not-taken
+	0x02 extra: mispred_not_taken Retired branch instructions that were mispredicted not-taken
+	0x04 extra: pred_taken Retired branch instructions that were predicted taken
+	0x08 extra: mispred_taken Retired branch instructions that were mispredicted taken
 	0x0A mispred Retired mispredicted branch instructions (precise event)
 	0x0C taken Retired taken branch instructions
 	0x0F any1 Retired branch instructions
 name:cycles_int_masked type:bitmask default:0x01
-	0x01 cycles_int_masked Cycles during which interrupts are disabled
-	0x02 cycles_int_pending_and_masked Cycles during which interrupts are pending and disabled
+	0x01 extra: cycles_int_masked Cycles during which interrupts are disabled
+	0x02 extra: cycles_int_pending_and_masked Cycles during which interrupts are pending and disabled
 name:simd_inst_retired type:bitmask default:0x01
-	0x01 packed_single Retired Streaming SIMD Extensions (SSE) packed-single instructions
-	0x02 scalar_single Retired Streaming SIMD Extensions (SSE) scalar-single instructions
-	0x04 packed_double Retired Streaming SIMD Extensions 2 (SSE2) packed-double instructions
-	0x08 scalar_double Retired Streaming SIMD Extensions 2 (SSE2) scalar-double instructions
-	0x10 vector Retired Streaming SIMD Extensions 2 (SSE2) vector instructions
+	0x01 extra: packed_single Retired Streaming SIMD Extensions (SSE) packed-single instructions
+	0x02 extra: scalar_single Retired Streaming SIMD Extensions (SSE) scalar-single instructions
+	0x04 extra: packed_double Retired Streaming SIMD Extensions 2 (SSE2) packed-double instructions
+	0x08 extra: scalar_double Retired Streaming SIMD Extensions 2 (SSE2) scalar-double instructions
+	0x10 extra: vector Retired Streaming SIMD Extensions 2 (SSE2) vector instructions
 	0x1F any Retired Streaming SIMD instructions
 name:simd_comp_inst_retired type:bitmask default:0x01
-	0x01 packed_single Retired computational Streaming SIMD Extensions (SSE) packed-single instructions
-	0x02 scalar_single Retired computational Streaming SIMD Extensions (SSE) scalar-single instructions
-	0x04 packed_double Retired computational Streaming SIMD Extensions 2 (SSE2) packed-double instructions
-	0x08 scalar_double Retired computational Streaming SIMD Extensions 2 (SSE2) scalar-double instructions
+	0x01 extra: packed_single Retired computational Streaming SIMD Extensions (SSE) packed-single instructions
+	0x02 extra: scalar_single Retired computational Streaming SIMD Extensions (SSE) scalar-single instructions
+	0x04 extra: packed_double Retired computational Streaming SIMD Extensions 2 (SSE2) packed-double instructions
+	0x08 extra: scalar_double Retired computational Streaming SIMD Extensions 2 (SSE2) scalar-double instructions
 name:mem_load_retired type:bitmask default:0x01
-	0x01 l2_hit Retired loads that hit the L2 cache (precise event)
-	0x02 l2_miss Retired loads that miss the L2 cache (precise event)
-	0x04 dtlb_miss Retired loads that miss the DTLB (precise event)
+	0x01 extra: l2_hit Retired loads that hit the L2 cache (precise event)
+	0x02 extra: l2_miss Retired loads that miss the L2 cache (precise event)
+	0x04 extra: dtlb_miss Retired loads that miss the DTLB (precise event)
 name:thermal_trip type:mandatory default:0xc0
-	0xc0 thermal_trip Number of thermal trips.
+	0xc0 extra: thermal_trip Number of thermal trips.
 # 18-11
 name:core type:bitmask default:0x180
-	0x180 all All cores.
-	0x080 this This Core.
+	0x180 extra: all All cores.
+	0x080 extra: this This Core.
 # 18-12
 name:agent type:bitmask default:0x00
-	0x00 this This agent
-	0x40 any Include any agents
+	0x00 extra: this This agent
+	0x40 extra: any Include any agents
 # 18-13
 name:prefetch type:bitmask default:0x60
-	0x60 all All inclusive
-	0x20 hw Hardware prefetch only
-	0x00 exclude_hw Exclude hardware prefetch
+	0x60 extra: all All inclusive
+	0x20 extra: hw Hardware prefetch only
+	0x00 extra: exclude_hw Exclude hardware prefetch
 # 18-14
 name:mesi type:bitmask default:0x0f
-	0x08 modified Counts modified state
-	0x04 exclusive Counts exclusive state
-	0x02 shared Counts shared state
-	0x01 invalid Counts invalid state
+	0x08 extra: modified Counts modified state
+	0x04 extra: exclusive Counts exclusive state
+	0x02 extra: shared Counts shared state
+	0x01 extra: invalid Counts invalid state
diff --git a/events/i386/broadwell/events b/events/i386/broadwell/events
new file mode 100644
index 0000000..ec55836
--- /dev/null
+++ b/events/i386/broadwell/events
@@ -0,0 +1,65 @@
+#
+# Intel "Broadwell" microarchitecture core events.
+#
+# See http://ark.intel.com/ for help in identifying Broadwell based CPUs
+#
+# Note the minimum counts are not discovered experimentally and could be likely
+# lowered in many cases without ill effect.
+#
+include:i386/arch_perfmon
+event:0x03 counters:cpuid um:ld_blocks minimum:100003 name:ld_blocks :
+event:0x05 counters:cpuid um:misalign_mem_ref minimum:2000003 name:misalign_mem_ref :
+event:0x07 counters:cpuid um:one minimum:100003 name:ld_blocks_partial_address_alias :
+event:0x08 counters:cpuid um:dtlb_load_misses minimum:2000003 name:dtlb_load_misses :
+event:0x0d counters:cpuid um:x03 minimum:2000003 name:int_misc_recovery_cycles :
+event:0x0e counters:cpuid um:uops_issued minimum:2000003 name:uops_issued :
+event:0x14 counters:cpuid um:one minimum:2000003 name:arith_fpu_div_active :
+event:0x24 counters:cpuid um:l2_rqsts minimum:200003 name:l2_rqsts :
+event:0x27 counters:cpuid um:x50 minimum:200003 name:l2_demand_rqsts_wb_hit :
+event:0x48 counters:2 um:l1d_pend_miss minimum:2000003 name:l1d_pend_miss :
+event:0x49 counters:cpuid um:dtlb_store_misses minimum:100003 name:dtlb_store_misses :
+event:0x4c counters:cpuid um:x02 minimum:100003 name:load_hit_pre_hw_pf :
+event:0x4f counters:cpuid um:x10 minimum:2000003 name:ept_walk_cycles :
+event:0x51 counters:cpuid um:one minimum:2000003 name:l1d_replacement :
+event:0x54 counters:cpuid um:tx_mem minimum:2000003 name:tx_mem :
+event:0x58 counters:cpuid um:move_elimination minimum:1000003 name:move_elimination :
+event:0x5c counters:cpuid um:cpl_cycles minimum:2000003 name:cpl_cycles :
+event:0x5d counters:cpuid um:tx_exec minimum:2000003 name:tx_exec :
+event:0x5e counters:cpuid um:rs_events minimum:2000003 name:rs_events :
+event:0x60 counters:cpuid um:offcore_requests_outstanding minimum:2000003 name:offcore_requests_outstanding :
+event:0x63 counters:cpuid um:lock_cycles minimum:2000003 name:lock_cycles :
+event:0x79 counters:0,1,2,3 um:idq minimum:2000003 name:idq :
+event:0x80 counters:cpuid um:x02 minimum:200003 name:icache_misses :
+event:0x85 counters:cpuid um:itlb_misses minimum:100003 name:itlb_misses :
+event:0x87 counters:cpuid um:one minimum:2000003 name:ild_stall_lcp :
+event:0x88 counters:cpuid um:br_inst_exec minimum:200003 name:br_inst_exec :
+event:0x89 counters:cpuid um:br_misp_exec minimum:200003 name:br_misp_exec :
+event:0x9c counters:0,1,2,3 um:idq_uops_not_delivered minimum:2000003 name:idq_uops_not_delivered :
+event:0xa1 counters:cpuid um:uops_executed_port minimum:2000003 name:uops_executed_port :
+event:0xa1 counters:cpuid um:uops_dispatched_port minimum:2000003 name:uops_dispatched_port :
+event:0xa2 counters:cpuid um:resource_stalls minimum:2000003 name:resource_stalls :
+event:0xa3 counters:2 um:cycle_activity minimum:2000003 name:cycle_activity :
+event:0xa8 counters:cpuid um:lsd minimum:2000003 name:lsd :
+event:0xab counters:cpuid um:x02 minimum:2000003 name:dsb2mite_switches_penalty_cycles :
+event:0xae counters:cpuid um:one minimum:100007 name:itlb_itlb_flush :
+event:0xb0 counters:cpuid um:offcore_requests minimum:100003 name:offcore_requests :
+event:0xb1 counters:cpuid um:uops_executed minimum:2000003 name:uops_executed :
+event:0xbc counters:0,1,2,3 um:page_walker_loads minimum:2000003 name:page_walker_loads :
+event:0xc0 counters:1 um:inst_retired minimum:2000003 name:inst_retired :
+event:0xc1 counters:cpuid um:other_assists minimum:100003 name:other_assists :
+event:0xc2 counters:cpuid um:uops_retired minimum:2000003 name:uops_retired :
+event:0xc3 counters:cpuid um:machine_clears minimum:2000003 name:machine_clears :
+event:0xc4 counters:cpuid um:br_inst_retired minimum:400009 name:br_inst_retired :
+event:0xc5 counters:cpuid um:br_misp_retired minimum:400009 name:br_misp_retired :
+event:0xc8 counters:cpuid um:hle_retired minimum:2000003 name:hle_retired :
+event:0xc9 counters:0,1,2,3 um:rtm_retired minimum:2000003 name:rtm_retired :
+event:0xca counters:cpuid um:fp_assist minimum:100003 name:fp_assist :
+event:0xcc counters:cpuid um:x20 minimum:2000003 name:rob_misc_events_lbr_inserts :
+event:0xd0 counters:0,1,2,3 um:mem_uops_retired minimum:2000003 name:mem_uops_retired :
+event:0xd1 counters:0,1,2,3 um:mem_load_uops_retired minimum:2000003 name:mem_load_uops_retired :
+event:0xd2 counters:0,1,2,3 um:mem_load_uops_l3_hit_retired minimum:100003 name:mem_load_uops_l3_hit_retired :
+event:0xd3 counters:0,1,2,3 um:mem_load_uops_l3_miss_retired minimum:100007 name:mem_load_uops_l3_miss_retired :
+event:0xe6 counters:cpuid um:x1f minimum:100003 name:baclears_any :
+event:0xf0 counters:cpuid um:l2_trans minimum:200003 name:l2_trans :
+event:0xf1 counters:cpuid um:l2_lines_in minimum:100003 name:l2_lines_in :
+event:0xf2 counters:cpuid um:x05 minimum:100003 name:l2_lines_out_demand_clean :
diff --git a/events/i386/broadwell/unit_masks b/events/i386/broadwell/unit_masks
new file mode 100644
index 0000000..0d6ccd5
--- /dev/null
+++ b/events/i386/broadwell/unit_masks
@@ -0,0 +1,347 @@
+#
+# Unit masks for the Intel "Broadwell" micro architecture
+#
+# See http://ark.intel.com/ for help in identifying Broadwell based CPUs
+#
+include:i386/arch_perfmon
+name:x02 type:mandatory default:0x2
+	0x2 No unit mask
+name:x03 type:mandatory default:0x3
+	0x3 No unit mask
+name:x05 type:mandatory default:0x5
+	0x5 No unit mask
+name:x10 type:mandatory default:0x10
+	0x10 No unit mask
+name:x1f type:mandatory default:0x1f
+	0x1f No unit mask
+name:x20 type:mandatory default:0x20
+	0x20 No unit mask
+name:x50 type:mandatory default:0x50
+	0x50 No unit mask
+name:ld_blocks type:exclusive default:0x2
+	0x2 extra: store_forward This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:  - preceding store conflicts with the load (incomplete overlap);  - store forwarding is impossible due to u-arch limitations;  - preceding lock RMW operations are not forwarded;  - store has the no-forward bit set (uncacheable/page-split/masked stores);  - all-blocking stores are used (mostly, fences and port I/O); and others. The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.
+	0x8 extra: no_sr This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.
+name:misalign_mem_ref type:exclusive default:0x1
+	0x1 extra: loads This event counts speculative cache-line split load uops dispatched to the L1 cache.
+	0x2 extra: stores This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.
+name:dtlb_load_misses type:exclusive default:0x1
+	0x1 extra: miss_causes_a_walk This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).
+	0x2 extra: walk_completed_4k This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.
+	0x10 extra: walk_duration This event counts the number of cycles while PMH is busy with the page walk.
+	0x20 extra: stlb_hit_4k Load misses that miss the  DTLB and hit the STLB (4K)
+	0xe extra: walk_completed Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.
+	0x60 extra: stlb_hit Load operations that miss the first DTLB level but hit the second and do not cause page walks
+name:uops_issued type:exclusive default:0x1
+	0x1 extra: any This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).
+	0x10 extra: flags_merge Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.
+	0x20 extra: slow_lea Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.
+	0x40 extra: single_mul Number of Multiply packed/scalar single precision uops allocated
+	0x1 extra:cmask=1,inv stall_cycles This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.
+name:l2_rqsts type:exclusive default:0x21
+	0x21 extra: demand_data_rd_miss This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.
+	0x41 extra: demand_data_rd_hit This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.
+	0x30 extra: l2_pf_miss This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.
+	0x50 extra: l2_pf_hit This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types
+	0xe1 extra: all_demand_data_rd This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.
+	0xe2 extra: all_rfo This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+	0xe4 extra: all_code_rd This event counts the total number of L2 code requests.
+	0xf8 extra: all_pf This event counts the total number of requests from the L2 hardware prefetchers.
+	0x42 extra: rfo_hit RFO requests that hit L2 cache
+	0x22 extra: rfo_miss RFO requests that miss L2 cache
+	0x44 extra: code_rd_hit L2 cache hits when fetching instructions, code reads.
+	0x24 extra: code_rd_miss L2 cache misses when fetching instructions
+	0x27 extra: all_demand_miss Demand requests that miss L2 cache
+	0xe7 extra: all_demand_references Demand requests to L2 cache
+	0x3f extra: miss All requests that miss L2 cache
+	0xff extra: references All L2 requests
+name:l1d_pend_miss type:exclusive default:0x1
+	0x1 extra: pending This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.
+	0x1 extra:cmask=1 pending_cycles This event counts duration of L1D miss outstanding in cycles.
+name:dtlb_store_misses type:exclusive default:0x1
+	0x1 extra: miss_causes_a_walk This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).
+	0x2 extra: walk_completed_4k This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.
+	0x10 extra: walk_duration This event counts the number of cycles while PMH is busy with the page walk.
+	0x20 extra: stlb_hit_4k Store misses that miss the  DTLB and hit the STLB (4K)
+	0xe extra: walk_completed Store misses in all DTLB levels that cause completed page walks
+	0x60 extra: stlb_hit Store operations that miss the first TLB level but hit the second and do not cause page walks
+name:tx_mem type:exclusive default:0x1
+	0x1 extra: abort_conflict Number of times a TSX line had a cache conflict
+	0x2 extra: abort_capacity_write Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow
+	0x4 extra: abort_hle_store_to_elided_lock Number of times a TSX Abort was triggered due to a non-release/commit store to lock
+	0x8 extra: abort_hle_elision_buffer_not_empty Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty
+	0x10 extra: abort_hle_elision_buffer_mismatch Number of times a TSX Abort was triggered due to release/commit but data and address mismatch
+	0x20 extra: abort_hle_elision_buffer_unsupported_alignment Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer
+	0x40 extra: hle_elision_buffer_full Number of times we could not allocate Lock Buffer
+name:move_elimination type:exclusive default:0x1
+	0x1 extra: int_eliminated Number of integer Move Elimination candidate uops that were eliminated.
+	0x2 extra: simd_eliminated Number of SIMD Move Elimination candidate uops that were eliminated.
+	0x4 extra: int_not_eliminated Number of integer Move Elimination candidate uops that were not eliminated.
+	0x8 extra: simd_not_eliminated Number of SIMD Move Elimination candidate uops that were not eliminated.
+name:cpl_cycles type:exclusive default:0x1
+	0x1 extra: ring0 This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.
+	0x2 extra: ring123 This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.
+	0x1 extra:cmask=1,edge ring0_trans This event counts when there is a transition from ring 1,2 or 3 to ring0.
+name:tx_exec type:exclusive default:0x1
+	0x1 extra: misc1 Unfriendly TSX abort triggered by  a flowmarker
+	0x2 extra: misc2 Unfriendly TSX abort triggered by  a vzeroupper instruction
+	0x4 extra: misc3 Unfriendly TSX abort triggered by a nest count that is too deep
+	0x8 extra: misc4 RTM region detected inside HLE
+	0x10 extra: misc5 # HLE inside HLE+
+name:rs_events type:exclusive default:0x1
+	0x1 extra: empty_cycles This event counts cycles during which the reservation station (RS) is empty for the thread. Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.
+	0x1 extra:cmask=1,inv,edge empty_end Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.
+name:offcore_requests_outstanding type:exclusive default:0x1
+	0x1 extra: demand_data_rd This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS. Note: A prefetch promoted to Demand is counted from the promotion point.
+	0x2 extra: demand_code_rd This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The "Offcore outstanding" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.
+	0x4 extra: demand_rfo This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.
+	0x8 extra: all_data_rd This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.
+	0x1 extra:cmask=1 cycles_with_demand_data_rd This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).
+	0x8 extra:cmask=1 cycles_with_data_rd This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.
+name:lock_cycles type:exclusive default:0x1
+	0x1 extra: split_lock_uc_lock_duration This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.
+	0x2 extra: cache_lock_duration This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).
+name:idq type:exclusive default:0x2
+	0x2 extra: empty This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end.  It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.
+	0x4 extra: mite_uops This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may "bypass" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).
+	0x8 extra: dsb_uops This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may "bypass" the IDQ.
+	0x10 extra: ms_dsb_uops This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may "bypass" the IDQ.
+	0x20 extra: ms_mite_uops This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may "bypass" the IDQ.
+	0x30 extra: ms_uops This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may "bypass" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.
+	0x30 extra:cmask=1 ms_cycles This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may "bypass" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.
+	0x4 extra:cmask=1 mite_cycles This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may "bypass" the IDQ.
+	0x8 extra:cmask=1 dsb_cycles This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may "bypass" the IDQ.
+	0x10 extra:cmask=1 ms_dsb_cycles This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may "bypass" the IDQ.
+	0x10 extra:cmask=1,edge ms_dsb_occur This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may "bypass" the IDQ.
+	0x18 extra:cmask=4 all_dsb_cycles_4_uops This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may "bypass" the IDQ.
+	0x18 extra:cmask=1 all_dsb_cycles_any_uops This event counts the number of cycles  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may "bypass" the IDQ.
+	0x24 extra:cmask=4 all_mite_cycles_4_uops This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may "bypass" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).
+	0x24 extra:cmask=1 all_mite_cycles_any_uops This event counts the number of cycles  uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may "bypass" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).
+	0x3c extra: mite_all_uops This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may "bypass" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).
+	0x30 extra:cmask=1,edge ms_switches Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer
+name:itlb_misses type:exclusive default:0x1
+	0x1 extra: miss_causes_a_walk This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).
+	0x2 extra: walk_completed_4k This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.
+	0x10 extra: walk_duration This event counts the number of cycles while PMH is busy with the page walk.
+	0x20 extra: stlb_hit_4k Core misses that miss the  DTLB and hit the STLB (4K)
+	0xe extra: walk_completed Misses in all ITLB levels that cause completed page walks
+	0x60 extra: stlb_hit Operations that miss the first ITLB level but hit the second and do not cause any page walks
+name:br_inst_exec type:exclusive default:0xff
+	0xff extra: all_branches This event counts both taken and not taken speculative and retired branch instructions.
+	0x41 extra: nontaken_conditional This event counts not taken macro-conditional branch instructions.
+	0x81 extra: taken_conditional This event counts taken speculative and retired macro-conditional branch instructions.
+	0x82 extra: taken_direct_jump This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.
+	0x84 extra: taken_indirect_jump_non_call_ret This event counts taken speculative and retired indirect branches excluding calls and return branches.
+	0x88 extra: taken_indirect_near_return This event counts taken speculative and retired indirect branches that have a return mnemonic.
+	0x90 extra: taken_direct_near_call This event counts taken speculative and retired direct near calls.
+	0xa0 extra: taken_indirect_near_call This event counts taken speculative and retired indirect calls including both register and memory indirect.
+	0xc1 extra: all_conditional This event counts both taken and not taken speculative and retired macro-conditional branch instructions.
+	0xc2 extra: all_direct_jmp This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.
+	0xc4 extra: all_indirect_jump_non_call_ret This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.
+	0xc8 extra: all_indirect_near_return This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.
+	0xd0 extra: all_direct_near_call This event counts both taken and not taken speculative and retired direct near calls.
+name:br_misp_exec type:exclusive default:0xff
+	0xff extra: all_branches This event counts both taken and not taken speculative and retired mispredicted branch instructions.
+	0x41 extra: nontaken_conditional This event counts not taken speculative and retired mispredicted macro conditional branch instructions.
+	0x81 extra: taken_conditional This event counts taken speculative and retired mispredicted macro conditional branch instructions.
+	0x84 extra: taken_indirect_jump_non_call_ret This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.
+	0xc1 extra: all_conditional This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.
+	0xc4 extra: all_indirect_jump_non_call_ret This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.
+	0xa0 extra: taken_indirect_near_call Taken speculative and retired mispredicted indirect calls
+name:idq_uops_not_delivered type:exclusive default:0x1
+	0x1 extra: core This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:  a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;  b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions);   c. Instruction Decode Queue (IDQ) delivers four uops.
+	0x1 extra:cmask=4 cycles_0_uops_deliv_core This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.
+	0x1 extra:cmask=3 cycles_le_1_uop_deliv_core This event counts, on the per-thread basis, cycles when less than 1 uop is  delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.
+	0x1 extra:cmask=2 cycles_le_2_uop_deliv_core Cycles with less than 2 uops delivered by the front end
+	0x1 extra:cmask=1 cycles_le_3_uop_deliv_core Cycles with less than 3 uops delivered by the front end
+	0x1 extra:cmask=1,inv cycles_fe_was_ok Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.
+name:uops_executed_port type:exclusive default:0x1
+	0x1 extra:any port_0_core Cycles per core when uops are exectuted in port 0
+	0x2 extra:any port_1_core Cycles per core when uops are exectuted in port 1
+	0x4 extra:any port_2_core Cycles per core when uops are dispatched to port 2
+	0x8 extra:any port_3_core Cycles per core when uops are dispatched to port 3
+	0x10 extra:any port_4_core Cycles per core when uops are exectuted in port 4
+	0x20 extra:any port_5_core Cycles per core when uops are exectuted in port 5
+	0x40 extra:any port_6_core Cycles per core when uops are exectuted in port 6
+	0x80 extra:any port_7_core Cycles per core when uops are dispatched to port 7
+	0x1 extra: port_0 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.
+	0x2 extra: port_1 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.
+	0x4 extra: port_2 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.
+	0x8 extra: port_3 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.
+	0x10 extra: port_4 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.
+	0x20 extra: port_5 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.
+	0x40 extra: port_6 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.
+	0x80 extra: port_7 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.
+name:uops_dispatched_port type:exclusive default:0x1
+	0x1 extra: port_0 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.
+	0x2 extra: port_1 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.
+	0x4 extra: port_2 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.
+	0x8 extra: port_3 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.
+	0x10 extra: port_4 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.
+	0x20 extra: port_5 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.
+	0x40 extra: port_6 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.
+	0x80 extra: port_7 This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.
+name:resource_stalls type:exclusive default:0x1
+	0x1 extra: any This event counts resource-related stall cycles. Reasons for stalls can be as follows:  - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)  - *any* u-arch structure got empty (like INT/SIMD FreeLists)  - FPU control word (FPCW), MXCSR and others. This counts cycles that the pipeline backend blocked uop delivery from the front end.
+	0x4 extra: rs This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.
+	0x8 extra: sb This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.
+	0x10 extra: rob This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.
+name:cycle_activity type:exclusive default:0x1
+	0x1 extra:cmask=1 cycles_l2_pending Counts number of cycles the CPU has at least one pending  demand* load request missing the L2 cache.
+	0x8 extra:cmask=8 cycles_l1d_pending Counts number of cycles the CPU has at least one pending  demand load request missing the L1 data cache.
+	0x2 extra:cmask=2 cycles_ldm_pending Counts number of cycles the CPU has at least one pending  demand load request (that is cycles with non-completed load waiting for its data from memory subsystem)
+	0x4 extra:cmask=4 cycles_no_execute Counts number of cycles nothing is executed on any execution port.
+	0x5 extra:cmask=5 stalls_l2_pending Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.    (as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands
+	0x6 extra:cmask=6 stalls_ldm_pending Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.
+	0xc extra:cmask=c stalls_l1d_pending Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.
+	0x8 extra:cmask=8 cycles_l1d_miss Cycles while L1 cache miss demand load is outstanding.
+	0x1 extra:cmask=1 cycles_l2_miss Cycles while L2 cache miss demand load is outstanding.
+	0x2 extra:cmask=2 cycles_mem_any Cycles while memory subsystem has an outstanding load.
+	0x4 extra:cmask=4 stalls_total Total execution stalls.
+	0xc extra:cmask=c stalls_l1d_miss Execution stalls while L1 cache miss demand load is outstanding.
+	0x5 extra:cmask=5 stalls_l2_miss Execution stalls while L2 cache miss demand load is outstanding.
+	0x6 extra:cmask=6 stalls_mem_any Execution stalls while memory subsystem has an outstanding load.
+name:lsd type:exclusive default:0x1
+	0x1 extra: uops Number of Uops delivered by the LSD. Read more on LSD under LSD_REPLAY.REPLAY
+	0x1 extra:cmask=4 cycles_4_uops Cycles 4 Uops delivered by the LSD, but didn't come from the decoder
+	0x1 extra:cmask=1 cycles_active Cycles Uops delivered by the LSD, but didn't come from the decoder
+name:offcore_requests type:exclusive default:0x1
+	0x1 extra: demand_data_rd This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.
+	0x2 extra: demand_code_rd This event counts both cacheable and noncachaeble code read requests.
+	0x4 extra: demand_rfo This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.
+	0x8 extra: all_data_rd This event counts the demand and prefetch data reads. All Core Data Reads include cacheable "Demands" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.
+name:uops_executed type:exclusive default:0x1
+	0x1 extra: thread Number of uops to be executed per-thread each cycle.
+	0x2 extra: core Number of uops executed from any thread
+	0x1 extra:cmask=1,inv stall_cycles This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.
+	0x1 extra:cmask=1 cycles_ge_1_uop_exec Cycles where at least 1 uop was executed per-thread
+	0x1 extra:cmask=2 cycles_ge_2_uops_exec Cycles where at least 2 uops were executed per-thread
+	0x1 extra:cmask=3 cycles_ge_3_uops_exec Cycles where at least 3 uops were executed per-thread
+	0x1 extra:cmask=4 cycles_ge_4_uops_exec Cycles where at least 4 uops were executed per-thread
+name:page_walker_loads type:exclusive default:0x11
+	0x11 extra: dtlb_l1 Number of DTLB page walker hits in the L1+FB
+	0x21 extra: itlb_l1 Number of ITLB page walker hits in the L1+FB
+	0x12 extra: dtlb_l2 Number of DTLB page walker hits in the L2
+	0x22 extra: itlb_l2 Number of ITLB page walker hits in the L2
+	0x14 extra: dtlb_l3 Number of DTLB page walker hits in the L3 + XSNP
+	0x24 extra: itlb_l3 Number of ITLB page walker hits in the L3 + XSNP
+	0x18 extra: dtlb_memory Number of DTLB page walker hits in Memory
+name:inst_retired type:exclusive default:0x2
+	0x2 extra: x87 This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.
+	0x1 extra:pebs prec_dist This is a precise version (that is, uses PEBS) of the event that counts instructions retired.
+name:other_assists type:exclusive default:0x8
+	0x8 extra: avx_to_sse This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.
+	0x10 extra: sse_to_avx This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.
+	0x40 extra: any_wb_assist Number of times any microcode assist is invoked by HW upon uop writeback.
+name:uops_retired type:exclusive default:0x1
+	0x1 extra: all This is a non-precise version (that is, does not use PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.
+	0x1 extra: all_pebs Counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.
+	0x2 extra: retire_slots This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.
+	0x2 extra: retire_slots_pebs Counts the number of retirement slots used.
+	0x1 extra:cmask=1,inv stall_cycles This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.
+	0x1 extra:cmask=a,inv total_cycles Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.
+name:machine_clears type:exclusive default:0x1
+	0x1 extra: cycles This event counts both thread-specific (TS) and all-thread (AT) nukes.
+	0x2 extra: memory_ordering This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following: 1. memory disambiguation, 2. external snoop, or 3. cross SMT-HW-thread snoop (stores) hitting load buffer.
+	0x4 extra: smc This event counts self-modifying code (SMC) detected, which causes a machine clear.
+	0x20 extra: maskmov Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.
+	0x1 extra:cmask=1,edge count Number of machine clears (nukes) of any type.
+name:br_inst_retired type:exclusive default:0x1
+	0x1 extra: conditional This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.
+	0x1 extra: conditional_pebs Counts conditional branch instructions retired.
+	0x2 extra: near_call This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.
+	0x2 extra: near_call_pebs Counts both direct and indirect near call instructions retired.
+	0x8 extra: near_return This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.
+	0x8 extra: near_return_pebs Counts return instructions retired.
+	0x10 extra: not_taken This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.
+	0x20 extra: near_taken This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.
+	0x20 extra: near_taken_pebs Counts taken branch instructions retired.
+	0x40 extra: far_branch This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.
+	0x4 extra:pebs all_branches_pebs This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.
+name:br_misp_retired type:exclusive default:0x1
+	0x1 extra: conditional This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.
+	0x1 extra: conditional_pebs Counts mispredicted conditional branch instructions retired.
+	0x4 extra:pebs all_branches_pebs This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.
+	0x20 extra: near_taken number of near branch instructions retired that were mispredicted and taken.
+	0x20 extra: near_taken_pebs number of near branch instructions retired that were mispredicted and taken.
+name:hle_retired type:exclusive default:0x1
+	0x1 extra: start Number of times we entered an HLE region; does not count nested transactions
+	0x2 extra: commit Number of times HLE commit succeeded
+	0x4 extra: aborted Number of times HLE abort was triggered
+	0x4 extra: aborted_pebs Number of times HLE abort was triggered
+	0x8 extra: aborted_misc1 Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details)
+	0x10 extra: aborted_misc2 Number of times the TSX watchdog signaled an HLE abort
+	0x20 extra: aborted_misc3 Number of times a disallowed operation caused an HLE abort
+	0x40 extra: aborted_misc4 Number of times HLE caused a fault
+	0x80 extra: aborted_misc5 Number of times HLE aborted and was not due to the abort conditions in subevents 3-6
+name:rtm_retired type:exclusive default:0x1
+	0x1 extra: start Number of times we entered an RTM region; does not count nested transactions
+	0x2 extra: commit Number of times RTM commit succeeded
+	0x4 extra: aborted Number of times RTM abort was triggered
+	0x4 extra: aborted_pebs Number of times RTM abort was triggered
+	0x8 extra: aborted_misc1 Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details)
+	0x10 extra: aborted_misc2 Number of times the TSX watchdog signaled an RTM abort
+	0x20 extra: aborted_misc3 Number of times a disallowed operation caused an RTM abort
+	0x40 extra: aborted_misc4 Number of times a RTM caused a fault
+	0x80 extra: aborted_misc5 Number of times RTM aborted and was not due to the abort conditions in subevents 3-6
+name:fp_assist type:exclusive default:0x1e
+	0x1e extra:cmask=1 any This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.
+	0x2 extra: x87_output This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.
+	0x4 extra: x87_input This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.
+	0x8 extra: simd_output This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.
+	0x10 extra: simd_input This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.
+name:mem_uops_retired type:exclusive default:0x11
+	0x11 extra: stlb_miss_loads This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.
+	0x11 extra: stlb_miss_loads_pebs Counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.
+	0x12 extra: stlb_miss_stores This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.
+	0x12 extra: stlb_miss_stores_pebs Counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.
+	0x21 extra: lock_loads This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.
+	0x21 extra: lock_loads_pebs Counts load uops with locked access retired to the architected path.
+	0x41 extra: split_loads This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).
+	0x41 extra: split_loads_pebs Counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).
+	0x42 extra: split_stores This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).
+	0x42 extra: split_stores_pebs Counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).
+	0x81 extra: all_loads This is a non-precise version (that is, does not use PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied. Note: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.
+	0x81 extra: all_loads_pebs Counts load uops retired to the architected path with a filter on bits 0 and 1 applied. Note: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.
+	0x82 extra: all_stores This is a non-precise version (that is, does not use PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied. Note: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.
+	0x82 extra: all_stores_pebs Counts store uops retired to the architected path with a filter on bits 0 and 1 applied. Note: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.
+name:mem_load_uops_retired type:exclusive default:0x1
+	0x1 extra: l1_hit This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache. Note: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source
+	0x1 extra: l1_hit_pebs Counts retired load uops which data sources were hits in the nearest-level (L1) cache. Note: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source
+	0x2 extra: l2_hit This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.
+	0x2 extra: l2_hit_pebs Counts retired load uops which data sources were hits in the mid-level (L2) cache.
+	0x4 extra: l3_hit This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.
+	0x4 extra: l3_hit_pebs Counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.
+	0x8 extra: l1_miss This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.
+	0x8 extra: l1_miss_pebs Counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.
+	0x10 extra: l2_miss This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.
+	0x10 extra: l2_miss_pebs Counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.
+	0x20 extra: l3_miss Miss in last-level (L3) cache. Excludes Unknown data-source.
+	0x20 extra: l3_miss_pebs Miss in last-level (L3) cache. Excludes Unknown data-source.
+	0x40 extra: hit_lfb This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready. Note: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.
+	0x40 extra: hit_lfb_pebs Counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready. Note: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.
+name:mem_load_uops_l3_hit_retired type:exclusive default:0x1
+	0x1 extra: xsnp_miss This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.
+	0x1 extra: xsnp_miss_pebs Counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.
+	0x2 extra: xsnp_hit This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.
+	0x2 extra: xsnp_hit_pebs Counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.
+	0x4 extra: xsnp_hitm This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).
+	0x4 extra: xsnp_hitm_pebs Counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).
+	0x8 extra: xsnp_none This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.
+	0x8 extra: xsnp_none_pebs Counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.
+name:mem_load_uops_l3_miss_retired type:exclusive default:0x1
+	0x1 extra: local_dram Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI)
+	0x1 extra: local_dram_pebs Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI)
+name:l2_trans type:exclusive default:0x80
+	0x80 extra: all_requests This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.
+	0x1 extra: demand_data_rd This event counts Demand Data Read requests that access L2 cache, including rejects.
+	0x2 extra: rfo This event counts Read for Ownership (RFO) requests that access L2 cache.
+	0x4 extra: code_rd This event counts the number of L2 cache accesses when fetching instructions.
+	0x8 extra: all_pf This event counts L2 or L3 HW prefetches that access L2 cache including rejects.
+	0x10 extra: l1d_wb This event counts L1D writebacks that access L2 cache.
+	0x20 extra: l2_fill This event counts L2 fill requests that access L2 cache.
+	0x40 extra: l2_wb This event counts L2 writebacks that access L2 cache.
+name:l2_lines_in type:exclusive default:0x7
+	0x7 extra: all This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.
+	0x1 extra: i This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.
+	0x2 extra: s This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.
+	0x4 extra: e This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.
diff --git a/events/i386/core_2/unit_masks b/events/i386/core_2/unit_masks
index d528f17..6bc0960 100644
--- a/events/i386/core_2/unit_masks
+++ b/events/i386/core_2/unit_masks
@@ -50,30 +50,30 @@ name:sse_miss type:exclusive default:0x0
 	0x01 PREFETCHT0
 	0x02 PREFETCHT1/PREFETCHT2
 name:load_block type:bitmask default:0x3e
-	0x02 STA  Loads blocked by a preceding store with unknown address.
-	0x04 STD  Loads blocked by a preceding store with unknown data.
-	0x08 OVERLAP_STORE  Loads that partially overlap an earlier store, or 4K aliased with a previous store.
-	0x10 UNTIL_RETIRE  Loads blocked until retirement.
-	0x20 L1D  Loads blocked by the L1 data cache.
+	0x02 extra: STA  Loads blocked by a preceding store with unknown address.
+	0x04 extra: STD  Loads blocked by a preceding store with unknown data.
+	0x08 extra: OVERLAP_STORE  Loads that partially overlap an earlier store, or 4K aliased with a previous store.
+	0x10 extra: UNTIL_RETIRE  Loads blocked until retirement.
+	0x20 extra: L1D  Loads blocked by the L1 data cache.
 name:store_block type:bitmask default:0x0b
-	0x01 SB_DRAIN_CYCLES	Cycles while stores are blocked due to store buffer drain.
-	0x02 ORDER	Cycles while store is waiting for a preceding store to be globally observed.
-	0x08 NOOP	A store is blocked due to a conflict with an external or internal snoop.
+	0x01 extra: SB_DRAIN_CYCLES	Cycles while stores are blocked due to store buffer drain.
+	0x02 extra: ORDER	Cycles while store is waiting for a preceding store to be globally observed.
+	0x08 extra: NOOP	A store is blocked due to a conflict with an external or internal snoop.
 name:dtlb_miss type:bitmask default:0x0f
-	0x01 ANY	Memory accesses that missed the DTLB.
-	0x02 MISS_LD	DTLB misses due to load operations.
-	0x04 L0_MISS_LD L0 DTLB misses due to load operations.
-	0x08 MISS_ST	TLB misses due to store operations.
+	0x01 extra: ANY	Memory accesses that missed the DTLB.
+	0x02 extra: MISS_LD	DTLB misses due to load operations.
+	0x04 extra: L0_MISS_LD L0 DTLB misses due to load operations.
+	0x08 extra: MISS_ST	TLB misses due to store operations.
 name:memory_dis type:exclusive default:0x01
-	0x01 RESET	Memory disambiguation reset cycles.
-	0x02 SUCCESS	Number of loads that were successfully disambiguated.
+	0x01 extra: RESET	Memory disambiguation reset cycles.
+	0x02 extra: SUCCESS	Number of loads that were successfully disambiguated.
 name:page_walks type:exclusive default:0x02
-	0x01 COUNT	Number of page-walks executed.
-	0x02 CYCLES	Duration of page-walks in core cycles.
+	0x01 extra: COUNT	Number of page-walks executed.
+	0x02 extra: CYCLES	Duration of page-walks in core cycles.
 name:delayed_bypass type:exclusive default:0x00
-	0x00 FP		Delayed bypass to FP operation.
-	0x01 SIMD	Delayed bypass to SIMD operation.
-	0x02 LOAD	Delayed bypass to load operation.
+	0x00 extra: FP		Delayed bypass to FP operation.
+	0x01 extra: SIMD	Delayed bypass to SIMD operation.
+	0x02 extra: LOAD	Delayed bypass to load operation.
 name:core type:exclusive default:0x40
 	0xc0	All cores
 	0x40	This core
@@ -133,10 +133,10 @@ name:esp type:bitmask default:0x01
 	0x01	ESP register content synchronizations
 	0x02	ESP register automatic additions
 name:inst_retired type:bitmask default:0x00
-	0x00	Any
-	0x01	Loads
-	0x02	Stores
-	0x04	Other
+	0x00	extra: Any
+	0x01	extra: Loads
+	0x02	extra: Stores
+	0x04	extra: Other
 name:x87_ops_retired type:exclusive default:0xfe
 	0x01	FXCH instructions retired
 	0xfe	Retired floating-point computational operations (precise)
@@ -183,10 +183,10 @@ name:rat_stalls type:bitmask default:0xf
 	0x08	FPU status word
 	0x0f	All RAT
 name:seg_regs type:bitmask default:0x0f
-	0x01	ES
-	0x02	DS
-	0x04	FS
-	0x08	GS
+	0x01	extra: ES
+	0x02	extra: DS
+	0x04	extra: FS
+	0x08	extra: GS
 name:resource_stalls type:bitmask default:0x0f
 	0x01	when the ROB is full
 	0x02	during which the RS is full
diff --git a/events/i386/haswell/events b/events/i386/haswell/events
new file mode 100644
index 0000000..5aa5eb5
--- /dev/null
+++ b/events/i386/haswell/events
@@ -0,0 +1,64 @@
+#
+# Intel "Haswell" microarchitecture core events.
+#
+# See http://ark.intel.com/ for help in identifying Haswell based CPUs
+#
+# Note the minimum counts are not discovered experimentally and could be likely
+# lowered in many cases without ill effect.
+#
+include:i386/arch_perfmon
+event:0x03 counters:cpuid um:ld_blocks minimum:100003 name:ld_blocks :
+event:0x05 counters:cpuid um:misalign_mem_ref minimum:2000003 name:misalign_mem_ref :
+event:0x07 counters:cpuid um:one minimum:100003 name:ld_blocks_partial_address_alias :
+event:0x08 counters:cpuid um:dtlb_load_misses minimum:2000003 name:dtlb_load_misses :
+event:0x0d counters:cpuid um:x03 minimum:2000003 name:int_misc_recovery_cycles :
+event:0x0e counters:cpuid um:uops_issued minimum:2000003 name:uops_issued :
+event:0x24 counters:cpuid um:l2_rqsts minimum:200003 name:l2_rqsts :
+event:0x27 counters:cpuid um:x50 minimum:200003 name:l2_demand_rqsts_wb_hit :
+event:0x48 counters:2 um:l1d_pend_miss minimum:2000003 name:l1d_pend_miss :
+event:0x49 counters:cpuid um:dtlb_store_misses minimum:100003 name:dtlb_store_misses :
+event:0x4c counters:cpuid um:load_hit_pre minimum:100003 name:load_hit_pre :
+event:0x4f counters:cpuid um:x10 minimum:2000003 name:ept_walk_cycles :
+event:0x51 counters:cpuid um:one minimum:2000003 name:l1d_replacement :
+event:0x54 counters:cpuid um:tx_mem minimum:2000003 name:tx_mem :
+event:0x58 counters:cpuid um:move_elimination minimum:1000003 name:move_elimination :
+event:0x5c counters:cpuid um:cpl_cycles minimum:2000003 name:cpl_cycles :
+event:0x5d counters:cpuid um:tx_exec minimum:2000003 name:tx_exec :
+event:0x5e counters:cpuid um:rs_events minimum:2000003 name:rs_events :
+event:0x60 counters:cpuid um:offcore_requests_outstanding minimum:2000003 name:offcore_requests_outstanding :
+event:0x63 counters:cpuid um:lock_cycles minimum:2000003 name:lock_cycles :
+event:0x79 counters:0,1,2,3 um:idq minimum:2000003 name:idq :
+event:0x80 counters:cpuid um:icache minimum:2000003 name:icache :
+event:0x85 counters:cpuid um:itlb_misses minimum:100003 name:itlb_misses :
+event:0x87 counters:cpuid um:ild_stall minimum:2000003 name:ild_stall :
+event:0x88 counters:cpuid um:br_inst_exec minimum:200003 name:br_inst_exec :
+event:0x89 counters:cpuid um:br_misp_exec minimum:200003 name:br_misp_exec :
+event:0x9c counters:0,1,2,3 um:idq_uops_not_delivered minimum:2000003 name:idq_uops_not_delivered :
+event:0xa1 counters:cpuid um:uops_executed_port minimum:2000003 name:uops_executed_port :
+event:0xa2 counters:cpuid um:resource_stalls minimum:2000003 name:resource_stalls :
+event:0xa3 counters:2 um:cycle_activity minimum:2000003 name:cycle_activity :
+event:0xa8 counters:cpuid um:one minimum:2000003 name:lsd_uops :
+event:0xab counters:cpuid um:x02 minimum:2000003 name:dsb2mite_switches_penalty_cycles :
+event:0xae counters:cpuid um:one minimum:100007 name:itlb_itlb_flush :
+event:0xb0 counters:cpuid um:offcore_requests minimum:100003 name:offcore_requests :
+event:0xb1 counters:cpuid um:uops_executed minimum:2000003 name:uops_executed :
+event:0xbc counters:0,1,2,3 um:page_walker_loads minimum:2000003 name:page_walker_loads :
+event:0xbd counters:cpuid um:tlb_flush minimum:100007 name:tlb_flush :
+event:0xc0 counters:1 um:one minimum:2000003 name:inst_retired_prec_dist :
+event:0xc1 counters:cpuid um:other_assists minimum:100003 name:other_assists :
+event:0xc2 counters:cpuid um:uops_retired minimum:2000003 name:uops_retired :
+event:0xc3 counters:cpuid um:machine_clears minimum:2000003 name:machine_clears :
+event:0xc4 counters:cpuid um:br_inst_retired minimum:400009 name:br_inst_retired :
+event:0xc5 counters:cpuid um:br_misp_retired minimum:400009 name:br_misp_retired :
+event:0xc8 counters:cpuid um:hle_retired minimum:2000003 name:hle_retired :
+event:0xc9 counters:0,1,2,3 um:rtm_retired minimum:2000003 name:rtm_retired :
+event:0xca counters:cpuid um:fp_assist minimum:100003 name:fp_assist :
+event:0xcc counters:cpuid um:x20 minimum:2000003 name:rob_misc_events_lbr_inserts :
+event:0xd0 counters:0,1,2,3 um:mem_uops_retired minimum:2000003 name:mem_uops_retired :
+event:0xd1 counters:0,1,2,3 um:mem_load_uops_retired minimum:2000003 name:mem_load_uops_retired :
+event:0xd2 counters:0,1,2,3 um:mem_load_uops_l3_hit_retired minimum:100003 name:mem_load_uops_l3_hit_retired :
+event:0xd3 counters:0,1,2,3 um:mem_load_uops_l3_miss_retired minimum:100007 name:mem_load_uops_l3_miss_retired :
+event:0xe6 counters:cpuid um:x1f minimum:100003 name:baclears_any :
+event:0xf0 counters:cpuid um:l2_trans minimum:200003 name:l2_trans :
+event:0xf1 counters:cpuid um:l2_lines_in minimum:100003 name:l2_lines_in :
+event:0xf2 counters:cpuid um:l2_lines_out minimum:100003 name:l2_lines_out :
diff --git a/events/i386/haswell/unit_masks b/events/i386/haswell/unit_masks
new file mode 100644
index 0000000..60c2a61
--- /dev/null
+++ b/events/i386/haswell/unit_masks
@@ -0,0 +1,355 @@
+#
+# Unit masks for the Intel "Haswell" micro architecture
+#
+# See http://ark.intel.com/ for help in identifying Haswell based CPUs
+#
+include:i386/arch_perfmon
+name:x02 type:mandatory default:0x2
+	0x2 No unit mask
+name:x03 type:mandatory default:0x3
+	0x3 No unit mask
+name:x10 type:mandatory default:0x10
+	0x10 No unit mask
+name:x1f type:mandatory default:0x1f
+	0x1f No unit mask
+name:x20 type:mandatory default:0x20
+	0x20 No unit mask
+name:x50 type:mandatory default:0x50
+	0x50 No unit mask
+name:ld_blocks type:exclusive default:0x2
+	0x2 extra: store_forward This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.
+	0x8 extra: no_sr The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use
+name:misalign_mem_ref type:exclusive default:0x1
+	0x1 extra: loads Speculative cache line split load uops dispatched to L1 cache
+	0x2 extra: stores Speculative cache line split STA uops dispatched to L1 cache
+name:dtlb_load_misses type:exclusive default:0x1
+	0x1 extra: miss_causes_a_walk Load misses in all DTLB levels that cause page walks
+	0x2 extra: walk_completed_4k Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).
+	0x4 extra: walk_completed_2m_4m Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).
+	0x10 extra: walk_duration This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.
+	0x20 extra: stlb_hit_4k This event counts load operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.
+	0x40 extra: stlb_hit_2m This event counts load operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.
+	0x80 extra: pde_cache_miss DTLB demand load misses with low part of linear-to-physical address translation missed
+	0xe extra: walk_completed Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.
+	0x60 extra: stlb_hit Load operations that miss the first DTLB level but hit the second and do not cause page walks
+name:uops_issued type:exclusive default:0x1
+	0x1 extra: any This event counts the number of uops issued by the Front-end of the pipeline to the Back-end. This event is counted at the allocation stage and will count both retired and non-retired uops.
+	0x10 extra: flags_merge Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.
+	0x20 extra: slow_lea Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.
+	0x40 extra: single_mul Number of Multiply packed/scalar single precision uops allocated
+	0x1 extra:cmask=1,inv stall_cycles Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread
+	0x1 extra:cmask=1,inv,any core_stall_cycles Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads
+name:l2_rqsts type:exclusive default:0x21
+	0x21 extra: demand_data_rd_miss Demand Data Read miss L2, no rejects
+	0x41 extra: demand_data_rd_hit Demand Data Read requests that hit L2 cache
+	0x30 extra: l2_pf_miss L2 prefetch requests that miss L2 cache
+	0x50 extra: l2_pf_hit L2 prefetch requests that hit L2 cache
+	0xe1 extra: all_demand_data_rd Demand Data Read requests
+	0xe2 extra: all_rfo RFO requests to L2 cache
+	0xe4 extra: all_code_rd L2 code requests
+	0xf8 extra: all_pf Requests from L2 hardware prefetchers
+	0x42 extra: rfo_hit RFO requests that hit L2 cache
+	0x22 extra: rfo_miss RFO requests that miss L2 cache
+	0x44 extra: code_rd_hit L2 cache hits when fetching instructions, code reads.
+	0x24 extra: code_rd_miss L2 cache misses when fetching instructions
+	0x27 extra: all_demand_miss Demand requests that miss L2 cache
+	0xe7 extra: all_demand_references Demand requests to L2 cache
+	0x3f extra: miss All requests that miss L2 cache
+	0xff extra: references All L2 requests
+name:l1d_pend_miss type:exclusive default:0x1
+	0x1 extra: pending L1D miss oustandings duration in cycles
+	0x1 extra:cmask=1 pending_cycles Cycles with L1D load Misses outstanding.
+name:dtlb_store_misses type:exclusive default:0x1
+	0x1 extra: miss_causes_a_walk Store misses in all DTLB levels that cause page walks
+	0x2 extra: walk_completed_4k Store miss in all TLB levels causes a page walk that completes. (4K)
+	0x4 extra: walk_completed_2m_4m Store misses in all DTLB levels that cause completed page walks (2M/4M)
+	0x10 extra: walk_duration This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB store misses.
+	0x20 extra: stlb_hit_4k This event counts store operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.
+	0x40 extra: stlb_hit_2m This event counts store operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.
+	0x80 extra: pde_cache_miss DTLB store misses with low part of linear-to-physical address translation missed
+	0xe extra: walk_completed Store misses in all DTLB levels that cause completed page walks
+	0x60 extra: stlb_hit Store operations that miss the first TLB level but hit the second and do not cause page walks
+name:load_hit_pre type:exclusive default:0x1
+	0x1 extra: sw_pf Not software-prefetch load dispatches that hit FB allocated for software prefetch
+	0x2 extra: hw_pf Not software-prefetch load dispatches that hit FB allocated for hardware prefetch
+name:tx_mem type:exclusive default:0x1
+	0x1 extra: abort_conflict Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address
+	0x2 extra: abort_capacity_write Number of times a transactional abort was signaled due to a data capacity limitation for transactional writes.
+	0x4 extra: abort_hle_store_to_elided_lock Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer
+	0x8 extra: abort_hle_elision_buffer_not_empty Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.
+	0x10 extra: abort_hle_elision_buffer_mismatch Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer
+	0x20 extra: abort_hle_elision_buffer_unsupported_alignment Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.
+	0x40 extra: hle_elision_buffer_full Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.
+name:move_elimination type:exclusive default:0x1
+	0x1 extra: int_eliminated Number of integer Move Elimination candidate uops that were eliminated.
+	0x2 extra: simd_eliminated Number of SIMD Move Elimination candidate uops that were eliminated.
+	0x4 extra: int_not_eliminated Number of integer Move Elimination candidate uops that were not eliminated.
+	0x8 extra: simd_not_eliminated Number of SIMD Move Elimination candidate uops that were not eliminated.
+name:cpl_cycles type:exclusive default:0x1
+	0x1 extra: ring0 Unhalted core cycles when the thread is in ring 0
+	0x2 extra: ring123 Unhalted core cycles when thread is in rings 1, 2, or 3
+	0x1 extra:cmask=1,edge ring0_trans Number of intervals between processor halts while thread is in ring 0
+name:tx_exec type:exclusive default:0x1
+	0x1 extra: misc1 Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.
+	0x2 extra: misc2 Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region
+	0x4 extra: misc3 Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded
+	0x8 extra: misc4 Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.
+	0x10 extra: misc5 Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region
+name:rs_events type:exclusive default:0x1
+	0x1 extra: empty_cycles This event counts cycles when the Reservation Station ( RS ) is empty for the thread. The RS is a structure that buffers allocated micro-ops from the Front-end. If there are many cycles when the RS is empty, it may represent an underflow of instructions delivered from the Front-end.
+	0x1 extra:cmask=1,inv,edge empty_end Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.
+name:offcore_requests_outstanding type:exclusive default:0x1
+	0x1 extra: demand_data_rd Offcore outstanding Demand Data Read transactions in uncore queue.
+	0x2 extra: demand_code_rd Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle
+	0x4 extra: demand_rfo Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore
+	0x8 extra: all_data_rd Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore
+	0x1 extra:cmask=1 cycles_with_demand_data_rd Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore
+	0x8 extra:cmask=1 cycles_with_data_rd Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore
+name:lock_cycles type:exclusive default:0x1
+	0x1 extra: split_lock_uc_lock_duration Cycles when L1 and L2 are locked due to UC or split lock
+	0x2 extra: cache_lock_duration Cycles when L1D is locked
+name:idq type:exclusive default:0x2
+	0x2 extra: empty Instruction Decode Queue (IDQ) empty cycles
+	0x4 extra: mite_uops Uops delivered to Instruction Decode Queue (IDQ) from MITE path
+	0x8 extra: dsb_uops Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path
+	0x10 extra: ms_dsb_uops Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
+	0x20 extra: ms_mite_uops Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
+	0x30 extra: ms_uops This event counts uops delivered by the Front-end with the assistance of the microcode sequencer.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.
+	0x30 extra:cmask=1 ms_cycles This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.
+	0x4 extra:cmask=1 mite_cycles Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path
+	0x8 extra:cmask=1 dsb_cycles Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path
+	0x10 extra:cmask=1 ms_dsb_cycles Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
+	0x10 extra:cmask=1,edge ms_dsb_occur Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy
+	0x18 extra:cmask=4 all_dsb_cycles_4_uops Cycles Decode Stream Buffer (DSB) is delivering 4 Uops
+	0x18 extra:cmask=1 all_dsb_cycles_any_uops Cycles Decode Stream Buffer (DSB) is delivering any Uop
+	0x24 extra:cmask=4 all_mite_cycles_4_uops Cycles MITE is delivering 4 Uops
+	0x24 extra:cmask=1 all_mite_cycles_any_uops Cycles MITE is delivering any Uop
+	0x3c extra: mite_all_uops Uops delivered to Instruction Decode Queue (IDQ) from MITE path
+	0x30 extra:cmask=1,edge ms_switches Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer
+name:icache type:exclusive default:0x2
+	0x2 extra: misses This event counts Instruction Cache (ICACHE) misses.
+	0x4 extra: ifetch_stall Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss
+name:itlb_misses type:exclusive default:0x1
+	0x1 extra: miss_causes_a_walk Misses at all ITLB levels that cause page walks
+	0x2 extra: walk_completed_4k Code miss in all TLB levels causes a page walk that completes. (4K)
+	0x4 extra: walk_completed_2m_4m Code miss in all TLB levels causes a page walk that completes. (2M/4M)
+	0x10 extra: walk_duration This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by ITLB misses.
+	0x20 extra: stlb_hit_4k Core misses that miss the  DTLB and hit the STLB (4K)
+	0x40 extra: stlb_hit_2m Code misses that miss the  DTLB and hit the STLB (2M)
+	0xe extra: walk_completed Misses in all ITLB levels that cause completed page walks
+	0x60 extra: stlb_hit Operations that miss the first ITLB level but hit the second and do not cause any page walks
+name:ild_stall type:exclusive default:0x1
+	0x1 extra: lcp This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).
+	0x4 extra: iq_full Stall cycles because IQ is full
+name:br_inst_exec type:exclusive default:0xff
+	0xff extra: all_branches Speculative and retired  branches
+	0x41 extra: nontaken_conditional Not taken macro-conditional branches
+	0x81 extra: taken_conditional Taken speculative and retired macro-conditional branches
+	0x82 extra: taken_direct_jump Taken speculative and retired macro-conditional branch instructions excluding calls and indirects
+	0x84 extra: taken_indirect_jump_non_call_ret Taken speculative and retired indirect branches excluding calls and returns
+	0x88 extra: taken_indirect_near_return Taken speculative and retired indirect branches with return mnemonic
+	0x90 extra: taken_direct_near_call Taken speculative and retired direct near calls
+	0xa0 extra: taken_indirect_near_call Taken speculative and retired indirect calls
+	0xc1 extra: all_conditional Speculative and retired macro-conditional branches
+	0xc2 extra: all_direct_jmp Speculative and retired macro-unconditional branches excluding calls and indirects
+	0xc4 extra: all_indirect_jump_non_call_ret Speculative and retired indirect branches excluding calls and returns
+	0xc8 extra: all_indirect_near_return Speculative and retired indirect return branches.
+	0xd0 extra: all_direct_near_call Speculative and retired direct near calls
+name:br_misp_exec type:exclusive default:0xff
+	0xff extra: all_branches Speculative and retired mispredicted macro conditional branches
+	0x41 extra: nontaken_conditional Not taken speculative and retired mispredicted macro conditional branches
+	0x81 extra: taken_conditional Taken speculative and retired mispredicted macro conditional branches
+	0x84 extra: taken_indirect_jump_non_call_ret Taken speculative and retired mispredicted indirect branches excluding calls and returns
+	0x88 extra: taken_return_near Taken speculative and retired mispredicted indirect branches with return mnemonic
+	0xc1 extra: all_conditional Speculative and retired mispredicted macro conditional branches
+	0xc4 extra: all_indirect_jump_non_call_ret Mispredicted indirect branches excluding calls and returns
+	0xa0 extra: taken_indirect_near_call Taken speculative and retired mispredicted indirect calls
+name:idq_uops_not_delivered type:exclusive default:0x1
+	0x1 extra: core This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.
+	0x1 extra:cmask=4 cycles_0_uops_deliv_core This event counts the number cycles during which the Front-end allocated exactly zero uops to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled.  This event is counted on a per-core basis.
+	0x1 extra:cmask=3 cycles_le_1_uop_deliv_core Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled
+	0x1 extra:cmask=2 cycles_le_2_uop_deliv_core Cycles with less than 2 uops delivered by the front end.
+	0x1 extra:cmask=1 cycles_le_3_uop_deliv_core Cycles with less than 3 uops delivered by the front end.
+	0x1 extra:cmask=1,inv cycles_fe_was_ok Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.
+name:uops_executed_port type:exclusive default:0x1
+	0x1 extra: port_0 Cycles per thread when uops are executed in port 0
+	0x2 extra: port_1 Cycles per thread when uops are executed in port 1
+	0x4 extra: port_2 Cycles per thread when uops are executed in port 2
+	0x8 extra: port_3 Cycles per thread when uops are executed in port 3
+	0x10 extra: port_4 Cycles per thread when uops are executed in port 4
+	0x20 extra: port_5 Cycles per thread when uops are executed in port 5
+	0x40 extra: port_6 Cycles per thread when uops are executed in port 6
+	0x80 extra: port_7 Cycles per thread when uops are executed in port 7
+	0x1 extra:any port_0_core Cycles per core when uops are exectuted in port 0
+	0x2 extra:any port_1_core Cycles per core when uops are exectuted in port 1
+	0x4 extra:any port_2_core Cycles per core when uops are dispatched to port 2
+	0x8 extra:any port_3_core Cycles per core when uops are dispatched to port 3
+	0x10 extra:any port_4_core Cycles per core when uops are exectuted in port 4
+	0x20 extra:any port_5_core Cycles per core when uops are exectuted in port 5
+	0x40 extra:any port_6_core Cycles per core when uops are exectuted in port 6
+	0x80 extra:any port_7_core Cycles per core when uops are dispatched to port 7
+name:resource_stalls type:exclusive default:0x1
+	0x1 extra: any Resource-related stall cycles
+	0x4 extra: rs Cycles stalled due to no eligible RS entry available.
+	0x8 extra: sb This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.
+	0x10 extra: rob Cycles stalled due to re-order buffer full.
+name:cycle_activity type:exclusive default:0x1
+	0x1 extra:cmask=1 cycles_l2_pending Cycles with pending L2 cache miss loads.
+	0x8 extra:cmask=8 cycles_l1d_pending Cycles with pending L1 cache miss loads.
+	0x2 extra:cmask=2 cycles_ldm_pending Cycles with pending memory loads.
+	0x4 extra:cmask=4 cycles_no_execute This event counts cycles during which no instructions were executed in the execution stage of the pipeline.
+	0x5 extra:cmask=5 stalls_l2_pending Execution stalls due to L2 cache misses.
+	0x6 extra:cmask=6 stalls_ldm_pending This event counts cycles during which no instructions were executed in the execution stage of the pipeline and there were memory instructions pending (waiting for data).
+	0xc extra:cmask=c stalls_l1d_pending Execution stalls due to L1 data cache misses
+name:offcore_requests type:exclusive default:0x1
+	0x1 extra: demand_data_rd Demand Data Read requests sent to uncore
+	0x2 extra: demand_code_rd Cacheable and noncachaeble code read requests
+	0x4 extra: demand_rfo Demand RFO requests including regular RFOs, locks, ItoM
+	0x8 extra: all_data_rd Demand and prefetch data reads
+name:uops_executed type:exclusive default:0x2
+	0x2 extra: core Number of uops executed on the core. Errata: HSM31
+	0x1 extra:cmask=1,inv stall_cycles Counts number of cycles no uops were dispatched to be executed on this thread.
+	0x1 extra:cmask=1 cycles_ge_1_uops_exec This events counts the cycles where at least one uop was executed. It is counted per thread. Errata: HSM31
+	0x1 extra:cmask=2 cycles_ge_2_uops_exec This events counts the cycles where at least two uop were executed. It is counted per thread. Errata: HSM31
+	0x1 extra:cmask=3 cycles_ge_3_uops_exec This events counts the cycles where at least three uop were executed. It is counted per thread. Errata: HSM31
+	0x1 extra:cmask=4 cycles_ge_4_uops_exec Cycles where at least 4 uops were executed per-thread Errata: HSM31
+name:page_walker_loads type:exclusive default:0x11
+	0x11 extra: dtlb_l1 Number of DTLB page walker hits in the L1+FB
+	0x21 extra: itlb_l1 Number of ITLB page walker hits in the L1+FB
+	0x41 extra: ept_dtlb_l1 Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.
+	0x81 extra: ept_itlb_l1 Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.
+	0x12 extra: dtlb_l2 Number of DTLB page walker hits in the L2
+	0x22 extra: itlb_l2 Number of ITLB page walker hits in the L2
+	0x42 extra: ept_dtlb_l2 Counts the number of Extended Page Table walks from the DTLB that hit in the L2.
+	0x82 extra: ept_itlb_l2 Counts the number of Extended Page Table walks from the ITLB that hit in the L2.
+	0x14 extra: dtlb_l3 Number of DTLB page walker hits in the L3 + XSNP
+	0x24 extra: itlb_l3 Number of ITLB page walker hits in the L3 + XSNP
+	0x44 extra: ept_dtlb_l3 Counts the number of Extended Page Table walks from the DTLB that hit in the L3.
+	0x84 extra: ept_itlb_l3 Counts the number of Extended Page Table walks from the ITLB that hit in the L2.
+	0x18 extra: dtlb_memory Number of DTLB page walker hits in Memory
+	0x48 extra: ept_dtlb_memory Counts the number of Extended Page Table walks from the DTLB that hit in memory.
+	0x88 extra: ept_itlb_memory Counts the number of Extended Page Table walks from the ITLB that hit in memory.
+name:tlb_flush type:exclusive default:0x1
+	0x1 extra: dtlb_thread DTLB flush attempts of the thread-specific entries
+	0x20 extra: stlb_any STLB flush attempts
+name:other_assists type:exclusive default:0x8
+	0x8 extra: avx_to_sse Number of transitions from AVX-256 to legacy SSE when penalty applicable. Errata: HSM57
+	0x10 extra: sse_to_avx Number of transitions from SSE to AVX-256 when penalty applicable. Errata: HSM57
+	0x40 extra: any_wb_assist Number of times any microcode assist is invoked by HW upon uop writeback.
+name:uops_retired type:exclusive default:0x1
+	0x1 extra: all Actually retired uops.
+	0x1 extra: all_pebs Actually retired uops.
+	0x2 extra: retire_slots This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.
+	0x2 extra: retire_slots_pebs This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.
+	0x1 extra:cmask=1,inv stall_cycles Cycles without actually retired uops.
+	0x1 extra:cmask=a,inv total_cycles Cycles with less than 10 actually retired uops.
+	0x1 extra:cmask=1,inv core_stall_cycles Cycles without actually retired uops.
+name:machine_clears type:exclusive default:0x1
+	0x1 extra: cycles Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.
+	0x2 extra: memory_ordering This event counts the number of memory ordering machine clears detected. Memory ordering machine clears can result from memory address aliasing or snoops from another hardware thread or core to data inflight in the pipeline.  Machine clears can have a significant performance impact if they are happening frequently.
+	0x4 extra: smc This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.
+	0x20 extra: maskmov This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.
+	0x1 extra:cmask=1,edge count Number of machine clears (nukes) of any type.
+name:br_inst_retired type:exclusive default:0x1
+	0x1 extra: conditional Conditional branch instructions retired.
+	0x1 extra: conditional_pebs Conditional branch instructions retired.
+	0x2 extra: near_call Direct and indirect near call instructions retired.
+	0x2 extra: near_call_pebs Direct and indirect near call instructions retired.
+	0x8 extra: near_return Return instructions retired.
+	0x8 extra: near_return_pebs Return instructions retired.
+	0x10 extra: not_taken Not taken branch instructions retired.
+	0x20 extra: near_taken Taken branch instructions retired.
+	0x20 extra: near_taken_pebs Taken branch instructions retired.
+	0x40 extra: far_branch Far branch instructions retired.
+	0x4 extra:pebs all_branches_pebs All (macro) branch instructions retired.
+name:br_misp_retired type:exclusive default:0x1
+	0x1 extra: conditional Mispredicted conditional branch instructions retired.
+	0x1 extra: conditional_pebs Mispredicted conditional branch instructions retired.
+	0x4 extra:pebs all_branches_pebs This event counts all mispredicted branch instructions retired. This is a precise event.
+	0x20 extra: near_taken number of near branch instructions retired that were mispredicted and taken.
+	0x20 extra: near_taken_pebs number of near branch instructions retired that were mispredicted and taken.
+name:hle_retired type:exclusive default:0x1
+	0x1 extra: start Number of times an HLE execution started.
+	0x2 extra: commit Number of times an HLE execution successfully committed
+	0x4 extra: aborted Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).
+	0x4 extra: aborted_pebs Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).
+	0x8 extra: aborted_misc1 Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).
+	0x10 extra: aborted_misc2 Number of times an HLE execution aborted due to uncommon conditions
+	0x20 extra: aborted_misc3 Number of times an HLE execution aborted due to HLE-unfriendly instructions
+	0x40 extra: aborted_misc4 Number of times an HLE execution aborted due to incompatible memory type
+	0x80 extra: aborted_misc5 Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)
+name:rtm_retired type:exclusive default:0x1
+	0x1 extra: start Number of times an RTM execution started.
+	0x2 extra: commit Number of times an RTM execution successfully committed
+	0x4 extra: aborted Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).
+	0x4 extra: aborted_pebs Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).
+	0x8 extra: aborted_misc1 Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)
+	0x10 extra: aborted_misc2 Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).
+	0x20 extra: aborted_misc3 Number of times an RTM execution aborted due to HLE-unfriendly instructions
+	0x40 extra: aborted_misc4 Number of times an RTM execution aborted due to incompatible memory type
+	0x80 extra: aborted_misc5 Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)
+name:fp_assist type:exclusive default:0x1e
+	0x1e extra:cmask=1 any Cycles with any input/output SSE or FP assist
+	0x2 extra: x87_output Number of X87 assists due to output value.
+	0x4 extra: x87_input Number of X87 assists due to input value.
+	0x8 extra: simd_output Number of SIMD FP assists due to Output values
+	0x10 extra: simd_input Number of SIMD FP assists due to input values
+name:mem_uops_retired type:exclusive default:0x11
+	0x11 extra: stlb_miss_loads Load uops with true STLB miss retired to architected path. Errata: HSM30
+	0x11 extra: stlb_miss_loads_pebs Load uops with true STLB miss retired to architected path. Errata: HSM30
+	0x12 extra: stlb_miss_stores Store uops with true STLB miss retired to architected path. Errata: HSM30
+	0x12 extra: stlb_miss_stores_pebs Store uops with true STLB miss retired to architected path. Errata: HSM30
+	0x21 extra: lock_loads Load uops with locked access retired to architected path. Errata: HSM30
+	0x21 extra: lock_loads_pebs Load uops with locked access retired to architected path. Errata: HSM30
+	0x41 extra: split_loads Line-splitted load uops retired to architected path. Errata: HSM30
+	0x41 extra: split_loads_pebs Line-splitted load uops retired to architected path. Errata: HSM30
+	0x42 extra: split_stores Line-splitted store uops retired to architected path. Errata: HSM30
+	0x42 extra: split_stores_pebs Line-splitted store uops retired to architected path. Errata: HSM30
+	0x81 extra: all_loads Load uops retired to architected path with filter on bits 0 and 1 applied. Errata: HSM30
+	0x81 extra: all_loads_pebs Load uops retired to architected path with filter on bits 0 and 1 applied. Errata: HSM30
+	0x82 extra: all_stores Store uops retired to architected path with filter on bits 0 and 1 applied. Errata: HSM30
+	0x82 extra: all_stores_pebs Store uops retired to architected path with filter on bits 0 and 1 applied. Errata: HSM30
+name:mem_load_uops_retired type:exclusive default:0x1
+	0x1 extra: l1_hit Retired load uops with L1 cache hits as data sources. Errata: HSM30
+	0x1 extra: l1_hit_pebs Retired load uops with L1 cache hits as data sources. Errata: HSM30
+	0x2 extra: l2_hit Retired load uops with L2 cache hits as data sources. Errata: HSM30
+	0x2 extra: l2_hit_pebs Retired load uops with L2 cache hits as data sources. Errata: HSM30
+	0x4 extra: l3_hit Retired load uops which data sources were data hits in L3 without snoops required. Errata: HSM26, HSM30
+	0x4 extra: l3_hit_pebs Retired load uops which data sources were data hits in L3 without snoops required. Errata: HSM26, HSM30
+	0x8 extra: l1_miss Retired load uops misses in L1 cache as data sources. Errata: HSM30
+	0x8 extra: l1_miss_pebs Retired load uops misses in L1 cache as data sources. Errata: HSM30
+	0x10 extra: l2_miss Miss in mid-level (L2) cache. Excludes Unknown data-source. Errata: HSM30
+	0x10 extra: l2_miss_pebs Miss in mid-level (L2) cache. Excludes Unknown data-source. Errata: HSM30
+	0x20 extra: l3_miss Miss in last-level (L3) cache. Excludes Unknown data-source. Errata: HSM26, HSM30
+	0x20 extra: l3_miss_pebs Miss in last-level (L3) cache. Excludes Unknown data-source. Errata: HSM26, HSM30
+	0x40 extra: hit_lfb Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. Errata: HSM30
+	0x40 extra: hit_lfb_pebs Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. Errata: HSM30
+name:mem_load_uops_l3_hit_retired type:exclusive default:0x1
+	0x1 extra: xsnp_miss Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Errata: HSM26, HSM30
+	0x1 extra: xsnp_miss_pebs Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Errata: HSM26, HSM30
+	0x2 extra: xsnp_hit Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. Errata: HSM26, HSM30
+	0x2 extra: xsnp_hit_pebs Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. Errata: HSM26, HSM30
+	0x4 extra: xsnp_hitm Retired load uops which data sources were HitM responses from shared L3. Errata: HSM26, HSM30
+	0x4 extra: xsnp_hitm_pebs Retired load uops which data sources were HitM responses from shared L3. Errata: HSM26, HSM30
+	0x8 extra: xsnp_none Retired load uops which data sources were hits in L3 without snoops required. Errata: HSM26, HSM30
+	0x8 extra: xsnp_none_pebs Retired load uops which data sources were hits in L3 without snoops required. Errata: HSM26, HSM30
+name:mem_load_uops_l3_miss_retired type:exclusive default:0x1
+	0x1 extra: local_dram This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. Errata: HSM30
+	0x1 extra: local_dram_pebs This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. Errata: HSM30
+name:l2_trans type:exclusive default:0x80
+	0x80 extra: all_requests Transactions accessing L2 pipe
+	0x1 extra: demand_data_rd Demand Data Read requests that access L2 cache
+	0x2 extra: rfo RFO requests that access L2 cache
+	0x4 extra: code_rd L2 cache accesses when fetching instructions
+	0x8 extra: all_pf L2 or L3 HW prefetches that access L2 cache
+	0x10 extra: l1d_wb L1D writebacks that access L2 cache
+	0x20 extra: l2_fill L2 fill requests that access L2 cache
+	0x40 extra: l2_wb L2 writebacks that access L2 cache
+name:l2_lines_in type:exclusive default:0x7
+	0x7 extra: all This event counts the number of L2 cache lines brought into the L2 cache.  Lines are filled into the L2 cache when there was an L2 miss.
+	0x1 extra: i L2 cache lines in I state filling L2
+	0x2 extra: s L2 cache lines in S state filling L2
+	0x4 extra: e L2 cache lines in E state filling L2
+name:l2_lines_out type:exclusive default:0x5
+	0x5 extra: demand_clean Clean L2 cache lines evicted by demand
+	0x6 extra: demand_dirty Dirty L2 cache lines evicted by demand
diff --git a/events/i386/ivybridge/unit_masks b/events/i386/ivybridge/unit_masks
index ddb59a0..7786904 100644
--- a/events/i386/ivybridge/unit_masks
+++ b/events/i386/ivybridge/unit_masks
@@ -5,163 +5,163 @@
 #
 include:i386/arch_perfmon
 name:ld_blocks type:mandatory default:0x2
-	0x2 store_forward loads blocked by overlapping with store buffer that cannot be forwarded
+	0x2 extra: store_forward loads blocked by overlapping with store buffer that cannot be forwarded
 name:misalign_mem_ref type:bitmask default:0x1
-	0x1 loads Speculative cache line split load uops dispatched to L1 cache
-	0x2 stores Speculative cache line split STA uops dispatched to L1 cache
+	0x1 extra: loads Speculative cache line split load uops dispatched to L1 cache
+	0x2 extra: stores Speculative cache line split STA uops dispatched to L1 cache
 name:ld_blocks_partial type:mandatory default:0x1
-	0x1 address_alias False dependencies in MOB due to partial compare on address
+	0x1 extra: address_alias False dependencies in MOB due to partial compare on address
 name:dtlb_load_misses type:exclusive default:0x81
-	0x81 demand_ld_miss_causes_a_walk Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.
-	0x82 demand_ld_walk_completed Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.
-	0x84 demand_ld_walk_duration Demand load cycles page miss handler (PMH) is busy with this walk.
-name:int_misc type:exclusive default:0x3
+	0x81 extra: demand_ld_miss_causes_a_walk Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.
+	0x82 extra: demand_ld_walk_completed Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.
+	0x84 extra: demand_ld_walk_duration Demand load cycles page miss handler (PMH) is busy with this walk.
+name:int_misc type:exclusive default:recovery_cycles
 	0x3 extra:cmask=1 recovery_cycles Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)
 	0x3 extra:cmask=1,edge recovery_stalls_count Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)
-name:uops_issued type:exclusive default:0x1
-	0x1 any Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)
+name:uops_issued type:exclusive default:any
+	0x1 extra: any Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)
 	0x1 extra:cmask=1,inv stall_cycles Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread
 	0x1 extra:cmask=1,inv,any core_stall_cycles Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads
-	0x10 flags_merge Number of flags-merge uops being allocated.
-	0x20 slow_lea Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.
-	0x40 single_mul Number of Multiply packed/scalar single precision uops allocated
-name:arith type:bitmask default:0x1
-	0x1 fpu_div_active Cycles when divider is busy executing divide operations
+	0x10 extra: flags_merge Number of flags-merge uops being allocated.
+	0x20 extra: slow_lea Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.
+	0x40 extra: single_mul Number of Multiply packed/scalar single precision uops allocated
+name:arith type:bitmask default:fpu_div_active
+	0x1 extra: fpu_div_active Cycles when divider is busy executing divide operations
 	0x4 extra:cmask=1,edge fpu_div Divide operations executed
 name:l2_rqsts type:exclusive default:0x1
-	0x1 demand_data_rd_hit Demand Data Read requests that hit L2 cache
-	0x3 all_demand_data_rd Demand Data Read requests
-	0x4 rfo_hit RFO requests that hit L2 cache
-	0x8 rfo_miss RFO requests that miss L2 cache
-	0xc all_rfo RFO requests to L2 cache
-	0x10 code_rd_hit L2 cache hits when fetching instructions, code reads.
-	0x20 code_rd_miss L2 cache misses when fetching instructions
-	0x30 all_code_rd L2 code requests
-	0x40 pf_hit Requests from the L2 hardware prefetchers that hit L2 cache
-	0x80 pf_miss Requests from the L2 hardware prefetchers that miss L2 cache
-	0xc0 all_pf Requests from L2 hardware prefetchers
+	0x1 extra: demand_data_rd_hit Demand Data Read requests that hit L2 cache
+	0x3 extra: all_demand_data_rd Demand Data Read requests
+	0x4 extra: rfo_hit RFO requests that hit L2 cache
+	0x8 extra: rfo_miss RFO requests that miss L2 cache
+	0xc extra: all_rfo RFO requests to L2 cache
+	0x10 extra: code_rd_hit L2 cache hits when fetching instructions, code reads.
+	0x20 extra: code_rd_miss L2 cache misses when fetching instructions
+	0x30 extra: all_code_rd L2 code requests
+	0x40 extra: pf_hit Requests from the L2 hardware prefetchers that hit L2 cache
+	0x80 extra: pf_miss Requests from the L2 hardware prefetchers that miss L2 cache
+	0xc0 extra: all_pf Requests from L2 hardware prefetchers
 name:l2_store_lock_rqsts type:exclusive default:0x1
-	0x1 miss RFOs that miss cache lines
-	0x8 hit_m RFOs that hit cache lines in M state
-	0xf all RFOs that access cache lines in any state
+	0x1 extra: miss RFOs that miss cache lines
+	0x8 extra: hit_m RFOs that hit cache lines in M state
+	0xf extra: all RFOs that access cache lines in any state
 name:l2_l1d_wb_rqsts type:exclusive default:0x1
-	0x1 miss Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.)
-	0x4 hit_e Not rejected writebacks from L1D to L2 cache lines in E state
-	0x8 hit_m Not rejected writebacks from L1D to L2 cache lines in M state
-	0xf all Not rejected writebacks from L1D to L2 cache lines in any state.
-name:l1d_pend_miss type:exclusive default:0x1
-	0x1 pending L1D miss oustandings duration in cycles
+	0x1 extra: miss Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.)
+	0x4 extra: hit_e Not rejected writebacks from L1D to L2 cache lines in E state
+	0x8 extra: hit_m Not rejected writebacks from L1D to L2 cache lines in M state
+	0xf extra: all Not rejected writebacks from L1D to L2 cache lines in any state.
+name:l1d_pend_miss type:exclusive default:pending_cycles
+	0x1 extra: pending L1D miss oustandings duration in cycles
 	0x1 extra:cmask=1 pending_cycles Cycles with L1D load Misses outstanding.
 	0x1 extra:cmask=1,edge occurences This event counts the number of L1D misses outstanding, using an edge detect to count transitions.
 name:dtlb_store_misses type:bitmask default:0x1
-	0x1 miss_causes_a_walk Store misses in all DTLB levels that cause page walks
-	0x2 walk_completed Store misses in all DTLB levels that cause completed page walks
-	0x4 walk_duration Cycles when PMH is busy with page walks
-	0x10 stlb_hit Store operations that miss the first TLB level but hit the second and do not cause page walks
+	0x1 extra: miss_causes_a_walk Store misses in all DTLB levels that cause page walks
+	0x2 extra: walk_completed Store misses in all DTLB levels that cause completed page walks
+	0x4 extra: walk_duration Cycles when PMH is busy with page walks
+	0x10 extra: stlb_hit Store operations that miss the first TLB level but hit the second and do not cause page walks
 name:load_hit_pre type:bitmask default:0x1
-	0x1 sw_pf Not software-prefetch load dispatches that hit forward buffer allocated for software prefetch
-	0x2 hw_pf Not software-prefetch load dispatches that hit forward buffer allocated for hardware prefetch
+	0x1 extra: sw_pf Not software-prefetch load dispatches that hit forward buffer allocated for software prefetch
+	0x2 extra: hw_pf Not software-prefetch load dispatches that hit forward buffer allocated for hardware prefetch
 name:l1d type:mandatory default:0x1
-	0x1 replacement L1D data line replacements
+	0x1 extra: replacement L1D data line replacements
 name:move_elimination type:bitmask default:0x1
-	0x1 int_not_eliminated Number of integer Move Elimination candidate uops that were not eliminated.
-	0x2 simd_not_eliminated Number of SIMD Move Elimination candidate uops that were not eliminated.
-	0x4 int_eliminated Number of integer Move Elimination candidate uops that were eliminated.
-	0x8 simd_eliminated Number of SIMD Move Elimination candidate uops that were eliminated.
-name:cpl_cycles type:exclusive default:0x1
-	0x1 ring0 Unhalted core cycles when the thread is in ring 0
+	0x1 extra: int_not_eliminated Number of integer Move Elimination candidate uops that were not eliminated.
+	0x2 extra: simd_not_eliminated Number of SIMD Move Elimination candidate uops that were not eliminated.
+	0x4 extra: int_eliminated Number of integer Move Elimination candidate uops that were eliminated.
+	0x8 extra: simd_eliminated Number of SIMD Move Elimination candidate uops that were eliminated.
+name:cpl_cycles type:exclusive default:ring0
+	0x1 extra: ring0 Unhalted core cycles when the thread is in ring 0
 	0x1 extra:cmask=1,edge ring0_trans Number of intervals between processor halts while thread is in ring 0
-	0x2 ring123 Unhalted core cycles when thread is in rings 1, 2, or 3
+	0x2 extra: ring123 Unhalted core cycles when thread is in rings 1, 2, or 3
 name:rs_events type:mandatory default:0x1
-	0x1 empty_cycles Cycles when Reservation Station (RS) is empty for the thread
+	0x1 extra: empty_cycles Cycles when Reservation Station (RS) is empty for the thread
 name:tlb_access type:mandatory default:0x4
-	0x4 load_stlb_hit Load operations that miss the first DTLB level but hit the second and do not cause page walks
-name:offcore_requests_outstanding type:exclusive default:0x1
-	0x1 demand_data_rd Offcore outstanding Demand Data Read transactions in uncore queue.
+	0x4 extra: load_stlb_hit Load operations that miss the first DTLB level but hit the second and do not cause page walks
+name:offcore_requests_outstanding type:exclusive default:cycles_with_demand_data_rd
+	0x1 extra: demand_data_rd Offcore outstanding Demand Data Read transactions in uncore queue.
 	0x1 extra:cmask=1 cycles_with_demand_data_rd Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore
-	0x2 demand_code_rd Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle
-	0x4 demand_rfo Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore
+	0x2 extra: demand_code_rd Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle
+	0x4 extra: demand_rfo Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore
 	0x4 extra:cmask=1 cycles_with_demand_rfo Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle
-	0x8 all_data_rd Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore
+	0x8 extra: all_data_rd Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore
 	0x8 extra:cmask=1 cycles_with_data_rd Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore
 name:lock_cycles type:bitmask default:0x1
-	0x1 split_lock_uc_lock_duration Cycles when L1 and L2 are locked due to UC or split lock
-	0x2 cache_lock_duration Cycles when L1D is locked
-name:idq type:exclusive default:0x2
-	0x2 empty Instruction Decode Queue (IDQ) empty cycles
-	0x4 mite_uops Uops delivered to Instruction Decode Queue (IDQ) from MITE path
+	0x1 extra: split_lock_uc_lock_duration Cycles when L1 and L2 are locked due to UC or split lock
+	0x2 extra: cache_lock_duration Cycles when L1D is locked
+name:idq type:exclusive default:empty
+	0x2 extra: empty Instruction Decode Queue (IDQ) empty cycles
+	0x4 extra: mite_uops Uops delivered to Instruction Decode Queue (IDQ) from MITE path
 	0x4 extra:cmask=1 mite_cycles Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path
-	0x8 dsb_uops Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path
+	0x8 extra: dsb_uops Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path
 	0x8 extra:cmask=1 dsb_cycles Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path
-	0x10 ms_dsb_uops Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
+	0x10 extra: ms_dsb_uops Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
 	0x10 extra:cmask=1 ms_dsb_cycles Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
 	0x10 extra:cmask=1,edge ms_dsb_occur Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy
 	0x18 extra:cmask=1 all_dsb_cycles_any_uops Cycles Decode Stream Buffer (DSB) is delivering any Uop
 	0x18 extra:cmask=4 all_dsb_cycles_4_uops Cycles Decode Stream Buffer (DSB) is delivering 4 Uops
-	0x20 ms_mite_uops Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
+	0x20 extra: ms_mite_uops Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
 	0x24 extra:cmask=1 all_mite_cycles_any_uops Cycles MITE is delivering any Uop
 	0x24 extra:cmask=4 all_mite_cycles_4_uops Cycles MITE is delivering 4 Uops
-	0x30 ms_uops Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
+	0x30 extra: ms_uops Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
 	0x30 extra:cmask=1 ms_cycles Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy
-	0x3c mite_all_uops Uops delivered to Instruction Decode Queue (IDQ) from MITE path
+	0x3c extra: mite_all_uops Uops delivered to Instruction Decode Queue (IDQ) from MITE path
 name:icache type:mandatory default:0x2
-	0x2 misses Instruction cache, streaming buffer and victim cache misses
+	0x2 extra: misses Instruction cache, streaming buffer and victim cache misses
 name:itlb_misses type:bitmask default:0x1
-	0x1 miss_causes_a_walk Misses at all ITLB levels that cause page walks
-	0x2 walk_completed Misses in all ITLB levels that cause completed page walks
-	0x4 walk_duration Cycles when PMH is busy with page walks
-	0x10 stlb_hit Operations that miss the first ITLB level but hit the second and do not cause any page walks
+	0x1 extra: miss_causes_a_walk Misses at all ITLB levels that cause page walks
+	0x2 extra: walk_completed Misses in all ITLB levels that cause completed page walks
+	0x4 extra: walk_duration Cycles when PMH is busy with page walks
+	0x10 extra: stlb_hit Operations that miss the first ITLB level but hit the second and do not cause any page walks
 name:ild_stall type:bitmask default:0x1
-	0x1 lcp Stalls caused by changing prefix length of the instruction.
-	0x4 iq_full Stall cycles because IQ is full
+	0x1 extra: lcp Stalls caused by changing prefix length of the instruction.
+	0x4 extra: iq_full Stall cycles because IQ is full
 name:br_inst_exec type:exclusive default:0x41
-	0x41 nontaken_conditional Not taken macro-conditional branches
-	0x81 taken_conditional Taken speculative and retired macro-conditional branches
-	0x82 taken_direct_jump Taken speculative and retired macro-conditional branch instructions excluding calls and indirects
-	0x84 taken_indirect_jump_non_call_ret Taken speculative and retired indirect branches excluding calls and returns
-	0x88 taken_indirect_near_return Taken speculative and retired indirect branches with return mnemonic
-	0x90 taken_direct_near_call Taken speculative and retired direct near calls
-	0xa0 taken_indirect_near_call Taken speculative and retired indirect calls
-	0xc1 all_conditional Speculative and retired macro-conditional branches
-	0xc2 all_direct_jmp Speculative and retired macro-unconditional branches excluding calls and indirects
-	0xc4 all_indirect_jump_non_call_ret Speculative and retired indirect branches excluding calls and returns
-	0xc8 all_indirect_near_return Speculative and retired indirect return branches.
-	0xd0 all_direct_near_call Speculative and retired direct near calls
-	0xff all_branches Speculative and retired  branches
+	0x41 extra: nontaken_conditional Not taken macro-conditional branches
+	0x81 extra: taken_conditional Taken speculative and retired macro-conditional branches
+	0x82 extra: taken_direct_jump Taken speculative and retired macro-conditional branch instructions excluding calls and indirects
+	0x84 extra: taken_indirect_jump_non_call_ret Taken speculative and retired indirect branches excluding calls and returns
+	0x88 extra: taken_indirect_near_return Taken speculative and retired indirect branches with return mnemonic
+	0x90 extra: taken_direct_near_call Taken speculative and retired direct near calls
+	0xa0 extra: taken_indirect_near_call Taken speculative and retired indirect calls
+	0xc1 extra: all_conditional Speculative and retired macro-conditional branches
+	0xc2 extra: all_direct_jmp Speculative and retired macro-unconditional branches excluding calls and indirects
+	0xc4 extra: all_indirect_jump_non_call_ret Speculative and retired indirect branches excluding calls and returns
+	0xc8 extra: all_indirect_near_return Speculative and retired indirect return branches.
+	0xd0 extra: all_direct_near_call Speculative and retired direct near calls
+	0xff extra: all_branches Speculative and retired  branches
 name:br_misp_exec type:exclusive default:0x41
-	0x41 nontaken_conditional Not taken speculative and retired mispredicted macro conditional branches
-	0x81 taken_conditional Taken speculative and retired mispredicted macro conditional branches
-	0x84 taken_indirect_jump_non_call_ret Taken speculative and retired mispredicted indirect branches excluding calls and returns
-	0x88 taken_return_near Taken speculative and retired mispredicted indirect branches with return mnemonic
-	0xa0 taken_indirect_near_call Taken speculative and retired mispredicted indirect calls
-	0xc1 all_conditional Speculative and retired mispredicted macro conditional branches
-	0xc4 all_indirect_jump_non_call_ret Mispredicted indirect branches excluding calls and returns
-	0xff all_branches Speculative and retired mispredicted macro conditional branches
-name:idq_uops_not_delivered type:exclusive default:0x1
-	0x1 core Uops not delivered by the Frontend to the Backend of the machine, while there is no Backend stall 
+	0x41 extra: nontaken_conditional Not taken speculative and retired mispredicted macro conditional branches
+	0x81 extra: taken_conditional Taken speculative and retired mispredicted macro conditional branches
+	0x84 extra: taken_indirect_jump_non_call_ret Taken speculative and retired mispredicted indirect branches excluding calls and returns
+	0x88 extra: taken_return_near Taken speculative and retired mispredicted indirect branches with return mnemonic
+	0xa0 extra: taken_indirect_near_call Taken speculative and retired mispredicted indirect calls
+	0xc1 extra: all_conditional Speculative and retired mispredicted macro conditional branches
+	0xc4 extra: all_indirect_jump_non_call_ret Mispredicted indirect branches excluding calls and returns
+	0xff extra: all_branches Speculative and retired mispredicted macro conditional branches
+name:idq_uops_not_delivered type:exclusive default:core
+	0x1 extra: core Uops not delivered by the Frontend to the Backend of the machine, while there is no Backend stall
 	0x1 extra:cmask=1 cycles_le_3_uop_deliv.core Cycles with 3 or less uops delivered by the Frontend to the Backend of the machine, while there is no Backend stall
 	0x1 extra:cmask=1,inv cycles_fe_was_ok Cycles with 4 uops delivered by the Frontend to the Backend of the machine, or the Backend was stalling
 	0x1 extra:cmask=2 cycles_le_2_uop_deliv.core Cycles with 2 or less uops delivered by the Frontend to the Backend of the machine, while there is no Backend stall
 	0x1 extra:cmask=3 cycles_le_1_uop_deliv.core Cycles with 1 or less uops delivered by the Frontend to the Backend of the machine, while there is no Backend stall
 	0x1 extra:cmask=4 cycles_0_uops_deliv.core Cycles with no uops delivered by the Frontend to the Backend of the machine, while there is no Backend stall
-name:uops_dispatched_port type:exclusive default:0x1
-	0x1 port_0 Cycles per thread when uops are dispatched to port 0
+name:uops_dispatched_port type:exclusive default:port_0
+	0x1 extra: port_0 Cycles per thread when uops are dispatched to port 0
 	0x1 extra:any port_0_core Cycles per core when uops are dispatched to port 0
-	0x2 port_1 Cycles per thread when uops are dispatched to port 1
+	0x2 extra: port_1 Cycles per thread when uops are dispatched to port 1
 	0x2 extra:any port_1_core Cycles per core when uops are dispatched to port 1
-	0xc port_2 Cycles per thread when load or STA uops are dispatched to port 2
+	0xc extra: port_2 Cycles per thread when load or STA uops are dispatched to port 2
 	0xc extra:any port_2_core Cycles per core when load or STA uops are dispatched to port 2
-	0x30 port_3 Cycles per thread when load or STA uops are dispatched to port 3
+	0x30 extra: port_3 Cycles per thread when load or STA uops are dispatched to port 3
 	0x30 extra:any port_3_core Cycles per core when load or STA uops are dispatched to port 3
-	0x40 port_4 Cycles per thread when uops are dispatched to port 4
+	0x40 extra: port_4 Cycles per thread when uops are dispatched to port 4
 	0x40 extra:any port_4_core Cycles per core when uops are dispatched to port 4
-	0x80 port_5 Cycles per thread when uops are dispatched to port 5
+	0x80 extra: port_5 Cycles per thread when uops are dispatched to port 5
 	0x80 extra:any port_5_core Cycles per core when uops are dispatched to port 5
 name:resource_stalls type:bitmask default:0x1
-	0x1 any Resource-related stall cycles
-	0x4 rs Cycles stalled due to no eligible RS entry available.
-	0x8 sb Cycles stalled due to no store buffers available. (not including draining form sync).
-	0x10 rob Cycles stalled due to re-order buffer full.
+	0x1 extra: any Resource-related stall cycles
+	0x4 extra: rs Cycles stalled due to no eligible RS entry available.
+	0x8 extra: sb Cycles stalled due to no store buffers available. (not including draining form sync).
+	0x10 extra: rob Cycles stalled due to re-order buffer full.
 name:cycle_activity type:exclusive default:0x1
 	0x1 extra:cmask=1 cycles_l2_pending Cycles with pending L2 cache miss loads.
 	0x2 extra:cmask=2 cycles_ldm_pending Cycles with pending memory loads.
@@ -171,99 +171,99 @@ name:cycle_activity type:exclusive default:0x1
 	0x8 extra:cmask=8 cycles_l1d_pending Cycles with pending L1 cache miss loads.
 	0xc extra:cmask=c stalls_l1d_pending Execution stalls due to L1 data cache misses
 name:dsb2mite_switches type:mandatory default:0x1
-	0x1 count Decode Stream Buffer (DSB)-to-MITE switches
+	0x1 extra: count Decode Stream Buffer (DSB)-to-MITE switches
 name:dsb_fill type:mandatory default:0x8
-	0x8 exceed_dsb_lines Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines
+	0x8 extra: exceed_dsb_lines Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines
 name:itlb type:mandatory default:0x1
-	0x1 itlb_flush Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.
+	0x1 extra: itlb_flush Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.
 name:offcore_requests type:bitmask default:0x1
-	0x1 demand_data_rd Demand Data Read requests sent to uncore
-	0x2 demand_code_rd Cacheable and noncachaeble code read requests
-	0x4 demand_rfo Demand RFO requests including regular RFOs, locks, ItoM
-	0x8 all_data_rd Demand and prefetch data reads
-name:uops_executed type:exclusive default:0x1
-	0x1 thread Counts the number of uops to be executed per-thread each cycle.
+	0x1 extra: demand_data_rd Demand Data Read requests sent to uncore
+	0x2 extra: demand_code_rd Cacheable and noncachaeble code read requests
+	0x4 extra: demand_rfo Demand RFO requests including regular RFOs, locks, ItoM
+	0x8 extra: all_data_rd Demand and prefetch data reads
+name:uops_executed type:exclusive default:thread
+	0x1 extra: thread Counts the number of uops to be executed per-thread each cycle.
 	0x1 extra:cmask=1 cycles_ge_1_uop_exec Cycles where at least 1 uop was executed per-thread
 	0x1 extra:cmask=1,inv stall_cycles Counts number of cycles no uops were dispatched to be executed on this thread.
 	0x1 extra:cmask=2 cycles_ge_2_uops_exec Cycles where at least 2 uops were executed per-thread
 	0x1 extra:cmask=3 cycles_ge_3_uops_exec Cycles where at least 3 uops were executed per-thread
 	0x1 extra:cmask=4 cycles_ge_4_uops_exec Cycles where at least 4 uops were executed per-thread
-	0x2 core Number of uops executed on the core.
+	0x2 extra: core Number of uops executed on the core.
 name:tlb_flush type:bitmask default:0x1
-	0x1 dtlb_thread DTLB flush attempts of the thread-specific entries
-	0x20 stlb_any STLB flush attempts
+	0x1 extra: dtlb_thread DTLB flush attempts of the thread-specific entries
+	0x20 extra: stlb_any STLB flush attempts
 name:other_assists type:bitmask default:0x8
-	0x8 avx_store Number of AVX memory assist for stores. AVX microcode assist is being invoked whenever the hardware is unable to properly handle AVX-256b operations.
-	0x10 avx_to_sse Number of transitions from AVX-256 to legacy SSE when penalty applicable.
-	0x20 sse_to_avx Number of transitions from SSE to AVX-256 when penalty applicable.
-name:uops_retired type:exclusive default:0x1
-	0x1 all Actually retired uops. 
+	0x8 extra: avx_store Number of AVX memory assist for stores. AVX microcode assist is being invoked whenever the hardware is unable to properly handle AVX-256b operations.
+	0x10 extra: avx_to_sse Number of transitions from AVX-256 to legacy SSE when penalty applicable.
+	0x20 extra: sse_to_avx Number of transitions from SSE to AVX-256 when penalty applicable.
+name:uops_retired type:exclusive default:all
+	0x1 extra: all Actually retired uops.
 	0x1 extra:cmask=1,inv stall_cycles Cycles without actually retired uops. 
 	0x1 extra:cmask=1,inv,any core_stall_cycles Cycles without actually retired uops. 
 	0x1 extra:cmask=10,inv total_cycles Cycles with less than 10 actually retired uops. 
-	0x2 retire_slots Retirement slots used. 
+	0x2 extra: retire_slots Retirement slots used.
 name:machine_clears type:bitmask default:0x2
-	0x2 memory_ordering Counts the number of machine clears due to memory order conflicts.
-	0x4 smc Self-modifying code (SMC) detected.
-	0x20 maskmov This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0. 
+	0x2 extra: memory_ordering Counts the number of machine clears due to memory order conflicts.
+	0x4 extra: smc Self-modifying code (SMC) detected.
+	0x20 extra: maskmov This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.
 name:br_inst_retired type:exclusive default:0x1
-	0x1 conditional Conditional branch instructions retired. 
-	0x2 near_call_r3 Direct and indirect macro near call instructions retired (captured in ring 3). 
-	0x2 near_call Direct and indirect near call instructions retired. 
-	0x8 near_return Return instructions retired. 
-	0x10 not_taken Not taken branch instructions retired. 
-	0x20 near_taken Taken branch instructions retired. 
-	0x40 far_branch Far branch instructions retired. 
+	0x1 extra: conditional Conditional branch instructions retired.
+	0x2 extra: near_call_r3 Direct and indirect macro near call instructions retired (captured in ring 3).
+	0x2 extra: near_call Direct and indirect near call instructions retired.
+	0x8 extra: near_return Return instructions retired.
+	0x10 extra: not_taken Not taken branch instructions retired.
+	0x20 extra: near_taken Taken branch instructions retired.
+	0x40 extra: far_branch Far branch instructions retired.
 name:br_misp_retired type:bitmask default:0x1
-	0x1 conditional Mispredicted conditional branch instructions retired. 
-	0x20 near_taken number of near branch instructions retired that were mispredicted and taken. 
+	0x1 extra: conditional Mispredicted conditional branch instructions retired.
+	0x20 extra: near_taken number of near branch instructions retired that were mispredicted and taken.
 name:fp_assist type:exclusive default:0x1e
-	0x2 x87_output Number of X87 assists due to output value.
-	0x4 x87_input Number of X87 assists due to input value.
-	0x8 simd_output Number of SIMD FP assists due to Output values
-	0x10 simd_input Number of SIMD FP assists due to input values
+	0x2 extra: x87_output Number of X87 assists due to output value.
+	0x4 extra: x87_input Number of X87 assists due to input value.
+	0x8 extra: simd_output Number of SIMD FP assists due to Output values
+	0x10 extra: simd_input Number of SIMD FP assists due to input values
 	0x1e extra:cmask=1 any Cycles with any input/output SSE or FP assist
 name:rob_misc_events type:mandatory default:0x20
-	0x20 lbr_inserts Count cases of saving new LBR
+	0x20 extra: lbr_inserts Count cases of saving new LBR
 name:mem_uops_retired type:exclusive default:0x81
-	0x11 stlb_miss_loads Load uops with true STLB miss retired to architected path. 
-	0x12 stlb_miss_stores Store uops with true STLB miss retired to architected path. 
-	0x21 lock_loads Load uops with locked access retired to architected path. 
-	0x41 split_loads Line-splitted load uops retired to architected path. 
-	0x42 split_stores Line-splitted store uops retired to architected path. 
-	0x81 all_loads Load uops retired to architected path with filter on bits 0 and 1 applied. 
-	0x82 all_stores Store uops retired to architected path with filter on bits 0 and 1 applied. 
+	0x11 extra: stlb_miss_loads Load uops with true STLB miss retired to architected path.
+	0x12 extra: stlb_miss_stores Store uops with true STLB miss retired to architected path.
+	0x21 extra: lock_loads Load uops with locked access retired to architected path.
+	0x41 extra: split_loads Line-splitted load uops retired to architected path.
+	0x42 extra: split_stores Line-splitted store uops retired to architected path.
+	0x81 extra: all_loads Load uops retired to architected path with filter on bits 0 and 1 applied.
+	0x82 extra: all_stores Store uops retired to architected path with filter on bits 0 and 1 applied.
 name:mem_load_uops_retired type:bitmask default:0x1
-	0x1 l1_hit Retired load uops with L1 cache hits as data sources. 
-	0x2 l2_hit Retired load uops with L2 cache hits as data sources. 
-	0x4 llc_hit Retired load uops which data sources were data hits in LLC without snoops required. 
-	0x40 hit_lfb Retired load uops which data sources were load uops missed L1 but hit forward buffer due to preceding miss to the same cache line with data not ready. 
+	0x1 extra: l1_hit Retired load uops with L1 cache hits as data sources.
+	0x2 extra: l2_hit Retired load uops with L2 cache hits as data sources.
+	0x4 extra: llc_hit Retired load uops which data sources were data hits in LLC without snoops required.
+	0x40 extra: hit_lfb Retired load uops which data sources were load uops missed L1 but hit forward buffer due to preceding miss to the same cache line with data not ready.
 name:mem_load_uops_llc_hit_retired type:bitmask default:0x1
-	0x1 xsnp_miss Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. 
-	0x2 xsnp_hit Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. 
-	0x4 xsnp_hitm Retired load uops which data sources were HitM responses from shared LLC. 
-	0x8 xsnp_none Retired load uops which data sources were hits in LLC without snoops required. 
+	0x1 extra: xsnp_miss Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.
+	0x2 extra: xsnp_hit Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.
+	0x4 extra: xsnp_hitm Retired load uops which data sources were HitM responses from shared LLC.
+	0x8 extra: xsnp_none Retired load uops which data sources were hits in LLC without snoops required.
 name:mem_load_uops_llc_miss_retired type:mandatory default:0x1
-	0x1 local_dram Data from local DRAM either Snoop not needed or Snoop Miss (RspI)
+	0x1 extra: local_dram Data from local DRAM either Snoop not needed or Snoop Miss (RspI)
 name:baclears type:mandatory default:0x1f
-	0x1f any Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.
+	0x1f extra: any Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.
 name:l2_trans type:bitmask default:0x80
-	0x1 demand_data_rd Demand Data Read requests that access L2 cache
-	0x2 rfo RFO requests that access L2 cache
-	0x4 code_rd L2 cache accesses when fetching instructions
-	0x8 all_pf L2 or LLC HW prefetches that access L2 cache
-	0x10 l1d_wb L1D writebacks that access L2 cache
-	0x20 l2_fill L2 fill requests that access L2 cache
-	0x40 l2_wb L2 writebacks that access L2 cache
-	0x80 all_requests Transactions accessing L2 pipe
+	0x1 extra: demand_data_rd Demand Data Read requests that access L2 cache
+	0x2 extra: rfo RFO requests that access L2 cache
+	0x4 extra: code_rd L2 cache accesses when fetching instructions
+	0x8 extra: all_pf L2 or LLC HW prefetches that access L2 cache
+	0x10 extra: l1d_wb L1D writebacks that access L2 cache
+	0x20 extra: l2_fill L2 fill requests that access L2 cache
+	0x40 extra: l2_wb L2 writebacks that access L2 cache
+	0x80 extra: all_requests Transactions accessing L2 pipe
 name:l2_lines_in type:exclusive default:0x7
-	0x1 i L2 cache lines in I state filling L2
-	0x2 s L2 cache lines in S state filling L2
-	0x4 e L2 cache lines in E state filling L2
-	0x7 all L2 cache lines filling L2
+	0x1 extra: i L2 cache lines in I state filling L2
+	0x2 extra: s L2 cache lines in S state filling L2
+	0x4 extra: e L2 cache lines in E state filling L2
+	0x7 extra: all L2 cache lines filling L2
 name:l2_lines_out type:exclusive default:0x1
-	0x1 demand_clean Clean L2 cache lines evicted by demand
-	0x2 demand_dirty Dirty L2 cache lines evicted by demand
-	0x4 pf_clean Clean L2 cache lines evicted by L2 prefetch
-	0x8 pf_dirty Dirty L2 cache lines evicted by L2 prefetch
-	0xa dirty_all Dirty L2 cache lines filling the L2
+	0x1 extra: demand_clean Clean L2 cache lines evicted by demand
+	0x2 extra: demand_dirty Dirty L2 cache lines evicted by demand
+	0x4 extra: pf_clean Clean L2 cache lines evicted by L2 prefetch
+	0x8 extra: pf_dirty Dirty L2 cache lines evicted by L2 prefetch
+	0xa extra: dirty_all Dirty L2 cache lines filling the L2
diff --git a/events/i386/nehalem/unit_masks b/events/i386/nehalem/unit_masks
index d800e5d..8f60292 100644
--- a/events/i386/nehalem/unit_masks
+++ b/events/i386/nehalem/unit_masks
@@ -4,369 +4,369 @@
 #
 include:i386/arch_perfmon
 name:sb_forward type:mandatory default:0x01
-	0x01 any Counts the number of store forwards
+	0x01 extra: any Counts the number of store forwards
 name:load_block type:bitmask default:0x01
-	0x01 std Counts the number of loads blocked by a preceding store with unknown data
-	0x04 address_offset Counts the number of loads blocked by a preceding store address
+	0x01 extra: std Counts the number of loads blocked by a preceding store with unknown data
+	0x04 extra: address_offset Counts the number of loads blocked by a preceding store address
 name:sb_drain type:mandatory default:0x01
-	0x01 cycles Counts the cycles of store buffer drains
+	0x01 extra: cycles Counts the cycles of store buffer drains
 name:misalign_mem_ref type:bitmask default:0x03
-	0x01 load Counts the number of misaligned load references
-	0x02 store Counts the number of misaligned store references
-	0x03 any Counts the number of misaligned memory references
+	0x01 extra: load Counts the number of misaligned load references
+	0x02 extra: store Counts the number of misaligned store references
+	0x03 extra: any Counts the number of misaligned memory references
 name:store_blocks type:bitmask default:0x0f
-	0x01 not_sta This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load
-	0x02 sta This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block)
-	0x04 at_ret Counts number of loads delayed with at-Retirement block code
-	0x08 l1d_block Cacheable  loads delayed with L1D block code
+	0x01 extra: not_sta This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load
+	0x02 extra: sta This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block)
+	0x04 extra: at_ret Counts number of loads delayed with at-Retirement block code
+	0x08 extra: l1d_block Cacheable  loads delayed with L1D block code
 	0x0F any All loads delayed due to store blocks
 name:dtlb_load_misses type:bitmask default:0x01
-	0x01 any Counts all load misses that cause a page walk
-	0x02 walk_completed Counts number of completed page walks due to load miss in the STLB
-	0x10 stlb_hit Number of cache load STLB hits
-	0x20 pde_miss Number of DTLB cache load misses where the low part of the linear to physical address translation was missed
-	0x40 pdp_miss Number of DTLB cache load misses where the high part of the linear to physical address translation was missed
-	0x80 large_walk_completed Counts number of completed large page walks due to load miss in the STLB
+	0x01 extra: any Counts all load misses that cause a page walk
+	0x02 extra: walk_completed Counts number of completed page walks due to load miss in the STLB
+	0x10 extra: stlb_hit Number of cache load STLB hits
+	0x20 extra: pde_miss Number of DTLB cache load misses where the low part of the linear to physical address translation was missed
+	0x40 extra: pdp_miss Number of DTLB cache load misses where the high part of the linear to physical address translation was missed
+	0x80 extra: large_walk_completed Counts number of completed large page walks due to load miss in the STLB
 name:memory_disambiguration type:bitmask default:0x01
-	0x01 reset Counts memory disambiguration reset cycles
-	0x02 success Counts the number of loads that memory disambiguration succeeded
-	0x04 watchdog Counts the number of times the memory disambiguration watchdog kicked in
-	0x08 watch_cycles Counts the cycles that the memory disambiguration watchdog is active
+	0x01 extra: reset Counts memory disambiguration reset cycles
+	0x02 extra: success Counts the number of loads that memory disambiguration succeeded
+	0x04 extra: watchdog Counts the number of times the memory disambiguration watchdog kicked in
+	0x08 extra: watch_cycles Counts the cycles that the memory disambiguration watchdog is active
 name:mem_inst_retired type:bitmask default:0x01
-	0x01 loads Counts the number of instructions with an architecturally-visible store retired on the architected path
-	0x02 stores Counts the number of instructions with an architecturally-visible store retired on the architected path
+	0x01 extra: loads Counts the number of instructions with an architecturally-visible store retired on the architected path
+	0x02 extra: stores Counts the number of instructions with an architecturally-visible store retired on the architected path
 name:mem_store_retired type:mandatory default:0x01
-	0x01 dtlb_miss The event counts the number of retired stores that missed the DTLB
+	0x01 extra: dtlb_miss The event counts the number of retired stores that missed the DTLB
 name:uops_issued type:bitmask default:0x01
-	0x01 any Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i
-	0x01 stalled_cycles Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i
-	0x02 fused Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station
+	0x01 extra: any Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i
+	0x01 extra: stalled_cycles Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i
+	0x02 extra: fused Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station
 name:mem_uncore_retired type:bitmask default:0x02
-	0x02 other_core_l2_hitm Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket
-	0x08 remote_cache_local_home_hit Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and HIT in a remote socket's cache
-	0x10 remote_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and was remotely homed
-	0x20 local_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and required a local socket memory reference
+	0x02 extra: other_core_l2_hitm Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket
+	0x08 extra: remote_cache_local_home_hit Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and HIT in a remote socket's cache
+	0x10 extra: remote_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and was remotely homed
+	0x20 extra: local_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and required a local socket memory reference
 name:fp_comp_ops_exe type:bitmask default:0x01
-	0x01 x87 Counts the number of FP Computational Uops Executed
-	0x02 mmx Counts number of MMX Uops executed
-	0x04 sse_fp Counts number of SSE and SSE2 FP uops executed
-	0x08 sse2_integer Counts number of SSE2 integer uops executed
-	0x10 sse_fp_packed Counts number of SSE FP packed uops executed
-	0x20 sse_fp_scalar Counts number of SSE FP scalar uops executed
-	0x40 sse_single_precision Counts number of SSE* FP single precision uops executed
-	0x80 sse_double_precision Counts number of SSE* FP double precision uops executed
+	0x01 extra: x87 Counts the number of FP Computational Uops Executed
+	0x02 extra: mmx Counts number of MMX Uops executed
+	0x04 extra: sse_fp Counts number of SSE and SSE2 FP uops executed
+	0x08 extra: sse2_integer Counts number of SSE2 integer uops executed
+	0x10 extra: sse_fp_packed Counts number of SSE FP packed uops executed
+	0x20 extra: sse_fp_scalar Counts number of SSE FP scalar uops executed
+	0x40 extra: sse_single_precision Counts number of SSE* FP single precision uops executed
+	0x80 extra: sse_double_precision Counts number of SSE* FP double precision uops executed
 name:simd_int_128 type:bitmask default:0x01
-	0x01 packed_mpy Counts number of 128 bit SIMD integer multiply operations
-	0x02 packed_shift Counts number of 128 bit SIMD integer shift operations
-	0x04 pack Counts number of 128 bit SIMD integer pack operations
-	0x08 unpack Counts number of 128 bit SIMD integer unpack operations
-	0x10 packed_logical Counts number of 128 bit SIMD integer logical  operations
-	0x20 packed_arith Counts number of 128 bit SIMD integer arithmetic operations
-	0x40 shuffle_move Counts number of 128 bit SIMD integer shuffle and move operations
+	0x01 extra: packed_mpy Counts number of 128 bit SIMD integer multiply operations
+	0x02 extra: packed_shift Counts number of 128 bit SIMD integer shift operations
+	0x04 extra: pack Counts number of 128 bit SIMD integer pack operations
+	0x08 extra: unpack Counts number of 128 bit SIMD integer unpack operations
+	0x10 extra: packed_logical Counts number of 128 bit SIMD integer logical  operations
+	0x20 extra: packed_arith Counts number of 128 bit SIMD integer arithmetic operations
+	0x40 extra: shuffle_move Counts number of 128 bit SIMD integer shuffle and move operations
 name:load_dispatch type:bitmask default:0x07
-	0x01 rs Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer
-	0x02 rs_delayed Counts the number of delayed RS dispatches at the stage latch
-	0x04 mob Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer
-	0x07 any Counts all loads dispatched from the Reservation Station
+	0x01 extra: rs Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer
+	0x02 extra: rs_delayed Counts the number of delayed RS dispatches at the stage latch
+	0x04 extra: mob Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer
+	0x07 extra: any Counts all loads dispatched from the Reservation Station
 name:arith type:bitmask default:0x01
-	0x01 cycles_div_busy Counts the number of cycles the divider is busy executing divide or square root operations
-	0x02 mul Counts the number of multiply operations executed
+	0x01 extra: cycles_div_busy Counts the number of cycles the divider is busy executing divide or square root operations
+	0x02 extra: mul Counts the number of multiply operations executed
 name:inst_decoded type:mandatory default:0x01
-	0x01 dec0 Counts number of instructions that require  decoder 0 to be decoded
+	0x01 extra: dec0 Counts number of instructions that require  decoder 0 to be decoded
 name:hw_int type:bitmask default:0x01
-	0x01 rcv Number of interrupt received
-	0x02 cycles_masked Number of cycles interrupt are masked
-	0x04 cycles_pending_and_masked Number of cycles interrupts are pending and masked
+	0x01 extra: rcv Number of interrupt received
+	0x02 extra: cycles_masked Number of cycles interrupt are masked
+	0x04 extra: cycles_pending_and_masked Number of cycles interrupts are pending and masked
 name:l2_rqsts type:bitmask default:0x01
-	0x01 ld_hit Counts number of loads that hit the L2 cache
-	0x02 ld_miss Counts the number of loads that miss the L2 cache
-	0x03 loads Counts all L2 load requests
-	0x04 rfo_hit Counts the number of store RFO requests that hit the L2 cache
-	0x08 rfo_miss Counts the number of store RFO requests that miss the L2 cache
+	0x01 extra: ld_hit Counts number of loads that hit the L2 cache
+	0x02 extra: ld_miss Counts the number of loads that miss the L2 cache
+	0x03 extra: loads Counts all L2 load requests
+	0x04 extra: rfo_hit Counts the number of store RFO requests that hit the L2 cache
+	0x08 extra: rfo_miss Counts the number of store RFO requests that miss the L2 cache
 	0x0C rfos Counts all L2 store RFO requests
-	0x10 ifetch_hit Counts number of instruction fetches that hit the L2 cache
-	0x20 ifetch_miss Counts number of instruction fetches that miss the L2 cache
-	0x30 ifetches Counts all instruction fetches
-	0x40 prefetch_hit Counts L2 prefetch hits for both code and data
-	0x80 prefetch_miss Counts L2 prefetch misses for both code and data
+	0x10 extra: ifetch_hit Counts number of instruction fetches that hit the L2 cache
+	0x20 extra: ifetch_miss Counts number of instruction fetches that miss the L2 cache
+	0x30 extra: ifetches Counts all instruction fetches
+	0x40 extra: prefetch_hit Counts L2 prefetch hits for both code and data
+	0x80 extra: prefetch_miss Counts L2 prefetch misses for both code and data
 	0xC0 prefetches Counts all L2 prefetches for both code and data
 	0xAA miss Counts all L2 misses for both code and data
 	0xFF references Counts all L2 requests for both code and data
 name:l2_data_rqsts type:bitmask default:0xff
-	0x01 i_state Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i
-	0x02 s_state Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state
-	0x04 e_state Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state
-	0x08 m_state Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state
+	0x01 extra: i_state Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i
+	0x02 extra: s_state Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state
+	0x04 extra: e_state Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state
+	0x08 extra: m_state Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state
 	0x0F mesi Counts all L2 data demand requests
-	0x10 i_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i
-	0x20 s_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state
-	0x40 e_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state
-	0x80 m_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state
+	0x10 extra: i_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i
+	0x20 extra: s_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state
+	0x40 extra: e_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state
+	0x80 extra: m_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state
 	0xF0 mesi Counts all L2 prefetch requests
 	0xFF any Counts all L2 data requests
 name:l2_write type:bitmask default:0x01
-	0x01 i_state Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i
-	0x02 s_state Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state
-	0x04 e_state Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state
-	0x08 m_state Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state
+	0x01 extra: i_state Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i
+	0x02 extra: s_state Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state
+	0x04 extra: e_state Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state
+	0x08 extra: m_state Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state
 	0x0E hit Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states
 	0x0F mesi Counts all L2 store RFO requests
-	0x10 i_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i
-	0x20 s_state Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state
-	0x40 e_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state
-	0x80 m_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state
+	0x10 extra: i_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i
+	0x20 extra: s_state Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state
+	0x40 extra: e_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state
+	0x80 extra: m_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state
 	0xE0 hit Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state
 	0xF0 mesi Counts all L2 demand lock RFO requests
 name:l1d_wb_l2 type:bitmask default:0x01
-	0x01 i_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i
-	0x02 s_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state
-	0x04 e_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state
-	0x08 m_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state
+	0x01 extra: i_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i
+	0x02 extra: s_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state
+	0x04 extra: e_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state
+	0x08 extra: m_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state
 	0x0F mesi Counts all L1 writebacks to the L2
 name:longest_lat_cache type:bitmask default:0x4F
 	0x4F reference This event counts requests originating from the core that reference a cache line in the last level cache
-	0x41 miss This event counts each cache miss condition for references to the last level cache
+	0x41 extra: miss This event counts each cache miss condition for references to the last level cache
 name:cpu_clk_unhalted type:bitmask default:0x00
-	0x00 thread_p Counts the number of thread cycles while the thread is not in a halt state
-	0x01 ref_p Increments at the frequency of a slower reference clock when not halted
+	0x00 extra: thread_p Counts the number of thread cycles while the thread is not in a halt state
+	0x01 extra: ref_p Increments at the frequency of a slower reference clock when not halted
 name:l1d_cache_ld type:bitmask default:0x01
-	0x01 i_state Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i
-	0x02 s_state Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state
-	0x04 e_state Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state
-	0x08 m_state Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state
+	0x01 extra: i_state Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i
+	0x02 extra: s_state Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state
+	0x04 extra: e_state Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state
+	0x08 extra: m_state Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state
 	0x0F mesi Counts L1 data cache read requests
 name:l1d_cache_st type:bitmask default:0x01
-	0x01 i_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state
-	0x02 s_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state
-	0x04 e_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state
-	0x08 m_state Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state
+	0x01 extra: i_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state
+	0x02 extra: s_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state
+	0x04 extra: e_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state
+	0x08 extra: m_state Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state
 	0x0F mesi Counts L1 data cache store RFO requests
 name:l1d_cache_lock type:bitmask default:0x01
-	0x01 hit Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer
-	0x02 s_state Counts L1 data cache retired load locks that hit the target cache line in the shared state
-	0x04 e_state Counts L1 data cache retired load locks that hit the target cache line in the exclusive state
-	0x08 m_state Counts L1 data cache retired load locks that hit the target cache line in the modified state
+	0x01 extra: hit Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer
+	0x02 extra: s_state Counts L1 data cache retired load locks that hit the target cache line in the shared state
+	0x04 extra: e_state Counts L1 data cache retired load locks that hit the target cache line in the exclusive state
+	0x08 extra: m_state Counts L1 data cache retired load locks that hit the target cache line in the modified state
 name:l1d_all_ref type:bitmask default:0x01
-	0x01 any Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types
-	0x02 cacheable Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations
+	0x01 extra: any Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types
+	0x02 extra: cacheable Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations
 #name:l1d_pend_miss type:mandatory default:0x02
-#	0x02 load_buffers_full Counts cycles of L1 data cache load fill buffers full
+#	0x02 extra: load_buffers_full Counts cycles of L1 data cache load fill buffers full
 name:dtlb_misses type:bitmask default:0x01
-	0x01 any Counts the number of misses in the STLB which causes a page walk
-	0x02 walk_completed Counts number of misses in the STLB which resulted in a completed page walk
-	0x10 stlb_hit Counts the number of DTLB first level misses that hit in the second level TLB
-	0x20 pde_miss Number of DTLB cache misses where the low part of the linear to physical address translation was missed
-	0x40 pdp_miss Number of DTLB misses where the high part of the linear to physical address translation was missed
-	0x80 large_walk_completed Counts number of completed large page walks due to misses in the STLB
+	0x01 extra: any Counts the number of misses in the STLB which causes a page walk
+	0x02 extra: walk_completed Counts number of misses in the STLB which resulted in a completed page walk
+	0x10 extra: stlb_hit Counts the number of DTLB first level misses that hit in the second level TLB
+	0x20 extra: pde_miss Number of DTLB cache misses where the low part of the linear to physical address translation was missed
+	0x40 extra: pdp_miss Number of DTLB misses where the high part of the linear to physical address translation was missed
+	0x80 extra: large_walk_completed Counts number of completed large page walks due to misses in the STLB
 name:sse_mem_exec type:bitmask default:0x01
-	0x01 nta Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache
-	0x08 streaming_stores Counts number of SSE nontemporal stores
+	0x01 extra: nta Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache
+	0x08 extra: streaming_stores Counts number of SSE nontemporal stores
 name:l1d_prefetch type:bitmask default:0x01
-	0x01 requests Counts number of hardware prefetch requests dispatched out of the prefetch FIFO
-	0x02 miss Counts number of hardware prefetch requests that miss the L1D
-	0x04 triggers Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO
+	0x01 extra: requests Counts number of hardware prefetch requests dispatched out of the prefetch FIFO
+	0x02 extra: miss Counts number of hardware prefetch requests that miss the L1D
+	0x04 extra: triggers Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO
 name:ept type:bitmask default:0x02
-	0x02 epde_miss Counts Extended Page Directory Entry misses
-	0x04 epdpe_hit Counts Extended Page Directory Pointer Entry hits
-	0x08 epdpe_miss Counts Extended Page Directory Pointer Entry misses
+	0x02 extra: epde_miss Counts Extended Page Directory Entry misses
+	0x04 extra: epdpe_hit Counts Extended Page Directory Pointer Entry hits
+	0x08 extra: epdpe_miss Counts Extended Page Directory Pointer Entry misses
 name:l1d type:bitmask default:0x01
-	0x01 repl Counts the number of lines brought into the L1 data cache
-	0x02 m_repl Counts the number of modified lines brought into the L1 data cache
-	0x04 m_evict Counts the number of modified lines evicted from the L1 data cache due to replacement
-	0x08 m_snoop_evict Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention
+	0x01 extra: repl Counts the number of lines brought into the L1 data cache
+	0x02 extra: m_repl Counts the number of modified lines brought into the L1 data cache
+	0x04 extra: m_evict Counts the number of modified lines evicted from the L1 data cache due to replacement
+	0x08 extra: m_snoop_evict Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention
 name:offcore_requests_outstanding type:bitmask default:0x01
-	0x01 read_data Counts weighted cycles of offcore demand data read requests
-	0x02 read_code Counts weighted cycles of offcore demand code read requests
-	0x04 rfo Counts weighted cycles of offcore demand RFO requests
-	0x08 read Counts weighted cycles of offcore read requests of any kind
+	0x01 extra: read_data Counts weighted cycles of offcore demand data read requests
+	0x02 extra: read_code Counts weighted cycles of offcore demand code read requests
+	0x04 extra: rfo Counts weighted cycles of offcore demand RFO requests
+	0x08 extra: read Counts weighted cycles of offcore read requests of any kind
 name:cache_lock_cycles type:bitmask default:0x01
-	0x01 l1d_l2 Cycle count during which the L1D and L2 are locked
-	0x02 l1d Counts the number of cycles that cacheline in the L1 data cache unit is locked
+	0x01 extra: l1d_l2 Cycle count during which the L1D and L2 are locked
+	0x02 extra: l1d Counts the number of cycles that cacheline in the L1 data cache unit is locked
 name:l1i type:bitmask default:0x01
-	0x01 hits Counts all instruction fetches that hit the L1 instruction cache
-	0x02 misses Counts all instruction fetches that miss the L1I cache
-	0x03 reads Counts all instruction fetches, including uncacheable fetches that bypass the L1I
-	0x04 cycles_stalled Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault
+	0x01 extra: hits Counts all instruction fetches that hit the L1 instruction cache
+	0x02 extra: misses Counts all instruction fetches that miss the L1I cache
+	0x03 extra: reads Counts all instruction fetches, including uncacheable fetches that bypass the L1I
+	0x04 extra: cycles_stalled Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault
 name:ifu_ivc type:bitmask default:0x01
-	0x01 full Instruction Fetche unit victim cache full
-	0x02 l1i_eviction L1 Instruction cache evictions
+	0x01 extra: full Instruction Fetche unit victim cache full
+	0x02 extra: l1i_eviction L1 Instruction cache evictions
 name:large_itlb type:mandatory default:0x01
-	0x01 hit Counts number of large ITLB hits
+	0x01 extra: hit Counts number of large ITLB hits
 name:itlb_misses type:bitmask default:0x01
-	0x01 any Counts the number of misses in all levels of the ITLB which causes a page walk
-	0x02 walk_completed Counts number of misses in all levels of the ITLB which resulted in a completed page walk
-	0x04 walk_cycles Counts ITLB miss page walk cycles
-	0x04 pmh_busy_cycles Counts PMH busy cycles
-	0x10 stlb_hit Counts the number of ITLB misses that hit in the second level TLB
-	0x20 pde_miss Number of ITLB misses where the low part of the linear to physical address translation was missed
-	0x40 pdp_miss Number of ITLB misses where the high part of the linear to physical address translation was missed
-	0x80 large_walk_completed Counts number of completed large page walks due to misses in the STLB
+	0x01 extra: any Counts the number of misses in all levels of the ITLB which causes a page walk
+	0x02 extra: walk_completed Counts number of misses in all levels of the ITLB which resulted in a completed page walk
+	0x04 extra: walk_cycles Counts ITLB miss page walk cycles
+	0x04 extra: pmh_busy_cycles Counts PMH busy cycles
+	0x10 extra: stlb_hit Counts the number of ITLB misses that hit in the second level TLB
+	0x20 extra: pde_miss Number of ITLB misses where the low part of the linear to physical address translation was missed
+	0x40 extra: pdp_miss Number of ITLB misses where the high part of the linear to physical address translation was missed
+	0x80 extra: large_walk_completed Counts number of completed large page walks due to misses in the STLB
 name:ild_stall type:bitmask default:0x0f
-	0x01 lcp Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX
-	0x02 mru Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) Most Recently Used (MRU) bypass
-	0x04 iq_full Stall cycles due to a full instruction queue
-	0x08 regen Counts the number of regen stalls
+	0x01 extra: lcp Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX
+	0x02 extra: mru Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) Most Recently Used (MRU) bypass
+	0x04 extra: iq_full Stall cycles due to a full instruction queue
+	0x08 extra: regen Counts the number of regen stalls
 	0x0F any Counts any cycles the Instruction Length Decoder is stalled
 name:br_inst_exec type:bitmask default:0x7f
-	0x01 cond Counts the number of conditional near branch instructions executed, but not necessarily retired
-	0x02 direct Counts all unconditional near branch instructions excluding calls and indirect branches
-	0x04 indirect_non_call Counts the number of executed indirect near branch instructions that are not calls
-	0x07 non_calls Counts all non call near branch instructions executed, but not necessarily retired
-	0x08 return_near Counts indirect near branches that have a return mnemonic
-	0x10 direct_near_call Counts unconditional near call branch instructions, excluding non call branch, executed
-	0x20 indirect_near_call Counts indirect near calls, including both register and memory indirect, executed
-	0x30 near_calls Counts all near call branches executed,  but not necessarily retired
-	0x40 taken Counts taken near branches executed, but not necessarily retired
+	0x01 extra: cond Counts the number of conditional near branch instructions executed, but not necessarily retired
+	0x02 extra: direct Counts all unconditional near branch instructions excluding calls and indirect branches
+	0x04 extra: indirect_non_call Counts the number of executed indirect near branch instructions that are not calls
+	0x07 extra: non_calls Counts all non call near branch instructions executed, but not necessarily retired
+	0x08 extra: return_near Counts indirect near branches that have a return mnemonic
+	0x10 extra: direct_near_call Counts unconditional near call branch instructions, excluding non call branch, executed
+	0x20 extra: indirect_near_call Counts indirect near calls, including both register and memory indirect, executed
+	0x30 extra: near_calls Counts all near call branches executed,  but not necessarily retired
+	0x40 extra: taken Counts taken near branches executed, but not necessarily retired
 	0x7F any Counts all near executed branches (not necessarily retired)
 name:br_misp_exec type:bitmask default:0x7f
-	0x01 cond Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired
-	0x02 direct Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)
-	0x04 indirect_non_call Counts the number of executed mispredicted indirect near branch instructions that are not calls
-	0x07 non_calls Counts mispredicted non call near branches executed,  but not necessarily retired
-	0x08 return_near Counts mispredicted indirect branches that have a rear return mnemonic
-	0x10 direct_near_call Counts mispredicted non-indirect near calls executed, (should always be 0)
-	0x20 indirect_near_call Counts mispredicted indirect near calls exeucted, including both register and memory indirect
-	0x30 near_calls Counts all mispredicted near call branches executed, but not necessarily retired
-	0x40 taken Counts executed mispredicted near branches that are taken, but not necessarily retired
+	0x01 extra: cond Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired
+	0x02 extra: direct Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)
+	0x04 extra: indirect_non_call Counts the number of executed mispredicted indirect near branch instructions that are not calls
+	0x07 extra: non_calls Counts mispredicted non call near branches executed,  but not necessarily retired
+	0x08 extra: return_near Counts mispredicted indirect branches that have a rear return mnemonic
+	0x10 extra: direct_near_call Counts mispredicted non-indirect near calls executed, (should always be 0)
+	0x20 extra: indirect_near_call Counts mispredicted indirect near calls exeucted, including both register and memory indirect
+	0x30 extra: near_calls Counts all mispredicted near call branches executed, but not necessarily retired
+	0x40 extra: taken Counts executed mispredicted near branches that are taken, but not necessarily retired
 	0x7F any Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired
 name:resource_stalls type:bitmask default:0x01
-	0x01 any Counts the number of Allocator resource related stalls
-	0x02 load Counts the cycles of stall due to lack of load buffer for load operation
-	0x04 rs_full This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle
-	0x08 store This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i
-	0x10 rob_full Counts the cycles of stall due to reorder buffer full
-	0x20 fpcw Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word
-	0x40 mxcsr Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename
-	0x80 other Counts the number of cycles while execution was stalled due to other resource issues
+	0x01 extra: any Counts the number of Allocator resource related stalls
+	0x02 extra: load Counts the cycles of stall due to lack of load buffer for load operation
+	0x04 extra: rs_full This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle
+	0x08 extra: store This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i
+	0x10 extra: rob_full Counts the cycles of stall due to reorder buffer full
+	0x20 extra: fpcw Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word
+	0x40 extra: mxcsr Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename
+	0x80 extra: other Counts the number of cycles while execution was stalled due to other resource issues
 name:offcore_requests type:bitmask default:0x80
-	0x01 demand_read_data Counts number of offcore demand data read requests
-	0x02 demand_read_code Counts number of offcore demand code read requests
-	0x04 demand_rfo Counts number of offcore demand RFO requests
-	0x08 any_read Counts number of offcore read requests
-	0x10 any_rfo Counts number of offcore RFO requests
-	0x20 uncached_mem Counts number of offcore uncached memory requests
-	0x40 l1d_writeback Counts number of L1D writebacks to the uncore
-	0x80 any Counts all offcore requests
+	0x01 extra: demand_read_data Counts number of offcore demand data read requests
+	0x02 extra: demand_read_code Counts number of offcore demand code read requests
+	0x04 extra: demand_rfo Counts number of offcore demand RFO requests
+	0x08 extra: any_read Counts number of offcore read requests
+	0x10 extra: any_rfo Counts number of offcore RFO requests
+	0x20 extra: uncached_mem Counts number of offcore uncached memory requests
+	0x40 extra: l1d_writeback Counts number of L1D writebacks to the uncore
+	0x80 extra: any Counts all offcore requests
 name:uops_executed type:bitmask default:0x3f
-	0x01 port0 Counts number of Uops executed that were issued on port 0
-	0x02 port1 Counts number of Uops executed that were issued on port 1
-	0x04 port2_core Counts number of Uops executed that were issued on port 2
-	0x08 port3_core Counts number of Uops executed that were issued on port 3
-	0x10 port4_core Counts number of Uops executed that where issued on port  4
-	0x20 port5 Counts number of Uops executed that where issued on port 5
-	0x40 port015 Counts number of Uops executed that where issued on port  0, 1, or 5
-	0x80 port234 Counts number of Uops executed that where issued on port 2, 3, or 4
+	0x01 extra: port0 Counts number of Uops executed that were issued on port 0
+	0x02 extra: port1 Counts number of Uops executed that were issued on port 1
+	0x04 extra: port2_core Counts number of Uops executed that were issued on port 2
+	0x08 extra: port3_core Counts number of Uops executed that were issued on port 3
+	0x10 extra: port4_core Counts number of Uops executed that where issued on port  4
+	0x20 extra: port5 Counts number of Uops executed that where issued on port 5
+	0x40 extra: port015 Counts number of Uops executed that where issued on port  0, 1, or 5
+	0x80 extra: port234 Counts number of Uops executed that where issued on port 2, 3, or 4
 name:snoopq_requests_outstanding type:bitmask default:0x01
-	0x01 data Counts weighted cycles of snoopq requests for data
-	0x02 invalidate Counts weighted cycles of snoopq invalidate requests
-	0x04 code Counts weighted cycles of snoopq requests for code
+	0x01 extra: data Counts weighted cycles of snoopq requests for data
+	0x02 extra: invalidate Counts weighted cycles of snoopq invalidate requests
+	0x04 extra: code Counts weighted cycles of snoopq requests for code
 name:snoop_response type:bitmask default:0x01
-	0x01 hit Counts HIT snoop response sent by this thread in response to a snoop request
-	0x02 hite Counts HIT E snoop response sent by this thread in response to a snoop request
-	0x04 hitm Counts HIT M snoop response sent by this thread in response to a snoop request
+	0x01 extra: hit Counts HIT snoop response sent by this thread in response to a snoop request
+	0x02 extra: hite Counts HIT E snoop response sent by this thread in response to a snoop request
+	0x04 extra: hitm Counts HIT M snoop response sent by this thread in response to a snoop request
 name:pic_accesses type:bitmask default:0x01
-	0x01 tpr_reads Counts number of TPR reads
-	0x02 tpr_writes Counts number of TPR writes
+	0x01 extra: tpr_reads Counts number of TPR reads
+	0x02 extra: tpr_writes Counts number of TPR writes
 name:inst_retired type:bitmask default:0x01
-	0x01 any_p instructions retired
-	0x02 x87 Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions
+	0x01 extra: any_p instructions retired
+	0x02 extra: x87 Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions
 name:uops_retired type:bitmask default:0x01
-	0x01 any Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle)
-	0x02 retire_slots Counts the number of retirement slots used each cycle
-	0x04 macro_fused Counts number of macro-fused uops retired
+	0x01 extra: any Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle)
+	0x02 extra: retire_slots Counts the number of retirement slots used each cycle
+	0x04 extra: macro_fused Counts number of macro-fused uops retired
 name:machine_clears type:bitmask default:0x01
-	0x01 cycles Counts the cycles machine clear is asserted
-	0x02 mem_order Counts the number of machine clears due to memory order conflicts
-	0x04 smc Counts the number of times that a program writes to a code section
-	0x10 fusion_assist Counts the number of macro-fusion assists
+	0x01 extra: cycles Counts the cycles machine clear is asserted
+	0x02 extra: mem_order Counts the number of machine clears due to memory order conflicts
+	0x04 extra: smc Counts the number of times that a program writes to a code section
+	0x10 extra: fusion_assist Counts the number of macro-fusion assists
 name:br_inst_retired type:bitmask default:0x00
-	0x00 all_branches See Table A-1
-	0x01 conditional Counts the number of conditional branch instructions retired
-	0x02 near_call Counts the number of direct & indirect near unconditional calls retired
-	0x04 all_branches Counts the number of branch instructions retired
+	0x00 extra: all_branches See Table A-1
+	0x01 extra: conditional Counts the number of conditional branch instructions retired
+	0x02 extra: near_call Counts the number of direct & indirect near unconditional calls retired
+	0x04 extra: all_branches Counts the number of branch instructions retired
 name:br_misp_retired type:bitmask default:0x00
-	0x00 all_branches See Table A-1
-	0x02 near_call Counts mispredicted direct & indirect near unconditional retired calls
+	0x00 extra: all_branches See Table A-1
+	0x02 extra: near_call Counts mispredicted direct & indirect near unconditional retired calls
 name:ssex_uops_retired type:bitmask default:0x01
-	0x01 packed_single Counts SIMD packed single-precision floating point Uops retired
-	0x02 scalar_single Counts SIMD calar single-precision floating point Uops retired
-	0x04 packed_double Counts SIMD packed double-precision floating point Uops retired
-	0x08 scalar_double Counts SIMD scalar double-precision floating point Uops retired
-	0x10 vector_integer Counts 128-bit SIMD vector integer Uops retired
+	0x01 extra: packed_single Counts SIMD packed single-precision floating point Uops retired
+	0x02 extra: scalar_single Counts SIMD calar single-precision floating point Uops retired
+	0x04 extra: packed_double Counts SIMD packed double-precision floating point Uops retired
+	0x08 extra: scalar_double Counts SIMD scalar double-precision floating point Uops retired
+	0x10 extra: vector_integer Counts 128-bit SIMD vector integer Uops retired
 name:mem_load_retired type:bitmask default:0x01
-	0x01 l1d_hit Counts number of retired loads that hit the L1 data cache
-	0x02 l2_hit Counts number of retired loads that hit the L2 data cache
-	0x04 llc_unshared_hit Counts number of retired loads that hit their own, unshared lines in the LLC cache
-	0x08 other_core_l2_hit_hitm Counts number of retired loads that hit in a sibling core's L2 (on die core)
-	0x10 llc_miss Counts number of retired loads that miss the LLC cache
-	0x40 hit_lfb Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache
-	0x80 dtlb_miss Counts the number of retired loads that missed the DTLB
+	0x01 extra: l1d_hit Counts number of retired loads that hit the L1 data cache
+	0x02 extra: l2_hit Counts number of retired loads that hit the L2 data cache
+	0x04 extra: llc_unshared_hit Counts number of retired loads that hit their own, unshared lines in the LLC cache
+	0x08 extra: other_core_l2_hit_hitm Counts number of retired loads that hit in a sibling core's L2 (on die core)
+	0x10 extra: llc_miss Counts number of retired loads that miss the LLC cache
+	0x40 extra: hit_lfb Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache
+	0x80 extra: dtlb_miss Counts the number of retired loads that missed the DTLB
 name:fp_mmx_trans type:bitmask default:0x03
-	0x01 to_fp Counts the first floating-point instruction following any MMX instruction
-	0x02 to_mmx Counts the first MMX instruction following a floating-point instruction
-	0x03 any Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions
+	0x01 extra: to_fp Counts the first floating-point instruction following any MMX instruction
+	0x02 extra: to_mmx Counts the first MMX instruction following a floating-point instruction
+	0x03 extra: any Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions
 name:macro_insts type:mandatory default:0x01
-	0x01 decoded Counts the number of instructions decoded, (but not necessarily executed or retired)
+	0x01 extra: decoded Counts the number of instructions decoded, (but not necessarily executed or retired)
 name:uops_decoded type:bitmask default:0x0e
-	0x02 ms Counts the number of Uops decoded by the Microcode Sequencer, MS
-	0x04 esp_folding Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc
-	0x08 esp_sync Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected  by adding the ESP offset register to the current value of the ESP register
+	0x02 extra: ms Counts the number of Uops decoded by the Microcode Sequencer, MS
+	0x04 extra: esp_folding Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc
+	0x08 extra: esp_sync Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected  by adding the ESP offset register to the current value of the ESP register
 name:rat_stalls type:bitmask default:0x0f
-	0x01 flags Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall
-	0x02 registers This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction
-	0x04 rob_read_port Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline
-	0x08 scoreboard Counts the cycles where we stall due to microarchitecturally required serialization
+	0x01 extra: flags Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall
+	0x02 extra: registers This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction
+	0x04 extra: rob_read_port Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline
+	0x08 extra: scoreboard Counts the cycles where we stall due to microarchitecturally required serialization
 	0x0F any Counts all Register Allocation Table stall cycles due to:  Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe
 name:baclear type:bitmask default:0x01
-	0x01 clear Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end
-	0x02 bad_target Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong
+	0x01 extra: clear Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end
+	0x02 extra: bad_target Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong
 name:bpu_clears type:bitmask default:0x03
-	0x01 early Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken
-	0x02 late Counts late Branch Prediction Unit clears due to Most Recently Used conflicts
-	0x03 any Counts all BPU clears
+	0x01 extra: early Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken
+	0x02 extra: late Counts late Branch Prediction Unit clears due to Most Recently Used conflicts
+	0x03 extra: any Counts all BPU clears
 name:l2_transactions type:bitmask default:0x80
-	0x01 load Counts L2 load operations due to HW prefetch or demand loads
-	0x02 rfo Counts L2 RFO operations due to HW prefetch or demand RFOs
-	0x04 ifetch Counts L2 instruction fetch operations due to HW prefetch or demand ifetch
-	0x08 prefetch Counts L2 prefetch operations
-	0x10 l1d_wb Counts L1D writeback operations to the L2
-	0x20 fill Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch
-	0x40 wb Counts L2 writeback operations to the LLC
-	0x80 any Counts all L2 cache operations
+	0x01 extra: load Counts L2 load operations due to HW prefetch or demand loads
+	0x02 extra: rfo Counts L2 RFO operations due to HW prefetch or demand RFOs
+	0x04 extra: ifetch Counts L2 instruction fetch operations due to HW prefetch or demand ifetch
+	0x08 extra: prefetch Counts L2 prefetch operations
+	0x10 extra: l1d_wb Counts L1D writeback operations to the L2
+	0x20 extra: fill Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch
+	0x40 extra: wb Counts L2 writeback operations to the LLC
+	0x80 extra: any Counts all L2 cache operations
 name:l2_lines_in type:bitmask default:0x07
-	0x02 s_state Counts the number of cache lines allocated in the L2 cache in the S (shared) state
-	0x04 e_state Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state
-	0x07 any Counts the number of cache lines allocated in the L2 cache
+	0x02 extra: s_state Counts the number of cache lines allocated in the L2 cache in the S (shared) state
+	0x04 extra: e_state Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state
+	0x07 extra: any Counts the number of cache lines allocated in the L2 cache
 name:l2_lines_out type:bitmask default:0x0f
-	0x01 demand_clean Counts L2 clean cache lines evicted by a demand request
-	0x02 demand_dirty Counts L2 dirty (modified) cache lines evicted by a demand request
-	0x04 prefetch_clean Counts L2 clean cache line evicted by a prefetch request
-	0x08 prefetch_dirty Counts L2 modified cache line evicted by a prefetch request
+	0x01 extra: demand_clean Counts L2 clean cache lines evicted by a demand request
+	0x02 extra: demand_dirty Counts L2 dirty (modified) cache lines evicted by a demand request
+	0x04 extra: prefetch_clean Counts L2 clean cache line evicted by a prefetch request
+	0x08 extra: prefetch_dirty Counts L2 modified cache line evicted by a prefetch request
 	0x0F any Counts all L2 cache lines evicted for any reason
 name:l2_hw_prefetch type:bitmask default:0x01
-	0x01 hit Count L2 HW prefetcher detector hits
-	0x02 alloc Count L2 HW prefetcher allocations
-	0x04 data_trigger Count L2 HW data prefetcher triggered
-	0x08 code_trigger Count L2 HW code prefetcher triggered
-	0x10 dca_trigger Count L2 HW DCA prefetcher triggered
-	0x20 kick_start Count L2 HW prefetcher kick started
+	0x01 extra: hit Count L2 HW prefetcher detector hits
+	0x02 extra: alloc Count L2 HW prefetcher allocations
+	0x04 extra: data_trigger Count L2 HW data prefetcher triggered
+	0x08 extra: code_trigger Count L2 HW code prefetcher triggered
+	0x10 extra: dca_trigger Count L2 HW DCA prefetcher triggered
+	0x20 extra: kick_start Count L2 HW prefetcher kick started
 name:sq_misc type:bitmask default:0x01
-	0x01 promotion Counts the number of L2 secondary misses that hit the Super Queue
-	0x02 promotion_post_go Counts the number of L2 secondary misses during the Super Queue filling L2
-	0x04 lru_hints Counts number of Super Queue LRU hints sent to L3
-	0x08 fill_dropped Counts the number of SQ L2 fills dropped due to L2 busy
-	0x10 split_lock Counts the number of SQ lock splits across a cache line
+	0x01 extra: promotion Counts the number of L2 secondary misses that hit the Super Queue
+	0x02 extra: promotion_post_go Counts the number of L2 secondary misses during the Super Queue filling L2
+	0x04 extra: lru_hints Counts number of Super Queue LRU hints sent to L3
+	0x08 extra: fill_dropped Counts the number of SQ L2 fills dropped due to L2 busy
+	0x10 extra: split_lock Counts the number of SQ lock splits across a cache line
 name:fp_assist type:bitmask default:0x01
-	0x01 all Counts the number of floating point operations executed that required micro-code assist intervention
-	0x02 output Counts number of floating point micro-code assist when the output value (destination register) is invalid
-	0x04 input Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid
+	0x01 extra: all Counts the number of floating point operations executed that required micro-code assist intervention
+	0x02 extra: output Counts number of floating point micro-code assist when the output value (destination register) is invalid
+	0x04 extra: input Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid
 name:simd_int_64 type:bitmask default:0x01
-	0x01 packed_mpy Counts number of SID integer 64 bit packed multiply operations
-	0x02 packed_shift Counts number of SID integer 64 bit packed shift operations
-	0x04 pack Counts number of SID integer 64 bit pack operations
-	0x08 unpack Counts number of SID integer 64 bit unpack operations
-	0x10 packed_logical Counts number of SID integer 64 bit logical operations
-	0x20 packed_arith Counts number of SID integer 64 bit arithmetic operations
-	0x40 shuffle_move Counts number of SID integer 64 bit shift or move operations
+	0x01 extra: packed_mpy Counts number of SID integer 64 bit packed multiply operations
+	0x02 extra: packed_shift Counts number of SID integer 64 bit packed shift operations
+	0x04 extra: pack Counts number of SID integer 64 bit pack operations
+	0x08 extra: unpack Counts number of SID integer 64 bit unpack operations
+	0x10 extra: packed_logical Counts number of SID integer 64 bit logical operations
+	0x20 extra: packed_arith Counts number of SID integer 64 bit arithmetic operations
+	0x40 extra: shuffle_move Counts number of SID integer 64 bit shift or move operations
 name:x20 type:mandatory default:0x20
 	0x20 No unit mask
diff --git a/events/i386/sandybridge/unit_masks b/events/i386/sandybridge/unit_masks
index e02bb33..f35f32d 100644
--- a/events/i386/sandybridge/unit_masks
+++ b/events/i386/sandybridge/unit_masks
@@ -11,100 +11,100 @@ name:x10 type:mandatory default:0x10
 name:x20 type:mandatory default:0x20
 	0x20 No unit mask
 name:ld_blocks type:bitmask default:0x1
-	0x1 data_unknown blocked loads due to store buffer blocks with unknown data.
-	0x2 store_forward loads blocked by overlapping with store buffer that cannot be forwarded
-	0x8 no_sr This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.
-	0x10 all_block Number of cases where any load is blocked but has no DCU miss.
+	0x1 extra: data_unknown blocked loads due to store buffer blocks with unknown data.
+	0x2 extra: store_forward loads blocked by overlapping with store buffer that cannot be forwarded
+	0x8 extra: no_sr This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.
+	0x10 extra: all_block Number of cases where any load is blocked but has no DCU miss.
 name:misalign_mem_ref type:bitmask default:0x1
-	0x1 loads Speculative cache-line split load uops dispatched to the L1D.
-	0x2 stores Speculative cache-line split Store-address uops dispatched to L1D
+	0x1 extra: loads Speculative cache-line split load uops dispatched to the L1D.
+	0x2 extra: stores Speculative cache-line split Store-address uops dispatched to L1D
 name:ld_blocks_partial type:bitmask default:0x1
-	0x1 address_alias False dependencies in MOB due to partial compare on address
-	0x8 all_sta_block This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.
+	0x1 extra: address_alias False dependencies in MOB due to partial compare on address
+	0x8 extra: all_sta_block This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.
 name:dtlb_load_misses type:bitmask default:0x1
-	0x1 miss_causes_a_walk Miss in all TLB levels causes an page walk of any page size (4K/2M/4M/1G)
-	0x2 walk_completed Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G)
-	0x4 walk_duration Cycles PMH is busy with this walk
-	0x10 stlb_hit First level miss but second level hit; no page walk.
+	0x1 extra: miss_causes_a_walk Miss in all TLB levels causes an page walk of any page size (4K/2M/4M/1G)
+	0x2 extra: walk_completed Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G)
+	0x4 extra: walk_duration Cycles PMH is busy with this walk
+	0x10 extra: stlb_hit First level miss but second level hit; no page walk.
 name:int_misc type:bitmask default:0x40
-	0x40 rat_stall_cycles Cycles Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for this thread.
+	0x40 extra: rat_stall_cycles Cycles Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for this thread.
 	0x3 extra:cmask=1 recovery_cycles Number of cycles waiting to be recover after Nuke due to all other cases except JEClear.
 	0x3 extra:cmask=1,edge recovery_stalls_count Edge applied to recovery_cycles, thus counts occurrences.
-name:uops_issued type:bitmask default:0x1
-	0x1 any Number of Uops issued by the Resource Allocation Table (RAT) to the Reservation Station (RS)
+name:uops_issued type:bitmask default:any
+	0x1 extra: any Number of Uops issued by the Resource Allocation Table (RAT) to the Reservation Station (RS)
 	0x1 extra:cmask=1,inv stall_cycles cycles no uops issued by this thread.
-name:arith type:bitmask default:0x1
-	0x1 fpu_div_active Cycles that the divider is busy with any divide or sqrt operation.
+name:arith type:bitmask default:fpu_div_active
+	0x1 extra: fpu_div_active Cycles that the divider is busy with any divide or sqrt operation.
 	0x1 extra:cmask=1,edge fpu_div Number of times that the divider is actived, includes INT, SIMD and FP.
 name:l2_rqsts type:bitmask default:0x1
-	0x1 demand_data_rd_hit Demand Data Read hit L2, no rejects
-	0x4 rfo_hit RFO requests that hit L2 cache
-	0x8 rfo_miss RFO requests that miss L2 cache
-	0x10 code_rd_hit L2 cache hits when fetching instructions, code reads.
-	0x20 code_rd_miss L2 cache misses when fetching instructions
-	0x40 pf_hit Requests from the L2 hardware prefetchers that hit L2 cache
-	0x80 pf_miss Requests from the L2 hardware prefetchers that miss L2 cache
-	0x3 all_demand_data_rd Any data read request to L2 cache
-	0xc all_rfo Any data RFO request to L2 cache
-	0x30 all_code_rd Any code read request to L2 cache
-	0xc0 all_pf Any L2 HW prefetch request to L2 cache
+	0x1 extra: demand_data_rd_hit Demand Data Read hit L2, no rejects
+	0x4 extra: rfo_hit RFO requests that hit L2 cache
+	0x8 extra: rfo_miss RFO requests that miss L2 cache
+	0x10 extra: code_rd_hit L2 cache hits when fetching instructions, code reads.
+	0x20 extra: code_rd_miss L2 cache misses when fetching instructions
+	0x40 extra: pf_hit Requests from the L2 hardware prefetchers that hit L2 cache
+	0x80 extra: pf_miss Requests from the L2 hardware prefetchers that miss L2 cache
+	0x3 extra: all_demand_data_rd Any data read request to L2 cache
+	0xc extra: all_rfo Any data RFO request to L2 cache
+	0x30 extra: all_code_rd Any code read request to L2 cache
+	0xc0 extra: all_pf Any L2 HW prefetch request to L2 cache
 name:l2_store_lock_rqsts type:bitmask default:0xf
-	0xf all RFOs that access cache lines in any state
-	0x1 miss RFO (as a result of regular RFO or Lock request) miss cache - I state
-	0x4 hit_e RFO (as a result of regular RFO or Lock request) hits cache in E state
-	0x8 hit_m RFO (as a result of regular RFO or Lock request) hits cache in M state
+	0xf extra: all RFOs that access cache lines in any state
+	0x1 extra: miss RFO (as a result of regular RFO or Lock request) miss cache - I state
+	0x4 extra: hit_e RFO (as a result of regular RFO or Lock request) hits cache in E state
+	0x8 extra: hit_m RFO (as a result of regular RFO or Lock request) hits cache in M state
 name:l2_l1d_wb_rqsts type:bitmask default:0x4
-	0x4 hit_e writebacks from L1D to L2 cache lines in E state
-	0x8 hit_m writebacks from L1D to L2 cache lines in M state
-name:l1d_pend_miss type:bitmask default:0x1
-	0x1 pending Cycles with L1D load Misses outstanding.
+	0x4 extra: hit_e writebacks from L1D to L2 cache lines in E state
+	0x8 extra: hit_m writebacks from L1D to L2 cache lines in M state
+name:l1d_pend_miss type:bitmask default:pending
+	0x1 extra: pending Cycles with L1D load Misses outstanding.
 	0x1 extra:cmask=1,edge occurences This event counts the number of L1D misses outstanding occurences.
 name:dtlb_store_misses type:bitmask default:0x1
-	0x1 miss_causes_a_walk Miss in all TLB levels causes an page walk of any page size (4K/2M/4M/1G)
-	0x2 walk_completed Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G)
-	0x4 walk_duration Cycles PMH is busy with this walk
-	0x10 stlb_hit First level miss but second level hit; no page walk.  Only relevant if multiple levels.
+	0x1 extra: miss_causes_a_walk Miss in all TLB levels causes an page walk of any page size (4K/2M/4M/1G)
+	0x2 extra: walk_completed Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G)
+	0x4 extra: walk_duration Cycles PMH is busy with this walk
+	0x10 extra: stlb_hit First level miss but second level hit; no page walk.  Only relevant if multiple levels.
 name:load_hit_pre type:bitmask default:0x1
-	0x1 sw_pf Load  dispatches that hit fill buffer allocated for S/W prefetch.
-	0x2 hw_pf Load  dispatches that hit fill buffer allocated for HW prefetch.
+	0x1 extra: sw_pf Load  dispatches that hit fill buffer allocated for S/W prefetch.
+	0x2 extra: hw_pf Load  dispatches that hit fill buffer allocated for HW prefetch.
 name:l1d type:bitmask default:0x1
-	0x1 replacement L1D Data line replacements.
-	0x2 allocated_in_m L1D M-state Data Cache Lines Allocated
-	0x4 eviction L1D M-state Data Cache Lines Evicted due to replacement (only)
-	0x8 all_m_replacement All Modified lines evicted out of L1D
-name:partial_rat_stalls type:bitmask default:0x20
-	0x20 flags_merge_uop Number of perf sensitive flags-merge uops added by Sandy Bridge u-arch.
-	0x40 slow_lea_window Number of cycles with at least 1 slow Load Effective Address (LEA) uop being allocated.
-	0x80 mul_single_uop Number of Multiply packed/scalar single precision uops allocated
+	0x1 extra: replacement L1D Data line replacements.
+	0x2 extra: allocated_in_m L1D M-state Data Cache Lines Allocated
+	0x4 extra: eviction L1D M-state Data Cache Lines Evicted due to replacement (only)
+	0x8 extra: all_m_replacement All Modified lines evicted out of L1D
+name:partial_rat_stalls type:bitmask default:flags_merge_uop
+	0x20 extra: flags_merge_uop Number of perf sensitive flags-merge uops added by Sandy Bridge u-arch.
+	0x40 extra: slow_lea_window Number of cycles with at least 1 slow Load Effective Address (LEA) uop being allocated.
+	0x80 extra: mul_single_uop Number of Multiply packed/scalar single precision uops allocated
 	0x20 extra:cmask=1 flags_merge_uop_cycles Cycles with perf sensitive flags-merge uops added by SandyBridge u-arch.
 name:resource_stalls2 type:bitmask default:0x40
-	0x40 bob_full Cycles Allocator is stalled due Branch Order Buffer (BOB).
-	0xf all_prf_control Resource stalls2 control structures full for physical registers
-	0xc all_fl_empty Cycles with either free list is empty
-	0x4f ooo_rsrc Resource stalls2 control structures full Physical Register Reclaim Table (PRRT),  Physical History Table (PHT), INT or SIMD Free List (FL), Branch Order Buffer (BOB)
-name:cpl_cycles type:bitmask default:0x1
-	0x1 ring0 Unhalted core cycles the Thread was in Rings 0.
+	0x40 extra: bob_full Cycles Allocator is stalled due Branch Order Buffer (BOB).
+	0xf extra: all_prf_control Resource stalls2 control structures full for physical registers
+	0xc extra: all_fl_empty Cycles with either free list is empty
+	0x4f extra: ooo_rsrc Resource stalls2 control structures full Physical Register Reclaim Table (PRRT),  Physical History Table (PHT), INT or SIMD Free List (FL), Branch Order Buffer (BOB)
+name:cpl_cycles type:bitmask default:ring0
+	0x1 extra: ring0 Unhalted core cycles the Thread was in Rings 0.
 	0x1 extra:cmask=1,edge ring0_trans Transitions from ring123 to Ring0.
-	0x2 ring123 Unhalted core cycles the Thread was in Rings 1/2/3.
-name:offcore_requests_outstanding type:bitmask default:0x1
-	0x1 demand_data_rd Offcore outstanding Demand Data Read transactions in the SuperQueue (SQ), queue to uncore, every cycle. Includes L1D data hardware prefetches.
+	0x2 extra: ring123 Unhalted core cycles the Thread was in Rings 1/2/3.
+name:offcore_requests_outstanding type:bitmask default:cycles_with_demand_data_rd
+	0x1 extra: demand_data_rd Offcore outstanding Demand Data Read transactions in the SuperQueue (SQ), queue to uncore, every cycle. Includes L1D data hardware prefetches.
 	0x1 extra:cmask=1 cycles_with_demand_data_rd cycles there are Offcore outstanding RD data transactions in the SuperQueue (SQ), queue to uncore.
-	0x2 demand_code_rd Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.
-	0x4 demand_rfo Offcore outstanding RFO (store) transactions in the SuperQueue (SQ), queue to uncore, every cycle.
-	0x8 all_data_rd Offcore outstanding all cacheable Core Data Read transactions in the SuperQueue (SQ), queue to uncore, every cycle.
+	0x2 extra: demand_code_rd Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.
+	0x4 extra: demand_rfo Offcore outstanding RFO (store) transactions in the SuperQueue (SQ), queue to uncore, every cycle.
+	0x8 extra: all_data_rd Offcore outstanding all cacheable Core Data Read transactions in the SuperQueue (SQ), queue to uncore, every cycle.
 	0x8 extra:cmask=1 cycles_with_data_rd Cycles there are Offcore outstanding all Data read transactions in the SuperQueue (SQ), queue to uncore, every cycle.
 	0x2 extra:cmask=1 cycles_with_demand_code_rd Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.
 	0x4 extra:cmask=1 cycles_with_demand_rfo Cycles with offcore outstanding demand RFO Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.
 name:lock_cycles type:bitmask default:0x1
-	0x1 split_lock_uc_lock_duration Cycles in which the L1D and L2  are locked, due to a UC lock or split lock
-	0x2 cache_lock_duration cycles that theL1D is locked
+	0x1 extra: split_lock_uc_lock_duration Cycles in which the L1D and L2  are locked, due to a UC lock or split lock
+	0x2 extra: cache_lock_duration cycles that theL1D is locked
 name:idq type:bitmask default:0x2
-	0x2 empty Cycles the Instruction Decode Queue (IDQ) is empty.
-	0x4 mite_uops Number of uops delivered to Instruction Decode Queue (IDQ) from MITE path.
-	0x8 dsb_uops Number of uops delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.
-	0x10 ms_dsb_uops Number of Uops delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by Decode Stream Buffer (DSB).
-	0x20 ms_mite_uops Number of Uops delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by MITE.
-	0x30 ms_uops Number of Uops were delivered into Instruction Decode Queue (IDQ) from MS, initiated by Decode Stream Buffer (DSB) or MITE.
+	0x2 extra: empty Cycles the Instruction Decode Queue (IDQ) is empty.
+	0x4 extra: mite_uops Number of uops delivered to Instruction Decode Queue (IDQ) from MITE path.
+	0x8 extra: dsb_uops Number of uops delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.
+	0x10 extra: ms_dsb_uops Number of Uops delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by Decode Stream Buffer (DSB).
+	0x20 extra: ms_mite_uops Number of Uops delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by MITE.
+	0x30 extra: ms_uops Number of Uops were delivered into Instruction Decode Queue (IDQ) from MS, initiated by Decode Stream Buffer (DSB) or MITE.
 	0x30 extra:cmask=1 ms_cycles Number of cycles that Uops were delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by Decode Stream Buffer (DSB) or MITE.
 	0x4 extra:cmask=1 mite_cycles Cycles MITE is active
 	0x8 extra:cmask=1 dsb_cycles Cycles Decode Stream Buffer (DSB) is active
@@ -114,42 +114,42 @@ name:idq type:bitmask default:0x2
 	0x18 extra:cmask=4 all_dsb_cycles_4_uops Cycles Decode Stream Buffer (DSB) is delivering 4 Uops
 	0x24 extra:cmask=1 all_mite_cycles_any_uops Cycles MITE is delivering anything
 	0x24 extra:cmask=4 all_mite_cycles_4_uops Cycles MITE is delivering 4 Uops
-	0x3c mite_all_uops Number of uops delivered to Instruction Decode Queue (IDQ) from any path.
+	0x3c extra: mite_all_uops Number of uops delivered to Instruction Decode Queue (IDQ) from any path.
 name:itlb_misses type:bitmask default:0x1
-	0x1 miss_causes_a_walk Miss in all TLB levels causes an page walk of any page size (4K/2M/4M)
-	0x2 walk_completed Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M)
-	0x4 walk_duration Cycles PMH is busy with this walk.
-	0x10 stlb_hit First level miss but second level hit; no page walk.
+	0x1 extra: miss_causes_a_walk Miss in all TLB levels causes an page walk of any page size (4K/2M/4M)
+	0x2 extra: walk_completed Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M)
+	0x4 extra: walk_duration Cycles PMH is busy with this walk.
+	0x10 extra: stlb_hit First level miss but second level hit; no page walk.
 name:ild_stall type:bitmask default:0x1
-	0x1 lcp Stall "occurrences" due to length changing prefixes (LCP).
-	0x4 iq_full Stall cycles when instructions cannot be written because the Instruction Queue (IQ) is full.
+	0x1 extra: lcp Stall "occurrences" due to length changing prefixes (LCP).
+	0x4 extra: iq_full Stall cycles when instructions cannot be written because the Instruction Queue (IQ) is full.
 name:br_inst_exec type:bitmask default:0xff
-	0xff all_branches All branch instructions executed.
-	0x41 nontaken_conditional All macro conditional nontaken branch instructions.
-	0x81 taken_conditional All macro conditional taken branch instructions.
-	0x82 taken_direct_jump All macro unconditional taken branch instructions, excluding calls and indirects.
-	0x84 taken_indirect_jump_non_call_ret All taken indirect branches that are not calls nor returns.
-	0x88 taken_indirect_near_return All taken indirect branches that have a return mnemonic.
-	0x90 taken_direct_near_call All taken non-indirect calls.
-	0xa0 taken_indirect_near_call All taken indirect calls, including both register and memory indirect.
-	0xc1 all_conditional All macro conditional branch instructions.
-	0xc2 all_direct_jmp All macro unconditional branch instructions, excluding calls and indirects
-	0xc4 all_indirect_jump_non_call_ret All indirect branches that are not calls nor returns.
-	0xc8 all_indirect_near_return All indirect return branches.
-	0xd0 all_direct_near_call All non-indirect calls executed.
+	0xff extra: all_branches All branch instructions executed.
+	0x41 extra: nontaken_conditional All macro conditional nontaken branch instructions.
+	0x81 extra: taken_conditional All macro conditional taken branch instructions.
+	0x82 extra: taken_direct_jump All macro unconditional taken branch instructions, excluding calls and indirects.
+	0x84 extra: taken_indirect_jump_non_call_ret All taken indirect branches that are not calls nor returns.
+	0x88 extra: taken_indirect_near_return All taken indirect branches that have a return mnemonic.
+	0x90 extra: taken_direct_near_call All taken non-indirect calls.
+	0xa0 extra: taken_indirect_near_call All taken indirect calls, including both register and memory indirect.
+	0xc1 extra: all_conditional All macro conditional branch instructions.
+	0xc2 extra: all_direct_jmp All macro unconditional branch instructions, excluding calls and indirects
+	0xc4 extra: all_indirect_jump_non_call_ret All indirect branches that are not calls nor returns.
+	0xc8 extra: all_indirect_near_return All indirect return branches.
+	0xd0 extra: all_direct_near_call All non-indirect calls executed.
 name:br_misp_exec type:bitmask default:0xff
-	0xff all_branches All mispredicted branch instructions executed.
-	0x41 nontaken_conditional All nontaken mispredicted macro conditional branch instructions.
-	0x81 taken_conditional All taken mispredicted macro conditional branch instructions.
-	0x84 taken_indirect_jump_non_call_ret All taken mispredicted indirect branches that are not calls nor returns.
-	0x88 taken_return_near All taken mispredicted indirect branches that have a return mnemonic.
-	0x90 taken_direct_near_call All taken mispredicted non-indirect calls.
-	0xa0 taken_indirect_near_call All taken mispredicted indirect calls, including both register and memory indirect.
-	0xc1 all_conditional All mispredicted macro conditional branch instructions.
-	0xc4 all_indirect_jump_non_call_ret All mispredicted indirect branches that are not calls nor returns.
-	0xd0 all_direct_near_call All mispredicted non-indirect calls
-name:idq_uops_not_delivered type:bitmask default:0x1
-	0x1 core Count number of non-delivered uops to Resource Allocation Table (RAT).
+	0xff extra: all_branches All mispredicted branch instructions executed.
+	0x41 extra: nontaken_conditional All nontaken mispredicted macro conditional branch instructions.
+	0x81 extra: taken_conditional All taken mispredicted macro conditional branch instructions.
+	0x84 extra: taken_indirect_jump_non_call_ret All taken mispredicted indirect branches that are not calls nor returns.
+	0x88 extra: taken_return_near All taken mispredicted indirect branches that have a return mnemonic.
+	0x90 extra: taken_direct_near_call All taken mispredicted non-indirect calls.
+	0xa0 extra: taken_indirect_near_call All taken mispredicted indirect calls, including both register and memory indirect.
+	0xc1 extra: all_conditional All mispredicted macro conditional branch instructions.
+	0xc4 extra: all_indirect_jump_non_call_ret All mispredicted indirect branches that are not calls nor returns.
+	0xd0 extra: all_direct_near_call All mispredicted non-indirect calls
+name:idq_uops_not_delivered type:bitmask default:core
+	0x1 extra: core Count number of non-delivered uops to Resource Allocation Table (RAT).
 	0x1 extra:cmask=4 cycles_0_uops_deliv.core Counts the cycles no uops were delivered
 	0x1 extra:cmask=3 cycles_le_1_uop_deliv.core Counts the cycles less than 1 uops were delivered
 	0x1 extra:cmask=2 cycles_le_2_uop_deliv.core Counts the cycles less than 2 uops were delivered
@@ -157,119 +157,119 @@ name:idq_uops_not_delivered type:bitmask default:0x1
 	0x1 extra:cmask=4,inv cycles_ge_1_uop_deliv.core Cycles when 1 or more uops were delivered to the by the front end.
 	0x1 extra:cmask=1,inv cycles_fe_was_ok Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.
 name:uops_dispatched_port type:bitmask default:0x1
-	0x1 port_0 Cycles which a Uop is dispatched on port 0
-	0x2 port_1 Cycles which a Uop is dispatched on port 1
-	0x4 port_2_ld Cycles which a load Uop is dispatched on port 2
-	0x8 port_2_sta Cycles which a STA Uop is dispatched on port 2
-	0x10 port_3_ld Cycles which a load Uop is dispatched on port 3
-	0x20 port_3_sta Cycles which a STA Uop is dispatched on port 3
-	0x40 port_4 Cycles which a Uop is dispatched on port 4
-	0x80 port_5 Cycles which a Uop is dispatched on port 5
-	0xc port_2 Uops disptached to port 2, loads and stores (speculative and retired)
-	0x30 port_3 Uops disptached to port 3, loads and stores (speculative and retired)
-	0xc port_2_core Uops disptached to port 2, loads and stores per core (speculative and retired)
-	0x30 port_3_core Uops disptached to port 3, loads and stores per core (speculative and retired)
+	0x1 extra: port_0 Cycles which a Uop is dispatched on port 0
+	0x2 extra: port_1 Cycles which a Uop is dispatched on port 1
+	0x4 extra: port_2_ld Cycles which a load Uop is dispatched on port 2
+	0x8 extra: port_2_sta Cycles which a STA Uop is dispatched on port 2
+	0x10 extra: port_3_ld Cycles which a load Uop is dispatched on port 3
+	0x20 extra: port_3_sta Cycles which a STA Uop is dispatched on port 3
+	0x40 extra: port_4 Cycles which a Uop is dispatched on port 4
+	0x80 extra: port_5 Cycles which a Uop is dispatched on port 5
+	0xc extra: port_2 Uops disptached to port 2, loads and stores (speculative and retired)
+	0x30 extra: port_3 Uops disptached to port 3, loads and stores (speculative and retired)
+	0xc extra: port_2_core Uops disptached to port 2, loads and stores per core (speculative and retired)
+	0x30 extra: port_3_core Uops disptached to port 3, loads and stores per core (speculative and retired)
 name:resource_stalls type:bitmask default:0x1
-	0x1 any Cycles Allocation is stalled due to Resource Related reason.
-	0x2 lb Cycles Allocator is stalled due to Load Buffer full
-	0x4 rs Stall due to no eligible Reservation Station (RS) entry available.
-	0x8 sb Cycles Allocator is stalled due to Store Buffer full (not including draining from synch).
-	0x10 rob ROB full cycles.
-	0xe mem_rs Resource stalls due to LB, SB or Reservation Station (RS) being completely in use
-	0xf0 ooo_rsrc Resource stalls due to Rob being full, FCSW, MXCSR and OTHER
-	0xa lb_sb Resource stalls due to load or store buffers
+	0x1 extra: any Cycles Allocation is stalled due to Resource Related reason.
+	0x2 extra: lb Cycles Allocator is stalled due to Load Buffer full
+	0x4 extra: rs Stall due to no eligible Reservation Station (RS) entry available.
+	0x8 extra: sb Cycles Allocator is stalled due to Store Buffer full (not including draining from synch).
+	0x10 extra: rob ROB full cycles.
+	0xe extra: mem_rs Resource stalls due to LB, SB or Reservation Station (RS) being completely in use
+	0xf0 extra: ooo_rsrc Resource stalls due to Rob being full, FCSW, MXCSR and OTHER
+	0xa extra: lb_sb Resource stalls due to load or store buffers
 name:dsb2mite_switches type:bitmask default:0x1
-	0x1 count Number of Decode Stream Buffer (DSB) to MITE switches
-	0x2 penalty_cycles Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.
+	0x1 extra: count Number of Decode Stream Buffer (DSB) to MITE switches
+	0x2 extra: penalty_cycles Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.
 name:dsb_fill type:bitmask default:0x2
-	0x2 other_cancel Count number of times a valid DSB fill has been actually cancelled for any reason.
-	0x8 exceed_dsb_lines Decode Stream Buffer (DSB) Fill encountered > 3 Decode Stream Buffer (DSB) lines.
-	0xa all_cancel Count number of times a valid Decode Stream Buffer (DSB) fill has been actually cancelled for any reason.
+	0x2 extra: other_cancel Count number of times a valid DSB fill has been actually cancelled for any reason.
+	0x8 extra: exceed_dsb_lines Decode Stream Buffer (DSB) Fill encountered > 3 Decode Stream Buffer (DSB) lines.
+	0xa extra: all_cancel Count number of times a valid Decode Stream Buffer (DSB) fill has been actually cancelled for any reason.
 name:offcore_requests type:bitmask default:0x1
-	0x1 demand_data_rd Demand Data Read requests sent to uncore
-	0x2 demand_code_rd Offcore Code read requests. Includes Cacheable and Un-cacheables.
-	0x4 demand_rfo Offcore Demand RFOs. Includes regular RFO, Locks, ItoM.
-	0x8 all_data_rd Offcore Demand and prefetch data reads returned to the core.
-name:uops_dispatched type:bitmask default:0x1
-	0x1 thread Counts total number of uops to be dispatched per-thread each cycle.
+	0x1 extra: demand_data_rd Demand Data Read requests sent to uncore
+	0x2 extra: demand_code_rd Offcore Code read requests. Includes Cacheable and Un-cacheables.
+	0x4 extra: demand_rfo Offcore Demand RFOs. Includes regular RFO, Locks, ItoM.
+	0x8 extra: all_data_rd Offcore Demand and prefetch data reads returned to the core.
+name:uops_dispatched type:bitmask default:thread
+	0x1 extra: thread Counts total number of uops to be dispatched per-thread each cycle.
 	0x1 extra:cmask=1,inv stall_cycles Counts number of cycles no uops were dispatced to be executed on this thread.
-	0x2 core Counts total number of uops dispatched from any thread
+	0x2 extra: core Counts total number of uops dispatched from any thread
 name:tlb_flush type:bitmask default:0x1
-	0x1 dtlb_thread Count number of DTLB flushes of thread-specific entries.
-	0x20 stlb_any Count number of any STLB flushes
-name:l1d_blocks type:bitmask default:0x1
-	0x1 ld_bank_conflict Any dispatched loads cancelled due to DCU bank conflict
+	0x1 extra: dtlb_thread Count number of DTLB flushes of thread-specific entries.
+	0x20 extra: stlb_any Count number of any STLB flushes
+name:l1d_blocks type:bitmask default:bank_conflict_cycles
+	0x1 extra: ld_bank_conflict Any dispatched loads cancelled due to DCU bank conflict
 	0x5 extra:cmask=1 bank_conflict_cycles Cycles with l1d blocks due to bank conflicts
 name:other_assists type:bitmask default:0x2
-	0x2 itlb_miss_retired Instructions that experienced an ITLB miss.  Non Pebs
-	0x10 avx_to_sse Number of transitions from AVX-256 to legacy SSE when penalty applicable Non Pebs
-	0x20 sse_to_avx Number of transitions from legacy SSE to AVX-256 when penalty applicable Non Pebs
-name:uops_retired type:bitmask default:0x1
-	0x1 all All uops that actually retired.
-	0x2 retire_slots number of retirement slots used non PEBS
+	0x2 extra: itlb_miss_retired Instructions that experienced an ITLB miss.  Non Pebs
+	0x10 extra: avx_to_sse Number of transitions from AVX-256 to legacy SSE when penalty applicable Non Pebs
+	0x20 extra: sse_to_avx Number of transitions from legacy SSE to AVX-256 when penalty applicable Non Pebs
+name:uops_retired type:bitmask default:all
+	0x1 extra: all All uops that actually retired.
+	0x2 extra: retire_slots number of retirement slots used non PEBS
 	0x1 extra:cmask=1,inv stall_cycles Cycles no executable uops retired
 	0x1 extra:cmask=10,inv total_cycles Number of cycles using always true condition applied to  non PEBS uops retired event.
 name:machine_clears type:bitmask default:0x2
-	0x2 memory_ordering Number of Memory Ordering Machine Clears detected.
-	0x4 smc Number of Self-modifying code (SMC) Machine Clears detected.
-	0x20 maskmov Number of AVX masked mov Machine Clears detected.
+	0x2 extra: memory_ordering Number of Memory Ordering Machine Clears detected.
+	0x4 extra: smc Number of Self-modifying code (SMC) Machine Clears detected.
+	0x20 extra: maskmov Number of AVX masked mov Machine Clears detected.
 name:br_inst_retired type:bitmask default:0x1
-	0x1 conditional Counts all taken and not taken macro conditional branch instructions.
-	0x2 near_call Counts all macro direct and indirect near calls. non PEBS
-	0x8 near_return This event counts the number of near ret instructions retired.
-	0x10 not_taken Counts all not taken macro branch instructions retired.
-	0x20 near_taken Counts the number of near branch taken instructions retired.
-	0x40 far_branch Counts the number of far branch  instructions retired.
-	0x4 all_branches_ps Counts all taken and not taken macro branches including far branches.(Precise Event)
-	0x2 near_call_r3 Ring123 only near calls (non precise)
-	0x2 near_call_r3_ps Ring123 only near calls (precise event)
+	0x1 extra: conditional Counts all taken and not taken macro conditional branch instructions.
+	0x2 extra: near_call Counts all macro direct and indirect near calls. non PEBS
+	0x8 extra: near_return This event counts the number of near ret instructions retired.
+	0x10 extra: not_taken Counts all not taken macro branch instructions retired.
+	0x20 extra: near_taken Counts the number of near branch taken instructions retired.
+	0x40 extra: far_branch Counts the number of far branch  instructions retired.
+	0x4 extra: all_branches_ps Counts all taken and not taken macro branches including far branches.(Precise Event)
+	0x2 extra: near_call_r3 Ring123 only near calls (non precise)
+	0x2 extra: near_call_r3_ps Ring123 only near calls (precise event)
 name:br_misp_retired type:bitmask default:0x1
-	0x1 conditional All mispredicted macro conditional branch instructions.
-	0x2 near_call All macro direct and indirect near calls
-	0x10 not_taken number of branch instructions retired that were mispredicted and not-taken.
-	0x20 taken number of branch instructions retired that were mispredicted and taken.
-	0x4 all_branches_ps all macro branches (Precise Event)
+	0x1 extra: conditional All mispredicted macro conditional branch instructions.
+	0x2 extra: near_call All macro direct and indirect near calls
+	0x10 extra: not_taken number of branch instructions retired that were mispredicted and not-taken.
+	0x20 extra: taken number of branch instructions retired that were mispredicted and taken.
+	0x4 extra: all_branches_ps all macro branches (Precise Event)
 name:fp_assist type:bitmask default:0x1e
 	0x1e extra:cmask=1 any Counts any FP_ASSIST umask was incrementing.
-	0x2 x87_output output - Numeric Overflow, Numeric Underflow, Inexact Result
-	0x4 x87_input input - Invalid Operation, Denormal Operand, SNaN Operand
-	0x8 simd_output Any output SSE* FP Assist - Numeric Overflow, Numeric Underflow.
-	0x10 simd_input Any input SSE* FP Assist
+	0x2 extra: x87_output output - Numeric Overflow, Numeric Underflow, Inexact Result
+	0x4 extra: x87_input input - Invalid Operation, Denormal Operand, SNaN Operand
+	0x8 extra: simd_output Any output SSE* FP Assist - Numeric Overflow, Numeric Underflow.
+	0x10 extra: simd_input Any input SSE* FP Assist
 name:mem_uops_retired type:bitmask default:0x11
-	0x11 stlb_miss_loads STLB misses dues to retired loads
-	0x12 stlb_miss_stores STLB misses dues to retired stores
-	0x21 lock_loads Locked retired loads
-	0x41 split_loads Retired loads causing cacheline splits
-	0x42 split_stores Retired stores causing cacheline splits
-	0x81 all_loads Any retired loads
-	0x82 all_stores Any retired stores
+	0x11 extra: stlb_miss_loads STLB misses dues to retired loads
+	0x12 extra: stlb_miss_stores STLB misses dues to retired stores
+	0x21 extra: lock_loads Locked retired loads
+	0x41 extra: split_loads Retired loads causing cacheline splits
+	0x42 extra: split_stores Retired stores causing cacheline splits
+	0x81 extra: all_loads Any retired loads
+	0x82 extra: all_stores Any retired stores
 name:mem_load_uops_retired type:bitmask default:0x1
-	0x1 l1_hit Load hit in nearest-level (L1D) cache
-	0x2 l2_hit Load hit in mid-level (L2) cache
-	0x4 llc_hit Load hit in last-level (L3) cache with no snoop needed
-	0x40 hit_lfb A load missed L1D but hit the Fill Buffer
+	0x1 extra: l1_hit Load hit in nearest-level (L1D) cache
+	0x2 extra: l2_hit Load hit in mid-level (L2) cache
+	0x4 extra: llc_hit Load hit in last-level (L3) cache with no snoop needed
+	0x40 extra: hit_lfb A load missed L1D but hit the Fill Buffer
 name:mem_load_uops_llc_hit_retired type:bitmask default:0x1
-	0x1 xsnp_miss Load LLC Hit and a cross-core Snoop missed in on-pkg core cache
-	0x2 xsnp_hit Load LLC Hit and a cross-core Snoop hits in on-pkg core cache
-	0x4 xsnp_hitm Load had HitM Response from a core on same socket (shared LLC).
-	0x8 xsnp_none Load hit in last-level (L3) cache with no snoop needed.
+	0x1 extra: xsnp_miss Load LLC Hit and a cross-core Snoop missed in on-pkg core cache
+	0x2 extra: xsnp_hit Load LLC Hit and a cross-core Snoop hits in on-pkg core cache
+	0x4 extra: xsnp_hitm Load had HitM Response from a core on same socket (shared LLC).
+	0x8 extra: xsnp_none Load hit in last-level (L3) cache with no snoop needed.
 name:l2_trans type:bitmask default:0x80
-	0x80 all_requests Transactions accessing L2 pipe
-	0x1 demand_data_rd Demand Data Read requests that access L2 cache, includes L1D prefetches.
-	0x2 rfo RFO requests that access L2 cache
-	0x4 code_rd L2 cache accesses when fetching instructions including L1D code prefetches
-	0x8 all_pf L2 or LLC HW prefetches that access L2 cache
-	0x10 l1d_wb L1D writebacks that access L2 cache
-	0x20 l2_fill L2 fill requests that access L2 cache
-	0x40 l2_wb L2 writebacks that access L2 cache
+	0x80 extra: all_requests Transactions accessing L2 pipe
+	0x1 extra: demand_data_rd Demand Data Read requests that access L2 cache, includes L1D prefetches.
+	0x2 extra: rfo RFO requests that access L2 cache
+	0x4 extra: code_rd L2 cache accesses when fetching instructions including L1D code prefetches
+	0x8 extra: all_pf L2 or LLC HW prefetches that access L2 cache
+	0x10 extra: l1d_wb L1D writebacks that access L2 cache
+	0x20 extra: l2_fill L2 fill requests that access L2 cache
+	0x40 extra: l2_wb L2 writebacks that access L2 cache
 name:l2_lines_in type:bitmask default:0x7
-	0x7 all L2 cache lines filling L2
-	0x1 i L2 cache lines in I state filling L2
-	0x2 s L2 cache lines in S state filling L2
-	0x4 e L2 cache lines in E state filling L2
+	0x7 extra: all L2 cache lines filling L2
+	0x1 extra: i L2 cache lines in I state filling L2
+	0x2 extra: s L2 cache lines in S state filling L2
+	0x4 extra: e L2 cache lines in E state filling L2
 name:l2_lines_out type:bitmask default:0x1
-	0x1 demand_clean Clean line evicted by a demand
-	0x2 demand_dirty Dirty line evicted by a demand
-	0x4 pf_clean Clean line evicted by an L2 Prefetch
-	0x8 pf_dirty Dirty line evicted by an L2 Prefetch
-	0xa dirty_all Any Dirty line evicted
+	0x1 extra: demand_clean Clean line evicted by a demand
+	0x2 extra: demand_dirty Dirty line evicted by a demand
+	0x4 extra: pf_clean Clean line evicted by an L2 Prefetch
+	0x8 extra: pf_dirty Dirty line evicted by an L2 Prefetch
+	0xa extra: dirty_all Any Dirty line evicted
diff --git a/events/i386/silvermont/events b/events/i386/silvermont/events
new file mode 100644
index 0000000..434538f
--- /dev/null
+++ b/events/i386/silvermont/events
@@ -0,0 +1,24 @@
+#
+# Intel "Silvermont" microarchitecture core events.
+#
+# See http://ark.intel.com/ for help in identifying Silvermont based CPUs
+#
+# Note the minimum counts are not discovered experimentally and could be likely
+# lowered in many cases without ill effect.
+#
+include:i386/arch_perfmon
+event:0x03 counters:0,1 um:rehabq minimum:200003 name:rehabq :
+event:0x04 counters:0,1 um:mem_uops_retired minimum:200003 name:mem_uops_retired :
+event:0x05 counters:0,1 um:page_walks minimum:200003 name:page_walks :
+event:0x30 counters:0,1 um:zero minimum:200003 name:l2_reject_xq_all :
+event:0x31 counters:0,1 um:zero minimum:200003 name:core_reject_l2q_all :
+event:0x80 counters:0,1 um:icache minimum:200003 name:icache :
+event:0xc2 counters:0,1 um:uops_retired minimum:2000003 name:uops_retired :
+event:0xc3 counters:0,1 um:machine_clears minimum:200003 name:machine_clears :
+event:0xc4 counters:0,1 um:br_inst_retired minimum:200003 name:br_inst_retired :
+event:0xc5 counters:0,1 um:br_misp_retired minimum:200003 name:br_misp_retired :
+event:0xca counters:0,1 um:no_alloc_cycles minimum:200003 name:no_alloc_cycles :
+event:0xcb counters:0,1 um:rs_full_stall minimum:200003 name:rs_full_stall :
+event:0xcd counters:0,1 um:one minimum:2000003 name:cycles_div_busy_all :
+event:0xe6 counters:0,1 um:baclears minimum:200003 name:baclears :
+event:0xe7 counters:0,1 um:one minimum:200003 name:ms_decoded_ms_entry :
diff --git a/events/i386/silvermont/unit_masks b/events/i386/silvermont/unit_masks
new file mode 100644
index 0000000..c0dac26
--- /dev/null
+++ b/events/i386/silvermont/unit_masks
@@ -0,0 +1,89 @@
+#
+# Unit masks for the Intel "Silvermont" micro architecture
+#
+# See http://ark.intel.com/ for help in identifying Silvermont based CPUs
+#
+include:i386/arch_perfmon
+name:rehabq type:exclusive default:0x1
+	0x1 extra: ld_block_st_forward This event counts the number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch.
+	0x1 extra:pebs ld_block_st_forward_pebs This event counts the number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch.
+	0x2 extra: ld_block_std_notready This event counts the cases where a forward was technically possible, but did not occur because the store data was not available at the right time
+	0x4 extra: st_splits This event counts the number of retire stores that experienced cache line boundary splits
+	0x8 extra: ld_splits This event counts the number of retire loads that experienced cache line boundary splits
+	0x8 extra:pebs ld_splits_pebs This event counts the number of retire loads that experienced cache line boundary splits
+	0x10 extra: lock This event counts the number of retired memory operations with lock semantics. These are either implicit locked instructions such as the XCHG instruction or instructions with an explicit LOCK prefix (0xF0).
+	0x20 extra: sta_full This event counts the number of retired stores that are delayed because there is not a store address buffer available.
+	0x40 extra: any_ld This event counts the number of load uops reissued from Rehabq
+	0x80 extra: any_st This event counts the number of store uops reissued from Rehabq
+name:mem_uops_retired type:exclusive default:0x1
+	0x1 extra: l1_miss_loads This event counts the number of load ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted.
+	0x2 extra: l2_hit_loads This event counts the number of load ops retired that hit in the L2
+	0x2 extra:pebs l2_hit_loads_pebs This event counts the number of load ops retired that hit in the L2
+	0x4 extra: l2_miss_loads This event counts the number of load ops retired that miss in the L2
+	0x4 extra:pebs l2_miss_loads_pebs This event counts the number of load ops retired that miss in the L2
+	0x8 extra: dtlb_miss_loads This event counts the number of load ops retired that had DTLB miss.
+	0x8 extra:pebs dtlb_miss_loads_pebs This event counts the number of load ops retired that had DTLB miss.
+	0x10 extra: utlb_miss This event counts the number of load ops retired that had UTLB miss.
+	0x20 extra: hitm This event counts the number of load ops retired that got data from the other core or from the other module.
+	0x20 extra:pebs hitm_pebs This event counts the number of load ops retired that got data from the other core or from the other module.
+	0x40 extra: all_loads This event counts the number of load ops retired
+	0x80 extra: all_stores This event counts the number of store ops retired
+name:page_walks type:exclusive default:0x1
+	0x1 extra:edge d_side_walks This event counts when a data (D) page walk is completed or started.  Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.
+	0x1 extra: d_side_cycles This event counts every cycle when a D-side (walks due to a load) page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks.
+	0x2 extra:edge i_side_walks This event counts when an instruction (I) page walk is completed or started.  Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.
+	0x2 extra: i_side_cycles This event counts every cycle when a I-side (walks due to an instruction fetch) page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks.
+	0x3 extra:edge walks This event counts when a data (D) page walk or an instruction (I) page walk is completed or started.  Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.
+	0x3 extra: cycles This event counts every cycle when a data (D) page walk or instruction (I) page walk is in progress.  Since a pagewalk implies a TLB miss, the approximate cost of a TLB miss can be determined from this event.
+name:icache type:exclusive default:0x3
+	0x3 extra: accesses This event counts all instruction fetches, including uncacheable fetches.
+	0x1 extra: hit This event counts all instruction fetches from the instruction cache.
+	0x2 extra: misses This event counts all instruction fetches that miss the Instruction cache or produce memory requests. This includes uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding.
+name:uops_retired type:exclusive default:0x10
+	0x10 extra: all This event counts the number of micro-ops retired. The processor decodes complex macro instructions into a sequence of simpler micro-ops. Most instructions are composed of one or two micro-ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. In some cases micro-op sequences are fused or whole instructions are fused into one micro-op. See other UOPS_RETIRED events for differentiating retired fused and non-fused micro-ops.
+	0x1 extra: ms This event counts the number of micro-ops retired that were supplied from MSROM.
+name:machine_clears type:exclusive default:0x8
+	0x8 extra: all Machine clears happen when something happens in the machine that causes the hardware to need to take special care to get the right answer. When such a condition is signaled on an instruction, the front end of the machine is notified that it must restart, so no more instructions will be decoded from the current path.  All instructions "older" than this one will be allowed to finish.  This instruction and all "younger" instructions must be cleared, since they must not be allowed to complete.  Essentially, the hardware waits until the problematic instruction is the oldest instruction in the machine.  This means all older instructions are retired, and all pending stores (from older instructions) are completed.  Then the new path of instructions from the front end are allowed to start into the machine.  There are many conditions that might cause a machine clear (including the receipt of an interrupt, or a trap or a fault).  All those conditions (including but not limited to MACHINE_CLEARS.MEMORY_ORDERING, MACHINE_CLEARS.SMC, and MACHINE_CLEARS.FP_ASSIST) are captured in the ANY event. In addition, some conditions can be specifically counted (i.e. SMC, MEMORY_ORDERING, FP_ASSIST).  However, the sum of SMC, MEMORY_ORDERING, and FP_ASSIST machine clears will not necessarily equal the number of ANY.
+	0x1 extra: smc This event counts the number of times that a program writes to a code section. Self-modifying code causes a severe penalty in all Intel? architecture processors.
+	0x2 extra: memory_ordering This event counts the number of times that pipeline was cleared due to memory ordering issues.
+	0x4 extra: fp_assist This event counts the number of times that pipeline stalled due to FP operations needing assists.
+name:br_inst_retired type:exclusive default:0x7e
+	0x7e extra: jcc JCC counts the number of conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0x7e extra:pebs jcc_pebs JCC counts the number of conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xfe extra: taken_jcc TAKEN_JCC counts the number of taken conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xfe extra:pebs taken_jcc_pebs TAKEN_JCC counts the number of taken conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xf9 extra: call CALL counts the number of near CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xf9 extra:pebs call_pebs CALL counts the number of near CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xfd extra: rel_call REL_CALL counts the number of near relative CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xfd extra:pebs rel_call_pebs REL_CALL counts the number of near relative CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xfb extra: ind_call IND_CALL counts the number of near indirect CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xfb extra:pebs ind_call_pebs IND_CALL counts the number of near indirect CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xf7 extra: return RETURN counts the number of near RET branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xf7 extra:pebs return_pebs RETURN counts the number of near RET branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xeb extra: non_return_ind NON_RETURN_IND counts the number of near indirect JMP and near indirect CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xeb extra:pebs non_return_ind_pebs NON_RETURN_IND counts the number of near indirect JMP and near indirect CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xbf extra: far_branch FAR counts the number of far branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+	0xbf extra:pebs far_branch_pebs FAR counts the number of far branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
+name:br_misp_retired type:exclusive default:0x7e
+	0x7e extra: jcc JCC counts the number of mispredicted conditional branches (JCC) instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0x7e extra:pebs jcc_pebs JCC counts the number of mispredicted conditional branches (JCC) instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xfe extra: taken_jcc TAKEN_JCC counts the number of mispredicted taken conditional branch (JCC) instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xfe extra:pebs taken_jcc_pebs TAKEN_JCC counts the number of mispredicted taken conditional branch (JCC) instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xfb extra: ind_call IND_CALL counts the number of mispredicted near indirect CALL branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xfb extra:pebs ind_call_pebs IND_CALL counts the number of mispredicted near indirect CALL branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xf7 extra: return RETURN counts the number of mispredicted near RET branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xf7 extra:pebs return_pebs RETURN counts the number of mispredicted near RET branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xeb extra: non_return_ind NON_RETURN_IND counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+	0xeb extra:pebs non_return_ind_pebs NON_RETURN_IND counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
+name:no_alloc_cycles type:exclusive default:0x3f
+	0x3f extra: all The NO_ALLOC_CYCLES.ALL event counts the number of cycles when the front-end does not provide any instructions to be allocated for any reason. This event indicates the cycles where an allocation stalls occurs, and no UOPS are allocated in that cycle.
+	0x1 extra: rob_full Counts the number of cycles when no uops are allocated and the ROB is full (less than 2 entries available)
+	0x20 extra: rat_stall Counts the number of cycles when no uops are allocated and a RATstall is asserted.
+	0x50 extra: not_delivered The NO_ALLOC_CYCLES.NOT_DELIVERED event is used to measure front-end inefficiencies, i.e. when front-end of the machine is not delivering micro-ops to the back-end and the back-end is not stalled. This event can be used to identify if the machine is truly front-end bound.  When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance.  Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into micro-ops (uops) in machine understandable format and putting them into a micro-op queue to be consumed by back end. The back-end then takes these micro-ops, allocates the required resources.  When all resources are ready, micro-ops are executed. If the back-end is not ready to accept micro-ops from the front-end, then we do not want to count these as front-end bottlenecks.  However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more UOPS. This event counts the cycles only when back-end is requesting more uops and front-end is not able to provide them. Some examples of conditions that cause front-end efficiencies are: Icache misses, ITLB misses, and decoder restrictions that limit the the front-end bandwidth.
+name:rs_full_stall type:exclusive default:0x1f
+	0x1f extra: all Counts the number of cycles the Alloc pipeline is stalled when any one of the RSs (IEC, FPC and MEC) is full. This event is a superset of all the individual RS stall event counts.
+	0x1 extra: mec Counts the number of cycles and allocation pipeline is stalled and is waiting for a free MEC reservation station entry.  The cycles should be appropriately counted in case of the cracked ops e.g. In case of a cracked load-op, the load portion is sent to M
+name:baclears type:exclusive default:0x1
+	0x1 extra: all The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end.  The BACLEARS.ANY event counts the number of baclears for any type of branch.
+	0x8 extra: return The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end.  The BACLEARS.RETURN event counts the number of RETURN baclears.
+	0x10 extra: cond The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end.  The BACLEARS.COND event counts the number of JCC (Jump on Condtional Code) baclears.
diff --git a/events/i386/westmere/unit_masks b/events/i386/westmere/unit_masks
index c98d81a..56206ce 100644
--- a/events/i386/westmere/unit_masks
+++ b/events/i386/westmere/unit_masks
@@ -16,291 +16,291 @@ name:x10 type:mandatory default:0x10
 name:x20 type:mandatory default:0x20
 	0x20 No unit mask
 name:arith type:bitmask default:0x01
-	0x01 cycles_div_busy Cycles the divider is busy
-	0x02 mul Multiply operations executed
+	0x01 extra: cycles_div_busy Cycles the divider is busy
+	0x02 extra: mul Multiply operations executed
 name:baclear type:bitmask default:0x01
-	0x01 clear BACLEAR asserted, regardless of cause 
-	0x02 bad_target BACLEAR asserted with bad target address
+	0x01 extra: clear BACLEAR asserted, regardless of cause
+	0x02 extra: bad_target BACLEAR asserted with bad target address
 name:bpu_clears type:bitmask default:0x01
-	0x01 early Early Branch Prediction Unit clears
-	0x02 late Late Branch Prediction Unit clears
+	0x01 extra: early Early Branch Prediction Unit clears
+	0x02 extra: late Late Branch Prediction Unit clears
 name:br_inst_exec type:bitmask default:0x7f
-	0x01 cond Conditional branch instructions executed
-	0x02 direct Unconditional branches executed
-	0x04 indirect_non_call Indirect non call branches executed
-	0x07 non_calls All non call branches executed
-	0x08 return_near Indirect return branches executed
-	0x10 direct_near_call Unconditional call branches executed
-	0x20 indirect_near_call Indirect call branches executed
-	0x30 near_calls Call branches executed
-	0x40 taken Taken branches executed
-	0x7f any Branch instructions executed
+	0x01 extra: cond Conditional branch instructions executed
+	0x02 extra: direct Unconditional branches executed
+	0x04 extra: indirect_non_call Indirect non call branches executed
+	0x07 extra: non_calls All non call branches executed
+	0x08 extra: return_near Indirect return branches executed
+	0x10 extra: direct_near_call Unconditional call branches executed
+	0x20 extra: indirect_near_call Indirect call branches executed
+	0x30 extra: near_calls Call branches executed
+	0x40 extra: taken Taken branches executed
+	0x7f extra: any Branch instructions executed
 name:br_inst_retired type:bitmask default:0x04
-	0x01 conditional Retired conditional branch instructions (Precise Event)
-	0x02 near_call Retired near call instructions (Precise Event)
-	0x04 all_branches Retired branch instructions (Precise Event)
+	0x01 extra: conditional Retired conditional branch instructions (Precise Event)
+	0x02 extra: near_call Retired near call instructions (Precise Event)
+	0x04 extra: all_branches Retired branch instructions (Precise Event)
 name:br_misp_exec type:bitmask default:0x7f
-	0x01 cond Mispredicted conditional branches executed
-	0x02 direct Mispredicted unconditional branches executed
-	0x04 indirect_non_call Mispredicted indirect non call branches executed
-	0x07 non_calls Mispredicted non call branches executed
-	0x08 return_near Mispredicted return branches executed
-	0x10 direct_near_call Mispredicted non call branches executed
-	0x20 indirect_near_call Mispredicted indirect call branches executed
-	0x30 near_calls Mispredicted call branches executed
-	0x40 taken Mispredicted taken branches executed
-	0x7f any Mispredicted branches executed
+	0x01 extra: cond Mispredicted conditional branches executed
+	0x02 extra: direct Mispredicted unconditional branches executed
+	0x04 extra: indirect_non_call Mispredicted indirect non call branches executed
+	0x07 extra: non_calls Mispredicted non call branches executed
+	0x08 extra: return_near Mispredicted return branches executed
+	0x10 extra: direct_near_call Mispredicted non call branches executed
+	0x20 extra: indirect_near_call Mispredicted indirect call branches executed
+	0x30 extra: near_calls Mispredicted call branches executed
+	0x40 extra: taken Mispredicted taken branches executed
+	0x7f extra: any Mispredicted branches executed
 name:br_misp_retired type:bitmask default:0x04
-	0x01 conditional Mispredicted conditional retired branches (Precise Event)
-	0x02 near_call Mispredicted near retired calls (Precise Event)
-	0x04 all_branches Mispredicted retired branch instructions (Precise Event)
+	0x01 extra: conditional Mispredicted conditional retired branches (Precise Event)
+	0x02 extra: near_call Mispredicted near retired calls (Precise Event)
+	0x04 extra: all_branches Mispredicted retired branch instructions (Precise Event)
 name:cache_lock_cycles type:bitmask default:0x01
-	0x01 l1d_l2 Cycles L1D and L2 locked
-	0x02 l1d Cycles L1D locked
+	0x01 extra: l1d_l2 Cycles L1D and L2 locked
+	0x02 extra: l1d Cycles L1D locked
 name:cpu_clk_unhalted type:bitmask default:0x00
-	0x00 thread_p Cycles when thread is not halted (programmable counter)
-	0x01 ref_p Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
+	0x00 extra: thread_p Cycles when thread is not halted (programmable counter)
+	0x01 extra: ref_p Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
 name:dtlb_load_misses type:bitmask default:0x01
-	0x01 any DTLB load misses
-	0x02 walk_completed DTLB load miss page walks complete
-	0x04 walk_cycles DTLB load miss page walk cycles
-	0x10 stlb_hit DTLB second level hit
-	0x20 pde_miss DTLB load miss caused by low part of address
-	0x80 large_walk_completed DTLB load miss large page walks
+	0x01 extra: any DTLB load misses
+	0x02 extra: walk_completed DTLB load miss page walks complete
+	0x04 extra: walk_cycles DTLB load miss page walk cycles
+	0x10 extra: stlb_hit DTLB second level hit
+	0x20 extra: pde_miss DTLB load miss caused by low part of address
+	0x80 extra: large_walk_completed DTLB load miss large page walks
 name:dtlb_misses type:bitmask default:0x01
-	0x01 any DTLB misses
-	0x02 walk_completed DTLB miss page walks
-	0x04 walk_cycles DTLB miss page walk cycles
-	0x10 stlb_hit DTLB first level misses but second level hit
-	0x20 pde_miss DTLB misses casued by low part of address
-	0x80 large_walk_completed DTLB miss large page walks
+	0x01 extra: any DTLB misses
+	0x02 extra: walk_completed DTLB miss page walks
+	0x04 extra: walk_cycles DTLB miss page walk cycles
+	0x10 extra: stlb_hit DTLB first level misses but second level hit
+	0x20 extra: pde_miss DTLB misses casued by low part of address
+	0x80 extra: large_walk_completed DTLB miss large page walks
 name:fp_assist type:bitmask default:0x01
-	0x01 all X87 Floating point assists (Precise Event)
-	0x02 output X87 Floating point assists for invalid output value (Precise Event)
-	0x04 input X87 Floating poiint assists for invalid input value (Precise Event)
+	0x01 extra: all X87 Floating point assists (Precise Event)
+	0x02 extra: output X87 Floating point assists for invalid output value (Precise Event)
+	0x04 extra: input X87 Floating poiint assists for invalid input value (Precise Event)
 name:fp_comp_ops_exe type:bitmask default:0x01
-	0x01 x87 Computational floating-point operations executed
-	0x02 mmx MMX Uops
-	0x04 sse_fp SSE and SSE2 FP Uops
-	0x08 sse2_integer SSE2 integer Uops
-	0x10 sse_fp_packed SSE FP packed Uops
-	0x20 sse_fp_scalar SSE FP scalar Uops
-	0x40 sse_single_precision SSE* FP single precision Uops
-	0x80 sse_double_precision SSE* FP double precision Uops
+	0x01 extra: x87 Computational floating-point operations executed
+	0x02 extra: mmx MMX Uops
+	0x04 extra: sse_fp SSE and SSE2 FP Uops
+	0x08 extra: sse2_integer SSE2 integer Uops
+	0x10 extra: sse_fp_packed SSE FP packed Uops
+	0x20 extra: sse_fp_scalar SSE FP scalar Uops
+	0x40 extra: sse_single_precision SSE* FP single precision Uops
+	0x80 extra: sse_double_precision SSE* FP double precision Uops
 name:fp_mmx_trans type:bitmask default:0x03
-	0x01 to_fp Transitions from MMX to Floating Point instructions
-	0x02 to_mmx Transitions from Floating Point to MMX instructions
-	0x03 any All Floating Point to and from MMX transitions
+	0x01 extra: to_fp Transitions from MMX to Floating Point instructions
+	0x02 extra: to_mmx Transitions from Floating Point to MMX instructions
+	0x03 extra: any All Floating Point to and from MMX transitions
 name:ild_stall type:bitmask default:0x0f
-	0x01 lcp Length Change Prefix stall cycles
-	0x02 mru Stall cycles due to BPU MRU bypass
-	0x04 iq_full Instruction Queue full stall cycles
-	0x08 regen Regen stall cycles
-	0x0f any Any Instruction Length Decoder stall cycles
+	0x01 extra: lcp Length Change Prefix stall cycles
+	0x02 extra: mru Stall cycles due to BPU MRU bypass
+	0x04 extra: iq_full Instruction Queue full stall cycles
+	0x08 extra: regen Regen stall cycles
+	0x0f extra: any Any Instruction Length Decoder stall cycles
 name:inst_retired type:bitmask default:0x01
-	0x01 any_p Instructions retired (Programmable counter and Precise Event)
-	0x02 x87 Retired floating-point operations (Precise Event)
-	0x04 mmx Retired MMX instructions (Precise Event)
+	0x01 extra: any_p Instructions retired (Programmable counter and Precise Event)
+	0x02 extra: x87 Retired floating-point operations (Precise Event)
+	0x04 extra: mmx Retired MMX instructions (Precise Event)
 name:itlb_misses type:bitmask default:0x01
-	0x01 any ITLB miss
-	0x02 walk_completed ITLB miss page walks
-	0x04 walk_cycles ITLB miss page walk cycles
-	0x80 large_walk_completed ITLB miss large page walks
+	0x01 extra: any ITLB miss
+	0x02 extra: walk_completed ITLB miss page walks
+	0x04 extra: walk_cycles ITLB miss page walk cycles
+	0x80 extra: large_walk_completed ITLB miss large page walks
 name:l1d type:bitmask default:0x01
-	0x01 repl L1 data cache lines allocated
-	0x02 m_repl L1D cache lines allocated in the M state
-	0x04 m_evict L1D cache lines replaced in M state 
-	0x08 m_snoop_evict L1D snoop eviction of cache lines in M state
+	0x01 extra: repl L1 data cache lines allocated
+	0x02 extra: m_repl L1D cache lines allocated in the M state
+	0x04 extra: m_evict L1D cache lines replaced in M state
+	0x08 extra: m_snoop_evict L1D snoop eviction of cache lines in M state
 name:l1d_prefetch type:bitmask default:0x01
-	0x01 requests L1D hardware prefetch requests
-	0x02 miss L1D hardware prefetch misses
-	0x04 triggers L1D hardware prefetch requests triggered
+	0x01 extra: requests L1D hardware prefetch requests
+	0x02 extra: miss L1D hardware prefetch misses
+	0x04 extra: triggers L1D hardware prefetch requests triggered
 name:l1d_wb_l2 type:bitmask default:0x0f
-	0x01 i_state L1 writebacks to L2 in I state (misses)
-	0x02 s_state L1 writebacks to L2 in S state
-	0x04 e_state L1 writebacks to L2 in E state
-	0x08 m_state L1 writebacks to L2 in M state
-	0x0f mesi All L1 writebacks to L2
+	0x01 extra: i_state L1 writebacks to L2 in I state (misses)
+	0x02 extra: s_state L1 writebacks to L2 in S state
+	0x04 extra: e_state L1 writebacks to L2 in E state
+	0x08 extra: m_state L1 writebacks to L2 in M state
+	0x0f extra: mesi All L1 writebacks to L2
 name:l1i type:bitmask default:0x01
-	0x01 hits L1I instruction fetch hits
-	0x02 misses L1I instruction fetch misses
-	0x03 reads L1I Instruction fetches
-	0x04 cycles_stalled L1I instruction fetch stall cycles
+	0x01 extra: hits L1I instruction fetch hits
+	0x02 extra: misses L1I instruction fetch misses
+	0x03 extra: reads L1I Instruction fetches
+	0x04 extra: cycles_stalled L1I instruction fetch stall cycles
 name:l2_data_rqsts type:bitmask default:0xff
-	0x01 demand_i_state L2 data demand loads in I state (misses)
-	0x02 demand_s_state L2 data demand loads in S state
-	0x04 demand_e_state L2 data demand loads in E state
-	0x08 demand_m_state L2 data demand loads in M state
-	0x0f demand_mesi L2 data demand requests
-	0x10 prefetch_i_state L2 data prefetches in the I state (misses)
-	0x20 prefetch_s_state L2 data prefetches in the S state
-	0x40 prefetch_e_state L2 data prefetches in E state
-	0x80 prefetch_m_state L2 data prefetches in M state
-	0xf0 prefetch_mesi All L2 data prefetches
-	0xff any All L2 data requests
+	0x01 extra: demand_i_state L2 data demand loads in I state (misses)
+	0x02 extra: demand_s_state L2 data demand loads in S state
+	0x04 extra: demand_e_state L2 data demand loads in E state
+	0x08 extra: demand_m_state L2 data demand loads in M state
+	0x0f extra: demand_mesi L2 data demand requests
+	0x10 extra: prefetch_i_state L2 data prefetches in the I state (misses)
+	0x20 extra: prefetch_s_state L2 data prefetches in the S state
+	0x40 extra: prefetch_e_state L2 data prefetches in E state
+	0x80 extra: prefetch_m_state L2 data prefetches in M state
+	0xf0 extra: prefetch_mesi All L2 data prefetches
+	0xff extra: any All L2 data requests
 name:l2_lines_in type:bitmask default:0x07
-	0x02 s_state L2 lines allocated in the S state
-	0x04 e_state L2 lines allocated in the E state
-	0x07 any L2 lines alloacated
+	0x02 extra: s_state L2 lines allocated in the S state
+	0x04 extra: e_state L2 lines allocated in the E state
+	0x07 extra: any L2 lines alloacated
 name:l2_lines_out type:bitmask default:0x0f
-	0x01 demand_clean L2 lines evicted by a demand request
-	0x02 demand_dirty L2 modified lines evicted by a demand request
-	0x04 prefetch_clean L2 lines evicted by a prefetch request
-	0x08 prefetch_dirty L2 modified lines evicted by a prefetch request
-	0x0f any L2 lines evicted
+	0x01 extra: demand_clean L2 lines evicted by a demand request
+	0x02 extra: demand_dirty L2 modified lines evicted by a demand request
+	0x04 extra: prefetch_clean L2 lines evicted by a prefetch request
+	0x08 extra: prefetch_dirty L2 modified lines evicted by a prefetch request
+	0x0f extra: any L2 lines evicted
 name:l2_rqsts type:bitmask default:0x01
-	0x01 ld_hit L2 load hits
-	0x02 ld_miss L2 load misses
-	0x03 loads L2 requests
-	0x04 rfo_hit L2 RFO hits
-	0x08 rfo_miss L2 RFO misses
-	0x0c rfos L2 RFO requests
-	0x10 ifetch_hit L2 instruction fetch hits
-	0x20 ifetch_miss L2 instruction fetch misses
-	0x30 ifetches L2 instruction fetches
-	0x40 prefetch_hit L2 prefetch hits
-	0x80 prefetch_miss L2 prefetch misses
-	0xaa miss All L2 misses
-	0xc0 prefetches All L2 prefetches
-	0xff references All L2 requests
+	0x01 extra: ld_hit L2 load hits
+	0x02 extra: ld_miss L2 load misses
+	0x03 extra: loads L2 requests
+	0x04 extra: rfo_hit L2 RFO hits
+	0x08 extra: rfo_miss L2 RFO misses
+	0x0c extra: rfos L2 RFO requests
+	0x10 extra: ifetch_hit L2 instruction fetch hits
+	0x20 extra: ifetch_miss L2 instruction fetch misses
+	0x30 extra: ifetches L2 instruction fetches
+	0x40 extra: prefetch_hit L2 prefetch hits
+	0x80 extra: prefetch_miss L2 prefetch misses
+	0xaa extra: miss All L2 misses
+	0xc0 extra: prefetches All L2 prefetches
+	0xff extra: references All L2 requests
 name:l2_transactions type:bitmask default:0x80
-	0x01 load L2 Load transactions
-	0x02 rfo L2 RFO transactions
-	0x04 ifetch L2 instruction fetch transactions
-	0x08 prefetch L2 prefetch transactions
-	0x10 l1d_wb L1D writeback to L2 transactions
-	0x20 fill L2 fill transactions
-	0x40 wb L2 writeback to LLC transactions
-	0x80 any All L2 transactions
+	0x01 extra: load L2 Load transactions
+	0x02 extra: rfo L2 RFO transactions
+	0x04 extra: ifetch L2 instruction fetch transactions
+	0x08 extra: prefetch L2 prefetch transactions
+	0x10 extra: l1d_wb L1D writeback to L2 transactions
+	0x20 extra: fill L2 fill transactions
+	0x40 extra: wb L2 writeback to LLC transactions
+	0x80 extra: any All L2 transactions
 name:l2_write type:bitmask default:0x01
-	0x01 rfo_i_state L2 demand store RFOs in I state (misses)
-	0x02 rfo_s_state L2 demand store RFOs in S state
-	0x08 rfo_m_state L2 demand store RFOs in M state
-	0x0e rfo_hit All L2 demand store RFOs that hit the cache
-	0x0f rfo_mesi All L2 demand store RFOs
-	0x10 lock_i_state L2 demand lock RFOs in I state (misses)
-	0x20 lock_s_state L2 demand lock RFOs in S state
-	0x40 lock_e_state L2 demand lock RFOs in E state
-	0x80 lock_m_state L2 demand lock RFOs in M state
-	0xe0 lock_hit All demand L2 lock RFOs that hit the cache
-	0xf0 lock_mesi All demand L2 lock RFOs
+	0x01 extra: rfo_i_state L2 demand store RFOs in I state (misses)
+	0x02 extra: rfo_s_state L2 demand store RFOs in S state
+	0x08 extra: rfo_m_state L2 demand store RFOs in M state
+	0x0e extra: rfo_hit All L2 demand store RFOs that hit the cache
+	0x0f extra: rfo_mesi All L2 demand store RFOs
+	0x10 extra: lock_i_state L2 demand lock RFOs in I state (misses)
+	0x20 extra: lock_s_state L2 demand lock RFOs in S state
+	0x40 extra: lock_e_state L2 demand lock RFOs in E state
+	0x80 extra: lock_m_state L2 demand lock RFOs in M state
+	0xe0 extra: lock_hit All demand L2 lock RFOs that hit the cache
+	0xf0 extra: lock_mesi All demand L2 lock RFOs
 name:load_dispatch type:bitmask default:0x07
-	0x01 rs Loads dispatched that bypass the MOB
-	0x02 rs_delayed Loads dispatched from stage 305
-	0x04 mob Loads dispatched from the MOB
-	0x07 any All loads dispatched
+	0x01 extra: rs Loads dispatched that bypass the MOB
+	0x02 extra: rs_delayed Loads dispatched from stage 305
+	0x04 extra: mob Loads dispatched from the MOB
+	0x07 extra: any All loads dispatched
 name:longest_lat_cache type:bitmask default:0x01
-	0x01 miss Longest latency cache miss
-	0x02 reference Longest latency cache reference
+	0x01 extra: miss Longest latency cache miss
+	0x02 extra: reference Longest latency cache reference
 name:machine_clears type:bitmask default:0x01
-	0x01 cycles Cycles machine clear asserted
-	0x02 mem_order Execution pipeline restart due to Memory ordering conflicts 
-	0x04 smc Self-Modifying Code detected
+	0x01 extra: cycles Cycles machine clear asserted
+	0x02 extra: mem_order Execution pipeline restart due to Memory ordering conflicts
+	0x04 extra: smc Self-Modifying Code detected
 name:mem_inst_retired type:bitmask default:0x01
-	0x01 loads Instructions retired which contains a load (Precise Event)
-	0x02 stores Instructions retired which contains a store (Precise Event)
+	0x01 extra: loads Instructions retired which contains a load (Precise Event)
+	0x02 extra: stores Instructions retired which contains a store (Precise Event)
 name:mem_load_retired type:bitmask default:0x01
-	0x01 l1d_hit Retired loads that hit the L1 data cache (Precise Event)
-	0x02 l2_hit Retired loads that hit the L2 cache (Precise Event)
-	0x04 llc_unshared_hit Retired loads that hit valid versions in the LLC cache (Precise Event)
-	0x08 other_core_l2_hit_hitm Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)
-	0x10 llc_miss Retired loads that miss the LLC cache (Precise Event)
-	0x40 hit_lfb Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)
-	0x80 dtlb_miss Retired loads that miss the DTLB (Precise Event)
+	0x01 extra: l1d_hit Retired loads that hit the L1 data cache (Precise Event)
+	0x02 extra: l2_hit Retired loads that hit the L2 cache (Precise Event)
+	0x04 extra: llc_unshared_hit Retired loads that hit valid versions in the LLC cache (Precise Event)
+	0x08 extra: other_core_l2_hit_hitm Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)
+	0x10 extra: llc_miss Retired loads that miss the LLC cache (Precise Event)
+	0x40 extra: hit_lfb Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)
+	0x80 extra: dtlb_miss Retired loads that miss the DTLB (Precise Event)
 name:mem_uncore_retired type:bitmask default:0x02
-	0x02 local_hitm Load instructions retired that HIT modified data in sibling core (Precise Event)
-	0x04 remote_hitm Retired loads that hit remote socket in modified state (Precise Event)
-	0x08 local_dram_and_remote_cache_hit Load instructions retired local dram and remote cache HIT data sources (Precise Event)
-	0x10 remote_dram Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)
-	0x80 uncacheable Load instructions retired IO (Precise Event)
+	0x02 extra: local_hitm Load instructions retired that HIT modified data in sibling core (Precise Event)
+	0x04 extra: remote_hitm Retired loads that hit remote socket in modified state (Precise Event)
+	0x08 extra: local_dram_and_remote_cache_hit Load instructions retired local dram and remote cache HIT data sources (Precise Event)
+	0x10 extra: remote_dram Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)
+	0x80 extra: uncacheable Load instructions retired IO (Precise Event)
 name:offcore_requests type:bitmask default:0x80
-	0x01 demand_read_data Offcore demand data read requests
-	0x02 demand_read_code Offcore demand code read requests
-	0x04 demand_rfo Offcore demand RFO requests
-	0x08 any_read Offcore read requests
-	0x10 any_rfo Offcore RFO requests
-	0x40 l1d_writeback Offcore L1 data cache writebacks
-	0x80 any All offcore requests
+	0x01 extra: demand_read_data Offcore demand data read requests
+	0x02 extra: demand_read_code Offcore demand code read requests
+	0x04 extra: demand_rfo Offcore demand RFO requests
+	0x08 extra: any_read Offcore read requests
+	0x10 extra: any_rfo Offcore RFO requests
+	0x40 extra: l1d_writeback Offcore L1 data cache writebacks
+	0x80 extra: any All offcore requests
 name:offcore_requests_outstanding type:bitmask default:0x08
-	0x01 demand_read_data Outstanding offcore demand data reads
-	0x02 demand_read_code Outstanding offcore demand code reads
-	0x04 demand_rfo Outstanding offcore demand RFOs
-	0x08 any_read Outstanding offcore reads
+	0x01 extra: demand_read_data Outstanding offcore demand data reads
+	0x02 extra: demand_read_code Outstanding offcore demand code reads
+	0x04 extra: demand_rfo Outstanding offcore demand RFOs
+	0x08 extra: any_read Outstanding offcore reads
 name:rat_stalls type:bitmask default:0x0f
-	0x01 flags Flag stall cycles
-	0x02 registers Partial register stall cycles
-	0x04 rob_read_port ROB read port stalls cycles
-	0x08 scoreboard Scoreboard stall cycles
-	0x0f any All RAT stall cycles
+	0x01 extra: flags Flag stall cycles
+	0x02 extra: registers Partial register stall cycles
+	0x04 extra: rob_read_port ROB read port stalls cycles
+	0x08 extra: scoreboard Scoreboard stall cycles
+	0x0f extra: any All RAT stall cycles
 name:resource_stalls type:bitmask default:0x01
-	0x01 any Resource related stall cycles
-	0x02 load Load buffer stall cycles
-	0x04 rs_full Reservation Station full stall cycles
-	0x08 store Store buffer stall cycles
-	0x10 rob_full ROB full stall cycles
-	0x20 fpcw FPU control word write stall cycles
-	0x40 mxcsr MXCSR rename stall cycles
-	0x80 other Other Resource related stall cycles
+	0x01 extra: any Resource related stall cycles
+	0x02 extra: load Load buffer stall cycles
+	0x04 extra: rs_full Reservation Station full stall cycles
+	0x08 extra: store Store buffer stall cycles
+	0x10 extra: rob_full ROB full stall cycles
+	0x20 extra: fpcw FPU control word write stall cycles
+	0x40 extra: mxcsr MXCSR rename stall cycles
+	0x80 extra: other Other Resource related stall cycles
 name:simd_int_128 type:bitmask default:0x01
-	0x01 packed_mpy 128 bit SIMD integer multiply operations
-	0x02 packed_shift 128 bit SIMD integer shift operations
-	0x04 pack 128 bit SIMD integer pack operations
-	0x08 unpack 128 bit SIMD integer unpack operations
-	0x10 packed_logical 128 bit SIMD integer logical operations
-	0x20 packed_arith 128 bit SIMD integer arithmetic operations
-	0x40 shuffle_move 128 bit SIMD integer shuffle/move operations
+	0x01 extra: packed_mpy 128 bit SIMD integer multiply operations
+	0x02 extra: packed_shift 128 bit SIMD integer shift operations
+	0x04 extra: pack 128 bit SIMD integer pack operations
+	0x08 extra: unpack 128 bit SIMD integer unpack operations
+	0x10 extra: packed_logical 128 bit SIMD integer logical operations
+	0x20 extra: packed_arith 128 bit SIMD integer arithmetic operations
+	0x40 extra: shuffle_move 128 bit SIMD integer shuffle/move operations
 name:simd_int_64 type:bitmask default:0x01
-	0x01 packed_mpy SIMD integer 64 bit packed multiply operations
-	0x02 packed_shift SIMD integer 64 bit shift operations
-	0x04 pack SIMD integer 64 bit pack operations
-	0x08 unpack SIMD integer 64 bit unpack operations
-	0x10 packed_logical SIMD integer 64 bit logical operations
-	0x20 packed_arith SIMD integer 64 bit arithmetic operations
-	0x40 shuffle_move SIMD integer 64 bit shuffle/move operations
+	0x01 extra: packed_mpy SIMD integer 64 bit packed multiply operations
+	0x02 extra: packed_shift SIMD integer 64 bit shift operations
+	0x04 extra: pack SIMD integer 64 bit pack operations
+	0x08 extra: unpack SIMD integer 64 bit unpack operations
+	0x10 extra: packed_logical SIMD integer 64 bit logical operations
+	0x20 extra: packed_arith SIMD integer 64 bit arithmetic operations
+	0x40 extra: shuffle_move SIMD integer 64 bit shuffle/move operations
 name:snoopq_requests type:bitmask default:0x01
-	0x01 data Snoop data requests
-	0x02 invalidate Snoop invalidate requests
-	0x04 code Snoop code requests
+	0x01 extra: data Snoop data requests
+	0x02 extra: invalidate Snoop invalidate requests
+	0x04 extra: code Snoop code requests
 name:snoopq_requests_outstanding type:bitmask default:0x01
-	0x01 data Outstanding snoop data requests
-	0x02 invalidate Outstanding snoop invalidate requests
-	0x04 code Outstanding snoop code requests
+	0x01 extra: data Outstanding snoop data requests
+	0x02 extra: invalidate Outstanding snoop invalidate requests
+	0x04 extra: code Outstanding snoop code requests
 name:snoop_response type:bitmask default:0x01
-	0x01 hit Thread responded HIT to snoop
-	0x02 hite Thread responded HITE to snoop
-	0x04 hitm Thread responded HITM to snoop
+	0x01 extra: hit Thread responded HIT to snoop
+	0x02 extra: hite Thread responded HITE to snoop
+	0x04 extra: hitm Thread responded HITM to snoop
 name:sq_misc type:bitmask default:0x04
-	0x04 lru_hints Super Queue LRU hints sent to LLC
-	0x10 split_lock Super Queue lock splits across a cache line
+	0x04 extra: lru_hints Super Queue LRU hints sent to LLC
+	0x10 extra: split_lock Super Queue lock splits across a cache line
 name:ssex_uops_retired type:bitmask default:0x01
-	0x01 packed_single SIMD Packed-Single Uops retired (Precise Event)
-	0x02 scalar_single SIMD Scalar-Single Uops retired (Precise Event)
-	0x04 packed_double SIMD Packed-Double Uops retired (Precise Event)
-	0x08 scalar_double SIMD Scalar-Double Uops retired (Precise Event)
-	0x10 vector_integer SIMD Vector Integer Uops retired (Precise Event)
+	0x01 extra: packed_single SIMD Packed-Single Uops retired (Precise Event)
+	0x02 extra: scalar_single SIMD Scalar-Single Uops retired (Precise Event)
+	0x04 extra: packed_double SIMD Packed-Double Uops retired (Precise Event)
+	0x08 extra: scalar_double SIMD Scalar-Double Uops retired (Precise Event)
+	0x10 extra: vector_integer SIMD Vector Integer Uops retired (Precise Event)
 name:store_blocks type:bitmask default:0x04
-	0x04 at_ret Loads delayed with at-Retirement block code
-	0x08 l1d_block Cacheable loads delayed with L1D block code
+	0x04 extra: at_ret Loads delayed with at-Retirement block code
+	0x08 extra: l1d_block Cacheable loads delayed with L1D block code
 name:uops_decoded type:bitmask default:0x01
-	0x01 stall_cycles Cycles no Uops are decoded
-	0x02 ms_cycles_active Uops decoded by Microcode Sequencer
-	0x04 esp_folding Stack pointer instructions decoded
-	0x08 esp_sync Stack pointer sync operations
+	0x01 extra: stall_cycles Cycles no Uops are decoded
+	0x02 extra: ms_cycles_active Uops decoded by Microcode Sequencer
+	0x04 extra: esp_folding Stack pointer instructions decoded
+	0x08 extra: esp_sync Stack pointer sync operations
 name:uops_executed type:bitmask default:0x3f
-	0x01 port0 Uops executed on port 0
-	0x02 port1 Uops executed on port 1
-	0x04 port2_core Uops executed on port 2 (core count)
-	0x08 port3_core Uops executed on port 3 (core count)
-	0x10 port4_core Uops executed on port 4 (core count)
-	0x1f core_active_cycles_no_port5 Cycles Uops executed on ports 0-4 (core count)
-	0x20 port5 Uops executed on port 5
-	0x3f core_active_cycles Cycles Uops executed on any port (core count)
-	0x40 port015 Uops issued on ports 0, 1 or 5
-	0x80 port234_core Uops issued on ports 2, 3 or 4
+	0x01 extra: port0 Uops executed on port 0
+	0x02 extra: port1 Uops executed on port 1
+	0x04 extra: port2_core Uops executed on port 2 (core count)
+	0x08 extra: port3_core Uops executed on port 3 (core count)
+	0x10 extra: port4_core Uops executed on port 4 (core count)
+	0x1f extra: core_active_cycles_no_port5 Cycles Uops executed on ports 0-4 (core count)
+	0x20 extra: port5 Uops executed on port 5
+	0x3f extra: core_active_cycles Cycles Uops executed on any port (core count)
+	0x40 extra: port015 Uops issued on ports 0, 1 or 5
+	0x80 extra: port234_core Uops issued on ports 2, 3 or 4
 name:uops_issued type:bitmask default:0x01
-	0x01 any Uops issued
-	0x02 fused Fused Uops issued
+	0x01 extra: any Uops issued
+	0x02 extra: fused Fused Uops issued
 name:uops_retired type:bitmask default:0x01
-	0x01 active_cycles Cycles Uops are being retired
-	0x02 retire_slots Retirement slots used (Precise Event)
-	0x04 macro_fused Macro-fused Uops retired (Precise Event)
+	0x01 extra: active_cycles Cycles Uops are being retired
+	0x02 extra: retire_slots Retirement slots used (Precise Event)
+	0x04 extra: macro_fused Macro-fused Uops retired (Precise Event)
diff --git a/events/ia64/ia64/events b/events/ia64/ia64/events
deleted file mode 100644
index 8ae41dd..0000000
--- a/events/ia64/ia64/events
+++ /dev/null
@@ -1,3 +0,0 @@
-# IA-64 events
-event:0x12 counters:0,1,2,3 um:zero minimum:500 name:CPU_CYCLES : CPU Cycles
-event:0x08 counters:0,1,2,3 um:zero minimum:500 name:IA64_INST_RETIRED : IA-64 Instructions Retired
diff --git a/events/ia64/ia64/unit_masks b/events/ia64/ia64/unit_masks
deleted file mode 100644
index 7dd854a..0000000
--- a/events/ia64/ia64/unit_masks
+++ /dev/null
@@ -1,4 +0,0 @@
-# IA-64 possible unit masks
-#
-name:zero type:mandatory default:0x0
-	0x0 No unit mask
diff --git a/events/ia64/itanium/events b/events/ia64/itanium/events
deleted file mode 100644
index b0ce10f..0000000
--- a/events/ia64/itanium/events
+++ /dev/null
@@ -1,5 +0,0 @@
-# IA-64 Itanium 1 events
-event:0x12 counters:0,1,2,3 um:zero minimum:500 name:CPU_CYCLES : CPU Cycles
-event:0x08 counters:0,1 um:zero minimum:500 name:IA64_INST_RETIRED : IA-64 Instructions Retired
-event:0x15 counters:0,1,2,3 um:zero minimum:500 name:IA32_INST_RETIRED : IA-32 Instructions Retired
-# FIXME: itanium doc describe a lot of other events, should we add them w/o any testing ?
diff --git a/events/ia64/itanium/unit_masks b/events/ia64/itanium/unit_masks
deleted file mode 100644
index 6a9f77b..0000000
--- a/events/ia64/itanium/unit_masks
+++ /dev/null
@@ -1,4 +0,0 @@
-# IA-64 Itanium 1 possible unit masks
-#
-name:zero type:mandatory default:0x0
-	0x0 No unit mask
diff --git a/events/ia64/itanium2/events b/events/ia64/itanium2/events
deleted file mode 100644
index c979022..0000000
--- a/events/ia64/itanium2/events
+++ /dev/null
@@ -1,267 +0,0 @@
-# IA-64 Itanium 2 events
-
-# IA64_2 Basic Events, Table 11-1
-event:0x12 counters:0,1,2,3 um:zero minimum:500 name:CPU_CYCLES : CPU Cycles
-event:0x08 counters:0,1,2,3 um:zero minimum:500 name:IA64_INST_RETIRED : IA-64 Instructions Retired
-event:0x59 counters:0,1,2,3 um:zero minimum:5000 name:IA32_INST_RETIRED : IA-32 Instructions Retired
-event:0x07 counters:0,1,2,3 um:zero minimum:500 name:IA32_ISA_TRANSITIONS : Itanium to/from IA-32 ISA Transitions
-
-# IA64_2 Instruction Disperal Events, Table 11-3
-event:0x49 counters:0,1,2,3 um:zero minimum:5000 name:DISP_STALLED : Number of cycles dispersal stalled
-event:0x4d counters:0,1,2,3 um:zero minimum:5000 name:INST_DISPERSED : Syllables Dispersed from REN to REG stage
-event:0x4e counters:0,1,2,3 um:syll_not_dispersed minimum:5000 name:SYLL_NOT_DISPERSED : Syllables not dispersed
-event:0x4f counters:0,1,2,3 um:syll_overcount minimum:5000 name:SYLL_OVERCOUNT : Syllables overcounted
-
-# IA64_2 Instruction Execution Events, Table 11-4
-event:0x58 counters:0,1,2,3 um:alat_capacity_miss minimum:5000 name:ALAT_CAPACITY_MISS : ALAT Entry Replaced
-event:0x06 counters:0,1,2,3 um:zero minimum:5000 name:FP_FAILED_FCHKF : Failed fchkf
-event:0x05 counters:0,1,2,3 um:zero minimum:5000 name:FP_FALSE_SIRSTALL : SIR stall without a trap
-event:0x0b counters:0,1,2,3 um:zero minimum:5000 name:FP_FLUSH_TO_ZERO : Result Flushed to Zero
-event:0x09 counters:0,1,2,3 um:zero minimum:5000 name:FP_OPS_RETIRED : Retired FP operations
-event:0x03 counters:0,1,2,3 um:zero minimum:5000 name:FP_TRUE_SIRSTALL : SIR stall asserted and leads to a trap
-event:0x08 counters:0,1,2,3 um:tagged_inst_retired minimum:5000 name:IA64_TAGGED_INST_RETIRED : Retired Tagged Instructions
-event:0x56 counters:0,1,2,3 um:alat_capacity_miss minimum:5000 name:INST_CHKA_LDC_ALAT : Advanced Check Loads
-event:0x57 counters:0,1,2,3 um:alat_capacity_miss minimum:5000 name:INST_FAILED_CHKA_LDC_ALAT : Failed Advanced Check Loads
-event:0x55 counters:0,1,2,3 um:alat_capacity_miss minimum:5000 name:INST_FAILED_CHKS_RETIRED : Failed Speculative Check Loads
-# To avoid duplication from other tables the following events commented out
-#event:0xcd counters:0,1,2,3 um:zero minimum:5000 name:LOADS_RETIRED : Retired Loads
-#event:0xce counters:0,1,2,3 um:zero minimum:5000 name:MISALIGNED_LOADS_RETIRED : Retired Misaligned Load Instructions
-#event:0xcf counters:0,1,2,3 um:zero minimum:5000 name:UC_LOADS_RETIRED : Retired Uncacheable Loads
-#event:0xd1 counters:0,1,2,3 um:zero minimum:5000 name:STORES_RETIRED : Retired Stores
-#event:0xd2 counters:0,1,2,3 um:zero minimum:5000 name:MISALIGNED_STORES_RETIRED : Retired Misaligned Store Instructions
-#event:0xd0 counters:0,1,2,3 um:zero minimum:5000 name:UC_STORES_RETIRED : Retired Uncacheable Stores
-event:0x50 counters:0,1,2,3 um:zero minimum:5000 name:NOPS_RETIRED : Retired NOP Instructions
-event:0x51 counters:0,1,2,3 um:zero minimum:5000 name:PREDICATE_SQUASHED_RETIRED : Instructions Squashed Due to Predicate Off`
-
-# IA64_2 Stall Events, Table 11-6
-event:0x00 counters:0,1,2,3 um:back_end_bubble minimum:5000 name:BACK_END_BUBBLE : Full pipe bubbles in main pipe
-event:0x02 counters:0,1,2,3 um:be_exe_bubble minimum:5000 name:BE_EXE_BUBBLE : Full pipe bubbles in main pipe due to Execution unit stalls
-event:0x04 counters:0,1,2,3 um:be_flush_bubble minimum:5000 name:BE_FLUSH_BUBBLE : Full pipe bubbles in main pipe due to flushes
-event:0xca counters:0,1,2,3 um:be_l1d_fpu_bubble minimum:5000 name:BE_L1D_FPU_BUBBLE : Full pipe bubbles in main pipe due to FP or L1 dcache
-# To avoid duplication from other tables the following events commented out
-#event:0x72 counters:0,1,2,3 um:be_lost_bw_due_to_fe minimum:5000 name:BE_LOST_BW_DUE_TO_FE : Invalid bundles if BE not stalled for other reasons
-event:0x01 counters:0,1,2,3 um:be_rse_bubble minimum:5000 name:BE_RSE_BUBBLE : Full pipe bubbles in main pipe due to RSE stalls
-event:0x71 counters:0,1,2,3 um:fe_bubble minimum:5000 name:FE_BUBBLE : Bubbles seen by FE
-event:0x70 counters:0,1,2,3 um:fe_lost minimum:5000 name:FE_LOST_BW : Invalid bundles at the entrance to IB
-event:0x73 counters:0,1,2,3 um:fe_lost minimum:5000 name:IDEAL_BE_LOST_BW_DUE_TO_FE : Invalid bundles at the exit from IB
-
-# IA64_2 Branch Events, Table 11-7
-event:0x61 counters:0,1,2,3 um:be_br_mispredict_detail minimum:5000 name:BE_BR_MISPRED_DETAIL : BE branch misprediction detail
-event:0x11 counters:0,1,2,3 um:zero minimum:5000 name:BRANCH_EVENT : Branch Event Captured
-event:0x5b counters:0,1,2,3 um:br_mispred_detail minimum:5000 name:BR_MISPRED_DETAIL : Branch Mispredict Detail
-event:0x68 counters:0,1,2,3 um:br_mispredict_detail2 minimum:5000 name:BR_MISPRED_DETAIL2 : FE Branch Mispredict Detail (Unknown path component)
-event:0x54 counters:0,1,2,3 um:br_path_pred minimum:5000 name:BR_PATH_PRED : FE Branch Path Prediction Detail
-event:0x6a counters:0,1,2,3 um:br_path_pred2 minimum:5000 name:BR_PATH_PRED2 : FE Branch Path Prediction Detail (Unknown prediction component)
-event:0x63 counters:0,1,2,3 um:encbr_mispred_detail minimum:5000 name:ENCBR_MISPRED_DETAIL : Number of encoded branches retired
-
-# IA64_2 L1 Instruction Cache and Prefetch Events, Table 11-8
-event:0x46 counters:0,1,2,3 um:zero minimum:5000 name:ISB_BUNPAIRS_IN : Bundle pairs written from L2 into FE
-event:0x43 counters:0,1,2,3 um:zero minimum:5000 name:L1I_EAR_EVENTS : Instruction EAR Events
-event:0x66 counters:0,1,2,3 um:zero minimum:5000 name:L1I_FETCH_ISB_HIT : "\"Just-in-time\" instruction fetch hitting in and being bypassed from ISB
-event:0x65 counters:0,1,2,3 um:zero minimum:5000 name:L1I_FETCH_RAB_HIT : Instruction fetch hitting in RAB
-event:0x41 counters:0,1,2,3 um:zero minimum:5000 name:L1I_FILLS : L1 Instruction Cache Fills
-event:0x44 counters:0,1,2,3 um:zero minimum:5000 name:L1I_PREFETCHES : Instruction Prefetch Requests
-event:0x42 counters:0,1,2,3 um:zero minimum:5000 name:L2_INST_DEMAND_READS : L1 Instruction Cache and ISB Misses
-event:0x67 counters:0,1,2,3 um:l1i_prefetch_stall minimum:5000 name:L1I_PREFETCH_STALL : Why prefetch pipeline is stalled?
-event:0x4b counters:0,1,2,3 um:zero minimum:5000 name:L1I_PURGE : L1ITLB purges handled by L1I
-event:0x69 counters:0,1,2,3 um:zero minimum:5000 name:L1I_PVAB_OVERFLOW : PVAB overflow
-event:0x64 counters:0,1,2,3 um:zero minimum:5000 name:L1I_RAB_ALMOST_FULL : Is RAB almost full?
-event:0x60 counters:0,1,2,3 um:zero minimum:500 name:L1I_RAB_FULL : Is RAB full?
-event:0x40 counters:0,1,2,3 um:zero minimum:5000 name:L1I_READS : L1 Instruction Cache Read
-event:0x4a counters:0,1,2,3 um:zero minimum:5000 name:L1I_SNOOP : Snoop requests handled by L1I
-event:0x5f counters:0,1,2,3 um:zero minimum:5000 name:L1I_STRM_PREFETCHES : L1 Instruction Cache line prefetch requests
-event:0x45 counters:0,1,2,3 um:zero minimum:5000 name:L2_INST_PREFETCHES : Instruction Prefetch Requests
-
-# IA64_2 L1 Data Cache Events, Table 11-10
-event:0xc8 counters:0,1,2,3 um:zero minimum:5000 name:DATA_EAR_EVENTS : Data Cache EAR Events
-# To avoid duplication from other tables the following events commented out
-#event:0xc2 counters:0,1,2,3 um:zero minimum:5000 name:L1D_READS_SET0 : L1 Data Cache Reads
-#event:0xc3 counters:0,1,2,3 um:zero minimum:5000 name:DATA_REFERENCES_SET0 : Data memory references issued to memory pipeline
-#event:0xc4 counters:0,1,2,3 um:zero minimum:5000 name:L1D_READS_SET1 : L1 Data Cache Reads
-#event:0xc5 counters:0,1,2,3 um:zero minimum:5000 name:DATA_REFERENCES_SET1 : Data memory references issued to memory pipeline
-#event:0xc7 counters:0,1,2,3 um:l1d_read_misses minimum:5000 name:L1D_READ_MISSES : L1 Data Cache Read Misses
-
-# IA64_2 L1 Data Cache Set 0 Events, Table 11-11
-event:0xc0 counters:1 um:zero minimum:5000 name:L1DTLB_TRANSFER : L1DTLB misses that hit in the L2DTLB for accesses counted in L1D_READS
-event:0xc1 counters:1 um:zero minimum:5000 name:L2DTLB_MISSES : L2DTLB Misses
-event:0xc2 counters:1 um:zero minimum:5000 name:L1D_READS_SET0 : L1 Data Cache Reads
-event:0xc3 counters:1 um:zero minimum:5000 name:DATA_REFERENCES_SET0 : Data memory references issued to memory pipeline
-
-# IA64_2 L1 Data Cache Set 1 Events, Table 11-12
-event:0xc4 counters:1 um:zero minimum:5000 name:L1D_READS_SET1 : L1 Data Cache Reads
-event:0xc5 counters:1 um:zero minimum:5000 name:DATA_REFERENCES_SET1 : Data memory references issued to memory pipeline
-event:0xc7 counters:1 um:l1d_read_misses minimum:5000 name:L1D_READ_MISSES : L1 Data Cache Read Misses
-
-# IA64_2 L1 Data Cache Set 2 Events, Table 11-13
-event:0xca counters:1 um:be_l1d_fpu_bubble minimum:5000 name:BE_L1D_FPU_BUBBLE : Full pipe bubbles in main pipe due to FP or L1 dcache
-
-# IA64_2 L1 Data Cache Set 3 Events, Table 11-14
-event:0xcd counters:1 um:zero minimum:5000 name:LOADS_RETIRED : Retired Loads
-event:0xce counters:1 um:zero minimum:5000 name:MISALIGNED_LOADS_RETIRED : Retired Misaligned Load Instructions
-event:0xcf counters:1 um:zero minimum:5000 name:UC_LOADS_RETIRED : Retired Uncacheable Loads
-
-# IA64_2 L1 Data Cache Set 4 Events, Table 11-15
-event:0xd1 counters:1 um:zero minimum:5000 name:STORES_RETIRED : Retired Stores
-event:0xd2 counters:1 um:zero minimum:5000 name:MISALIGNED_STORES_RETIRED : Retired Misaligned Store Instructions
-event:0xd0 counters:1 um:zero minimum:5000 name:UC_STORES_RETIRED : Retired Uncacheable Stores
-
-# IA64_2 L2 Unified Cache Events, Table 11-16
-# To avoid duplication from other tables the following events commented out
-#event:0xb9 counters:0,1,2,3 um:zero minimum:5000 name:L2_BAD_LINES_SELECTED : Valid line replaced when invalid line is available
-#event:0xb8 counters:0,1,2,3 um:l2_bypass minimum:5000 name:L2_BYPASS : Count bypass
-#event:0xb2 counters:0,1,2,3 um:l2_data_references minimum:5000 name:L2_DATA_REFERENCES : Data read/write access to L2
-event:0xbf counters:0,1,2,3 um:zero minimum:5000 name:L2_FILLB_FULL : L2D Fill buffer is full
-#event:0xb4 counters:0,1,2,3 um:l2_force_recirc minimum:5000 name:L2_FORCE_RECIRC : Forced recirculates
-event:0xba counters:0,1,2,3 um:recirc_ifetch minimum:5000 name:L2_GOT_RECIRC_IFETCH : Instruction fetch recirculates received by L2D
-#event:0xb6 counters:0,1,2,3 um:zero minimum:5000 name:L2_GOT_RECIRC_OZQ_ACC : Counts number of OZQ accesses recirculated back to L1D
-#event:0xa1 counters:0,1,2,3 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-#event:0xa5 counters:0,1,2,3 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-#event:0xa9 counters:0,1,2,3 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-#event:0xad counters:0,1,2,3 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-event:0xb9 counters:0,1,2,3 um:recirc_ifetch minimum:5000 name:L2_ISSUED_RECIRC_IFETCH : Instruction fetch recirculates issued by L2D
-#event:0xb5 counters:0,1,2,3 um:zero minimum:5000 name:L2_ISSUED_RECIRC_OZQ_ACC : Count number of times a recirculate issue was attempted and not preempted
-#event:0xb0 counters:0,1,2,3 um:l2_l3_access_cancel minimum:5000 name:L2_L3ACCESS_CANCEL : Canceled L3 accesses
-event:0xcb counters:0,1,2,3 um:zero minimum:5000 name:L2_MISSES : L2 Misses
-event:0xb8 counters:0,1,2,3 um:l2_ops_issued minimum:5000 name:L2_OPS_ISSUED : Different operations issued by L2D
-#event:0xbd counters:0,1,2,3 um:zero minimum:5000 name:L2_OZDB_FULL : L2D OZQ is full
-#event:0xa2 counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-#event:0xa6 counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-#event:0xaa counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-#event:0xae counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-#event:0xa0 counters:0,1,2,3 um:l2_ozq_cancels0 minimum:5000 name:L2_OZQ_CANCELS0 : L2 OZQ cancels
-#event:0xac counters:0,1,2,3 um:l2_ozq_cancels1 minimum:5000 name:L2_OZQ_CANCELS1 : L2 OZQ cancels
-#event:0xa8 counters:0,1,2,3 um:l2_ozq_cancels2 minimum:5000 name:L2_OZQ_CANCELS2 : L2 OZQ cancels
-#event:0xbc counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_FULL : L2D OZQ is full
-#event:0xa3 counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-#event:0xa7 counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-#event:0xab counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-#event:0xaf counters:0,1,2,3 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-#event:0xb1 counters:0,1,2,3 um:zero minimum:5000 name:L2_REFERENCES : Requests made from L2
-#event:0xba counters:0,1,2,3 um:zero minimum:5000 name:L2_STORE_HIT_SHARED : Store hit a shared line
-#event:0xb7 counters:0,1,2,3 um:zero minimum:5000 name:L2_SYNTH_PROBE : Synthesized Probe
-#event:0xbe counters:0,1,2,3 um:zero minimum:5000 name:L2_VICTIMB_FULL : L2D victim buffer is full
-
-# IA64_2 L2 Cache Events Set 0, Table 11-18
-# FIXME all sorts of restrictions on how these can be combined
-event:0xa1 counters:0 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-event:0xa5 counters:0 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-event:0xa9 counters:0 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-event:0xad counters:0 um:l2_ifet_cancels minimum:5000 name:L2_IFET_CANCELS : Instruction fetch cancels by the L2.
-event:0xa2 counters:0 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-event:0xa6 counters:0 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-event:0xaa counters:0 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-event:0xae counters:0 um:zero minimum:5000 name:L2_OZQ_ACQUIRE : Clocks with acquire ordering attribute existed in L2 OZQ
-event:0xa0 counters:0 um:l2_ozq_cancels0 minimum:5000 name:L2_OZQ_CANCELS0 : L2 OZQ cancels
-event:0xac counters:0 um:l2_ozq_cancels1 minimum:5000 name:L2_OZQ_CANCELS1 : L2 OZQ cancels
-event:0xa8 counters:0 um:l2_ozq_cancels2 minimum:5000 name:L2_OZQ_CANCELS2 : L2 OZQ cancels
-event:0xa3 counters:0 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-event:0xa7 counters:0 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-event:0xab counters:0 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-event:0xaf counters:0 um:zero minimum:5000 name:L2_OZQ_RELEASE : Clocks with release ordering attribute existed in L2 OZQ
-
-# IA64_2 L2 Cache Events Set 1, Table 11-19
-# manual states that L2_L3ACCESS_CANCEL must be measured in PMD4.
-# FIXME Don't have any way of enforcing the constraints
-# so only l2_l3_access_cancel allowed.
-event:0xb0 counters:0 um:l2_l3_access_cancel minimum:5000 name:L2_L3ACCESS_CANCEL : Canceled L3 accesses
-#event:0xb2 counters:0,1,2,3 um:l2_data_references minimum:5000 name:L2_DATA_REFERENCES : Data read/write access to L2
-#event:0xb1 counters:0,1,2,3 um:zero minimum:5000 name:L2_REFERENCES : Requests made from L2
-
-# IA64_2 L2 Cache Events Set 2, Table 11-20
-# manual states that L2_FORCE_RECIRC must be measured in PMD4.
-# FIXME Don't have anyway of enforcing thes constraint
-# so only L2_FORCE_RECIRC allowed.
-event:0xb4 counters:0 um:l2_force_recirc minimum:5000 name:L2_FORCE_RECIRC : Forced recirculates
-#event:0xb5 counters:0,1,2,3 um:zero minimum:5000 name:L2_ISSUED_RECIRC_OZQ_ACC : Count number of times a recirculate issue was attempted and not preempted
-#event:0xb6 counters:0,1,2,3 um:zero minimum:5000 name:L2_GOT_RECIRC_OZQ_ACC : Counts number of OZQ accesses recirculated back to L1D
-#event:0xb7 counters:0,1,2,3 um:zero minimum:5000 name:L2_SYNTH_PROBE : Synthesized Probe
-
-# IA64_2 L2 Cache Events Set 3, Table 11-21
-# The manual states that all events in this set share the same umask.
-event:0xb9 counters:0 um:zero minimum:5000 name:L2_BAD_LINES_SELECTED : Valid line replaced when invalid line is available
-event:0xb8 counters:0 um:l2_bypass minimum:5000 name:L2_BYPASS : Count bypass
-event:0xba counters:0 um:zero minimum:5000 name:L2_STORE_HIT_SHARED : Store hit a shared line
-
-# IA64_2 L2 Cache Events Set 4, Table 11-22
-# The manual states one of the following needs to be in pmd4 and these events
-# share the same umask.
-event:0xba counters:0 um:recirc_ifetch minimum:5000 name:L2_GOT_RECIRC_IFETCH : Instruction fetch recirculates received by L2D
-event:0xb9 counters:0 um:recirc_ifetch minimum:5000 name:L2_ISSUED_RECIRC_IFETCH : Instruction fetch recirculates issued by L2D
-event:0xb8 counters:0 um:l2_ops_issued minimum:5000 name:L2_OPS_ISSUED : Different operations issued by L2D
-
-# IA64_2 L2 Cache Events Set 5, Table 11-23
-# manual states one of the following needs to be in pmd4 and
-#       these events share the same umask
-event:0xbc counters:0 um:zero minimum:5000 name:L2_OZQ_FULL : L2D OZQ is full
-event:0xbd counters:0 um:zero minimum:5000 name:L2_OZDB_FULL : L2D OZQ is full
-event:0xbe counters:0 um:zero minimum:5000 name:L2_VICTIMB_FULL : L2D victim buffer is full
-event:0xbf counters:0 um:zero minimum:5000 name:L2_FILLB_FULL : L2D Fill buffer is full
-
-# IA64_2 L3 Cache Events, Table 11-24
-event:0xdf counters:0,1,2,3 um:zero minimum:5000 name:L3_LINES_REPLACED : Cache Lines Replaced
-event:0xdc counters:0,1,2,3 um:zero minimum:5000 name:L3_MISSES : L3 Misses
-event:0xdb counters:0,1,2,3 um:zero minimum:5000 name:L3_REFERENCES : L3 References
-event:0xdd counters:0,1,2,3 um:l3_reads minimum:5000 name:L3_READS : L3 Reads
-event:0xde counters:0,1,2,3 um:l3_writes minimum:5000 name:L3_WRITES : L3 Writes
-
-# IA64_2 System Events, Table 11-26
-event:0x13 counters:0,1,2,3 um:zero minimum:5000 name:CPU_CPL_CHANGES : Privilege Level Changes
-event:0x52 counters:0,1,2,3 um:zero minimum:5000 name:DATA_DEBUG_REGISTER_FAULT : Fault due to data debug reg. Match to load/store instruction
-event:0xc6 counters:0,1,2,3 um:zero minimum:5000 name:DATA_DEBUG_REGISTER_MATCHES : Data debug register matches data address of memory reference
-event:0x9e counters:0,1,2,3 um:extern_dp_pins_0_to_3 minimum:5000 name:EXTERN_DP_PINS_0_TO_3 : DP pins 0-3 asserted
-event:0x9f counters:0,1,2,3 um:extern_dp_pins_4_to_5 minimum:5000 name:EXTERN_DP_PINS_4_TO_5 : DP pins 4-5 asserted
-event:0x53 counters:0,1,2,3 um:zero minimum:5000 name:SERIALIZATION_EVENTS : Number of srlz.I instructions
-
-# IA64_2 TLB Events, Table 11-28
-event:0xc9 counters:0,1,2,3 um:zero minimum:5000 name:DTLB_INSERTS_HPW : Hardware Page Walker Installs to DTLB"
-event:0x2c counters:0,1,2,3 um:zero minimum:500 name:DTLB_INSERTS_HPW_RETIRED : VHPT entries inserted into DTLB by HW PW
-event:0x2d counters:0,1,2,3 um:zero minimum:500 name:HPW_DATA_REFERENCES : Data memory references to VHPT
-#event:0xc1 counters:1 um:zero minimum:5000 name:L2DTLB_MISSES : L2DTLB Misses
-event:0x48 counters:0,1,2,3 um:zero minimum:5000 name:L1ITLB_INSERTS_HPW : L1ITLB Hardware Page Walker Inserts
-event:0x47 counters:0,1,2,3 um:itlb_misses_fetch minimum:5000 name:ITLB_MISSES_FETCH : ITLB Misses Demand Fetch
-#event:0xc0 counters:1 um:zero minimum:5000 name:L1DTLB_TRANSFER : L1DTLB misses that hit in the L2DTLB for accesses counted in L1D_READS
-
-# IA64_2 System Bus Events, Table 11-30
-event:0x87 counters:0,1,2,3 um:bus minimum:5000 name:BUS_ALL : Bus Transactions
-event:0x9c counters:0,1,2,3 um:zero minimum:5000 name:BUS_BRQ_LIVE_REQ_HI : BRQ Live Requests (two most-significant-bit of the 5-bit outstanding BRQ request count)
-event:0x9b counters:0,1,2,3 um:zero minimum:5000 name:BUS_BRQ_LIVE_REQ_LO : BRQ Live Requests (three least-significant-bit of the 5-bit outstanding BRQ request count
-event:0x9d counters:0,1,2,3 um:zero minimum:5000 name:BUS_BRQ_REQ_INSERTED : BRQ Requests Inserted
-event:0x88 counters:0,1,2,3 um:zero minimum:5000 name:BUS_DATA_CYCLE : Valid data cycle on the Bus
-event:0x84 counters:0,1,2,3 um:zero minimum:5000 name:BUS_HITM : Bus Hit Modified Line Transactions
-event:0x90 counters:0,1,2,3 um:bus minimum:5000 name:BUS_IO : IA-32 Compatible IO Bus Transactions
-event:0x98 counters:0,1,2,3 um:zero minimum:5000 name:BUS_IOQ_LIVE_REQ_HI : Inorder Bus Queue Requests (two most-significant-bit of the 4-bit outstanding IOQ request count)
-event:0x97 counters:0,1,2,3 um:zero minimum:5000 name:BUS_IOQ_LIVE_REQ_LO : Inorder Bus Queue Requests (two least-significant-bit of the 4-bit outstanding IOQ request count)
-event:0x93 counters:0,1,2,3 um:bus_lock minimum:5000 name:BUS_LOCK : IA-32 Compatible Bus Lock Transactions
-event:0x8e counters:0,1,2,3 um:bus_backsnp_req minimum:5000 name:BUS_BACKSNP_REQ : Bus Back Snoop Requests
-event:0x8a counters:0,1,2,3 um:bus_memory minimum:5000 name:BUS_MEMORY : Bus Memory Transactions
-event:0x8b counters:0,1,2,3 um:bus_mem_read minimum:5000 name:BUS_MEM_READ : Full Cache line D/I memory RD, RD invalidate, and BRIL
-event:0x94 counters:0,1,2,3 um:zero minimum:5000 name:BUS_MEM_READ_OUT_HI : Outstanding memory RD transactions
-event:0x95 counters:0,1,2,3 um:zero minimum:5000 name:BUS_MEM_READ_OUT_LO : Outstanding memory RD transactions
-event:0x9a counters:0,1,2,3 um:zero minimum:5000 name:BUS_OOQ_LIVE_REQ_HI : Out-of-order Bus Queue Requests (two most-significant-bit of the 4-bit outstanding OOQ request count)
-event:0x99 counters:0,1,2,3 um:zero minimum:5000 name:BUS_OOQ_LIVE_REQ_LO : Out-of-order Bus Queue Requests (three least-significant-bit of the 4-bit outstanding OOQ request count)
-event:0x8c counters:0,1,2,3 um:bus minimum:5000 name:BUS_RD_DATA : Bus Read Data Transactions
-event:0x80 counters:0,1,2,3 um:zero minimum:5000 name:BUS_RD_HIT : Bus Read Hit Clean Non-local Cache Transactions
-event:0x81 counters:0,1,2,3 um:zero minimum:5000 name:BUS_RD_HITM : Bus Read Hit Modified Non-local Cache Transactions
-event:0x83 counters:0,1,2,3 um:zero minimum:5000 name:BUS_RD_INVAL_ALL_HITM : Bus BIL or BRIL Transaction Results in HITM
-event:0x82 counters:0,1,2,3 um:zero minimum:5000 name:BUS_RD_INVAL_HITM : Bus BIL Transaction Results in HITM
-event:0x91 counters:0,1,2,3 um:bus minimum:5000 name:BUS_RD_IO : IA-32 Compatible IO Read Transactions
-event:0x8d counters:0,1,2,3 um:bus minimum:5000 name:BUS_RD_PRTL : Bus Read Partial Transactions
-event:0x96 counters:0,1,2,3 um:zero minimum:5000 name:BUS_SNOOPQ_REQ : Bus Snoop Queue Requests
-event:0x86 counters:0,1,2,3 um:bus minimum:5000 name:BUS_SNOOPS : Bus Snoops Total
-event:0x85 counters:0,1,2,3 um:bus_snoop minimum:5000 name:BUS_SNOOPS_HITM : Bus Snoops HIT Modified Cache Line
-event:0x8f counters:0,1,2,3 um:bus_snoop minimum:5000 name:BUS_SNOOP_STALL_CYCLES : Bus Snoop Stall Cycles (from any agent)
-event:0x92 counters:0,1,2,3 um:bus_wr_wb minimum:5000 name:BUS_WR_WB : Bus Write Back Transactions
-event:0x89 counters:0,1,2,3 um:mem_read_current minimum:5000 name:MEM_READ_CURRENT : Current Mem Read Transactions On Bus
-
-# RSE Events, Table 11-34
-event:0x2b counters:0,1,2,3 um:zero minimum:500 name:RSE_CURRENT_REGS_2_TO_0 : Current RSE registers
-event:0x2a counters:0,1,2,3 um:zero minimum:500 name:RSE_CURRENT_REGS_5_TO_3 : Current RSE registers
-event:0x26 counters:0,1,2,3 um:zero minimum:500 name:RSE_CURRENT_REGS_6 : Current RSE registers
-event:0x29 counters:0,1,2,3 um:zero minimum:500 name:RSE_DIRTY_REGS_2_TO_0 : Dirty RSE registers
-event:0x28 counters:0,1,2,3 um:zero minimum:500 name:RSE_DIRTY_REGS_5_TO_3 : Dirty RSE registers
-event:0x24 counters:0,1,2,3 um:zero minimum:500 name:RSE_DIRTY_REGS_6 : Dirty RSE registers
-event:0x32 counters:0,1,2,3 um:zero minimum:500 name:RSE_EVENT_RETIRED : Retired RSE operations
-event:0x20 counters:0,1,2,3 um:rse_references_retired minimum:500 name:RSE_REFERENCES_RETIRED : RSE Accesses
-
-# IA64 Performance Monitors Ordered by Code, Table 11-36
-event:0xbb counters:0,1,2,3 um:zero minimum:5000 name:TAGGED_L2_DATA_RETURN_POR : Tagged L2 Data Return Ports 0/1
diff --git a/events/ia64/itanium2/unit_masks b/events/ia64/itanium2/unit_masks
deleted file mode 100644
index bc74f5d..0000000
--- a/events/ia64/itanium2/unit_masks
+++ /dev/null
@@ -1,465 +0,0 @@
-# IA-64 Itanium 2 possible unit masks
-#
-# The information for the following entries for the Itanium 2
-# came from Intel Itanium 2 Processor Reference Manual For
-# Software Development and Optimization, June 2002, Document
-# number 251110-001.
-
-name:zero type:mandatory default:0x0
-	0x0 No unit mask
-
-# CPU_IA64_2 Table 11-37, 11-72
-name:alat_capacity_miss type:bitmask default:0x03
-	0x1 INT
-	0x2 FP
-	0x3 ALL
-
-# CPU_IA64_2 Table 11-38
-name:back_end_bubble type:exclusive default:0x00
-	0x0 ALL
-	0x1 FE
-	0x2 L1D_FPU_RSE
-
-# CPU_IA64_2 Table 11-39
-name:be_br_mispredict_detail type:exclusive default:0x00
-	0x0 ANY
-	0x1 STG
-	0x2 ROT
-	0x3 PFS
-
-# CPU_IA64_2 Table 11-40
-name:be_exe_bubble type:exclusive default:0x00
-	0x0 ALL
-	0x1 GRALL
-	0x2 FRALL
-	0x3 PR
-	0x4 ARCR
-	0x5 GRCR
-	0x6 CANCEL
-	0x7 BANK_SWITCH
-	0x8 ARCR_PR_CANCEL_BANK
-
-# CPU_IA64_2 Table 11-41
-name:be_flush_bubble type:exclusive default:0x00
-	0x0 ALL
-	0x1 BRU
-	0x2 XPN
-
-# CPU_IA64_2 Table 11-42
-name:be_l1d_fpu_bubble type:exclusive default:0x00
-	0x0 ALL
-	0x1 FPU
-	0x2 L1D
-	0x3 L1D_FULLSTBUF
-	0x4 L1D_DCURECIR
-	0x5 L1D_HPW
-	0x7 L1D_FILLCONF
-	0x8 L1D_DCS
-	0x9 L1D_L2BPRESS
-	0xa L1D_TLB
-	0xb L1D_LDCONF
-	0xc L1D_LDCHK
-	0xd L1D_NAT
-	0xe L1D_STBUFRECIR
-	0xf L1D_NATCONF
-
-# CPU_IA64_2 Table 11-43
-# FIXME: events using this is commented out in events
-#name:be_lost_bw_due_to_fe type:exclusive default:0x00
-#	0x0 ALL
-#	0x1 FEFLUSH
-#	0x4 UNREACHED
-#	0x5 IBFULL
-#	0x6 IMISS
-#	0x7 TLBMISS
-#	0x8 FILL_RECIRC
-#	0x9 BI
-#	0xa BRQ
-#	0xb PLP
-#	0xc BR_ILOCK
-#	0xd BUBBLE
-
-# CPU_IA64_2 Table 11-44
-name:be_rse_bubble type:exclusive default:0x00
-	0x0 ALL
-	0x1 BANK_SWITCH
-	0x2 AR_DEP
-	0x3 OVERFLOW
-	0x4 UNDERFLOW
-	0x5 LOADRS
-
-# CPU_IA64_2 Table 11-45
-name:br_mispred_detail type:exclusive default:0x00
-	0x0 ALL.ALL_PRED
-	0x1 ALL.CORRECT_PRED
-	0x2 ALL.WRONG_PATH
-	0x3 ALL.WRONG_TARGET
-	0x4 IPREL.ALL_PRED
-	0x5 IPREL.CORRECT_PRED
-	0x6 IPREL.WRONG_PATH
-	0x7 IPREL.WRONG_TARGET
-	0x8 RETURN.ALL_PRED
-	0x9 RETURN.CORRECT_PRED
-	0xa RETURN.WRONG_PATH
-	0xb RETURN.WRONG_TARGET
-	0xc NRETIND.ALL_PRED
-	0xd NRETIND.CORRECT_PRED
-	0xe NRETIND.WRONG_PATH
-	0xf NRETIND.WRONG_TARGET
-
-# CPU_IA64_2 Table 11-46
-name:br_mispredict_detail2 type:exclusive default:0x00
-	0x0 ALL.ALL_UNKNOWN_PRED
-	0x1 ALL.UKNOWN_PATH_CORRECT_PRED
-	0x2 ALL.UKNOWN_PATH_WRONG_PATH
-	0x4 IPREL.ALL_UNKNOWN_PRED
-	0x5 IPREL.UNKNOWN_PATH_CORRECT_PRED
-	0x6 IPREL.UNKNOWN_PATH_WRONG_PATH
-	0x8 RETURN.ALL_UNKNOWN_PRED
-	0x9 RETURN.UNKNOWN_PATH_CORRECT_PRED
-	0xa RETURN.UNKNOWN_PATH_WRONG_PATH
-	0xc NRETIND.ALL_UNKNOWN_PRED
-	0xd NRETIND.UNKNOWN_PATH_CORRECT_PRED
-	0xe NRETIND.UNKNOWN_PATH_WRONG_PATH
-
-# CPU_IA64_2 Table 11-47
-name:br_path_pred type:exclusive default:0x00
-	0x0 ALL.MISPRED_NOTTAKEN
-	0x1 ALL.MISPRED_TAKEN
-	0x2 ALL.OKPRED_NOTTAKEN
-	0x3 ALL.OKPRED_TAKEN
-	0x4 IPREL.MISPRED_NOTTAKEN
-	0x5 IPREL.MISPRED_TAKEN
-	0x6 IPREL.OKPRED_NOTTAKEN
-	0x7 IPREL.OKPRED_TAKEN
-	0x8 RETURN.MISPRED_NOTTAKEN
-	0x9 RETURN.MISPRED_TAKEN
-	0xa RETURN.OKPRED_NOTTAKEN
-	0xb RETURN.OKPRED_TAKEN
-	0xc NRETIND.MISPRED_NOTTAKEN
-	0xd NRETIND.MISPRED_TAKEN
-	0xe NRETIND.OKPRED_NOTTAKEN
-	0xf NRETIND.OKPRED_TAKEN
-
-# CPU_IA64_2 Table 11-48
-name:br_path_pred2 type:exclusive default:0x00
-	0x0 ALL.UNKNOWNPRED_NOTTAKEN
-	0x1 ALL.UNKNOWNPRED_TAKEN
-	0x4 IPREL.UNKNOWNPRED_NOTTAKEN
-	0x5 IPREL.UNKNOWNPRED__TAKEN
-	0x8 RETURN.UNKNOWNPRED_NOTTAKEN
-	0x9 RETURN.UNKNOWNPRED_TAKEN
-	0xc NRETIND.UNKNOWNPRED_NOTTAKEN
-	0xd NRETIND.UNKNOWNPRED_TAKEN
-
-# CPU_IA64_2 Table 11-49, 11-51, 11-55, 11-56, 11-57, 11-58
-name:bus type:exclusive default:0x03
-	0x1 IO
-	0x2 SELF
-	0x3 ANY
-
-# CPU_IA64_2 Table 11-50  b0001
-name:bus_backsnp_req type:mandatory default:0x01
-	0x1 0x0
-
-# CPU_IA64_2 Table 11-52
-name:bus_lock type:exclusive default:0x03
-	0x2 SELF
-	0x3 ANY
-
-# CPU_IA64_2 Table 11-53
-name:bus_memory type:exclusive default:0x0f
-	0x5 EQ_128BYTEIO
-	0x6 EQ_128BYTE_SELF
-	0x7 EQ_128BYTE_ANY
-	0x9 LT_128BYTEIO
-	0xa LT_128BYTE_SELF
-	0xb LT_128BYTE_ANY
-	0xd ALL IO
-	0xe ALL SELF
-	0xf ALL ANY
-
-# CPU_IA64_2 Table 11-54
-name:bus_mem_read type:exclusive default:0x0f
-	0x1 BIL IO
-	0x2 BIL SELF
-	0x3 BIL ANY
-	0x5 BRL IO
-	0x6 BRL SELF
-	0x7 BRL_ANY
-	0x9 BRIL IO
-	0xa BRIL SELF
-	0xb BRIL ANY
-	0xd ALL IO
-	0xe ALL SELF
-	0xf ALL ANY
-
-# CPU_IA64_2 Table 11-59, 11-60
-name:bus_snoop type:exclusive default:0x03
-	0x2 SELF
-	0x3 ANY
-
-# CPU_IA64_2 Table 11-61
-name:bus_wr_wb type:exclusive default:0x0f
-	0x5 EQ_128BYTE IO
-	0x6 EQ_128BYTE SELF
-	0x7 EQ_128BYTE ANY
-	0xa CCASTOUT SELF
-	0xb CCASTOUT ANY
-	0xd ALL IO
-	0xe ALL SELF
-	0xf ALL ANY
-
-# CPU_IA64_2 Table 11-62
-name:encbr_mispred_detail type:exclusive default:0x0
-	0x0 ALL.ALL_PRED
-	0x1 ALL.CORRECT_PRED
-	0x2 ALL.WRONG_PATH
-	0x3 ALL.WRONG_TARGET
-	0x8 OVERSUB.ALL_PRED
-	0x9 OVERSUB.CORRECT_PRED
-	0xa OVERSUB.CORRECT_PRED
-	0xb OVERSUB.WRONGPATH
-	0xc ALL2.ALL_PRED
-	0xd ALL2.CORRECT_PRED
-	0xe ALL2.WRONG_PATH
-	0xf ALL2.WRONG_TARGET
-
-# CPU_IA64_2 Table 11-63
-name:extern_dp_pins_0_to_3 type:bitmask default:0xf
-	0x1 PIN0
-	0x2 PIN1
-	0x4 PIN2
-	0x8 PIN3
-	0xf ALL
-
-# CPU_IA64_2 Table 11-64
-name:extern_dp_pins_4_to_5 type:bitmask default:0x03
-	0x1 PIN4
-	0x2 PIN5
-	0xf ALL
-
-# CPU_IA64_2 Table 11-65
-name:fe_bubble type:exclusive default:0x0
-	0x0 ALL
-	0x1 FEFLUSH
-	0x3 GROUP1
-	0x4 GROUP2
-	0x5 IBFULL
-	0x6 IMISS
-	0x7 TLBMISS
-	0x8 FILL_RECIRC
-	0x9 BRANCH
-	0xa GROUP3
-	0xb ALLBUT_FEFLUSH_BUBBLE
-	0xc ALLBUT_IBFULL
-	0xd BUBBLE
-
-# CPU_IA64_2 Table 11-66, 11-69*/
-name:fe_lost type:exclusive default:0x0
-	0x0 ALL
-	0x1 FEFLUSH
-	0x4 UNREACHED
-	0x5 IBFULL
-	0x6 IMISS
-	0x7 TLBMISS
-	0x8 FILL_RECIRC
-	0x9 BI
-	0xa BRQ
-	0xb PLP
-	0xc BR_ILOCK
-	0xd BUBBLE
-
-# CPU_IA64_2 Table 11-67, 11-79, 11-86, 11-90, 11-92 b0000 
-# FIXME: events using this is commented out in events
-#name:this type:exclusive default:0x0 
-#	0x0 THIS
-
-# CPU_IA64_2 Table 11-68
-name:tagged_inst_retired type:exclusive default:0x0 
-	0x0 IBRP0_PMB8
-	0x1 IBRP1_PMB9
-	0x2 IBRP2_PMC8
-	0x3 IBRP3_PMC9
-
-# CPU_IA64_2 Table 11-73
-name:itlb_misses_fetch type:exclusive default:0x3
-	0x1 L1ITLB
-	0x2 L2ITLB
-	0x3 ALL
-
-# CPU_IA64_2 Table 11-74
-name:l1d_read_misses type:exclusive default:0x0
-	0x0 ALL
-	0x1 RSE_FILL
-
-# CPU_IA64_2 Table 11-75
-name:l1i_prefetch_stall type:exclusive default:0x3
-	0x2 FLOW
-	0x3 ALL
-
-# CPU_IA64_2 Table 11-76, 11-91 b0000
-# FIXME: events using this is commented out in events
-#name:l2_lines type:exclusive default:0x0
-#	0x0 ANY
-
-# CPU_IA64_2 Table 11-77
-name:l2_bypass type:exclusive default:0x0
-	0x0 L2_DATA1
-	0x1 L2_DATA2
-	0x2 L3_DATA1
-	0x4 L2_INST1
-	0x5 L2_INST2
-	0x6 L3_INST1
-
-# CPU_IA64_2 Table 11-78
-# FIXME: events using this is commented out in events
-#name:l2_data_references type:bitmask default:0x3
-#	0x1 L2_DATA_READS
-#	0x2 L2_DATA_WRITES
-#	0x3 L2_ALL
-
-# CPU_IA64_2 Table 11-80
-name:l2_force_recirc type:exclusive default:0x0
-	0x0 ANY
-	0x1 SMC_HIT
-	0x2 L1W
-	0x4 TAG_NOTOK
-	0x5 TRAN_PREF
-	0x6 SNP_OR_L3 
-	0x8 VIC_PEND
-	0x9 FILL_HIT
-	0xa IPF_MISS
-	0xb VIC_BUF_FULL
-	0xc OZQ_MISS
-	0xd SAME_INDEX
-	0xe FRC_RECIRC
-
-# CPU_IA64_2 Table 11-81, 11-83 b1000
-name:recirc_ifetch type:mandatory default:0x8
-	0x8 default:0x0} } };
-
-# CPU_IA64_2 Table 11-82
-name:l2_ifet_cancels type:exclusive default:0x0
-	0x0 ANY
-	0x2 BYPASS
-	0x4 DIDNT_RECIR
-	0x5 RECIRC_OVER_SUB
-	0x6 ST_FILL_WB
-	0x7 DATA_RD
-	0x8 PREEMPT
-	0xc CHG_PRIO
-	0xd IFETCH_BYP
-
-# CPU_IA64_2 Table 11-84
-name:l2_l3_access_cancel type:exclusive default:0x9
-	0x1 SPEC_L3_BYP
-	0x2 FILLD_FULL
-	0x5 UC_BLOCKED
-	0x6 INV_L3_BYP
-	0x8 EBL_REJECT
-	0x9 ANY
-	0xa DFETCH
-	0xb IFETCH
-
-# CPU_IA64_2 Table 11-85
-name:l2_ops_issued type:exclusive default:0x8
-	0x8 INT_LOAD
-	0x9 FP_LOAD
-	0xa RMW
-	0xb STORE
-	0xc NST_NLD
-
-# CPU_IA64_2 Table 11-87
-name:l2_ozq_cancels0 type:exclusive default:0x0
-	0x0 ANY
-	0x1 LATE_SPEC_BYP
-	0x2 LATE_RELEASE
-	0x3 LATE_ACQUIRE
-	0x4 LATE_BYP_EFFRELEASE
-
-# CPU_IA64_2 Table 11-88
-name:l2_ozq_cancels1 type:exclusive default:0x1
-	0x0 REL
-	0x1 BANK_CONF
-	0x2 L2D_ST_MAT
-	0x4 SYNC
-	0x5 HPW_IFETCH_CONF
-	0x6 CANC_L2M_ST
-	0x7 L1_FILL_CONF
-	0x8 ST_FILL_CONF
-	0x9 CCV
-	0xa SEM
-	0xb L2M_ST_MAT
-	0xc MFA
-	0xd L2A_ST_MAT
-	0xe L1DF_L2M
-	0xf ECC
-
-# CPU_IA64_2 Table 11-89
-name:l2_ozq_cancels2 type:exclusive default:0x0
-	0x0 RECIRC_OVER_SUB
-	0x1 CANC_L2C_ST
-	0x2 L2C_ST_MAT
-	0x3 SCRUB
-	0x4 ACQ
-	0x5 READ_WB_CONF
-	0x6 OZ_DATA_CONF
-	0x8 L2FILL_ST_CONF
-	0x9 DIDNT_RECIRC
-	0xa WEIRD
-	0xc OVER_SUB
-	0xd CANC_L2D_ST
-	0xf D_IFET
-
-# CPU_IA64_2 Table 11-93
-name:l3_reads type:exclusive default:0x3
-	0x1 DINST_FETCH.HIT
-	0x2 DINST_FETCH.MISS
-	0x3 DINST_FETCH.ALL
-	0x5 INST_FETCH.HIT
-	0x6 INST_FETCH.MISS
-	0x7 INST_FETCH.ALL
-	0x9 DATA_READ.HIT
-	0xa DATA_READ.MISS
-	0xb DATA_READ.ALL
-	0xd ALL.HIT
-	0xe ALL.MISS
-	0xf ALL.ALL
-
-# CPU_IA64_2 Table 11-94
-name:l3_writes type:exclusive default:0x7
-	0x5 DATA_WRITE.HIT
-	0x6 DATA_WRITE.MISS
-	0x7 DATA_WRITE.ALL
-	0x9 L2_WB.HIT
-	0xa L2_WB.MISS
-	0xb L2_WB.ALL
-	0xd ALL.HIT
-	0xe ALL.MISS
-	0xf ALL.ALL
-
-# CPU_IA64_2 Table 11-95
-name:mem_read_current type:exclusive default:0x3
-	0x1 IO
-	0x3 ANY
-
-# CPU_IA64_2 Table 11-96
-name:rse_references_retired type:bitmask default:0x3
-	0x1 LOAD
-	0x2 STORE
-	0x3 ALL
-
-# CPU_IA64_2 Table 11-97 bitmask
-name:syll_not_dispersed type:bitmask default:0xf
-	0x1 EXPL
-	0x2 IMPL
-	0x4 FE
-	0x8 MLI
-	0xf ALL
-
-# CPU_IA64_2 Table 11-98
-name:syll_overcount type:exclusive default:0x3
-	0x1 EXPL
-	0x2 IMPL
-	0x3 ALL
diff --git a/events/ppc/e500mc/events b/events/ppc/e500mc/events
new file mode 100644
index 0000000..8197a7d
--- /dev/null
+++ b/events/ppc/e500mc/events
@@ -0,0 +1,120 @@
+# e500mc Events
+#
+# Copyright (C) 2010 Freescale Semiconductor, Inc.
+#
+event:0x1 counters:0,1,2,3 um:zero minimum:100 name:CPU_CLK : Cycles
+event:0x2 counters:0,1,2,3 um:zero minimum:500 name:COMPLETED_INSNS : Completed Instructions (0, 1, or 2 per cycle)
+event:0x3 counters:0,1,2,3 um:zero minimum:500 name:COMPLETED_OPS : Completed Micro-ops (counts 2 for load/store w/update)
+event:0x4 counters:0,1,2,3 um:zero minimum:500 name:INSTRUCTION_FETCHES : Instruction fetches
+event:0x5 counters:0,1,2,3 um:zero minimum:500 name:DECODED_OPS : Micro-ops decoded
+event:0x8 counters:0,1,2,3 um:zero minimum:500 name:COMPLETED_BRANCHES : Branch Instructions completed
+event:0x9 counters:0,1,2,3 um:zero minimum:500 name:COMPLETED_LOAD_OPS : Load micro-ops completed
+event:0xa counters:0,1,2,3 um:zero minimum:500 name:COMPLETED_STORE_OPS : Store micro-ops completed
+event:0xb counters:0,1,2,3 um:zero minimum:500 name:COMPLETION_REDIRECTS : Number of completion buffer redirects
+event:0xc counters:0,1,2,3 um:zero minimum:500 name:BRANCHES_FINISHED : Branches finished
+event:0xd counters:0,1,2,3 um:zero minimum:500 name:TAKEN_BRANCHES_FINISHED : Taken branches finished
+event:0xe counters:0,1,2,3 um:zero minimum:500 name:BIFFED_BRANCHES_FINISHED : Biffed branches finished
+event:0xf counters:0,1,2,3 um:zero minimum:500 name:BRANCHES_MISPREDICTED : Branch instructions mispredicted due to direction, target, or IAB prediction
+event:0x10 counters:0,1,2,3 um:zero minimum:500 name:BRANCHES_MISPREDICTED_DIRECTION : Branches mispredicted due to direction prediction
+event:0x11 counters:0,1,2,3 um:zero minimum:500 name:BTB_HITS : Branches that hit in the BTB, or missed but are not taken
+event:0x12 counters:0,1,2,3 um:zero minimum:500 name:DECODE_STALLED : Cycles the instruction buffer was not empty, but 0 instructions decoded
+event:0x13 counters:0,1,2,3 um:zero minimum:500 name:ISSUE_STALLED : Cycles the issue buffer is not empty but 0 instructions issued
+event:0x14 counters:0,1,2,3 um:zero minimum:500 name:BRANCH_ISSUE_STALLED : Cycles the branch buffer is not empty but 0 instructions issued
+event:0x15 counters:0,1,2,3 um:zero minimum:500 name:SRS0_SCHEDULE_STALLED : Cycles SRS0 is not empty but 0 instructions scheduled
+event:0x16 counters:0,1,2,3 um:zero minimum:500 name:SRS1_SCHEDULE_STALLED : Cycles SRS1 is not empty but 0 instructions scheduled
+event:0x17 counters:0,1,2,3 um:zero minimum:500 name:VRS_SCHEDULE_STALLED : Cycles VRS is not empty but 0 instructions scheduled
+event:0x18 counters:0,1,2,3 um:zero minimum:500 name:LRS_SCHEDULE_STALLED : Cycles LRS is not empty but 0 instructions scheduled
+event:0x19 counters:0,1,2,3 um:zero minimum:500 name:BRS_SCHEDULE_STALLED : Cycles BRS is not empty but 0 instructions scheduled Load/Store, Data Cache, and dLFB Events
+event:0x1a counters:0,1,2,3 um:zero minimum:500 name:TOTAL_TRANSLATED : Total Ldst microops translated.
+event:0x1b counters:0,1,2,3 um:zero minimum:500 name:LOADS_TRANSLATED : Number of cacheable L* or EVL* microops translated. (This includes microops from load-multiple, load-update, and load-context instructions.)
+event:0x1c counters:0,1,2,3 um:zero minimum:500 name:STORES_TRANSLATED : Number of cacheable ST* or EVST* microops translated. (This includes microops from store-multiple, store-update, and save-context instructions.)
+event:0x1d counters:0,1,2,3 um:zero minimum:500 name:TOUCHES_TRANSLATED : Number of cacheable DCBT and DCBTST instructions translated (L1 only) (Does not count touches that are converted to nops i.e. exceptions, noncacheable, hid0[nopti] bit is set.)
+event:0x1e counters:0,1,2,3 um:zero minimum:500 name:CACHEOPS_TRANSLATED : Number of dcba, dcbf, dcbst, and dcbz instructions translated (e500 traps on dcbi)
+event:0x1f counters:0,1,2,3 um:zero minimum:500 name:CACHEINHIBITED_ACCESSES_TRANSLATED : Number of cache inhibited accesses translated
+event:0x20 counters:0,1,2,3 um:zero minimum:500 name:GUARDED_LOADS_TRANSLATED : Number of guarded loads translated
+event:0x21 counters:0,1,2,3 um:zero minimum:500 name:WRITETHROUGH_STORES_TRANSLATED : Number of write-through stores translated
+event:0x22 counters:0,1,2,3 um:zero minimum:500 name:MISALIGNED_ACCESSES_TRANSLATED : Number of misaligned load or store accesses translated.
+event:0x23 counters:0,1,2,3 um:zero minimum:500 name:TOTAL_ALLOCATED_DLFB : Total allocated to dLFB
+event:0x24 counters:0,1,2,3 um:zero minimum:500 name:LOADS_TRANSLATED_ALLOCATED_DLFB : Loads translated and allocated to dLFB (Applies to same class of instructions as loads translated.)
+event:0x25 counters:0,1,2,3 um:zero minimum:500 name:STORES_COMPLETED_ALLOCATED_DLFB : Stores completed and allocated to dLFB (Applies to same class of instructions as stores translated.)
+event:0x26 counters:0,1,2,3 um:zero minimum:500 name:TOUCHES_TRANSLATED_ALLOCATED_DLFB : Touches translated and allocated to dLFB (Applies to same class of instructions as touches translated.)
+event:0x27 counters:0,1,2,3 um:zero minimum:500 name:STORES_COMPLETED : Number of cacheable ST* or EVST* microops completed. (Applies to the same class of instructions as stores translated.)
+event:0x28 counters:0,1,2,3 um:zero minimum:500 name:DL1_LOCKS : Number of cache lines locked in the dL1. (Counts a lock even if an overlock condition is encountered.)
+event:0x29 counters:0,1,2,3 um:zero minimum:500 name:DL1_RELOADS : This is historically used to determine dcache miss rate (along with loads/stores completed). This counts dL1 reloads for any reason.
+event:0x2a counters:0,1,2,3 um:zero minimum:500 name:DL1_CASTOUTS : dL1 castouts. Does not count castouts due to DCBF.
+event:0x2b counters:0,1,2,3 um:zero minimum:500 name:DETECTED_REPLAYS : Times detected replay condition - Load miss with dLFB full.
+event:0x2c counters:0,1,2,3 um:zero minimum:500 name:LOAD_MISS_QUEUE_FULL_REPLAYS : Load miss with load queue full.
+event:0x2d counters:0,1,2,3 um:zero minimum:500 name:LOAD_GUARDED_MISS_NOT_LAST_REPLAYS : Load guarded miss when the load is not yet at the bottom of the completion buffer.
+event:0x2e counters:0,1,2,3 um:zero minimum:500 name:STORE_TRANSLATED_QUEUE_FULL_REPLAYS : Translate a store when the StQ is full.
+event:0x2f counters:0,1,2,3 um:zero minimum:500 name:ADDRESS_COLLISION_REPLAYS : Address collision.
+event:0x30 counters:0,1,2,3 um:zero minimum:500 name:DMMU_MISS_REPLAYS : DMMU_MISS_REPLAYS : DMMU miss.
+event:0x31 counters:0,1,2,3 um:zero minimum:500 name:DMMU_BUSY_REPLAYS : DMMU_BUSY_REPLAYS : DMMU busy.
+event:0x32 counters:0,1,2,3 um:zero minimum:500 name:SECOND_PART_MISALIGNED_AFTER_MISS_REPLAYS : Second part of misaligned access when first part missed in cache.
+event:0x33 counters:0,1,2,3 um:zero minimum:500 name:LOAD_MISS_DLFB_FULL_CYCLES : Cycles stalled on replay condition - Load miss with dLFB full.
+event:0x34 counters:0,1,2,3 um:zero minimum:500 name:LOAD_MISS_QUEUE_FULL_CYCLES : Cycles stalled on replay condition - Load miss with load queue full.
+event:0x35 counters:0,1,2,3 um:zero minimum:500 name:LOAD_GUARDED_MISS_NOT_LAST_CYCLES : Cycles stalled on replay condition - Load guarded miss when the load is not yet at the bottom of the completion buffer.
+event:0x36 counters:0,1,2,3 um:zero minimum:500 name:STORE_TRANSLATED_QUEUE_FULL_CYCLES : Cycles stalled on replay condition - Translate a store when the StQ is full.
+event:0x37 counters:0,1,2,3 um:zero minimum:500 name:ADDRESS_COLLISION_CYCLES : Cycles stalled on replay condition - Address collision.
+event:0x38 counters:0,1,2,3 um:zero minimum:500 name:DMMU_MISS_CYCLES : Cycles stalled on replay condition - DMMU miss.
+event:0x39 counters:0,1,2,3 um:zero minimum:500 name:DMMU_BUSY_CYCLES : Cycles stalled on replay condition - DMMU busy.
+event:0x3a counters:0,1,2,3 um:zero minimum:500 name:SECOND_PART_MISALIGNED_AFTER_MISS_CYCLES : Cycles stalled on replay condition - Second part of misaligned access when first part missed in cache.
+event:0x3b counters:0,1,2,3 um:zero minimum:500 name:IL1_LOCKS : Number of cache lines locked in the iL1. (Counts a lock even if an overlock condition is encountered.)
+event:0x3c counters:0,1,2,3 um:zero minimum:500 name:IL1_FETCH_RELOADS : This is historically used to determine icache miss rate (along with instructions completed) Reloads due to demand fetch.
+event:0x3d counters:0,1,2,3 um:zero minimum:500 name:FETCHES : Counts the number of fetches that write at least one instruction to the instruction buffer. (With instruction fetched, can used to compute instructions-per-fetch)
+event:0x3e counters:0,1,2,3 um:zero minimum:500 name:IMMU_TLB4K_RELOADS : iMMU TLB4K reloads
+event:0x3f counters:0,1,2,3 um:zero minimum:500 name:IMMU_VSP_RELOADS : iMMU VSP reloads
+event:0x40 counters:0,1,2,3 um:zero minimum:500 name:DMMU_TLB4K_RELOADS : dMMU TLB4K reloads
+event:0x41 counters:0,1,2,3 um:zero minimum:500 name:DMMU_VSP_RELOADS : dMMU VSP reloads
+event:0x42 counters:0,1,2,3 um:zero minimum:500 name:L2MMU_MISSES : Counts iTLB/dTLB error interrupt
+event:0x43 counters:0,1,2,3 um:zero minimum:500 name:BIU_MASTER_REQUESTS : Number of master transactions. (Number of master TSs.)
+event:0x44 counters:0,1,2,3 um:zero minimum:500 name:BIU_MASTER_I_REQUESTS : Number of master I-Side transactions. (Number of master I-Side TSs.)
+event:0x45 counters:0,1,2,3 um:zero minimum:500 name:BIU_MASTER_D_REQUESTS : Number of master D-Side transactions. (Number of master D-Side TSs.)
+event:0x46 counters:0,1,2,3 um:zero minimum:500 name:BIU_MASTER_D_CASTOUT_REQUESTS : Number of master D-Side non-program-demand castout transactions. This counts replacement pushes and snoop pushes. This does not count DCBF castouts. (Number of master D-side non-program-demand castout TSs.)
+event:0x48 counters:0,1,2,3 um:zero minimum:500 name:SNOOP_REQUESTS : Number of externally generated snoop requests. (Counts snoop TSs.)
+event:0x49 counters:0,1,2,3 um:zero minimum:500 name:SNOOP_HITS : Number of snoop hits on all D-side resources regardless of the cache state (modified, exclusive, or shared)
+event:0x4a counters:0,1,2,3 um:zero minimum:500 name:SNOOP_PUSHES : Number of snoop pushes from all D-side resources. (Counts snoop ARTRY/WOPs.)
+event:0x52 counters:0,1,2,3 um:zero minimum:500 name:PMC0_OVERFLOW : Counts the number of times PMC0[32] transitioned from 1 to 0.
+event:0x53 counters:0,1,2,3 um:zero minimum:500 name:PMC1_OVERFLOW : Counts the number of times PMC1[32] transitioned from 1 to 0.
+event:0x54 counters:0,1,2,3 um:zero minimum:500 name:PMC2_OVERFLOW : Counts the number of times PMC2[32] transitioned from 1 to 0.
+event:0x55 counters:0,1,2,3 um:zero minimum:500 name:PMC3_OVERFLOW : Counts the number of times PMC3[32] transitioned from 1 to 0.
+event:0x56 counters:0,1,2,3 um:zero minimum:500 name:INTERRUPTS : Number of interrupts taken
+event:0x57 counters:0,1,2,3 um:zero minimum:500 name:EXTERNAL_INTERRUPTS : Number of external input interrupts taken
+event:0x58 counters:0,1,2,3 um:zero minimum:500 name:CRITICAL_INTERRUPTS : Number of critical input interrupts taken
+event:0x59 counters:0,1,2,3 um:zero minimum:500 name:SC_TRAP_INTERRUPTS : Number of system call and trap interrupts
+event:0x5b counters:0,1,2,3 um:zero minimum:500 name:L2_LINEFILL_REQ : Number L2 Linefill requests
+event:0x5c counters:0,1,2,3 um:zero minimum:500 name:L2_VICTIM_SELECT : Number L2 Victim selects
+event:0x6e counters:0,1,2,3 um:zero minimum:500 name:L2_ACCESS : Number L2 cache accesses
+event:0x6f counters:0,1,2,3 um:zero minimum:500 name:L2_HIT_ACCESS : Number L2 hit cache accesses
+event:0x70 counters:0,1,2,3 um:zero minimum:500 name:L2_DATA_ACCESS : Number L2 data cache accesses
+event:0x71 counters:0,1,2,3 um:zero minimum:500 name:L2_HIT_DATA_ACCESS : Number L2 hit data cache accesses
+event:0x72 counters:0,1,2,3 um:zero minimum:500 name:L2_INST_ACCESS : Number L2 instruction cache accesses
+event:0x73 counters:0,1,2,3 um:zero minimum:500 name:L2_HIT_INST_ACCESS : Number L2 hit instruction cache accesses
+event:0x74 counters:0,1,2,3 um:zero minimum:500 name:L2_ALLOC : Number L2 cache allocations
+event:0x75 counters:0,1,2,3 um:zero minimum:500 name:L2_DATA_ALLOC : Number L2 data cache allocations
+event:0x76 counters:0,1,2,3 um:zero minimum:500 name:L2_DIRTY_DATA_ALLOC : Number L2 dirty data cache allocations
+event:0x77 counters:0,1,2,3 um:zero minimum:500 name:L2_INST_ALLOC : Number L2 instruction cache allocations
+event:0x78 counters:0,1,2,3 um:zero minimum:500 name:L2_UPDATE : Number L2 cache updates
+event:0x79 counters:0,1,2,3 um:zero minimum:500 name:L2_CLEAN_UPDATE : Number L2 cache clean updates
+event:0x7a counters:0,1,2,3 um:zero minimum:500 name:L2_DIRTY_UPDATE : Number L2 cache dirty updates
+event:0x7b counters:0,1,2,3 um:zero minimum:500 name:L2_CLEAN_REDU_UPDATE : Number L2 cache clean redundant updates
+event:0x7c counters:0,1,2,3 um:zero minimum:500 name:L2_DIRTY_REDU_UPDATE : Number L2 cache dirty redundant updates
+event:0x7d counters:0,1,2,3 um:zero minimum:500 name:L2_LOCKS : Number L2 cache locks
+event:0x7e counters:0,1,2,3 um:zero minimum:500 name:L2_CASTOUT : Number L2 cache castouts
+event:0x7f counters:0,1,2,3 um:zero minimum:500 name:L2_HIT_DATA_DIRTY : Number L2 cache data dirty hits
+event:0x82 counters:0,1,2,3 um:zero minimum:500 name:L2_INV_CLEAN : Number L2 cache invalidation of clean lines
+event:0x83 counters:0,1,2,3 um:zero minimum:500 name:L2_INV_INCOHER : Number L2 cache invalidation of incoherent lines
+event:0x84 counters:0,1,2,3 um:zero minimum:500 name:L2_INV_COHER : Number L2 cache invalidation of coherent lines
+event:0x94 counters:0,1,2,3 um:zero minimum:500 name:DVT0 : Detection of write to DEVENT with DVT0 set
+event:0x95 counters:0,1,2,3 um:zero minimum:500 name:DVT1 : Detection of write to DEVENT with DVT1 set
+event:0x96 counters:0,1,2,3 um:zero minimum:500 name:DVT2 : Detection of write to DEVENT with DVT2 set
+event:0x97 counters:0,1,2,3 um:zero minimum:500 name:DVT3 : Detection of write to DEVENT with DVT3 set
+event:0x98 counters:0,1,2,3 um:zero minimum:500 name:DVT4 : Detection of write to DEVENT with DVT4 set
+event:0x99 counters:0,1,2,3 um:zero minimum:500 name:DVT5 : Detection of write to DEVENT with DVT5 set
+event:0x9a counters:0,1,2,3 um:zero minimum:500 name:DVT6 : Detection of write to DEVENT with DVT6 set
+event:0x9b counters:0,1,2,3 um:zero minimum:500 name:DVT7 : Detection of write to DEVENT with DVT7 set
+event:0x9c counters:0,1,2,3 um:zero minimum:500 name:CYCLES_NEXUS_STALLED : Number of completion cycles stalled due to Nexus FIFO full
+event:0xb0 counters:0,1,2,3 um:zero minimum:500 name:DECORATED_LOAD : Number of decorated loads.
+event:0xb1 counters:0,1,2,3 um:zero minimum:500 name:DECORATED_STORE : Number of decorated stores
+event:0xb2 counters:0,1,2,3 um:zero minimum:500 name:LOAD_RETRY : Number of load retries
+event:0xb3 counters:0,1,2,3 um:zero minimum:500 name:STWCX_SUCCESS : Number of successful stwcx. instructions
+event:0xb4 counters:0,1,2,3 um:zero minimum:500 name:STWCX_UNSUCCESS : Number of unsuccessful stwcx. instructions
diff --git a/events/rtc/unit_masks b/events/ppc/e500mc/unit_masks
similarity index 67%
rename from events/rtc/unit_masks
rename to events/ppc/e500mc/unit_masks
index 6984b62..395c653 100644
--- a/events/rtc/unit_masks
+++ b/events/ppc/e500mc/unit_masks
@@ -1,4 +1,4 @@
-# RTC possible unit masks
+# e500 possible unit masks
 #
 name:zero type:mandatory default:0x0
 	0x0 No unit mask
diff --git a/events/ppc/e6500/events b/events/ppc/e6500/events
new file mode 100644
index 0000000..f34f82d
--- /dev/null
+++ b/events/ppc/e6500/events
@@ -0,0 +1,266 @@
+# e6500 Events
+#
+# Copyright (C) 2012 Freescale Semiconductor, Inc.
+#
+event:0x1 counters:0,1,2,3,4,5 um:zero minimum:100 name:CPU_CLK : Cycles
+event:0x2 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_INSNS : Completed Instructions (0, 1, or 2 per cycle)
+event:0x3 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_OPS : Completed Micro-ops
+event:0x5 counters:0,1,2,3,4,5 um:zero minimum:500 name:DECODED_OPS : Micro-ops decoded
+event:0x6 counters:0,1,2,3,4,5 um:zero minimum:500 name:TRANSITIONS_PM_EVENT : 0 to 1 transitions on the pm_event input
+event:0x7 counters:0,1,2,3,4,5 um:zero minimum:500 name:CPU_CLK_PM_EVENT : Processor cycles that occur when the pm_event input is asserted
+event:0x8 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_BRANCHES : Branch Instructions completed
+event:0x9 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_LOAD_OPS : Load micro-ops completed
+event:0xa counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_STORE_OPS : Store micro-ops completed
+event:0xb counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETION_REDIRECTS : Number of completion buffer redirects
+event:0xc counters:0,1,2,3,4,5 um:zero minimum:500 name:BRANCHES_FINISHED : Branches finished
+event:0xd counters:0,1,2,3,4,5 um:zero minimum:500 name:TAKEN_BRANCHES_FINISHED : Taken branches finished
+event:0xe counters:0,1,2,3,4,5 um:zero minimum:500 name:TAKEN_BRANCHES_FINISHED_NOT_BTB : Finished unconditional branches that miss the BTB
+event:0xf counters:0,1,2,3,4,5 um:zero minimum:500 name:BRANCHES_MISPREDICTED : Branch instructions mispredicted due to direction, target, or IAB prediction
+event:0x10 counters:0,1,2,3,4,5 um:zero minimum:500 name:BRANCHES_MISPREDICTED_DIRECTION : Branches mispredicted due to direction prediction
+event:0x11 counters:0,1,2,3,4,5 um:zero minimum:500 name:BTB_HITS : Branches that hit in the BTB, or missed but are not taken
+event:0x12 counters:0,1,2,3,4,5 um:zero minimum:500 name:DECODE_STALLED : Cycles the instruction buffer was not empty, but 0 instructions decoded
+event:0x13 counters:0,1,2,3,4,5 um:zero minimum:500 name:ISSUE_STALLED : Cycles the SFX/CFX issue queue is not empty but 0 instructions issued
+event:0x14 counters:0,1,2,3,4,5 um:zero minimum:500 name:BRANCH_ISSUE_STALLED : Cycles the branch buffer is not empty but 0 instructions issued
+event:0x15 counters:0,1,2,3,4,5 um:zero minimum:500 name:SFX0_SCHEDULE_STALLED : Cycles SFX0 is not empty but 0 instructions scheduled
+event:0x16 counters:0,1,2,3,4,5 um:zero minimum:500 name:SFX1_SCHEDULE_STALLED : Cycles SFX1 is not empty but 0 instructions scheduled
+event:0x17 counters:0,1,2,3,4,5 um:zero minimum:500 name:CFX_SCHEDULE_STALLED : Cycles CFX is not empty but 0 instructions scheduled
+event:0x18 counters:0,1,2,3,4,5 um:zero minimum:500 name:LSU_SCHEDULE_STALLED : Cycles LSU is not empty but 0 instructions scheduled
+event:0x19 counters:0,1,2,3,4,5 um:zero minimum:500 name:BU_SCHEDULE_STALLED : Cycles BU is not empty but 0 instructions scheduled
+event:0x1a counters:0,1,2,3,4,5 um:zero minimum:500 name:TOTAL_TRANSLATED : Total LSU micro-ops that reach the second stage of the LSU
+event:0x1b counters:0,1,2,3,4,5 um:zero minimum:500 name:LOADS_TRANSLATED : Cacheable load micro-ops translated.1 (Does not include WT)
+event:0x1c counters:0,1,2,3,4,5 um:zero minimum:500 name:STORES_TRANSLATED : Cacheable store micro-ops translated.1 (Does not include WT)
+event:0x1d counters:0,1,2,3,4,5 um:zero minimum:500 name:TOUCHES_TRANSLATED : Cacheable touch instructions translated. Includes: dcbt / dcbtep dcbtst / dcbtstep icbt ct=2
+event:0x1e counters:0,1,2,3,4,5 um:zero minimum:500 name:CACHEOPS_TRANSLATED : Number of dcba, dcbf, dcbst, and dcbz instructions translated (e500 traps on dcbi)
+event:0x1f counters:0,1,2,3,4,5 um:zero minimum:500 name:CACHEINHIBITED_ACCESSES_TRANSLATED : Number of cache inhibited accesses translated
+event:0x20 counters:0,1,2,3,4,5 um:zero minimum:500 name:GUARDED_LOADS_TRANSLATED : Number of guarded loads translated
+event:0x21 counters:0,1,2,3,4,5 um:zero minimum:500 name:WRITETHROUGH_STORES_TRANSLATED : Number of write-through stores translated
+event:0x22 counters:0,1,2,3,4,5 um:zero minimum:500 name:MISALIGNED_ACCESSES_TRANSLATED : Number of misaligned load or store accesses translated.
+event:0x23 counters:0,1,2,3,4,5 um:zero minimum:500 name:FETCH_2X4_HITS : Each fetch retrieves up to 8 instructions, but only the first 4 are required. This event increments if at least one instruction of the second 4 are actually used.
+event:0x24 counters:0,1,2,3,4,5 um:zero minimum:500 name:FETCH_HITS_ON_PREFETCHES : Fetch hits on instruction prefetch when the data is still in the ILFB.
+event:0x25 counters:0,1,2,3,4,5 um:zero minimum:500 name:GENERATED_FETCH_PREFETCHES : Number of prefetches generated.
+event:0x29 counters:0,1,2,3,4,5 um:zero minimum:500 name:DL1_RELOADS : This is historically used to determine dcache miss rate (along with loads/stores completed). This counts dL1 reloads for any reason.
+event:0x2c counters:0,1,2,3,4,5 um:zero minimum:500 name:LOAD_MISS_WITH_LOAD_QUEUE_FULL : Counts number of stalls; Com:52 counts cycles stalled. Includes: cacheable loads, CI loads, loadec, larx, touches, ibll, ibsl,ibllsl
+event:0x2d counters:0,1,2,3,4,5 um:zero minimum:500 name:LOAD_GUARDED_MISS_NOT_LAST_REPLAYS : Load guarded miss when the load is not yet at the bottom of the completion buffer.
+event:0x2e counters:0,1,2,3,4,5 um:zero minimum:500 name:STORE_TRANSLATED_QUEUE_FULL_REPLAYS : Translate a store when the StQ is full.
+event:0x2f counters:0,1,2,3,4,5 um:zero minimum:500 name:ADDRESS_COLLISION_REPLAYS : Address collision.
+event:0x30 counters:0,1,2,3,4,5 um:zero minimum:500 name:DTLB_MISS_REPLAYS : Counts number of stalls; Com:56 counts cycles stalled.
+event:0x31 counters:0,1,2,3,4,5 um:zero minimum:500 name:DTLB_BUSY_REPLAYS : Counts number of stalls; Com:57 counts cycles stalled.
+event:0x32 counters:0,1,2,3,4,5 um:zero minimum:500 name:SECOND_PART_MISALIGNED_AFTER_MISS_REPLAYS : Second part of misaligned access when first part missed in cache.
+event:0x34 counters:0,1,2,3,4,5 um:zero minimum:500 name:LOAD_MISS_QUEUE_FULL_CYCLES : Cycles stalled on replay condition - Load miss with load queue full.
+event:0x35 counters:0,1,2,3,4,5 um:zero minimum:500 name:LOAD_GUARDED_MISS_NOT_LAST_CYCLES : Cycles stalled on replay condition - Load guarded miss when the load is not yet at the bottom of the completion buffer.
+event:0x36 counters:0,1,2,3,4,5 um:zero minimum:500 name:STORE_TRANSLATED_QUEUE_FULL_CYCLES : Cycles stalled on replay condition - Translate a store when the StQ is full.
+event:0x37 counters:0,1,2,3,4,5 um:zero minimum:500 name:ADDRESS_COLLISION_CYCLES : Cycles stalled on replay condition - Address collision.
+event:0x38 counters:0,1,2,3,4,5 um:zero minimum:500 name:DTLB_MISS_CYCLES : Cycles stalled on replay condition - DTLB miss.
+event:0x39 counters:0,1,2,3,4,5 um:zero minimum:500 name:DTLB_BUSY_CYCLES : Cycles stalled on replay condition - DTLB busy.
+event:0x3a counters:0,1,2,3,4,5 um:zero minimum:500 name:SECOND_PART_MISALIGNED_AFTER_MISS_CYCLES : Cycles stalled on replay condition - Second part of misaligned access when first part missed in cache.
+event:0x3c counters:0,1,2,3,4,5 um:zero minimum:500 name:IL1_FETCH_RELOADS : This is historically used to determine icache miss rate (along with instructions completed) Reloads due to demand fetch.
+event:0x3d counters:0,1,2,3,4,5 um:zero minimum:500 name:FETCHES : Counts fetches that write at least one instruction to the Instruction Buffer.
+event:0x3e counters:0,1,2,3,4,5 um:zero minimum:500 name:IMMU_TLB4K_RELOADS : iMMU TLB4K reloads
+event:0x3f counters:0,1,2,3,4,5 um:zero minimum:500 name:IMMU_VSP_RELOADS : iMMU VSP reloads
+event:0x40 counters:0,1,2,3,4,5 um:zero minimum:500 name:DMMU_TLB4K_RELOADS : dMMU TLB4K reloads
+event:0x41 counters:0,1,2,3,4,5 um:zero minimum:500 name:DMMU_VSP_RELOADS : dMMU VSP reloads
+event:0x42 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2MMU_MISSES : Counts iTLB/dTLB error interrupt
+event:0x43 counters:0,1,2,3,4,5 um:zero minimum:500 name:TAKEN_BRANCHES : Completed branch instructions that were taken.
+event:0x44 counters:0,1,2,3,4,5 um:zero minimum:500 name:TAKEN_BLR : Completed blr instructions that were taken.
+event:0x45 counters:0,1,2,3,4,5 um:zero minimum:500 name:BTB_TARGET_MISPREDICT : Number of target mispredicts (BTB).
+event:0x46 counters:0,1,2,3,4,5 um:zero minimum:500 name:MISPREDICT_TARGET_BLR : Number of link stack mispredicts (LS).
+event:0x47 counters:0,1,2,3,4,5 um:zero minimum:500 name:TAKEN_BTB_BUT_MISS : Number of BTB misses, but taken (BTB allocates).
+event:0x52 counters:0,1,2,3,4,5 um:zero minimum:500 name:PMC0_OVERFLOW : Counts the number of times PMC0[32] transitioned from 1 to 0.
+event:0x53 counters:0,1,2,3,4,5 um:zero minimum:500 name:PMC1_OVERFLOW : Counts the number of times PMC1[32] transitioned from 1 to 0.
+event:0x54 counters:0,1,2,3,4,5 um:zero minimum:500 name:PMC2_OVERFLOW : Counts the number of times PMC2[32] transitioned from 1 to 0.
+event:0x55 counters:0,1,2,3,4,5 um:zero minimum:500 name:PMC3_OVERFLOW : Counts the number of times PMC3[32] transitioned from 1 to 0.
+event:0x56 counters:0,1,2,3,4,5 um:zero minimum:500 name:INTERRUPTS : Number of interrupts taken
+event:0x57 counters:0,1,2,3,4,5 um:zero minimum:500 name:EXTERNAL_INTERRUPTS : Number of external input interrupts taken
+event:0x58 counters:0,1,2,3,4,5 um:zero minimum:500 name:CRITICAL_INTERRUPTS : Number of critical input interrupts taken
+event:0x59 counters:0,1,2,3,4,5 um:zero minimum:500 name:SC_TRAP_INTERRUPTS : Number of system call and trap interrupts
+event:0x5a counters:0,1,2,3,4,5 um:zero minimum:500 name:TBL_BIT_TRANS_PMGC0 : Counts transitions of the TBL bit selected by PMGC0[TBSEL].
+event:0x5b counters:0,1,2,3,4,5 um:zero minimum:500 name:PMC4_OVERFLOW : Counts the number of times PMC4[32] transitioned from 1 to 0.
+event:0x5c counters:0,1,2,3,4,5 um:zero minimum:500 name:PMC5_OVERFLOW : Counts the number of times PMC5[32] transitioned from 1 to 0.
+event:0x61 counters:0,1,2,3,4,5 um:zero minimum:500 name:L1_STASH_HIT : Stash hits in L1 Data Cache.
+event:0x63 counters:0,1,2,3,4,5 um:zero minimum:500 name:L1_STASH_REQ : Stash requests to L1 Data Cache.
+event:0x64 counters:0,1,2,3,4,5 um:zero minimum:500 name:TIMES_LSU_THREAD_PRIO_SWTICHED : Number of times the Load Store Unit thread priority switched based on resource collisions.
+event:0x65 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_THREAD_REQ_FPU_DENIED : Number of cycles both threads had Floating Point Unit requests and one was denied.
+event:0x66 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_THREAD_REQ_VPERM_DENIED : Number of cycles both threads had Altivec Permute requests and one was denied.
+event:0x67 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_THREAD_REQ_VGEN_DENIED : Number of cycles both threads had Altivec General requests and one was denied.
+event:0x68 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_THREAD_REQ_CFX_DENIED : Number of cycles both threads had Complex Fixed-Point Unit requests and one was denied.
+event:0x69 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_THREAD_REQ_FETCH_DENIED : Number of cycles both threads both threads made a Fetch request to the L1 Instruction Cache and one thread wins arbitration.
+event:0x6e counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_LSU_ISSUE_STALLED : Cycles the LSU issue queue is not empty but 0 instructions issued.
+event:0x6f counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_FPU_ISSUE_STALLED : Cycles the FPU issue queue is not empty but 0 instructions issued.
+event:0x70 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_ALTIVEC_ISSUE_STALLED : Cycles the AltiVec issue queue is not empty but 0 instructions issued.
+event:0x71 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_FPU_SCHEDULE_STALLED : Cycles FPU is not empty but 0 instructions scheduled.
+event:0x72 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VPERM_SCHEDULE_STALLED : Cycles VPERM is not empty but 0 instructions scheduled.
+event:0x73 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VGEN_SCHEDULE_STALLED : Cycles VGEN is not empty but 0 instructions scheduled.
+event:0x74 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VPU_INSTRUCTION_WAIT_FOR_OPERA : Cycles VPU instruction waits for operands.
+event:0x75 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VFPU_INSTRUCTION_WAIT_FOR_OPERA : Cycles VFPU instruction waits for operands.
+event:0x76 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VSFX_INSTRUCTION_WAIT_FOR_OPERA : Cycles VSFX instruction waits for operands
+event:0x77 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VCFX_INSTRUCTION_WAIT_FOR_OPERA : Cycles VCFX instruction waits for operands.
+event:0x7a counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_IB_EMPT : Number of cycles the Instruction Buffer is empty
+event:0x7b counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_IB_FULL : Number of cycles the Instruction Buffer is full enough such that fetch stops fetching.
+event:0x7c counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_CB_EMPT : Number of cycles the Completion Buffer is empty.
+event:0x7d counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_CB_FULL : Number of cycles the Completion Buffer is full enough such that decode stops.
+event:0x7e counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_PRESYNC_SI_IB : Number of cycles a pre-sync serialized instruction holds in the Instruction Buffer and is not decoded.
+event:0x7f counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_CLK_0_INSTRUCTIONS : Increments if 0 instructions (micro-ops) completed.
+event:0x80 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_CLK_1_INSTRUCTIONS : Increments if 1 instruction (micro-op) completed.
+event:0x80 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_CLK_2_INSTRUCTIONS : Increments if 2 instructions (micro-op) completed.
+event:0x88 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC5S : Every valid IAC5 detection.
+event:0x89 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC6S : Every valid IAC6 detection.
+event:0x8a counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC7S : Every valid IAC7 detection.
+event:0x8b counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC8S : Every valid IAC8 detection.
+event:0x8c counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC1S : Every valid IAC1 detection.
+event:0x8d counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC2S : Every valid IAC2 detection.
+event:0x8e counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC3S : Every valid IAC3 detection.
+event:0x8f counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_IAC4S : Every valid IAC4 detection.
+event:0x90 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DAC1S : Every valid DAC1 detection.
+event:0x91 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DAC2S : Every valid DAC2 detection.
+event:0x94 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT0 : Detection of a write to DEVENT SPR with DVT0 set.
+event:0x95 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT1 : Detection of a write to DEVENT SPR with DVT1 set.
+event:0x96 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT2 : Detection of a write to DEVENT SPR with DVT2 set.
+event:0x97 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT3 : Detection of a write to DEVENT SPR with DVT3 set.
+event:0x98 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT4 : Detection of a write to DEVENT SPR with DVT4 set.
+event:0x99 counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT5 : Detection of a write to DEVENT SPR with DVT5 set.
+event:0x9a counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT6 : Detection of a write to DEVENT SPR with DVT6 set.
+event:0x9b counters:0,1,2,3,4,5 um:zero minimum:500 name:DETECTED_DVT7 : Detection of a write to DEVENT SPR with DVT7 set.
+event:0x9c counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_COMPLETION_STALLED : Number of completion cycles stalled due to Nexus FIFO full.
+event:0xa1 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_FINISH : FPU finish.
+event:0xa2 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_FPU_DIV : Counts once for every cycle of divide execution. (fdivs and fdiv).
+event:0xa3 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_DENORM_INPUT : Counts extra cycles delay due to denormalized inputs. If there is one, this is incremented 4 times, Two operands increments it 5 times. This shows the real penalty due to denorms, not just how often they occur.
+event:0xa4 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_DENORM_OUTPUT : FPU denorm output.
+event:0xa5 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_FPSCR_FULL_STALL : FPU FPSCR stall.
+event:0xa6 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_PIPE_SYNC_STALL : Synchronization-op stalls: count once for each cycle that a ï¿½ï¿½break-beforeï¿½ï¿½ FPU is in the RS/issue stage but cannotissue. Also count once for each cycle that an FPU op is in the RS/issue stage but cannot issue due to ï¿½ï¿½break-afterï¿½ï¿½: of an FPU op currently in progress.
+event:0xa7 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_INPUT_DATA_STALL : FPU data-ready stall: cycles in which there is an op in the RS/issue stage that cannot issue because one or more of its operands is not yet available.
+event:0xa8 counters:0,1,2,3,4,5 um:zero minimum:500 name:FPU_INSTRUCTIONS_GEN_FLAG : FPU instruction sets FPSCR[FEX].
+event:0xac counters:0,1,2,3,4,5 um:zero minimum:500 name:PW20_CNT : Number of times the core enters the PW20 power management state.
+event:0xb0 counters:0,1,2,3,4,5 um:zero minimum:500 name:DECORATED_LOADS : Number of decorated loads to cache inhibited memory performed.
+event:0xb1 counters:0,1,2,3,4,5 um:zero minimum:500 name:DECORATED_STORES : Number of decorated stores to cache inhibited memory performed.
+event:0xb3 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_INSTRUCTIONS_SUCC : Number of successful stbcx., sthcx., stwcx., or stdcx. instructions.
+event:0xb4 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_INSTRUCTIONS_UNSUCC : Number of unsuccessful stbcx., sthcx., stwcx., or stdcx. instructions.
+event:0xb5 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_LSU_MICROOPS : Completed Load Store Unit micro-ops. Every micro-op that goes down the LSU pipe. Includes: GPR loads / GPR stores, FPR loads / FPR stores, VR loads / VR stores, Cache ops.  Memory barriers Other LSU ops (dsn, msgsnd, mvidsplt, mviwsplt, tlbilx, tlbivax, tlbsync)
+event:0xb6 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_GPR_LOADS : GPR load micro-ops completed. This event only counts once for misaligns. Note that lmw that causes a fault may end up double-counting micro-ops -- once for first pass, once for second pass.
+event:0xb7 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_GPR_STORES : GPR store micro-ops completed. This event only counts once for misaligns. Note that stmw that causes a fault may end up double-counting micro-ops -- once for first pass, once for second pass.
+event:0xb8 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_CACHEOPS : Cache ops completed. Includes: dcba / dcbal, dcbf / dcbfep, dcbi, dcblc / dcblq, dcbst / dcbstep, dcbt / dcbtep / dcbtls, dcbtst / dcbtstep / dcbtstls, dcbz / dcbzep / dcbzl / dcbzlep, icbi / icbiep, icblc / icblq., icbt / icbtls
+event:0xb9 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_MEM_BARRIERS : Memory barriers completed. Includes: msync (sync, lwsync, elemental barriers) mbar (eieio) miso.
+event:0xba counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_SFX_MICROOPS : SFX micro-ops completed.
+event:0xbb counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_SINCLK_SFX_MICROOPS : SFX single-cycle micro-ops completed.
+event:0xbc counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_DBLCLK_SFX_MICROOPS : SFX double-cycle micro-ops completed.
+event:0xbe counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_CFX_INSTRUCTIONS : CFX instructions completed.
+event:0xbf counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_SFX_CFX_INSTRUCTIONS : SFX or CFX instructions completed.
+event:0xc0 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_FPU_INSTRUCTIONS : FPU instructions completed.
+event:0xc1 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_FPR_MICROOPS_LOADS : FPR load micro-ops completed.
+event:0xc2 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_FPR_MICROOPS_STORES : FPR store micro-ops completed.
+event:0xc3 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_FPR_MICROOPS_LOADS_STORES : FPR load and store micro-ops completed.
+event:0xc4 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_FPR_SINPRECISE_LOADS_STORES : FPR single-precision load and store micro-ops completed.
+event:0xc5 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_FPR_DBLPRECISE_LOADS_STORES : FPR double-precision load and store micro-ops completed.
+event:0xc6 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_ALTIVEC_INSTRUCTIONS : AltiVec instructions completed. (non-LSU).
+event:0xc7 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_ALTIVEC_VSFX_INSTRUCTIONS : AltiVec VSFX instructions completed.
+event:0xc8 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_ALTIVEC_VCFX_INSTRUCTIONS : AltiVec VCFX instructions completed.
+event:0xc9 counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_ALTIVEC_VPU_INSTRUCTIONS : AltiVec VPU instructions completed.
+event:0xca counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_ALTIVEC_VFPU_INSTRUCTIONS : AltiVec VFPU instructions completed.
+event:0xcb counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_VR_LOADS_MICROOPS : VR load micro-ops completed.
+event:0xcc counters:0,1,2,3,4,5 um:zero minimum:500 name:COMPLETED_VR_STORES_MICROOPS : VR store micro-ops completed.
+event:0xcd counters:0,1,2,3,4,5 um:zero minimum:500 name:VSCR_SAT_SET : Number of times the saturate bit flips from 0 to 1.
+event:0xd2 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_SFX0_IDLE : Cycles Simple Fixed Point Unit 0 is idle.
+event:0xd3 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_SFX1_IDLE : Cycles Simple Fixed Point Unit 1 is idle.
+event:0xd4 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_CFX_IDLE : Cycles Complex Fixed Point Unit is idle.
+event:0xd5 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_LSU_IDLE : Cycles Load Store Unit is idle.
+event:0xd6 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_BU_IDLE : Cycles Branch Unit is idle.
+event:0xd7 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_FPU_IDLE : Cycles Floating Point Unit is idle.
+event:0xd8 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VPU_IDLE : Cycles AltiVec Permute Unit is idle.
+event:0xd9 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VFPU_IDLE : Cycles AltiVec Floating Point Unit is idle.
+event:0xda counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VSFX_IDLE : Cycles AltiVec Simple Fixed Point Unit is idle.
+event:0xdb counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_VCFX_IDLE : Cycles AltiVec Complex Fixed Point Unit is idle.
+event:0xdd counters:0,1,2,3,4,5 um:zero minimum:500 name:L1_CACHE_MISSES : Data L1 cache misses. (Includes load, store, cache ops).
+event:0xde counters:0,1,2,3,4,5 um:zero minimum:500 name:L1_CACHE_LOAD_MISSES : Data L1 cache load misses.
+event:0xdf counters:0,1,2,3,4,5 um:zero minimum:500 name:L1_CACHE_STORE_MISSES : Data L1 cache store misses.
+event:0xe0 counters:0,1,2,3,4,5 um:zero minimum:500 name:LMQ_ALLOCATED_LOADS : Loads that allocate into Load Miss Queue. (Data L1 cache misses, but may not be to different cache lines).
+event:0xe1 counters:0,1,2,3,4,5 um:zero minimum:500 name:LOAD_THREAD_MISS_COLLISION : Number of times that this threadï¿½ï¿½s load hits a line that is valid for the other thread but not this thread.
+event:0xe2 counters:0,1,2,3,4,5 um:zero minimum:500 name:INTERTHREAD_STATUS_ARRAY_COLLISION : Number of times that two threads collide on status array access.
+event:0xe3 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_ALLOC : Number of Store Gather Buffer allocates.
+event:0xe4 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_GATHERS : Number of Store Gather Buffer gathers.
+event:0xe5 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_OVERFLOWS : Number of Store Gather Buffer overflows. (Causes SGB full condition when additional store request is made).
+event:0xe6 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_PROMOTIONS : Number of Store Gather Buffer promotions.
+event:0xe7 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_INORDER_PROMOTIONS : Number of Store Gather Buffer in-order promotions. (Also includes oldest-entry timeout condition).
+event:0xe8 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_OUTOFORDER_PROMOTIONS : Number of Store Gather Buffer out-of-order promotions.
+event:0xe9 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_HP_PROMOTIONS : Number of Store Gather Buffer high-priority promotions. (Load hits on pending store).
+event:0xea counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_MISO_PROMOTIONS : Number of Store Gather Buffer miso promotions. promotions. (Load hits on pending store).
+event:0xeb counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_WATERMARK_PROMOTIONS : Number of Store Gather Buffer watermark promotions.
+event:0xec counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_SGB_OVERFLOW_PROMOTIONS : Number of Store Gather Buffer overflow promotions.
+event:0xed counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_DLAQ_FULL : Number of cycles the DLink Age Queue is full.
+event:0xee counters:0,1,2,3,4,5 um:zero minimum:500 name:TIMES_DLAQ_FULL : Number of times the DLink Age Queue is full.
+event:0xef counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_LRSAQ_FULL : Number of cycles the Load Reservation Set Age Queue is full.
+event:0xf0 counters:0,1,2,3,4,5 um:zero minimum:500 name:TIMES_LRSAQ_FULL : Number of times the Load Reservation Set Age Queue is full.
+event:0xf1 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_FWDAQ_FULL : Number of cycles the Forward Age Queue is full.
+event:0xf2 counters:0,1,2,3,4,5 um:zero minimum:500 name:TIMES_FWDAQ_FULL : Number of times the Forward Age Queue is full.
+event:0xf3 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FWD_STQ_COLLISION_TIMES : Number of times a Store Queue collision is forwardable. The following cases are not forwardable: store address + size does not contain the load, cache-inhibited store, denormalized, floating point store, stcx, guarded load.
+event:0xf4 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FWD_STQ_COLLISION_TIMES_DATA_RDY : Number of times a Store Queue collision is forwardable and is ready with data to forward.
+event:0xf5 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FWD_STQ_COLLISION_TIMES_DATA_NORDY : Number of times a Store Queue collision is forwardable but is not ready with data to forward.
+event:0xf6 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_NOFWD_STQ_COLLISION_TIMES : Number of times a Store Queue collision is not forwardable and must wait until the store leaves the Store Queue.
+event:0xf7 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FWD_STQ_COLLISION_CLK : Number of cycles a Store Queue collision is forwardable. (Number of cycles from the detection of a forwardable Store Queue entry until the load is replayed in stg1).
+event:0xf8 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FWD_STQ_COLLISION_CLK_DATA_RDY : Number of cycles a Store Queue collision is forwardable and is ready with data to forward. (Number of cycles from the detection of a forwardable Store Queue entry with valid data until the load is replayed in stg1).
+event:0xf9 counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FWD_STQ_COLLISION_CLK_DATA_NORDY : Number of cycles a Store Queue collision is forwardable but is not ready with data to forward. (Number of cycles from the detection of a forwardable Store Queue entry without valid data until the load is replayed in stg1).
+event:0xfa counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_NOFWD_STQ_COLLISION_CLK : Number of cycles a Store Queue collision is not forwardable and has to wait until the store leaves the Store Queue. (Number of cycles from the detection of a non-forwardable Store Queue entry until the load is replayed in stg1).
+event:0xfb counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_FALSE_EA_COLLISION : Number of times the lower 12-bits of EA matched but the upper bits did not, leading to a false load-on-store replay. Cycle penalty is 4x the number of times.
+event:0xfc counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_LSO_BUS_COLLISION : Number of LS0 result bus collisions. Cycle penalty is 3x this measurement.
+event:0xfd counters:0,1,2,3,4,5 um:zero minimum:500 name:NUM_INTERTHREAD_DBLWORKD_BANK_COLLISION : Number of inter-thread double-word bank collisions. Measures when both threads attempt to access the same double-word bank. Cycle penalty is 3x this measurement.
+event:0xfe counters:0,1,2,3,4,5 um:zero minimum:500 name:L1_CACHE_IM : Instruction L1 cache demand fetch misses. (Includes icbtls. Does not include prefetch).
+event:0x100 counters:0,1,2,3,4,5 um:zero minimum:500 name:IMMU_MISSES : Counts misses in the level 1 Instruction MMU.
+event:0x101 counters:0,1,2,3,4,5 um:zero minimum:500 name:IMMU_TLB4K_HITS : Counts hits in the level 1 Instruction MMU TLB-4K.
+event:0x102 counters:0,1,2,3,4,5 um:zero minimum:500 name:IMMU_VSP_HITS : Counts hits in the level 1 Instruction MMU VSP.
+event:0x103 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_IMMU_HW_TABLEWALK : Counts IMMU cycles spent in hardware tablewalk. This represents the cycles from the point where the L2 MMU miss occurs to when the page table walk completes with a valid translation or exception.
+event:0x104 counters:0,1,2,3,4,5 um:zero minimum:500 name:DMMU_MISSES : Counts misses in the level 1 Data MMU. (Does not count replayed operations).
+event:0x105 counters:0,1,2,3,4,5 um:zero minimum:500 name:DMMU_TLB4K_HITS : Counts hits in the level 1 Data MMU TLB-4K. (Does not count replayed operations).
+event:0x106 counters:0,1,2,3,4,5 um:zero minimum:500 name:DMMU_VSP_HITS : Counts hits in the level 1 Data MMU VSP. (Does not count replayed operations).
+event:0x107 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_DMMU_HW_TABLEWALK : Counts DMMU cycles spent in hardware tablewalk. This represents the cycles from the point where the L2 MMU miss occurs to when the page table walk completes with a valid translation or exception.
+event:0x108 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2MMU_MISSES : Counts level 2 MMU misses. (Does not count misses that occur due to dcbt / dcbtst / dcba / dcbal instructions that fail translation and are no-oped. Does not count misses in L2MMU-VSP when looking up an indirect entry).
+event:0x109 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2MMU_4K_HITS : Counts level 2 MMU hits in L2MMU-4K.
+event:0x10a counters:0,1,2,3,4,5 um:zero minimum:500 name:L2MMU_VSP_HITS : Counts level 2 MMU hits in L2MMU-VSP. (Does not count indirect lookups).
+event:0x10b counters:0,1,2,3,4,5 um:zero minimum:500 name:L2MMU_INDIRECT_MISSES : Counts level 2 MMU indirect misses. This represents indirect entry lookups that do not have a matching indirect entry.
+event:0x10c counters:0,1,2,3,4,5 um:zero minimum:500 name:L2MMU_INDIRECT_VALID_MISSES : Counts level 2 MMU indirect valid misses. This occurts when the indirect entry is valid, but the corresponding PTE[V] = 0 or the premissions in the PTE are not sufficient for the requested access.
+event:0x10d counters:0,1,2,3,4,5 um:zero minimum:500 name:LRAT_MISSES : Counts Logical to Real Address Translation misses. This includes LRAT misses from tlbwe instructions or from page table translations.
+event:0x110 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_LMQ_LOSE_DLINK_DUE_SGB : Cycles the Load Miss Queue loses DLINK arbitration due to the Store Gather Buffer.
+event:0x111 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_SGB_LOSE_DLINK_DUE_LMQ : Cycles the Store Gather Buffer loses DLINK arbitration due to the Load Miss Queue.
+event:0x112 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_THREAD_LOSE_DLINK_DUE_OTHER_THREAD : Cycles thread loses DLINK arbitration due to other thread: Cycles thread loses DLINK arbitration due to other thread.
+event:0x116 counters:0,1,2,3,4,5 um:zero minimum:500 name:DECODE_MASK_VALUE : One mask/value pair that allows instructions to be counted in Decode.
+event:0x1bb counters:0,1,2,3,4,5 um:zero minimum:500 name:SHR_L2_DLINK_REQ : Number of DLINK requests made from core to Shared L2.
+event:0x1bc counters:0,1,2,3,4,5 um:zero minimum:500 name:SHR_L2_ILINK_REQ : Number of ILINK requests made from core to Shared L2. (Includes instruction fetches and L2MMU hardware tablewalk requests).
+event:0x1bd counters:0,1,2,3,4,5 um:zero minimum:500 name:SHR_L2_RLINK_REQ : Number of RLINK requests made from Shared L2 to core. (back invalidates, stashes, barriers).
+event:0x1be counters:0,1,2,3,4,5 um:zero minimum:500 name:SHR_L2_BLINK_REQ : Number of BLINK requests made from Shared L2 to core. (back invalidates, stashes, barriers).
+event:0x1bf counters:0,1,2,3,4,5 um:zero minimum:500 name:SHR_L2_CLINK_REQ : Number of CLINK requests made from Shared L2 to core. (back invalidates, stashes, barriers).
+event:0x1c8 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_HITS : Number of L2 Cache hits. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1c9 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_MISSES : Number of L2 Cache hits. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1ca counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_DEMAND_ACCESS : Number of L2 Cache demand accesses. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1cb counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_ACCESSES : Number of L2 Cache accesses from all sources (demand, reload, snoop, etc). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1cc counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_STORE_ALLOCATE : Number of L2 Cache store allocates. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1cd counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_INSTRUCTIONS_ACCESS : Number of L2 Cache instruction accesses. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1ce counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_DATA_ACCESS : Number of L2 Cache data accesses. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1cf counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_INSTRUCTIONS_MISSES : Number of L2 Cache instruction misses. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d0 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_DATA_MISSES : Number of L2 Cache data misses. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d1 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_HITS_PER_THREAD : Number of times this core/thread hits in the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d2 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_MISSES_PER_THREAD : Number of times this core/thread misses in the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d3 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_DEMAND_ACCESS_PER_THREAD : Number of times this core/thread makes a demand access to the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d4 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_STORE_ALLOC_PER_THREAD : Number of times a store from this core/thread allocates in the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d5 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_INSTRUCTIONS_ACCESS_PER_THREAD : Number of times an instruction from this core/thread accesses the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d6 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_DATA_ACCESS_PER_THREAD : Number of times a data operation from this core/thread accesses the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d7 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_INSTRUCTION_MISSES_PER_THREAD : Number of times an instruction from this core/thread misses in the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d8 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_DATA_MISSES_PER_THREAD : Number of times a data operation from this core/thread misses in the L2 Cache. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1d9 counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_RELOAD_FROM_CORENET : Number of L2 Cache reloads from CoreNet. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1da counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_IN_STASH_REQ : Number of incoming L2 Cache stash requests. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1db counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_STASH_REQ_DOWNGRD_TO_SNOOPS : Number of incoming L2 Cache stash requests downgraded to snoops. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1dc counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_SNOOPS_HITS : Number of L2 Cache snoop hits. Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1dd counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_SNOOPS_MINT : Number of L2 Cache snoops causing MINT.
+event:0x1de counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_SNOOPS_SINT : Number of L2 Cache snoops causing SINT.
+event:0x1df counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_SNOOPS_PUSHES : Number of L2 Cache snoop pushes.
+event:0x1e0 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_BIB_STALL : Stall for Back Invalidate Buffer entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1e2 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_RLT_STALL : Stall for Reload Table entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1e4 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_RLFQ_STALL : Stall for Reload Fold Queue entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1e6 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_DTQ_STALL : Stall for Data Transaction Queue entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1e8 counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_COB_STALL : Stall for Castout Buffer entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1ea counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_WDB_STALL : Stall for Write Data Buffer entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1ec counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_RLDB_STALL : Stall for Reload Data Buffer entry (cycles). Counts 0, 1, 2, 3, or 4 per cycle.
+event:0x1ee counters:0,1,2,3,4,5 um:zero minimum:500 name:CLK_SNPQ_STALL : Stall for Snoop Queue entry (cycles).
+event:0x1fa counters:0,1,2,3,4,5 um:zero minimum:500 name:BIU_MASTER_REQ : Master transaction starts. (Number of AOut sent to CoreNet).
+event:0x1fb counters:0,1,2,3,4,5 um:zero minimum:500 name:BIU_MASTER_GLOBAL_REQ : Master transaction starts that are global. (Number of AOut with M=1 sent to CoreNet).
+event:0x1fc counters:0,1,2,3,4,5 um:zero minimum:500 name:BIU_MASTER_DATA_SIDE_REQ : Master transaction starts that are global. (Number of AOut with M=1 sent to CoreNet).
+event:0x1fd counters:0,1,2,3,4,5 um:zero minimum:500 name:BIU_MASTER_INSTRUCTION_SIDE_REQ : Master instruction-side transaction starts. (Number of I-side AOut sent to CoreNet).
+event:0x1fe counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_STASH_REQ : Stash request on AIn matches stash IDs for core or L2.
+event:0x1ff counters:0,1,2,3,4,5 um:zero minimum:500 name:L2_SNOOP_REQ : Externally generated snoop requests. (Number of AIn from CoreNet not from self).
+
diff --git a/events/ppc/e6500/unit_masks b/events/ppc/e6500/unit_masks
new file mode 100644
index 0000000..b7e7a23
--- /dev/null
+++ b/events/ppc/e6500/unit_masks
@@ -0,0 +1,4 @@
+# e6500 possible unit masks
+#
+name:zero type:mandatory default:0x0
+	0x0 no unit mask
diff --git a/events/ppc64/architected_events_v1/events b/events/ppc64/architected_events_v1/events
new file mode 100644
index 0000000..a52d9ee
--- /dev/null
+++ b/events/ppc64/architected_events_v1/events
@@ -0,0 +1,62 @@
+#
+#  Copyright OProfile authors
+#  Copyright (c) International Business Machines, 2013.
+#  Contributed by Maynard Johnson <maynardj@us.ibm.com>.
+#
+#  IBM Power Architected Events -- Version 1: Power ISA 2.07
+
+# Manually add CYCLES for backward compatibility for default event
+event:0x100f0 counters:0 um:zero minimum:100000 name:CYCLES : Cycles
+
+event:0x100f2 counters:0 um:zero minimum:100000 name:PM_1PLUS_PPC_CMPL : 1 or more ppc insts finished (completed).
+event:0x400f2 counters:3 um:zero minimum:100000 name:PM_1PLUS_PPC_DISP : Cycles at least one Instr Dispatched. Could be a group with only microcode. Issue HW016521
+event:0x100fa counters:0 um:zero minimum:100000 name:PM_ANY_THRD_RUN_CYC : Any thread in run_cycles (was one thread in run_cycles).
+event:0x400f6 counters:3 um:zero minimum:10000 name:PM_BR_MPRED_CMPL : Number of Branch Mispredicts.
+event:0x200fa counters:1 um:zero minimum:10000 name:PM_BR_TAKEN_CMPL : Branch Taken.
+event:0x1e counters:0,1,2,3 um:zero minimum:100000 name:PM_CYC : Cycles .
+event:0x200fe counters:1 um:zero minimum:10000 name:PM_DATA_FROM_L2MISS : Demand LD - L2 Miss (not L2 hit).
+event:0x300fe counters:2 um:zero minimum:10000 name:PM_DATA_FROM_L3MISS : Demand LD - L3 Miss (not L2 hit and not L3 hit).
+event:0x400fe counters:3 um:zero minimum:10000 name:PM_DATA_FROM_MEM : Data cache reload from memory (including L4).
+event:0x300fc counters:2 um:zero minimum:10000 name:PM_DTLB_MISS : Data PTEG Reloaded (DTLB Miss).
+event:0x200f8 counters:1 um:zero minimum:10000 name:PM_EXT_INT : external interrupt.
+event:0x100f4 counters:0 um:zero minimum:10000 name:PM_FLOP : Floating Point Operations Finished.
+event:0x400f8 counters:3 um:zero minimum:10000 name:PM_FLUSH : Flush (any type).
+event:0x100f8 counters:0 um:zero minimum:10000 name:PM_GCT_NOSLOT_CYC : Pipeline empty (No itags assigned , no GCT slots used).
+event:0x100f6 counters:0 um:zero minimum:10000 name:PM_IERAT_RELOAD : IERAT Reloaded (Miss).
+event:0x200f2 counters:1 um:zero minimum:100000 name:PM_INST_DISP : PPC Dispatched.
+event:0x300fa counters:2 um:zero minimum:10000 name:PM_INST_FROM_L3MISS : Inst from L3 miss.
+event:0x400fc counters:3 um:zero minimum:10000 name:PM_ITLB_MISS : ITLB Reloaded.
+event:0x300f6 counters:2 um:zero minimum:10000 name:PM_L1_DCACHE_RELOAD_VALID : DL1 reloaded due to Demand Load .
+event:0x200fd counters:1 um:zero minimum:10000 name:PM_L1_ICACHE_MISS : Demand iCache Miss.
+event:0x3e054 counters:2 um:zero minimum:10000 name:PM_LD_MISS_L1 : Load Missed L1.
+event:0x200f6 counters:1 um:zero minimum:10000 name:PM_LSU_DERAT_MISS : DERAT Reloaded (Miss).
+event:0x301e4 counters:2 um:zero minimum:1000 name:PM_MRK_BR_MPRED_CMPL : Marked Branch Mispredicted.
+event:0x101e2 counters:0 um:zero minimum:1000 name:PM_MRK_BR_TAKEN_CMPL : Marked Branch Taken.
+event:0x401e8 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2MISS : Data cache reload L2 miss.
+event:0x201e4 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3MISS : The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load.
+event:0x201e0 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_MEM : The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load.
+event:0x301e6 counters:2 um:zero minimum:1000 name:PM_MRK_DERAT_MISS : Erat Miss (TLB Access) All page sizes.
+event:0x401e4 counters:3 um:zero minimum:1000 name:PM_MRK_DTLB_MISS : Marked dtlb miss.
+event:0x401e0 counters:3 um:zero minimum:1000 name:PM_MRK_INST_CMPL : marked instruction completed.
+event:0x101e0 counters:0 um:zero minimum:1000 name:PM_MRK_INST_DISP : Marked Instruction dispatched.
+event:0x401e6 counters:3 um:zero minimum:1000 name:PM_MRK_INST_FROM_L3MISS : n/a
+event:0x101e4 counters:0 um:zero minimum:1000 name:PM_MRK_L1_ICACHE_MISS : Marked L1 Icache Miss.
+event:0x101ea counters:0 um:zero minimum:1000 name:PM_MRK_L1_RELOAD_VALID : Marked demand reload.
+event:0x201e2 counters:1 um:zero minimum:1000 name:PM_MRK_LD_MISS_L1 : Marked DL1 Demand Miss counted at exec time.
+event:0x10134 counters:0 um:zero minimum:1000 name:PM_MRK_ST_CMPL : Marked store completed.
+event:0x600f4 counters:5 um:zero minimum:100000 name:PM_RUN_CYC : Run_cycles.
+event:0x500fa counters:4 um:zero minimum:100000 name:PM_RUN_INST_CMPL : Run_Instructions.
+event:0x400f4 counters:3 um:zero minimum:10000 name:PM_RUN_PURR : Run_PURR.
+event:0x200f0 counters:1 um:zero minimum:10000 name:PM_ST_FIN : Store Instructions Finished (store sent to nest).
+event:0x300f0 counters:2 um:zero minimum:10000 name:PM_ST_MISS_L1 : Store Missed L1.
+event:0x300f8 counters:2 um:zero minimum:10000 name:PM_TB_BIT_TRANS : timebase event.
+event:0x300f4 counters:2 um:zero minimum:100000 name:PM_THRD_CONC_RUN_INST : Concurrent Run Instructions.
+event:0x301ea counters:2 um:zero minimum:1000 name:PM_THRESH_EXC_1024 : Threshold counter exceeded a value of 1024.
+event:0x401ea counters:3 um:zero minimum:1000 name:PM_THRESH_EXC_128 : Threshold counter exceeded a value of 128.
+event:0x401ec counters:3 um:zero minimum:1000 name:PM_THRESH_EXC_2048 : Threshold counter exceeded a value of 2048.
+event:0x101e8 counters:0 um:zero minimum:1000 name:PM_THRESH_EXC_256 : Threshold counter exceed a count of 256.
+event:0x201e6 counters:1 um:zero minimum:1000 name:PM_THRESH_EXC_32 : Threshold counter exceeded a value of 32.
+event:0x101e6 counters:0 um:zero minimum:1000 name:PM_THRESH_EXC_4096 : Threshold counter exceed a count of 4096.
+event:0x201e8 counters:1 um:zero minimum:1000 name:PM_THRESH_EXC_512 : Threshold counter exceeded a value of 512.
+event:0x301e8 counters:2 um:zero minimum:1000 name:PM_THRESH_EXC_64 : Threshold counter exceeded a value of 64.
+event:0x101ec counters:0 um:zero minimum:10000 name:PM_THRESH_MET : threshold exceeded.
diff --git a/events/ppc64/ibm-compat-v1/unit_masks b/events/ppc64/architected_events_v1/unit_masks
similarity index 78%
rename from events/ppc64/ibm-compat-v1/unit_masks
rename to events/ppc64/architected_events_v1/unit_masks
index 170c53b..999ebfe 100644
--- a/events/ppc64/ibm-compat-v1/unit_masks
+++ b/events/ppc64/architected_events_v1/unit_masks
@@ -1,6 +1,6 @@
 #
 # Copyright OProfile authors
-# Copyright (c) International Business Machines, 2009.
+# Copyright (c) International Business Machines, 2013.
 # Contributed by Maynard Johnson <maynardj@us.ibm.com>.
 #
 # ppc64 compat mode version 1 possible unit masks
diff --git a/events/ppc64/cell-be/events b/events/ppc64/cell-be/events
deleted file mode 100644
index 3bcb393..0000000
--- a/events/ppc64/cell-be/events
+++ /dev/null
@@ -1,517 +0,0 @@
-#ppc64 Cell Broadband Engine events
-#
-# Copyright OProfile authors
-#
-#(C) COPYRIGHT International Business Machines Corp. 2006
-# Contributed by Maynard Johnson <maynardj@us.ibm.com>
-#
-#
-#  As many as 4 signals may be specified when they are from the same group.
-#  In some instances, signals from other groups in the same island or one
-#  other island may also be specified.
-#
-#  Each signal is assigned to a unique counter.  There are 4 32-bit hardware
-#  counters.  The signals are defined in the Cell Broadband Engine
-#  Performance manual.
-#
-#  Each event is given a unique event number.  The event number is used by the
-#  Oprofile code to resolve event names for the postprocessing.  This is done
-#  to preserve compatibility with the rest of the Oprofile code.  The event
-#  number format group_num followed by the counter number for the event within
-#  the group.
-
-# Signal Default
-event:0x1 counters:0,1,2,3 um:zero minimum:100000 name:CYCLES : Processor Cycles
-event:0x2 counters:0,1,2,3 um:zero minimum:60000 name:SPU_CYCLES : SPU Processor Cycles
-
-
-# Cell BE Island 2 - PowerPC Processing Unit (PPU)
-
-# CBE Signal Group 21 - PPU Instruction Unit - Group 1 (NClk)
-event:0x834 counters:0,1,2,3 um:PPU_01_edges           minimum:10000 	name:Branch_Commit		: Branch instruction committed. 
-event:0x835 counters:0,1,2,3 um:PPU_01_edges           minimum:10000	name:Branch_Flush		: Branch instruction that caused a misprediction flush is committed. Branch misprediction includes: (1) misprediction of taken or not-taken on conditional branch, (2) misprediction of branch target address on bclr[1] and bcctr[1]. 
-event:0x836 counters:0,1,2,3 um:PPU_01_cycles          minimum:10000	name:Ibuf_Empty		: Instruction buffer empty. 
-event:0x837 counters:0,1,2,3 um:PPU_01_edges           minimum:10000	name:IERAT_Miss		: Instruction effective-address-to-real-address translation (I-ERAT) miss. 
-event:0x838 counters:0,1,2,3 um:PPU_01_cycles_or_edges minimum:10000	name:IL1_Miss_Cycles	: L1 Instruction cache miss cycles. Counts the cycles from the miss event until the returned instruction is dispatched or cancelled due to branch misprediction, completion restart, or exceptions (see Note 1). 
-event:0x83a counters:0,1,2,3 um:PPU_01_cycles          minimum:10000	name:Dispatch_Blocked	: Valid instruction available for dispatch, but dispatch is blocked.
-event:0x83d counters:0,1,2,3 um:PPU_01_edges           minimum:10000	name:Instr_Flushed		: Instruction in pipeline stage EX7 causes a flush. 
-event:0x83f counters:0,1,2,3 um:PPU_01_edges           minimum:10000	name:PPC_Commit		: Two PowerPC instructions committed. For microcode sequences, only the last microcode operation is counted. Committed instructions are counted two at a time. If only one instruction has committed for a given cycle, this event will not be raised until another instruction has been committed in a future cycle. 
-
-
-# CBE Signal Group 22 - PPU Execution Unit (NClk)
-event:0x89a counters:0,1,2,3 um:PPU_01_cycles          minimum:10000	name:DERAT_Miss		: Data effective-address-to-real-address translation (D-ERAT) miss. Not speculative. 
-event:0x89b counters:0,1,2,3 um:PPU_01_cycles          minimum:10000	name:Store_Request		: Store request counted at the L2 interface. Counts microcoded PPE sequences more than once (see Note 1 for exceptions). (Thread 0 and 1)
-event:0x89c counters:0,1,2,3 um:PPU_01_cycles          minimum:10000	name:Load_Valid		: Load valid at a particular pipe stage. Speculative, since flushed operations are counted as well. Counts microcoded PPE sequences more than once. Misaligned flushes might be counted the first time as well. Load operations include all loads that read data from the cache, dcbt and dcbtst. Does not include load Vector/SIMD multimedia extension pattern instructions. 
-event:0x89d counters:0,1,2,3 um:PPU_01_cycles          minimum:10000	name:DL1_Miss		: L1 D-cache load miss. Pulsed when there is a miss request that has a tag miss but not an ERAT miss. Speculative, since flushed operations are counted as well. 
-
-
-# Cell BE Island 3 - PowerPC Storage Subsystem (PPSS)
-
-# CBE Signal Group 31 - PPSS Bus Interface Unit (NClk/2)
-event:0xc1c counters:0,1,2,3 um:PPU_2_edges           minimum:10000	name:rcv_mmio_rd_ev	: Load from MFC memory-mapped I/O (MMIO) space.
-event:0xc1d counters:0,1,2,3 um:PPU_2_edges           minimum:10000	name:rcv_mmio_wr_ev	: Stores to MFC MMIO space.
-event:0xc22 counters:0,1,2,3 um:PPU_2_edges           minimum:10000	name:even_token_req_ev	: Request token for even memory bank numbers 0-14.
-event:0xc2b counters:0,1,2,3 um:PPU_2_edges           minimum:10000	name:rcv_data_ev		: Receive 8-beat data from the Element Interconnect Bus (EIB).
-event:0xc2c counters:0,1,2,3 um:PPU_2_edges           minimum:10000	name:send_data_ev		: Send 8-beat data to the EIB.
-event:0xc2d counters:0,1,2,3 um:PPU_2_edges           minimum:10000	name:send_cmd_ev		: Send a command to the EIB; includes retried commands.
-event:0xc2e counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:dgnt_dly_cy		: Cycles between data request and data grant.
-event:0xc33 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:nc_wr_not_emp_cy	: The five-entry Non-Cacheable Unit (NCU) Store Command queue not empty.
-
-
-# CBE Signal Group 32 - PPSS L2 Cache Controller - Group 1 (NClk/2)
-event:0xc80 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:cache_hit		: Cache hit for core interface unit (CIU) loads and stores.
-event:0xc81 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:cache_miss		: Cache miss for CIU loads and stores.
-event:0xc84 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:load_miss		: CIU load miss.
-event:0xc85 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:store_miss		: CIU store to Invalid state (miss).
-event:0xc87 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:larx_miss_th1		: Load word and reserve indexed (lwarx/ldarx) for Thread 0 hits Invalid cache state
-event:0xc8e counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:stcx_miss_th1		: Store word conditional indexed (stwcx/stdcx) for Thread 0 hits Invalid cache state when reservation is set.
-event:0xc99 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:all_snp_busy		: All four snoop state machines busy.
-
-# CBE Signal Group 33 - PPSS L2 Cache Controller - Group 2 (NClk/2)
-event:0xce8 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:dclaim_srt		: Data line claim (dclaim) that received good combined response; includes store/stcx/dcbz to Shared (S), Shared Last (SL),or Tagged (T) cache state; does not include dcbz to Invalid (I) cache state (see Note 1).
-event:0xcef counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:dclaim_to_rwitm	: Dclaim converted into rwitm; may still not get to the bus if stcx is aborted (see Note 2).
-event:0xcf0 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:store_mxe		: Store to modified (M), modified unsolicited (MU), or exclusive (E) cache state.
-event:0xcf1 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:stq_full		: 8-entry store queue (STQ) full.
-event:0xcf2 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:store_rc_ack		: Store dispatched to RC machine is acknowledged.
-event:0xcf3 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:gather_store		: Gatherable store (type = 00000) received from CIU.
-event:0xcf6 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_push		: Snoop push.
-event:0xcf7 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:intv_snode_er		: Send intervention from (SL | E) cache state to a destination within the same CBE chip.
-event:0xcf8 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:intv_snode_mx		: Send intervention from (M | MU) cache state to a destination within the same CBE chip.
-event:0xcfd counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_retry		: Respond with Retry to a snooped request due to one of the following conflicts: read-and-claim state machine (RC) full address, castout (CO) congruence class, snoop (SNP) machine full address, all snoop machines busy, directory lockout, or parity error.
-event:0xcfe counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_busy_retry	: Respond with Retry to a snooped request because all snoop machines are busy.
-event:0xcff counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_mx_to_est		: Snooped response causes a cache state transition from (M | MU) to (E | S | T).
-event:0xd00 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_e_to_s		: Snooped response causes a cache state transition from E to S.
-event:0xd01 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_esrt_to_i		: Snooped response causes a cache state transition from (E | SL | S | T) to Invalid (I).
-event:0xd02 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:snp_mx_to_i		: Snooped response causes a cache state transition from (M | MU) to I.
-
-# CBE Signal Group 34 - PPSS L2 Cache Controller - Group 3 (NClk/2)
-event:0xd54 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:larx_miss		: Load and reserve indexed (lwarx/ldarx) for Thread 1 hits Invalid cache state.
-event:0xd5b counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:stcx_miss_th2		: Store conditional indexed (stwcx/stdcx) for Thread 1 hits Invalid cache state.
-
-# CBE Signal Group 35 - PPSS Non-Cacheable Unit (NClk/2)
-event:0xdac counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:st_req_any		: Non-cacheable store request received from CIU; includes all synchronization operations such as sync and eieio.
-event:0xdad counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:st_req_sync		: sync received from CIU.
-event:0xdb0 counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:st_req_store		: Non-cacheable store request received from CIU; includes only stores.
-event:0xdb2 counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:st_req_eieio		: eieio received from CIU.
-event:0xdb3 counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:st_req_tlbie		: tlbie received from CIU.
-event:0xdb4 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stq_bot_sync		: sync at the bottom of the store queue, while waiting on st_done signal from the Bus Interface Unit (BIU) and sync_done signal from L2.
-event:0xdb5 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stq_bot_lsync		: lwsync at the bottom of the store queue, while waiting for a sync_done signal from the L2.
-event:0xdb6 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stq_bot_eieio		: eieio at the bottom of the store queue, while waiting for a st_done signal from the BIU and a sync_done signal from the L2.
-event:0xdb7 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stq_bot_tlbieg	: tlbie at the bottom of the store queue, while waiting for a st_done signal from the BIU.
-event:0xdb8 counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:st_combined		: Non-cacheable store combined with the previous non-cacheable store with a contiguous address.
-event:0xdb9 counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:ld_cancel		: Load request canceled by CIU due to late detection of load-hit-store condition (128B boundary).
-event:0xdba counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:ld_hit_st		: NCU detects a load hitting a previous store to an overlapping address (32B boundary).
-event:0xdbb counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stb_full		: All four store-gather buffers full.
-event:0xdbc counters:0,1,2,3 um:PPU_0_edges           minimum:10000	name:ld_req		: Non-cacheable load request received from CIU; includes instruction and data fetches.
-event:0xdbd counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stq_not_empty		: The four-deep store queue not empty.
-event:0xdbe counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stq_full		: The four-deep store queue full.
-event:0xdbf counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:stb_not_empty		: At least one store gather buffer not empty.
-
-# Cell BE Island 4 - Synergistic Processor Unit (SPU)
-#
-# OPROFILE FOR CELL ONLY SUPPORTS PROFILING ON ONE SPU EVENT AT A TIME
-#
-# CBE Signal Group 41 - SPU (NClk)
-event:0x1004 counters:0 um:SPU_02_cycles          minimum:10000	name:dual_instrctn_commit	: Dual instruction committed.
-event:0x1005 counters:0 um:SPU_02_cycles          minimum:10000	name:sngl_instrctn_commit	: Single instruction committed.
-event:0x1006 counters:0 um:SPU_02_cycles          minimum:10000	name:ppln0_instrctn_commit	: Pipeline 0 instruction committed.
-event:0x1007 counters:0 um:SPU_02_cycles          minimum:10000	name:ppln1_instrctn_commit	: Pipeline 1 instruction committed.
-event:0x1008 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:instrctn_ftch_stll	: Instruction fetch stall.
-event:0x1009 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:lcl_strg_bsy		: Local storage busy.
-event:0x100A counters:0 um:SPU_02_cycles          minimum:10000	name:dma_cnflct_ld_st	: DMA may conflict with load or store.
-event:0x100B counters:0 um:SPU_02_cycles          minimum:10000	name:str_to_lcl_strg	: Store instruction to local storage issued.
-event:0x100C counters:0 um:SPU_02_cycles          minimum:10000	name:ld_frm_lcl_strg	: Load intruction from local storage issued.
-event:0x100D counters:0 um:SPU_02_cycles          minimum:10000	name:fpu_exctn		: Floating-Point Unit (FPU) exception.
-event:0x100E counters:0 um:SPU_02_cycles          minimum:10000	name:brnch_instrctn_commit	: Branch instruction committed.
-event:0x100F counters:0 um:SPU_02_cycles          minimum:10000	name:change_of_flow	: Non-sequential change of the SPU program counter, which can be caused by branch, asynchronous interrupt, stalled wait on channel, error correction code (ECC) error, and so forth.
-event:0x1010 counters:0 um:SPU_02_cycles          minimum:10000	name:brnch_not_tkn		: Branch not taken.
-event:0x1011 counters:0 um:SPU_02_cycles          minimum:10000	name:brnch_mss_prdctn	: Branch miss prediction; not exact. Certain other code sequences can cause additional pulses on this signal (see Note 2).
-event:0x1012 counters:0 um:SPU_02_cycles          minimum:10000	name:brnch_hnt_mss_prdctn	: Branch hint miss prediction; not exact. Certain other code sequences can cause additional pulses on this signal (see Note 2).
-event:0x1013 counters:0 um:SPU_02_cycles          minimum:10000	name:instrctn_seqnc_err	: Instruction sequence error.
-event:0x1015 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl_wrt	: Stalled waiting on any blocking channel write (see Note 3).
-event:0x1016 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl0	: Stalled waiting on External Event Status (Channel 0) (see Note 3).
-event:0x1017 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl3	: Stalled waiting on Signal Notification 1 (Channel 3) (see Note 3).
-event:0x1018 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl4	: Stalled waiting on Signal Notification 2 (Channel 4) (see Note 3).
-event:0x1019 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl21	: Stalled waiting on DMA Command Opcode or ClassID Register (Channel 21) (see Note 3).
-event:0x101A counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl24	: Stalled waiting on Tag Group Status (Channel 24) (see Note 3).
-event:0x101B counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl25	: Stalled waiting on List Stall-and-Notify Tag Status (Channel 25) (see Note 3).
-event:0x101C counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl28	: Stalled waiting on PPU Mailbox (Channel 28) (see Note 3).
-event:0x1022 counters:0 um:SPU_02_cycles_or_edges minimum:10000	name:stlld_wait_on_chnl29	: Stalled waiting on SPU Mailbox (Channel 29) (see Note 3).
-
-
-# CBE Signal Group 42 - SPU Trigger (NClk)
-event:0x10A1 counters:0 um:SPU_Trigger_cycles_or_edges minimum:10000	name:stld_wait_chnl_op	: Stalled waiting on channel operation (See Note 2).
-
-# CBE Signal Group 43 - SPU Event (NClk)
-event:0x1107 counters:0 um:SPU_Event_cycles_or_edges minimum:10000	name:instrctn_ftch_stll	: Instruction fetch stall.
-
-# Cell BE Island 6 - Element Interconnect Bus (EIB)
-
-# CBE Signal Group 61 - EIB Address Concentrator 0 (NClk/2)
-event:0x17d4 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(0)	: Number of read and rwitm commands (including atomic) AC1 to AC0. (Group 1)
-event:0x17d5 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(1)	: Number of dclaim commands (including atomic) AC1 to AC0. (Group 1)
-event:0x17d6 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(2)	: Number of wwk, wwc, and wwf commands from AC1 to AC0. (Group 1)
-event:0x17d7 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(3)	: Number of sync, tlbsync, and eieio commands from AC1 to AC0. (Group 1)
-event:0x17d8 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(4)	: Number of tlbie commands from AC1 to AC0. (Group 1)
-event:0x17df counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_CAM_PERF(1)	: Previous adjacent address match (PAAM) Content Addressable Memory (CAM) hit. (Group 1)
-event:0x17e0 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_CAM_PERF(2)	: PAAM CAM miss. (Group 1)
-event:0x17e2 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_CAM_CMD_REFLECTED	: Command reflected. (Group 1)
-event:0x17e4 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(0)	: Number of read and rwitm commands (including atomic) AC1 to AC0. (Group 2)
-event:0x17e5 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(1)	: Number of dclaim commands (including atomic) AC1 to AC0. (Group 2)
-event:0x17e6 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(2)	: Number of wwk, wwc, and wwf commands from AC1 to AC0. (Group 2)
-event:0x17e7 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(3)	: Number of sync, tlbsync, and eieio commands from AC1 to AC0. (Group 2)
-event:0x17e8 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_ICMD_PERF(4)	: Number of tlbie commands from AC1 to AC0. (Group 2)
-event:0x17ef counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_CAM_PERF(1)	: PAAM CAM hit. (Group 2)
-event:0x17f0 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_CAM_PERF(2)	: PAAM CAM miss. (Group 2)
-event:0x17f2 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC0_W_CAM_CMD_REFLECTED	: Command reflected. (Group 2)
-
-# CBE Signal Group 62 - EIB Address Concentrator 1 (NClk/2)
-event:0x1839 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(1) : Local command from SPE 6.
-event:0x183a counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(2) : Local command from SPE 4.
-event:0x183b counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(3) : Local command from SPE 2.
-event:0x183c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(4) : Local command from SPE 0.
-event:0x183d counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(5) : Local command from PPE.
-event:0x183e counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(6) : Local command from SPE 1.
-event:0x183f counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(7) : Local command from SPE 3.
-event:0x1840 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(8) : Local command from SPE 5.
-event:0x1841 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(9) : Local command from SPE 7.
-event:0x1844 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(12) : AC1-to-AC0 global command from SPE 6.
-event:0x1845 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(13) : AC1-to-AC0 global command from SPE 4.
-event:0x1846 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(14) : AC1-to-AC0 global command from SPE 2.
-event:0x1847 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(15) : AC1-to-AC0 global command from SPE 0.
-event:0x1848 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(16) : AC1-to-AC0 global command from PPE.
-event:0x1849 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(17) : AC1-to-AC0 global command from SPE 1.
-event:0x184a counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(18) : AC1-to-AC0 global command from SPE 3.
-event:0x184b counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(19) : AC1-to-AC0 global command from SPE 5.
-event:0x184c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(20) : AC1-to-AC0 global command from SPE 7.
-event:0x184f counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(23) : AC1 sends a global command to AC0.
-event:0x1850 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(24) : AC0 reflects a global command back to AC1.
-event:0x1851 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WAC1_WAC1_TRCMUX_W_TRCGRP_ACPERF(25) : AC1 reflects a command back to the bus masters.
-
-# CBE Signal Group 63 - EIB Data Ring Arbitrator - Group 1 (NClk/2)
-event:0x189c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(0)	: Grant on data ring 0.
-event:0x189d counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(1)	: Grant on data ring 1.
-event:0x189e counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(2)	: Grant on data ring 2.
-event:0x189f counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(3)	: Grant on data ring 3.
-event:0x18a0 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(4)	: Data ring 0 is in use.
-event:0x18a1 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(5)	: Data ring 1 is in use.
-event:0x18a2 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(6)	: Data ring 2 is in use.
-event:0x18a3 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(7)	: Data ring 3 is in use.
-event:0x18a4 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(8)	: All data rings are idle.
-event:0x18a5 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(9)	: One data ring is busy.
-event:0x18a6 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(10)	: Two or three data rings are busy.
-event:0x18a7 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPE(11)	: All data rings are busy.
-event:0x18a8 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(12)	: BIC data request pending.
-event:0x18a9 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(13)	: SPE 6 data request pending.
-event:0x18aa counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(14)	: SPE 4 data request pending.
-event:0x18ab counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(15)	: SPE 2 data request pending.
-event:0x18ac counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(16)	: SPE 0 data request pending.
-event:0x18ad counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(17)	: MIC data request pending.
-event:0x18ae counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(18)	: PPE data request pending.
-event:0x18af counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(19)	: SPE 1 data request pending.
-event:0x18b0 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(20)	: SPE 3 data request pending.
-event:0x18b1 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(21)	: SPE 5 data request pending.
-event:0x18b2 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(22)	: SPE 7 data request pending.
-event:0x18b3 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPE(23)	: IOC data request pending.
-event:0x18b4 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(24)	: BIC is data destination.
-event:0x18b5 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(25)	: SPE 6 is data destination.
-event:0x18b6 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(26)	: SPE 4 is data destination.
-event:0x18b7 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(27)	: SPE 2 is data destination.
-event:0x18b8 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(28)	: SPE 0 is data destination.
-event:0x18b9 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(29)	: MIC is data destination.
-event:0x18ba counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(30)	: PPE is data destination.
-event:0x18bb counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPE(31)	: SPE 1 is data destination.
-
-# CBE Signal Group 64 - EIB Data Ring Arbitrator - Group 2 (NClk/2)
-event:0x1900 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(0)	: BIC data request pending.
-event:0x1901 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(1)	: SPE 6 data request pending.
-event:0x1902 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(2)	: SPE 4 data request pending.
-event:0x1903 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(3)	: SPE 2 data request pending.
-event:0x1904 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(4)	: SPE 0 data request pending.
-event:0x1905 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(5)	: MIC data request pending.
-event:0x1906 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(6)	: PPE data request pending.
-event:0x1907 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(7)	: SPE 1 data request pending.
-event:0x1908 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(8)	: SPE 3 data request pending.
-event:0x1909 counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(9)	: SPE 5 data request pending.
-event:0x190a counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(10)	: SPE 7 data request pending.
-event:0x190b counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:WDA_DTRC_TRCGRPF(11)	: IOC data request pending.
-event:0x190c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(12)	: BIC is data destination.
-event:0x190d counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(13)	: SPE 6 is data destination.
-event:0x190e counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(14)	: SPE 4 is data destination.
-event:0x190f counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(15)	: SPE 2 is data destination.
-event:0x1910 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(16)	: SPE 0 is data destination.
-event:0x1911 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(17)	: MIC is data destination.
-event:0x1912 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(18)	: PPE is data destination.
-event:0x1913 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(19)	: SPE 1 is data destination.
-event:0x1914 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(20)	: SPE 3 is data destination.
-event:0x1915 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(21)	: SPE 5 is data destination.
-event:0x1916 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(22)	: SPE 7 is data destination.
-event:0x1917 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(23)	: IOC is data destination.
-event:0x1918 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(24)	: Grant on data ring 0.
-event:0x1919 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(25)	: Grant on data ring 1.
-event:0x191a counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(26)	: Grant on data ring 2.
-event:0x191b counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:WDA_DTRC_TRCGRPF(27)	: Grant on data ring 3.
-event:0x191c counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPF(28)	: All data rings are idle.
-event:0x191d counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPF(29)	: One data ring is busy.
-event:0x191e counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPF(30)	: Two or three data rings are busy.
-event:0x191f counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:WDA_DTRC_TRCGRPF(31)	: All four data rings are busy.
-
-# CBE Signal Group 651 - EIB Token Manager - Group A0/B0 (NClk/2)
-event:0xfe4c counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_e_unused	: Even XIO token unused by RAG 0.
-event:0xfe4d counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_o_unused	: Odd XIO token unused by RAG 0.
-event:0xfe4e counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_e_unused	: Even bank token unused by RAG 0.
-event:0xfe4f counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_o_unused	: Odd bank token unused by RAG 0.
-event:0xfe54 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc0	: Token granted for SPE 0.
-event:0xfe55 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc1	: Token granted for SPE 1.
-event:0xfe56 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc2	: Token granted for SPE 2.
-event:0xfe57 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc3	: Token granted for SPE 3.
-event:0xfe58 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc4	: Token granted for SPE 4.
-event:0xfe59 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc5	: Token granted for SPE 5.
-event:0xfe5a counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc6	: Token granted for SPE 6.
-event:0xfe5b counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:token_granted_spc7	: Token granted for SPE 7.
-
-
-# CBE Signal Group 652 - EIB Token Manager - Group A1/B1 (NClk/2)
-event:0xfeb0 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_e_wasted	: Even XIO token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.
-event:0xfeb1 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_o_wasted	: Odd XIO token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.
-event:0xfeb2 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_e_wasted	: Even bank token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.
-event:0xfeb3 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_o_wasted	: Odd bank token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.
-event:0xfebc counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_xio_e_wasted	: Even XIO token wasted by RAG U.
-event:0xfebd counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_xio_o_wasted	: Odd XIO token wasted by RAG U.
-event:0xfebe counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_bank_e_wasted	: Even bank token wasted by RAG U.
-event:0xfebf counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_bank_o_wasted	: Odd bank token wasted by RAG U.
-
-# CBE Signal Group 653 - EIB Token Manager - Group A2/B2 (NClk/2)
-event:0xff14 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_e_shared_to_rag1	: Even XIO token from RAG 0 shared with RAG 1
-event:0xff15 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_e_shared_to_rag2	: Even XIO token from RAG 0 shared with RAG 2
-event:0xff16 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_e_shared_to_rag3	: Even XIO token from RAG 0 shared with RAG 3
-event:0xff17 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_o_shared_to_rag1	: Odd XIO token from RAG 0 shared with RAG 1
-event:0xff18 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_o_shared_to_rag2	: Odd XIO token from RAG 0 shared with RAG 2
-event:0xff19 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_xio_o_shared_to_rag3	: Odd XIO token from RAG 0 shared with RAG 3
-event:0xff1a counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_e_shared_to_rag1	: Even bank token from RAG 0 shared with RAG 1
-event:0xff1b counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_e_shared_to_rag2	: Even bank token from RAG 0 shared with RAG 2
-event:0xff1c counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_e_shared_to_rag3	: Even bank token from RAG 0 shared with RAG 3
-event:0xff1d counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_o_shared_to_rag1	: Odd bank token from RAG 0 shared with RAG 1
-event:0xff1e counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_o_shared_to_rag2	: Odd bank token from RAG 0 shared with RAG 2
-event:0xff1f counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag0_bank_o_shared_to_rag3	: Odd bank token from RAG 0 shared with RAG 3
-
-
-# CBE Signal Group 654 - EIB Token Manager - Group A0/B0 (NClk/2)
-# Repeat of the 65400, 65401, 65402, 65403, 65416, 65417, 65418, 65419 events
-
-
-# CBE Signal Group 655 - EIB Token Manager - Group A1/B1 (NClk/2)
-#repeat of the 65200 events
-
-
-# CBE Signal Group 656 - EIB Token Manager - Group A2/B2 (NClk/2)
-event:0x1004f counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_bank_o_shared_to_rag0	: Odd bank token from RAG U shared with RAG 0
-event:0x10050 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_xio_e_shared_to_rag0	: Even XIO token from RAG 1 shared with RAG 0
-event:0x10051 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_xio_e_shared_to_rag2	: Even XIO token from RAG 1 shared with RAG 2
-event:0x10052 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_xio_e_shared_to_rag3	: Even XIO token from RAG 1 shared with RAG 3
-event:0x10053 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_xio_o_shared_to_rag0	: Odd XIO token from RAG 1 shared with RAG 0
-event:0x10054 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_xio_o_shared_to_rag2	: Odd XIO token from RAG 1 shared with RAG 2
-event:0x10055 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_xio_o_shared_to_rag3	: Odd XIO token from RAG 1 shared with RAG 3
-event:0x10056 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_bank_e_shared_to_rag0	: Even bank token from RAG 1 shared with RAG 0
-event:0x10057 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_bank_e_shared_to_rag2	: Even bank token from RAG 1 shared with RAG 2
-event:0x10058 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_bank_e_shared_to_rag3	: Even bank token from RAG 1 shared with RAG 3
-event:0x10059 counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_bank_o_shared_to_rag0	: Odd bank token from RAG 1 shared with RAG 0
-event:0x1005a counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_bank_o_shared_to_rag2	: Odd bank token from RAG 1 shared with RAG 2
-event:0x1005b counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:rag1_bank_o_shared_to_rag3	: Odd bank token from RAG 1 shared with RAG 3
-event:0x1005c counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_xio_e_shared_to_rag1	: Even XIO token from RAG U shared with RAG 1
-event:0x1005d counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_xio_o_shared_to_rag1	: Odd XIO token from RAG U shared with RAG 1
-event:0x1005e counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_bank_e_shared_to_rag1	: Even bank token from RAG U shared with RAG 1
-event:0x1005f counters:0,1,2,3 um:PPU_0_cycles          minimum:10000	name:ragu_bank_o_shared_to_rag1	: Odd bank token from RAG U shared with RAG 1
-
-# CBE Signal Group 657 - EIB Token Manager - Group C0/D0 (NClk/2)
-event:0x100e4 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_e_unused	: Even XIO token unused by RAG 2
-event:0x100e5 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_o_unused	: Odd XIO token unused by RAG 2
-event:0x100e6 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_e_unused	: Even bank token unused by RAG 2
-event:0x100e7 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_o_unused	: Odd bank token unused by RAG 2
-event:0x100e8 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif0_in_unused	: IOIF0 In token unused by RAG 0
-event:0x100e9 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif0_out_unused	: IOIF0 Out token unused by RAG 0
-event:0x100ea counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif1_in_unused	: IOIF1 In token unused by RAG 0
-event:0x100eb counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif1_out_unused	: IOIF1 Out token unused by RAG 0
-
-
-# CBE Signal Group 658 - EIB Token Manager - Group C1/D1 (NClk/2)
-event:0x10148 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_e_wasted	: Even XIO token wasted by RAG 2
-event:0x10149 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_o_wasted	: Odd XIO token wasted by RAG 2
-event:0x1014a counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_e_wasted	: Even bank token wasted by RAG 2
-event:0x1014b counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_o_wasted	: Odd bank token wasted by RAG 2
-
-
-# CBE Signal Group 659 - EIB Token Manager - Group C2/D2 (NClk/2)
-event:0x101ac counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_e_shared_to_rag0	: Even XIO token from RAG 2 shared with RAG 0
-event:0x101ad counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_e_shared_to_rag1	: Even XIO token from RAG 2 shared with RAG 1
-event:0x101ae counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_e_shared_to_rag3	: Even XIO token from RAG 2 shared with RAG 3
-event:0x101af counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_o_shared_to_rag0	: Odd XIO token from RAG 2 shared with RAG 0
-event:0x101b0 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_o_shared_to_rag1	: Odd XIO token from RAG 2 shared with RAG 1
-event:0x101b1 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_xio_o_shared_to_rag3	: Odd XIO token from RAG 2 shared with RAG 3
-event:0x101b2 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_e_shared_to_rag0	: Even bank token from RAG 2 shared with RAG 0
-event:0x101b3 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_e_shared_to_rag1	: Even bank token from RAG 2 shared with RAG 1
-event:0x101b4 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_e_shared_to_rag3	: Even bank token from RAG 2 shared with RAG 3
-event:0x101b5 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_o_shared_to_rag0	: Odd bank token from RAG 2 shared with RAG 0
-event:0x101b6 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_o_shared_to_rag1	: Odd bank token from RAG 2 shared with RAG 1
-event:0x101b7 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag2_bank_o_shared_to_rag3	: Odd bank token from RAG 2 shared with RAG 3
-
-
-# CBE Signal Group 6510 - EIB Token Manager - Group C3 (NClk/2)
-event:0x9ef38 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif0_in_wasted	: IOIF0 In token wasted by RAG 0
-event:0x9ef39 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif0_out_wasted	: IOIF0 Out token wasted by RAG 0
-event:0x9ef3a counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif1_in_wasted	: IOIF1 In token wasted by RAG 0
-event:0x9ef3b counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag0_ioif1_out_wasted	: IOIF1 Out token wasted by RAG 0
-
-
-# CBE Signal Group 6511 - EIB Token Manager - Group C0/D0 (NClk/2)
-# repeat of the events 65764 - 65771
-
-# CBE Signal Group 6512 - EIB Token Manager - Group C1/D1 (NClk/2)
-event:0x9f010 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_e_wasted	: Even XIO token wasted by RAG 3
-event:0x9f011 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_o_wasted	: Odd XIO token wasted by RAG 3
-event:0x9f012 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_e_wasted	: Even bank token wasted by RAG 3
-event:0x9f013 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_o_wasted	: Odd bank token wasted by RAG 3
-
-# CBE Signal Group 6513 - EIB Token Manager - Group C2/D2 (NClk/2)
-event:0x9f074 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_e_shared_to_rag0	: Even XIO token from RAG 3 shared with RAG 0
-event:0x9f075 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_e_shared_to_rag1	: Even XIO token from RAG 3 shared with RAG 1
-event:0x9f076 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_e_shared_to_rag2	: Even XIO token from RAG 3 shared with RAG 2
-event:0x9f077 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_o_shared_to_rag0	: Odd XIO token from RAG 3 shared with RAG 0
-event:0x9f078 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_o_shared_to_rag1	: Odd XIO token from RAG 3 shared with RAG 1
-event:0x9f079 counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_xio_o_shared_to_rag2	: Odd XIO token from RAG 3 shared with RAG 2
-event:0x9f07a counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_e_shared_to_rag0	: Even bank token from RAG 3 shared with RAG 0
-event:0x9f07b counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_e_shared_to_rag1	: Even bank token from RAG 3 shared with RAG 1
-event:0x9f07c counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_e_shared_to_rag2	: Even bank token from RAG 3 shared with RAG 2
-event:0x9f07d counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_o_shared_to_rag0	: Odd bank token from RAG 3 shared with RAG 0
-event:0x9f07e counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_o_shared_to_rag1	: Odd bank token from RAG 3 shared with RAG 1
-event:0x9f07f counters:0,1,2,3 um:PPU_2_cycles          minimum:10000	name:rag3_bank_o_shared_to_rag2	: Odd bank token from RAG 3 shared with RAG 2
-
-
-# Cell BE Island 7 - Memory Interface Controller (MIC)
-
-# CBE Signal Group 71 - MIC Group 1 (NClk/2)
-event:0x1bc5 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM1(1)	: XIO1 - Read command queue is empty.
-event:0x1bc6 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM1(2)	: XIO1 - Write command queue is empty.
-event:0x1bc8 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM1(4)	: XIO1 - Read command queue is full.
-event:0x1bc9 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM1(5)	: XIO1 - MIC responds with a Retry for a read command because the read command queue is full.
-event:0x1bca counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM1(6)	: XIO1 - Write command queue is full.
-event:0x1bcb counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM1(7)	: XIO1 - MIC responds with a Retry for a write command because the write command queue is full.
-event:0x1bde counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CCS_PERFORM(2)	: XIO1 - Read command dispatched; includes high-priority and fast-path reads (see Note 1).
-event:0x1bdf counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CCS_PERFORM(3)	: XIO1 - Write command dispatched (see Note 1).
-event:0x1be0 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CCS_PERFORM(4)	: XIO1 - Read-Modify-Write command (data size < 16 bytes) dispatched (see Note 1).
-event:0x1be1 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CCS_PERFORM(5)	: XIO1 - Refresh dispatched (see Note 1).
-event:0x1be3 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CCS_PERFORM(7)	: XIO1 - Byte-masking write command (data size >= 16 bytes) dispatched (see Note 1).
-event:0x1be5 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CRW_PERFORM(1)	: XIO1 - Write command dispatched after a read command was previously dispatched (see Note 1).
-event:0x1be6 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL1_YMM_CRW_PERFORM(2)	: XIO1 - Read command dispatched after a write command was previously dispatched (see Note 1).
-
-
-# CBE Signal Group 72 - MIC Group 2 (NClk/2)
-event:0x1c29 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM2(1)	: XIO0 - Read command queue is empty.
-event:0x1c2a counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM2(2)	: XIO0 - Write command queue is empty.
-event:0x1c2c counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM2(4)	: XIO0 - Read command queue is full.
-event:0x1c2d counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM2(5)	: XIO0 - MIC responds with a Retry for a read command because the read command queue is full.
-event:0x1c2e counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM2(6)	: XIO0 - Write command queue is full.
-event:0x1c2f counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_COMMON_YMB_CSR_PERFORM2(7)	: XIO0 - MIC responds with a Retry for a write command because the write command queue is full.
-event:0x1c42 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(2)	: XIO0 - Read command dispatched; includes high-priority and fast-path reads (see Note 1).
-event:0x1c43 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(3)	: XIO0 - Write command dispatched (see Note 1).
-event:0x1c44 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(4)	: XIO0 - Read-Modify-Write command (data size < 16 bytes) dispatched (see Note 1).
-event:0x1c45 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(5)	: XIO0 - Refresh dispatched (see Note 1).
-event:0x1c49 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CRW_PERFORM(1)	: XIO0 - Write command dispatched after a read command was previously dispatched (see Note 1).
-event:0x1c4a counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CRW_PERFORM(2)	: XIO0 - Read command dispatched after a write command was previously dispatched (see Note 1).
-
-# CBE Signal Group 73 - MIC Group 3 (NClk/2)
-event:0x1ca7 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(3)	: XIO0 - Write command dispatched (see Note 1).
-event:0x1ca8 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(4)	: XIO0 - Read-Modify-Write command (data size < 16 bytes) dispatched (see Note 1).
-event:0x1ca9 counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(5)	: XIO0 - Refresh dispatched (see Note 1).
-event:0x1cab counters:0,1,2,3 um:PPU_0123_cycles          minimum:10000	name:YM_CTL0_YMM_CCS_PERFORM(7)	: XIO0 - Byte-masking write command (data size >= 16 bytes) dispatched (see Note 1).
-
-
-# Cell BE Island 8 - Broadband Engine Interface (BEI)
-
-# CBE Signal Group 81 - BIF Controller - IOIF0 Word 0 (NClk/2)
-event:0x1fb0 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:B2F_Type_A_Data	: Type A data physical layer group (PLG). Does not include header-only or credit-only data PLGs. In IOIF mode, counts I/O device read data; in BIF mode, counts all outbound data.
-event:0x1fb1 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:B2F_Type_B_Data	: Type B data PLG. In IOIF mode, counts I/O device read data; in BIF mode, counts all outbound data.
-event:0x1fb2 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:IOC_Type_A_Data	: Type A data PLG. Does not include header-only or credit-only PLGs. In IOIF mode, counts CBE store data to I/O device. Does not apply in BIF mode.
-event:0x1fb3 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:IOC_Type_B_Data	: Type B data PLG. In IOIF mode, counts CBE store data to an I/O device. Does not apply in BIF mode.
-event:0x1fb4 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Data_PLG		: Data PLG. Does not include header-only or credit-only PLGs.
-event:0x1fb5 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Command_PLG		: Command PLG (no credit-only PLG). In IOIF mode, counts I/O command or reply PLGs. In BIF mode, counts command/ reflected command or snoop/combined responses.
-event:0x1fb6 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_A_Transfer	: Type A data transfer regardless of length. Can also be used to count Type A data header PLGs (but not credit-only PLGs).
-event:0x1fb7 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_B_Transfer	: Type B data transfer.
-event:0x1fb8 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Cmd_Credit_Only_PLG	: Command-credit-only command PLG in either IOIF or BIF mode.
-event:0x1fb9 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Data_Credit_Only_PLG	: Data-credit-only data PLG sent in either IOIF or BIF mode.
-event:0x1fba counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Non-Null_Envelopes	: Non-null envelope sent (does not include long envelopes).
-event:0x1fbc counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:null_env_sent		: Null envelope sent (see Note 1).
-event:0x1fbd counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:no_valid_data		: No valid data sent this cycle (see Note 1).
-event:0x1fbe counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:norm_env_sent		: Normal envelope sent (see Note 1).
-event:0x1fbf counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:lnog_env_sent		: Long envelope sent (see Note 1).
-event:0x1fc0 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:per_mon_null_sent	: A Null PLG inserted in an outgoing envelope.
-event:0x1fc1 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:per_mon_array_full	: Outbound envelope array is full.
-
-# CBE Signal Group 82 - BIF Controller - IOIF1 Word 0 (NClk/2)
-event:0x201b counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_B_Transfer	: Type B data transfer.
-
-
-# CBE Signal Group 83 - BIF Controller - IOIF0 Word 2 (NClk/2)
-event:0x206d counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:null_env_rcvd		: Null envelope received (see Note 1).
-event:0x207a counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Command_PLG		: Command PLG, but not credit-only PLG. In IOIF mode, counts I/O command or reply PLGs. In BIF mode, counts command/reflected command or snoop/combined responses.
-event:0x207b counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Command_Credit_Only_PLG	: Command-credit-only command PLG.
-event:0x2080 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:norm_env_rcvd_good	: Normal envelope received is good (see Note 1).
-event:0x2081 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:long_env_rcvd_good	: Long envelope received is good (see Note 1).
-event:0x2082 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:cmd_credit_only_PLG	: Data-credit-only data PLG in either IOIF or BIF mode; will count a maximum of one per envelope (see Note 1).
-event:0x2083 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:non-null_envelope	: Non-null envelope; does not include long envelopes; includes retried envelopes (see Note 1).
-event:0x2084 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:data_grnt_rcvd	: Data grant received.
-event:0x2088 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Data_PLG		: Data PLG. Does not include header-only or credit-only PLGs.
-event:0x2089 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_A_transfer	: Type A data transfer regardless of length. Can also be used to count Type A data header PLGs, but not credit-only PLGs.
-event:0x208a counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_B_transfer	: Type B data transfer.
-
-# CBE Signal Group 84 - BIF Controller - IOIF1 Word 2 (NClk/2)
-event:0x20d1 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:null_env_rcvd		: Null envelope received (see Note 1).
-event:0x20de counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Command_PLG		: Command PLG (no credit-only PLG). Counts I/O command or reply PLGs.
-event:0x20df counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Command_Credit_Only_PLG	: Command-credit-only command PLG.
-event:0x20e4 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:norm_env_rcvd_good	: Normal envelope received is good (see Note 1).
-event:0x20e5 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:long_env_rcvd_good	: Long envelope received is good (see Note 1).
-event:0x20e6 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:cmd_credit_only_PLG	: Data-credit-only data PLG received; will count a maximum of one per envelope (see Note 1).
-event:0x20e7 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:non-null_envelope	: Non-Null envelope received; does not include long envelopes; includes retried envelopes (see Note 1).
-event:0x20e8 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:data_grnt_rcvd	: Data grant received.
-event:0x20ec counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Data_PLG		: Data PLG received. Does not include header-only or credit-only PLGs.
-event:0x20ed counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_A_transfer	: Type I A data transfer regardless of length. Can also be used to count Type A data header PLGs (but not credit-only PLGs).
-event:0x20ee counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:Type_B_transfer	: Type B data transfer received.
-
-# CBE Signal Group 85 - I/O Controller Word 0 - Group 1 (NClk/2)
-event:0x213c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:mmio_rd_to_ioif1	: Received MMIO read targeted to IOIF1.
-event:0x213d counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:mmio_wrt_to_ioif1	: Received MMIO write targeted to IOIF1.
-event:0x213e counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:mmio_rd_to_ioif0	: Received MMIO read targeted to IOIF0.
-event:0x213f counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:mmio_wrt_to_ioif0	: Received MMIO write targeted to IOIF0.
-event:0x2140 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:cmd_to_slice0		: Sent command to IOIF0.
-event:0x2141 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:cmd_to_slice1		: Sent command to IOIF1.
-
-# CBE Signal Group 86 - I/O Controller Word 2 - Group 2 (NClk/2)
-event:0x219d counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:re_dep_dm3		: IOIF0 Dependency Matrix 3 is occupied by a dependent command (see Note 1).
-event:0x219e counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:re_dep_dm4		: IOIF0 Dependency Matrix 4 is occupied by a dependent command (see Note 1).
-event:0x219f counters:0,1,2,3 um:PPU_02_cycles_or_edges minimum:10000	name:re_dep_dm5		: IOIF0 Dependency Matrix 5 is occupied by a dependent command (see Note 1).
-event:0x21a2 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:slice0_ld_rqst	: Received read request from IOIF0.
-event:0x21a3 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:slice0_str_rqst	: Received write request from IOIF0.
-event:0x21a6 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:intrpt_from_realizer	: Received interrupt from the IOIF0.
-
-# CBE Signal Group 87 - I/O Controller - Group 3 (NClk/2)
-event:0x220c counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn_even	: IOIF0 request for token for even memory banks 0-14 (see Note 1).
-event:0x220d counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn_odd	: IOIF0 request for token for odd memory banks 1-15 (see Note 1).
-event:0x220e counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn1_3_5_7	: IOIF0 request for token type 1, 3, 5, or 7 (see Note 1).
-event:0x220f counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn9_11_13_15	: IOIF0 request for token type 9, 11, 13, or 15 (see Note 1).
-event:0x2214 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn16	: IOIF0 request for token type 16 (see Note 1).
-event:0x2215 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn17	: IOIF0 request for token type 17 (see Note 1).
-event:0x2216 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn18	: IOIF0 request for token type 18 (see Note 1).
-event:0x2217 counters:0,1,2,3 um:PPU_02_cycles          minimum:10000	name:slice0_rqst_tkn19	: IOIF0 request for token type 19 (see Note 1).
-
-
-# CBE Signal Group 88 - I/O Controller Word 0 - Group 4 (NClk/2)
-event:0x2260 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:io_pt_hit		: I/O page table cache hit for commands from IOIF.
-event:0x2261 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:io_pt_miss		: I/O page table cache miss for commands from IOIF.
-event:0x2263 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:io_seg_tbl_hit	: I/O segment table cache hit.
-event:0x2264 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:io_seg_tbl_miss	: I/O segment table cache miss.
-event:0x2278 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:intrrpt_frm_spu	: Interrupt received from any SPU (reflected cmd when IIC has sent ACK response).
-event:0x2279 counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:iic_intrrpt_to_pu_thrd0	: Internal interrupt controller (IIC) generated interrupt to PPU thread 0.
-event:0x227a counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:iic_intrrpt_to_pu_thrd1	: IIC generated interrupt to PPU thread 1.
-event:0x227b counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:pu_intrrpt_to_pu_thrd0	: Received external interrupt (using MMIO) from PPU to PPU thread 0.
-event:0x227c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:pu_intrrpt_to_pu_thrd1	: Received external interrupt (using MMIO) from PPU to PPU thread 1.
-event:0x227c counters:0,1,2,3 um:PPU_02_edges           minimum:10000	name:pu_intrrpt_to_pu_thrd1	: Received external interrupt (using MMIO) from PPU to PPU thread 1.
diff --git a/events/ppc64/cell-be/unit_masks b/events/ppc64/cell-be/unit_masks
deleted file mode 100644
index 64a4959..0000000
--- a/events/ppc64/cell-be/unit_masks
+++ /dev/null
@@ -1,137 +0,0 @@
-# Cell Broadband Engine possible unit masks
-#
-# Copyright OProfile authors
-#
-#(C) COPYRIGHT International Business Machines Corp. 2006
-# Contributed by Maynard Johnson <maynardj@us.ibm.com>
-#
-#
-name:zero type:mandatory default:0x0
-	0x000 Count cycles				[mandatory]
-name:PPU_0_cycles type:bitmask default:0x013
-	0x001 Count cycles				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[mandatory]
-name:PPU_0_edges type:bitmask default:0x012
-	0x000 Count edges				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[mandatory]
-name:PPU_2_cycles type:bitmask default:0x043
-	0x001 Count cycles				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x040 PPU Bus Word 2				[mandatory]
-name:PPU_2_edges type:bitmask default:0x042
-	0x000 Count edges				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x040 PPU Bus Word 2				[mandatory]
-name:PPU_01_cycles type:bitmask default:0x023
-	0x001 Count cycles				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[optional ]
-	0x020 PPU Bus Word 1				[default  ]
-name:PPU_01_edges type:bitmask default:0x022
-	0x000 Count edges				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[optional ]
-	0x020 PPU Bus Word 1				[default  ]
-name:PPU_01_cycles_or_edges type:bitmask default:0x023
-	0x000 Count edges				[optional ]
-	0x001 Count cycles				[default  ]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[optional ]
-	0x020 PPU Bus Word 1				[default  ]
-name:PPU_02_cycles type:bitmask default:0x013
-	0x001 Count cycles				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[default  ]
-	0x040 PPU Bus Word 2				[optional ]
-name:PPU_02_edges type:bitmask default:0x012
-	0x000 Count edges				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[default  ]
-	0x040 PPU Bus Word 2				[optional ]
-name:PPU_02_cycles_or_edges type:bitmask default:0x013
-	0x000 Count edges				[optional ]
-	0x001 Count cycles				[default  ]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x010 PPU Bus Word 0				[default  ]
-	0x040 PPU Bus Word 2				[optional ]
-name:PPU_0123_cycles type:bitmask default:0x033
-	0x001 Count cycles				[mandatory]
-	0x000 Negative polarity				[optional ]
-	0x002 Positive polarity				[default  ]
-	0x030 PPU Bus Word 0/1				[default  ]
-	0x0c0 PPU Bus Word 2/3				[optional ]
-name:SPU_02_cycles type:bitmask default:0x0113
-	0x0001 Count cycles				[mandatory]
-	0x0000 Negative polarity			[optional ]
-	0x0002 Positive polarity			[default  ]
-	0x0110 SPU Bus Word 0				[default  ]
-	0x0140 SPU Bus Word 2				[optional ]
-	0x0000 SPU 0					[default  ]
-	0x1000 SPU 1					[optional ]
-	0x2000 SPU 2					[optional ]
-	0x3000 SPU 3					[optional ]
-	0x4000 SPU 4					[optional ]
-	0x5000 SPU 5					[optional ]
-	0x6000 SPU 6					[optional ]
-	0x7000 SPU 7					[optional ]
-name:SPU_02_cycles_or_edges type:bitmask default:0x0113
-	0x0000 Count edges				[optional ]
-	0x0001 Count cycles				[default  ]
-	0x0000 Negative polarity			[optional ]
-	0x0002 Positive polarity			[default  ]
-	0x0110 SPU Bus Word 0				[default  ]
-	0x0140 SPU Bus Word 2				[optional ]
-	0x0000 SPU 0					[default  ]
-	0x1000 SPU 1					[optional ]
-	0x2000 SPU 2					[optional ]
-	0x3000 SPU 3					[optional ]
-	0x4000 SPU 4					[optional ]
-	0x5000 SPU 5					[optional ]
-	0x6000 SPU 6					[optional ]
-	0x7000 SPU 7					[optional ]
-name:SPU_Trigger_cycles_or_edges type:bitmask default:0x0107
-	0x0000 Count edges				[optional ]
-	0x0001 Count cycles				[default  ]
-	0x0000 Negative polarity			[optional ]
-	0x0002 Positive polarity			[default  ]
-	0x0104 SPU Trigger 0				[default  ]
-	0x0114 SPU Trigger 1				[optional ]
-	0x0124 SPU Trigger 2				[optional ]
-	0x0134 SPU Trigger 3				[optional ]
-	0x0000 SPU 0					[default  ]
-	0x1000 SPU 1					[optional ]
-	0x2000 SPU 2					[optional ]
-	0x3000 SPU 3					[optional ]
-	0x4000 SPU 4					[optional ]
-	0x5000 SPU 5					[optional ]
-	0x6000 SPU 6					[optional ]
-	0x7000 SPU 7					[optional ]
-name:SPU_Event_cycles_or_edges type:bitmask default:0x0147
-	0x0000 Count edges				[optional ]
-	0x0001 Count cycles				[default  ]
-	0x0000 Negative polarity			[optional ]
-	0x0002 Positive polarity			[default  ]
-	0x0144 SPU Event 0				[default  ]
-	0x0154 SPU Event 1				[optional ]
-	0x0164 SPU Event 2				[optional ]
-	0x0174 SPU Event 3				[optional ]
-	0x0000 SPU 0					[default  ]
-	0x1000 SPU 1					[optional ]
-	0x2000 SPU 2					[optional ]
-	0x3000 SPU 3					[optional ]
-	0x4000 SPU 4					[optional ]
-	0x5000 SPU 5					[optional ]
-	0x6000 SPU 6					[optional ]
-	0x7000 SPU 7					[optional ]
diff --git a/events/ppc64/ibm-compat-v1/event_mappings b/events/ppc64/ibm-compat-v1/event_mappings
deleted file mode 100644
index 5805604..0000000
--- a/events/ppc64/ibm-compat-v1/event_mappings
+++ /dev/null
@@ -1,82 +0,0 @@
-#PPC64 pmu-compat event mappings, version 1
-#
-# Copyright OProfile authors
-# Copyright (c) International Business Machines, 2009.
-# Contributed by Maynard Johnson <maynardj@us.ibm.com>.
-#
-#Mapping of event groups to MMCR values
-
-#Group Default
-event:0X001 mmcr0:0X00000000 mmcr1:0X00000000FAF41EF4 mmcra:0X00000000
-
-#Group 1 pm_compat_utilization1, Basic CPU utilization
-event:0X0010 mmcr0:0X00000000 mmcr1:0X00000000FAF41EF4 mmcra:0X00000000
-event:0X0011 mmcr0:0X00000000 mmcr1:0X00000000FAF41EF4 mmcra:0X00000000
-event:0X0012 mmcr0:0X00000000 mmcr1:0X00000000FAF41EF4 mmcra:0X00000000
-event:0X0013 mmcr0:0X00000000 mmcr1:0X00000000FAF41EF4 mmcra:0X00000000
-
-#Group 2 pm_compat_utilization2, CPI and utilization data
-event:0X0020 mmcr0:0X00000000 mmcr1:0X00000000F4F41EFA mmcra:0X00000000
-event:0X0021 mmcr0:0X00000000 mmcr1:0X00000000F4F41EFA mmcra:0X00000000
-event:0X0022 mmcr0:0X00000000 mmcr1:0X00000000F4F41EFA mmcra:0X00000000
-event:0X0023 mmcr0:0X00000000 mmcr1:0X00000000F4F41EFA mmcra:0X00000000
-
-#Group 3 pm_compat_dsource, Data Access sources
-event:0X0030 mmcr0:0X00000000 mmcr1:0X00000000FEFEFEFA mmcra:0X00000000
-event:0X0031 mmcr0:0X00000000 mmcr1:0X00000000FEFEFEFA mmcra:0X00000000
-event:0X0032 mmcr0:0X00000000 mmcr1:0X00000000FEFEFEFA mmcra:0X00000000
-event:0X0033 mmcr0:0X00000000 mmcr1:0X00000000FEFEFEFA mmcra:0X00000000
-
-#Group 4 pm_compat_l1_dcache_load_store_miss, L1 D-Cache load/store miss
-event:0X0040 mmcr0:0X00000000 mmcr1:0X0000000002F0F0F0 mmcra:0X00000000
-event:0X0041 mmcr0:0X00000000 mmcr1:0X0000000002F0F0F0 mmcra:0X00000000
-event:0X0042 mmcr0:0X00000000 mmcr1:0X0000000002F0F0F0 mmcra:0X00000000
-event:0X0043 mmcr0:0X00000000 mmcr1:0X0000000002F0F0F0 mmcra:0X00000000
-
-#Group 5 pm_compat_l1_cache_load, L1 Cache loads
-event:0X0050 mmcr0:0X00000000 mmcr1:0X0000000002FEF6F0 mmcra:0X00000000
-event:0X0051 mmcr0:0X00000000 mmcr1:0X0000000002FEF6F0 mmcra:0X00000000
-event:0X0052 mmcr0:0X00000000 mmcr1:0X0000000002FEF6F0 mmcra:0X00000000
-event:0X0053 mmcr0:0X00000000 mmcr1:0X0000000002FEF6F0 mmcra:0X00000000
-
-#Group 6 pm_compat_instruction_directory, Instruction Directory
-event:0X0060 mmcr0:0X00000000 mmcr1:0X00000000F6FC02FC mmcra:0X00000000
-event:0X0061 mmcr0:0X00000000 mmcr1:0X00000000F6FC02FC mmcra:0X00000000
-event:0X0062 mmcr0:0X00000000 mmcr1:0X00000000F6FC02FC mmcra:0X00000000
-event:0X0063 mmcr0:0X00000000 mmcr1:0X00000000F6FC02FC mmcra:0X00000000
-
-#Group 7 pm_compat_data_directory, Data Directory
-event:0X0070 mmcr0:0X00000000 mmcr1:0X00000000FCF6FCFA mmcra:0X00000000
-event:0X0071 mmcr0:0X00000000 mmcr1:0X00000000FCF6FCFA mmcra:0X00000000
-event:0X0072 mmcr0:0X00000000 mmcr1:0X00000000FCF6FCFA mmcra:0X00000000
-event:0X0073 mmcr0:0X00000000 mmcr1:0X00000000FCF6FCFA mmcra:0X00000000
-
-#Group 8 pm_compat_cpi_1plus_ppc, Misc CPI and utilization data
-event:0X0080 mmcr0:0X00000000 mmcr1:0X00000000F2F4F2F2 mmcra:0X00000000
-event:0X0081 mmcr0:0X00000000 mmcr1:0X00000000F2F4F2F2 mmcra:0X00000000
-event:0X0082 mmcr0:0X00000000 mmcr1:0X00000000F2F4F2F2 mmcra:0X00000000
-event:0X0083 mmcr0:0X00000000 mmcr1:0X00000000F2F4F2F2 mmcra:0X00000000
-
-#Group 9 pm_compat_misc_events1, Misc Events
-event:0X0090 mmcr0:0X00000000 mmcr1:0X0000000002F8F81E mmcra:0X00000000
-event:0X0091 mmcr0:0X00000000 mmcr1:0X0000000002F8F81E mmcra:0X00000000
-event:0X0092 mmcr0:0X00000000 mmcr1:0X0000000002F8F81E mmcra:0X00000000
-event:0X0093 mmcr0:0X00000000 mmcr1:0X0000000002F8F81E mmcra:0X00000000
-
-#Group 10 pm_compat_misc_events2, Misc Events
-event:0X00A0 mmcr0:0X00000000 mmcr1:0X00000000F0F2F4F8 mmcra:0X00000000
-event:0X00A1 mmcr0:0X00000000 mmcr1:0X00000000F0F2F4F8 mmcra:0X00000000
-event:0X00A2 mmcr0:0X00000000 mmcr1:0X00000000F0F2F4F8 mmcra:0X00000000
-event:0X00A3 mmcr0:0X00000000 mmcr1:0X00000000F0F2F4F8 mmcra:0X00000000
-
-#Group 11 pm_compat_misc_events3, Misc Events
-event:0X00B0 mmcr0:0X00000000 mmcr1:0X00000000F8F2F8F6 mmcra:0X00000000
-event:0X00B1 mmcr0:0X00000000 mmcr1:0X00000000F8F2F8F6 mmcra:0X00000000
-event:0X00B2 mmcr0:0X00000000 mmcr1:0X00000000F8F2F8F6 mmcra:0X00000000
-event:0X00B3 mmcr0:0X00000000 mmcr1:0X00000000F8F2F8F6 mmcra:0X00000000
-
-#Group 12 pm_compat_suspend, Suspend Events
-event:0X00C0 mmcr0:0X00000000 mmcr1:0X0000000000000000 mmcra:0X00000000
-event:0X00C1 mmcr0:0X00000000 mmcr1:0X0000000000000000 mmcra:0X00000000
-event:0X00C2 mmcr0:0X00000000 mmcr1:0X0000000000000000 mmcra:0X00000000
-event:0X00C3 mmcr0:0X00000000 mmcr1:0X0000000000000000 mmcra:0X00000000
diff --git a/events/ppc64/ibm-compat-v1/events b/events/ppc64/ibm-compat-v1/events
deleted file mode 100644
index 9d5e9c6..0000000
--- a/events/ppc64/ibm-compat-v1/events
+++ /dev/null
@@ -1,91 +0,0 @@
-#PPC64 pmu-compat events, version 1
-#
-# Copyright OProfile authors
-# Copyright (c) International Business Machines, 2009.
-# Contributed by Maynard Johnson <maynardj@us.ibm.com>.
-#
-#
-#  Within each group, the event names must be unique.  Each event in a group is
-#  assigned to a unique counter.
-#
-#  Only events within the same group can be selected simultaneously.
-#  Each event is given a unique event number.  The event number is used by the
-#  OProfile code to resolve event names for the post-processing.  This is done
-#  to preserve compatibility with the rest of the OProfile code.  The event
-#  numbers are formatted as follows: <group_num>concat(<counter for the event>).
-
-#Group Default
-event:0X001 counters:2 um:zero minimum:10000 name:CYCLES : Processor Cycles
-
-
-#Group 1 pm_compat_utilization1, Basic CPU utilization
-event:0X0010 counters:0 um:zero minimum:1000 name:PM_THRD_ONE_RUN_CYC_GRP1 : (Group 1 pm_compat_utilization1) At least one thread in run cycles
-event:0X0011 counters:1 um:zero minimum:10000 name:PM_RUN_CYC_GRP1 : (Group 1 pm_compat_utilization1) Run cycles
-event:0X0012 counters:2 um:zero minimum:10000 name:PM_CYC_GRP1 : (Group 1 pm_compat_utilization1) Processor cycles
-event:0X0013 counters:3 um:zero minimum:1000 name:PM_RUN_PURR_GRP1 : (Group 1 pm_compat_utilization1) Run PURR Even
-
-#Group 2 pm_compat_utilization2, CPI and utilization data
-event:0X0020 counters:0 um:zero minimum:1000 name:PM_FPU_FLOP_GRP2 : (Group 2 pm_compat_utilization2) FPU executed 1FLOP, FMA, FSQRT or FDIV instruction
-event:0X0021 counters:1 um:zero minimum:10000 name:PM_RUN_CYC_GRP2 : (Group 2 pm_compat_utilization2) Run cycles
-event:0X0022 counters:2 um:zero minimum:10000 name:PM_CYC_GRP2 : (Group 2 pm_compat_utilization2) Processor cycles
-event:0X0023 counters:3 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP2 : (Group 2 pm_compat_utilization2) Run instructions completed
-
-#Group 3 pm_compat_dsource, Data Access sources
-event:0X0030 counters:0 um:zero minimum:1000 name:PM_DATA_FROM_L1-5_GRP3 : (Group 3 pm_compat_dsource) Data loaded from L1.5
-event:0X0031 counters:1 um:zero minimum:1000 name:PM_DATA_FROM_L2MISS_GRP3 : (Group 3 pm_compat_dsource) Data loaded missed L2
-event:0X0032 counters:2 um:zero minimum:1000 name:PM_DATA_FROM_L3MISS_GRP3 : (Group 3 pm_compat_dsource) Data loaded from private L3 miss
-event:0X0033 counters:3 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP3 : (Group 3 pm_compat_dsource) Run instructions completed
-
-#Group 4 pm_compat_l1_dcache_load_store_miss, L1 D-Cache load/store miss
-event:0X0040 counters:0 um:zero minimum:10000 name:PM_INST_CMPL_GRP4 : (Group 4 pm_compat_l1_dcache_load_store_miss) Instruction completed
-event:0X0041 counters:1 um:zero minimum:1000 name:PM_ST_FIN_GRP4 : (Group 4 pm_compat_l1_dcache_load_store_miss) Store instructions finished
-event:0X0042 counters:2 um:zero minimum:1000 name:PM_ST_MISS_L1_GRP4 : (Group 4 pm_compat_l1_dcache_load_store_miss) L1 D cache store misses
-event:0X0043 counters:3 um:zero minimum:1000 name:PM_LD_MISS_L1_GRP4 : (Group 4 pm_compat_l1_dcache_load_store_miss) L1 D cache load misses
-
-#Group 5 pm_compat_l1_cache_load, L1 Cache loads
-event:0X0050 counters:0 um:zero minimum:10000 name:PM_INST_CMPL_GRP5 : (Group 5 pm_compat_l1_cache_load) Instruction completed
-event:0X0051 counters:1 um:zero minimum:1000 name:PM_DATA_FROM_L2MISS_GRP5 : (Group 5 pm_compat_l1_cache_load) Data loaded missed L2
-event:0X0052 counters:2 um:zero minimum:1000 name:PM_L1_DCACHE_RELOAD_VALID_GRP5 : (Group 5 pm_compat_l1_cache_load) L1 reload data source valid
-event:0X0053 counters:3 um:zero minimum:1000 name:PM_LD_MISS_L1_GRP5 : (Group 5 pm_compat_l1_cache_load) L1 D cache load misses
-
-#Group 6 pm_compat_instruction_directory, Instruction Directory
-event:0X0060 counters:0 um:zero minimum:1000 name:PM_IERAT_MISS_GRP6 : (Group 6 pm_compat_instruction_directory) IERAT miss coun
-event:0X0061 counters:1 um:zero minimum:1000 name:PM_L1_ICACHE_MISS_GRP6 : (Group 6 pm_compat_instruction_directory) L1 I cache miss coun
-event:0X0062 counters:2 um:zero minimum:10000 name:PM_INST_CMPL_GRP6 : (Group 6 pm_compat_instruction_directory) Instruction completed
-event:0X0063 counters:3 um:zero minimum:1000 name:PM_ITLB_MISS_GRP6 : (Group 6 pm_compat_instruction_directory) Instruction TLB misses
-
-#Group 7 pm_compat_data_directory, Data Directory
-event:0X0070 counters:0 um:zero minimum:1000 name:PM_LSU_DERAT_MISS_CYC_GRP7 : (Group 7 pm_compat_data_directory) DERAT miss latency
-event:0X0071 counters:1 um:zero minimum:1000 name:PM_LSU_DERAT_MISS_GRP7 : (Group 7 pm_compat_data_directory) DERAT misses
-event:0X0072 counters:2 um:zero minimum:1000 name:PM_DTLB_MISS_GRP7 : (Group 7 pm_compat_data_directory) Data TLB misses
-event:0X0073 counters:3 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP7 : (Group 7 pm_compat_data_directory) Run instructions completed
-
-#Group 8 pm_compat_cpi_1plus_ppc, Misc CPI and utilization data
-event:0X0080 counters:0 um:zero minimum:1000 name:PM_1PLUS_PPC_CMPL_GRP8 : (Group 8 pm_compat_cpi_1plus_ppc) One or more PPC instruction completed
-event:0X0081 counters:1 um:zero minimum:10000 name:PM_RUN_CYC_GRP8 : (Group 8 pm_compat_cpi_1plus_ppc) Run cycles
-event:0X0082 counters:2 um:zero minimum:1000 name:PM_INST_DISP_GRP8 : (Group 8 pm_compat_cpi_1plus_ppc) Instructions dispatched
-event:0X0083 counters:3 um:zero minimum:1000 name:PM_1PLUS_PPC_DISP_GRP8 : (Group 8 pm_compat_cpi_1plus_ppc) Cycles at least one instruction dispatched
-
-#Group 9 pm_compat_misc_events1, Misc Events
-event:0X0090 counters:0 um:zero minimum:10000 name:PM_INST_CMPL_GRP9 : (Group 9 pm_compat_misc_events1) Instruction completed
-event:0X0091 counters:1 um:zero minimum:1000 name:PM_EXT_INT_GRP9 : (Group 9 pm_compat_misc_events1) External interrupts
-event:0X0092 counters:2 um:zero minimum:1000 name:PM_TB_BIT_TRANS_GRP9 : (Group 9 pm_compat_misc_events1) Time Base bit transition
-event:0X0093 counters:3 um:zero minimum:10000 name:PM_CYC_GRP9 : (Group 9 pm_compat_misc_events1) Processor cycles
-
-#Group 10 pm_compat_misc_events2, Misc Events
-event:0X00A0 counters:0 um:zero minimum:1000 name:PM_INST_IMC_MATCH_CMPL_GRP10 : (Group 10 pm_compat_misc_events2) IMC matched instructions completed
-event:0X00A1 counters:1 um:zero minimum:1000 name:PM_INST_DISP_GRP10 : (Group 10 pm_compat_misc_events2) Instructions dispatched
-event:0X00A2 counters:2 um:zero minimum:1000 name:PM_THRD_CONC_RUN_INST_GRP10 : (Group 10 pm_compat_misc_events2) Concurrent run instructions
-event:0X00A3 counters:3 um:zero minimum:1000 name:PM_FLUSH_GRP10 : (Group 10 pm_compat_misc_events2) Flushes
-
-#Group 11 pm_compat_misc_events3, Misc Events
-event:0X00B0 counters:0 um:zero minimum:1000 name:PM_GCT_EMPTY_CYC_GRP11 : (Group 11 pm_compat_misc_events3) Cycles GCT empty
-event:0X00B1 counters:1 um:zero minimum:1000 name:PM_INST_DISP_GRP11 : (Group 11 pm_compat_misc_events3) Instructions dispatched
-event:0X00B2 counters:2 um:zero minimum:1000 name:PM_TB_BIT_TRANS_GRP11 : (Group 11 pm_compat_misc_events3) Time Base bit transition
-event:0X00B3 counters:3 um:zero minimum:1000 name:PM_BR_MPRED_GRP11 : (Group 11 pm_compat_misc_events3) Branches incorrectly predicted
-
-#Group 12 pm_compat_suspend, Suspend Events
-event:0X00C0 counters:0 um:zero minimum:1000 name:PM_SUSPENDED_GRP12 : (Group 12 pm_compat_suspend) Suspended
-event:0X00C1 counters:1 um:zero minimum:1000 name:PM_SUSPENDED_GRP12 : (Group 12 pm_compat_suspend) Suspended
-event:0X00C2 counters:2 um:zero minimum:1000 name:PM_SUSPENDED_GRP12 : (Group 12 pm_compat_suspend) Suspended
-event:0X00C3 counters:3 um:zero minimum:1000 name:PM_SUSPENDED_GRP12 : (Group 12 pm_compat_suspend) Suspended
diff --git a/events/ppc64/pa6t/event_mappings b/events/ppc64/pa6t/event_mappings
deleted file mode 100644
index 0bbddcb..0000000
--- a/events/ppc64/pa6t/event_mappings
+++ /dev/null
@@ -1,48 +0,0 @@
-# pa6t does not have an mmcra. mmcr0 has all the enables and config
-# bits. mmcr1 contains the event selectors for the four programmable
-# events
-
-# Group Default
-event:0x1 mmcr0:0x000000000005b81b mmcr1:0x0000000000949f00 mmcra:0x0
-event:0x3 mmcr0:0x000000000005b81b mmcr1:0x0000000000949f00 mmcra:0x0
-event:0x4 mmcr0:0x000000000005b81b mmcr1:0x0000000000949f00 mmcra:0x0
-
-# Group 1, Load/Store
-event:0x10 mmcr0:0x000000000007f83f mmcr1:0x00000000a8c0cab1 mmcra:0x0
-event:0x11 mmcr0:0x000000000007f83f mmcr1:0x00000000a8c0cab1 mmcra:0x0
-event:0x12 mmcr0:0x000000000007f83f mmcr1:0x00000000a8c0cab1 mmcra:0x0
-event:0x13 mmcr0:0x000000000007f83f mmcr1:0x00000000a8c0cab1 mmcra:0x0
-event:0x14 mmcr0:0x000000000007f83f mmcr1:0x00000000a8c0cab1 mmcra:0x0
-event:0x15 mmcr0:0x000000000007f83f mmcr1:0x00000000a8c0cab1 mmcra:0x0
-
-# Group 2, Frontend
-event:0x20 mmcr0:0x000000000007f83f mmcr1:0x0000000002058401 mmcra:0x0
-event:0x21 mmcr0:0x000000000007f83f mmcr1:0x0000000002058401 mmcra:0x0
-event:0x22 mmcr0:0x000000000007f83f mmcr1:0x0000000002058401 mmcra:0x0
-event:0x23 mmcr0:0x000000000007f83f mmcr1:0x0000000002058401 mmcra:0x0
-event:0x24 mmcr0:0x000000000007f83f mmcr1:0x0000000002058401 mmcra:0x0
-event:0x25 mmcr0:0x000000000007f83f mmcr1:0x0000000002058401 mmcra:0x0
-
-# Group 3, Branches
-event:0x30 mmcr0:0x000000000007f83f mmcr1:0x000000008d8b8988 mmcra:0x0
-event:0x31 mmcr0:0x000000000007f83f mmcr1:0x000000008d8b8988 mmcra:0x0
-event:0x32 mmcr0:0x000000000007f83f mmcr1:0x000000008d8b8988 mmcra:0x0
-event:0x33 mmcr0:0x000000000007f83f mmcr1:0x000000008d8b8988 mmcra:0x0
-event:0x34 mmcr0:0x000000000007f83f mmcr1:0x000000008d8b8988 mmcra:0x0
-event:0x35 mmcr0:0x000000000007f83f mmcr1:0x000000008d8b8988 mmcra:0x0
-
-# Group 4, Translation
-event:0x40 mmcr0:0x000000000007f83f mmcr1:0x0000000086baa7a8 mmcra:0x0
-event:0x41 mmcr0:0x000000000007f83f mmcr1:0x0000000086baa7a8 mmcra:0x0
-event:0x42 mmcr0:0x000000000007f83f mmcr1:0x0000000086baa7a8 mmcra:0x0
-event:0x43 mmcr0:0x000000000007f83f mmcr1:0x0000000086baa7a8 mmcra:0x0
-event:0x44 mmcr0:0x000000000007f83f mmcr1:0x0000000086baa7a8 mmcra:0x0
-event:0x45 mmcr0:0x000000000007f83f mmcr1:0x0000000086baa7a8 mmcra:0x0
-
-# Group 5, Memory
-event:0x50 mmcr0:0x000000000007f83f mmcr1:0x00000000c030cab1 mmcra:0x0
-event:0x51 mmcr0:0x000000000007f83f mmcr1:0x00000000c030cab1 mmcra:0x0
-event:0x52 mmcr0:0x000000000007f83f mmcr1:0x00000000c030cab1 mmcra:0x0
-event:0x53 mmcr0:0x000000000007f83f mmcr1:0x00000000c030cab1 mmcra:0x0
-event:0x54 mmcr0:0x000000000007f83f mmcr1:0x00000000c030cab1 mmcra:0x0
-event:0x55 mmcr0:0x000000000007f83f mmcr1:0x00000000c030cab1 mmcra:0x0
diff --git a/events/ppc64/pa6t/events b/events/ppc64/pa6t/events
deleted file mode 100644
index 5e2bc2f..0000000
--- a/events/ppc64/pa6t/events
+++ /dev/null
@@ -1,52 +0,0 @@
-# ppc64 pa6t events
-#
-# Unlike the IBM ppc64 chips, any of pa6t's events can be programmed into any
-# of the counters (pmc2-5). The notion of groups on pa6t is thus
-# artificial. That said, we can still define useful aggregations to guide the
-# user in his choice of group for a profiling session.
-
-# Group Default
-event:0x1 counters:0 um:zero minimum:10000 name:CYCLES : Processor Cycles
-event:0x3 counters:3 um:zero minimum:10000 name:ISS_CYCLES : Processor Cycles with instructions issued
-event:0x4 counters:4 um:zero minimum:10000 name:RET_UOP : Retired Micro-operatioins
-
-# Group 1, Load/Store
-event:0x10 counters:0 um:zero minimum:10000 name:GRP1_CYCLES : Processor Cycles
-event:0x11 counters:1 um:zero minimum:10000 name:GRP1_INST_RETIRED : Instructions retired
-event:0x12 counters:2 um:zero minimum:1000 name:GRP1_DCACHE_RD_MISS__NS : Dcache read misses NS
-event:0x13 counters:3 um:zero minimum:500 name:GRP1_MRB_LD_MISS_L2__NS : Load misses filling from memory
-event:0x14 counters:4 um:zero minimum:500 name:GRP1_MRB_ST_MISS_ALLOC__NS : Store misses in L1D and allocates an MRB entry
-event:0x15 counters:5 um:zero minimum:500 name:GRP1_TLB_MISS_D__NS : TLB misses NS (D- only)
-
-# Group 2, Frontend
-event:0x20 counters:0 um:zero minimum:10000 name:GRP2_CYCLES : Processor Cycles
-event:0x21 counters:1 um:zero minimum:10000 name:GRP2_INST_RETIRED : Instructions retired
-event:0x22 counters:2 um:zero minimum:2000 name:GRP2_FETCH_REQ : Demand fetch requests made to the Icache
-event:0x23 counters:3 um:zero minimum:500 name:GRP2_ICACHE_MISS_DEM__NS : Demand fetch requests missing in the Icache
-event:0x24 counters:4 um:zero minimum:500 name:GRP2_ICACHE_MISS_ALL : Demand and spec fetch requests missing in the Icache
-event:0x25 counters:5 um:zero minimum:2000 name:GRP2_ICACHE_ACC : Icache accesses
-
-# Group 3, Branches
-event:0x30 counters:0 um:zero minimum:10000 name:GRP3_CYCLES : Processor Cycles
-event:0x31 counters:1 um:zero minimum:10000 name:GRP3_INST_RETIRED : Instructions retired
-event:0x32 counters:2 um:zero minimum:500 name:GRP3_NXT_LINE_MISPRED__NS : Next fetch address mispredict
-event:0x33 counters:3 um:zero minimum:500 name:GRP3_DIRN_MISPRED__NS : Branch direction mispredict
-event:0x34 counters:4 um:zero minimum:500 name:GRP3_TGT_ADDR_MISPRED__NS : Branch target address mispredict
-event:0x35 counters:5 um:zero minimum:2000 name:GRP3_BRA_TAKEN__NS : Taken branches
-
-# Group 4, Translation
-event:0x40 counters:0 um:zero minimum:10000 name:GRP4_CYCLES : Processor Cycles
-event:0x41 counters:1 um:zero minimum:10000 name:GRP4_INST_RETIRED : Instructions retired
-event:0x42 counters:2 um:zero minimum:500 name:GRP4_TLB_MISS_D__NS : TLB Misses (D-)
-event:0x43 counters:3 um:zero minimum:500 name:GRP4_TLB_MISS_I__NS : TLB MIsses (I-)
-event:0x44 counters:4 um:zero minimum:500 name:GRP4_DERAT_MISS__NS : DERAT Misses
-event:0x45 counters:5 um:zero minimum:500 name:GRP4_IERAT_MISS__NS : IERAT Misses
-
-# Group 5, Memory
-event:0x50 counters:0 um:zero minimum:10000 name:GRP5_CYCLES : Processor Cycles
-event:0x51 counters:1 um:zero minimum:10000 name:GRP5_INST_RETIRED : Instructions retired
-event:0x52 counters:2 um:zero minimum:500 name:GRP5_DCACHE_RD_MISS__NS : Dcache read misses NS
-event:0x53 counters:3 um:zero minimum:500 name:GRP5_MRB_LD_MISS_L2__NS : Load misses filling from memory
-event:0x54 counters:4 um:zero minimum:500 name:GRP5_DCACHE_VIC : Dcache line evicted (snoops not included)
-event:0x55 counters:5 um:zero minimum:500 name:GRP5_MRB_ST_MISS_ALLOC__NS : Store misses in L1D and allocates an MRB entry
-
diff --git a/events/ppc64/pa6t/unit_masks b/events/ppc64/pa6t/unit_masks
deleted file mode 100644
index ccc3ddd..0000000
--- a/events/ppc64/pa6t/unit_masks
+++ /dev/null
@@ -1,4 +0,0 @@
-# ppc64 pa6t possible unit masks
-#
-name:zero type:mandatory default:0x0
-		 0x0 No unit mask
diff --git a/events/ppc64/power5++/event_mappings b/events/ppc64/power5++/event_mappings
index 57ed17b..07ff5b2 100644
--- a/events/ppc64/power5++/event_mappings
+++ b/events/ppc64/power5++/event_mappings
@@ -8,9 +8,6 @@
 #Group Default
 event:0X001 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
 
-#Group 0 with random sampling
-event:0X002 mmcr0:0X00000000 mmcr1:0X4000000002341E36 mmcra:0X00000001
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X0010 mmcr0:0X00000000 mmcr1:0X000000000A12121E mmcra:0X00000000
 event:0X0011 mmcr0:0X00000000 mmcr1:0X000000000A12121E mmcra:0X00000000
diff --git a/events/ppc64/power5++/events b/events/ppc64/power5++/events
index e4d055b..550dbf0 100644
--- a/events/ppc64/power5++/events
+++ b/events/ppc64/power5++/events
@@ -9,7 +9,11 @@
 #  assigned to a unique counter.  The groups are from the groups defined in the
 #  Performance Monitor Unit user guide for this processor.
 #
-#  Only events within the same group can be selected simultaneously.
+#  Only events within the same group can be selected simultaneously when
+#  using legacy opcontrol to do profiling.  When profiling with operf,
+#  events from different groups may be specified, and the Linux Performance
+#  Events Kernel Subsystem code will handle the necessary multiplexing.
+#
 #  Each event is given a unique event number.  The event number is used by the
 #  OProfile code to resolve event names for the post-processing.  This is done
 #  to preserve compatibility with the rest of the OProfile code.  The event
@@ -18,10 +22,6 @@
 #Group Default
 event:0X001 counters:1 um:zero minimum:10000 name:CYCLES : Processor Cycles
 
-#Group 0 with random sampling
-event:0X002 counters:2 um:zero minimum:10000 name:CYCLES_RND_SMPL : Processor Cycles with random sampling
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X0010 counters:0 um:zero minimum:10000 name:PM_RUN_CYC_GRP1 : (Group 1 pm_utilization) Run cycles
 event:0X0011 counters:1 um:zero minimum:10000 name:PM_INST_CMPL_GRP1 : (Group 1 pm_utilization) Instructions completed
diff --git a/events/ppc64/power5+/event_mappings b/events/ppc64/power5+/event_mappings
index 735d2d1..77e4957 100644
--- a/events/ppc64/power5+/event_mappings
+++ b/events/ppc64/power5+/event_mappings
@@ -3,10 +3,6 @@
 #Group Default
 event:0X001 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
 
-#Group 0 with random sampling
-event:0X002 mmcr0:0X00000000 mmcr1:0X4000000002341E36 mmcra:0X00000001
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X010 mmcr0:0X00000000 mmcr1:0X000000000A12121E mmcra:0X00000000
 event:0X011 mmcr0:0X00000000 mmcr1:0X000000000A12121E mmcra:0X00000000
diff --git a/events/ppc64/power5+/events b/events/ppc64/power5+/events
index 0624c39..deba0d0 100644
--- a/events/ppc64/power5+/events
+++ b/events/ppc64/power5+/events
@@ -4,7 +4,11 @@
 #  assigned to a unique counter.  The groups are from the groups defined in the
 #  Performance Monitor Unit user guide for this processor.
 #
-#  Only events within the same group can be selected simultaneously.
+#  Only events within the same group can be selected simultaneously when
+#  using legacy opcontrol to do profiling.  When profiling with operf,
+#  events from different groups may be specified, and the Linux Performance
+#  Events Kernel Subsystem code will handle the necessary multiplexing.
+#
 #  Each event is given a unique event number.  The event number is used by the
 #  OProfile code to resolve event names for the post-processing.  This is done
 #  to preserve compatibility with the rest of the OProfile code.  The event
@@ -13,10 +17,6 @@
 #Group Default
 event:0X001 counters:3 um:zero minimum:10000 name:CYCLES : Processor Cycles using continuous sampling
 
-#Group 0 with random sampling
-event:0X002 counters:2 um:zero minimum:10000 name:CYCLES_RND_SMPL : Processor Cycles with random sampling
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X010 counters:0 um:zero minimum:10000 name:PM_RUN_CYC_GRP1 : (Group 1 pm_utilization) Run cycles
 event:0X011 counters:1 um:zero minimum:10000 name:PM_INST_CMPL_GRP1 : (Group 1 pm_utilization) Instructions completed
diff --git a/events/ppc64/power5/event_mappings b/events/ppc64/power5/event_mappings
index dd3c779..52dd76f 100644
--- a/events/ppc64/power5/event_mappings
+++ b/events/ppc64/power5/event_mappings
@@ -3,10 +3,6 @@
 #Group Default
 event:0X001 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
 
-#Group 0 with random sampling
-event:0X002 mmcr0:0X00000000 mmcr1:0X4000000002341E36 mmcra:0X00000001
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X010 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
 event:0X011 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
diff --git a/events/ppc64/power5/events b/events/ppc64/power5/events
index 8f438bd..c40f78f 100644
--- a/events/ppc64/power5/events
+++ b/events/ppc64/power5/events
@@ -4,7 +4,11 @@
 #  assigned to a unique counter.  The groups are from the groups defined in the
 #  Performance Monitor Unit user guide for this processor.
 #
-#  Only events within the same group can be selected simultaneously.
+#  Only events within the same group can be selected simultaneously when
+#  using legacy opcontrol to do profiling.  When profiling with operf,
+#  events from different groups may be specified, and the Linux Performance
+#  Events Kernel Subsystem code will handle the necessary multiplexing.
+#
 #  Each event is given a unique event number.  The event number is used by the
 #  OProfile code to resolve event names for the post-processing.  This is done
 #  to preserve compatibility with the rest of the OProfile code.  The event
@@ -13,10 +17,6 @@
 #Group Default
 event:0X001 counters:3 um:zero minimum:10000 name:CYCLES : Processor Cycles using continuous sampling
 
-#Group 0 with random sampling
-event:0X002 counters:2 um:zero minimum:10000 name:CYCLES_RND_SMPL : Processor Cycles with random sampling
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X010 counters:0 um:zero minimum:10000 name:PM_RUN_CYC_GRP1 : (Group 1 pm_utilization) Run cycles
 event:0X011 counters:1 um:zero minimum:1000 name:PM_IOPS_CMPL_GRP1 : (Group 1 pm_utilization) IOPS instructions completed
diff --git a/events/ppc64/power6/event_mappings b/events/ppc64/power6/event_mappings
index 0d627b3..fdde90b 100644
--- a/events/ppc64/power6/event_mappings
+++ b/events/ppc64/power6/event_mappings
@@ -9,9 +9,6 @@
 #Group Default
 event:0X001 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
 
-#Group 0 with random sampling
-event:0X002 mmcr0:0X00000000 mmcr1:0X000000001E1E021A mmcra:0X00000001
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X0010 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
 event:0X0011 mmcr0:0X00000000 mmcr1:0X000000000A02121E mmcra:0X00000000
diff --git a/events/ppc64/power6/events b/events/ppc64/power6/events
index c1e2c76..df48b86 100644
--- a/events/ppc64/power6/events
+++ b/events/ppc64/power6/events
@@ -9,7 +9,11 @@
 #  assigned to a unique counter.  The groups are from the groups defined in the
 #  Performance Monitor Unit user guide for this processor.
 #
-#  Only events within the same group can be selected simultaneously.
+#  Only events within the same group can be selected simultaneously when
+#  using legacy opcontrol to do profiling.  When profiling with operf,
+#  events from different groups may be specified, and the Linux Performance
+#  Events Kernel Subsystem code will handle the necessary multiplexing.
+#
 #  Each event is given a unique event number.  The event number is used by the
 #  OProfile code to resolve event names for the post-processing.  This is done
 #  to preserve compatibility with the rest of the OProfile code.  The event
@@ -18,10 +22,6 @@
 #Group Default
 event:0X001 counters:3 um:zero minimum:10000 name:CYCLES : Processor Cycles
 
-#Group 0 with random sampling
-event:0X002 counters:1 um:zero minimum:10000 name:CYCLES_RND_SMPL : Processor Cycles with random sampling
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X0010 counters:0 um:zero minimum:10000 name:PM_RUN_CYC_GRP1 : (Group 1 pm_utilization) Run cycles
 event:0X0011 counters:1 um:zero minimum:10000 name:PM_INST_CMPL_GRP1 : (Group 1 pm_utilization) Instructions completed
diff --git a/events/ppc64/power7/event_mappings b/events/ppc64/power7/event_mappings
index 7de556d..fb752b0 100644
--- a/events/ppc64/power7/event_mappings
+++ b/events/ppc64/power7/event_mappings
@@ -8,9 +8,6 @@
 #Group Default
 event:0X001 mmcr0:0X00000000 mmcr1:0X000000001EF4F202 mmcra:0X00000000
 
-#Group 0 with random sampling
-event:0X002 mmcr0:0X00000000 mmcr1:0XDD0000008486021E mmcra:0X00000001
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X0010 mmcr0:0X00000000 mmcr1:0X000000001EF4F202 mmcra:0X00000000
 event:0X0011 mmcr0:0X00000000 mmcr1:0X000000001EF4F202 mmcra:0X00000000
@@ -2114,3 +2111,49 @@ event:0X1072 mmcr0:0X00000000 mmcr1:0X000000001E1E0232 mmcra:0X00000001
 event:0X1073 mmcr0:0X00000000 mmcr1:0X000000001E1E0232 mmcra:0X00000001
 event:0X1074 mmcr0:0X00000000 mmcr1:0X000000001E1E0232 mmcra:0X00000001
 event:0X1075 mmcr0:0X00000000 mmcr1:0X000000001E1E0232 mmcra:0X00000001
+
+#Group 264 pm_gct_noslot, GCT no slot events
+###### DO NOT REMOVE ######
+# Manually added group
+event:0X1080 mmcr0:0X00000000 mmcr1:0X00400000F908021B mmcra:0X00000000
+event:0X1081 mmcr0:0X00000000 mmcr1:0X00400000F908021B mmcra:0X00000000
+event:0X1082 mmcr0:0X00000000 mmcr1:0X00400000F908021B mmcra:0X00000000
+event:0X1083 mmcr0:0X00000000 mmcr1:0X00400000F908021B mmcra:0X00000000
+event:0X1084 mmcr0:0X00000000 mmcr1:0X00400000F908021B mmcra:0X00000000
+event:0X1085 mmcr0:0X00000000 mmcr1:0X00400000F908021B mmcra:0X00000000
+
+#Group 265 pm_cmplu_stall, CMPLU stall events
+###### DO NOT REMOVE ######
+event:0X1090 mmcr0:0X00000000 mmcr1:0X000000001D3C021C mmcra:0X00000000
+event:0X1091 mmcr0:0X00000000 mmcr1:0X000000001D3C021C mmcra:0X00000000
+event:0X1092 mmcr0:0X00000000 mmcr1:0X000000001D3C021C mmcra:0X00000000
+event:0X1093 mmcr0:0X00000000 mmcr1:0X000000001D3C021C mmcra:0X00000000
+event:0X1094 mmcr0:0X00000000 mmcr1:0X000000001D3C021C mmcra:0X00000000
+event:0X1095 mmcr0:0X00000000 mmcr1:0X000000001D3C021C mmcra:0X00000000
+
+#Group 266 pm_cmplu_stall2, CMPLU stall (with vector)
+###### DO NOT REMOVE ######
+event:0X10A0 mmcr0:0X00000000 mmcr1:0X00000000281D3F0B mmcra:0X00000000
+event:0X10A1 mmcr0:0X00000000 mmcr1:0X00000000281D3F0B mmcra:0X00000000
+event:0X10A2 mmcr0:0X00000000 mmcr1:0X00000000281D3F0B mmcra:0X00000000
+event:0X10A3 mmcr0:0X00000000 mmcr1:0X00000000281D3F0B mmcra:0X00000000
+event:0X10A4 mmcr0:0X00000000 mmcr1:0X00000000281D3F0B mmcra:0X00000000
+event:0X10A5 mmcr0:0X00000000 mmcr1:0X00000000281D3F0B mmcra:0X00000000
+
+#Group 267 pm_cmplu_stall3, CMPLU stall (scalar)
+###### DO NOT REMOVE ######
+event:0X10B0 mmcr0:0X00000000 mmcr1:0X00000000F4183E13 mmcra:0X00000000
+event:0X10B1 mmcr0:0X00000000 mmcr1:0X00000000F4183E13 mmcra:0X00000000
+event:0X10B2 mmcr0:0X00000000 mmcr1:0X00000000F4183E13 mmcra:0X00000000
+event:0X10B3 mmcr0:0X00000000 mmcr1:0X00000000F4183E13 mmcra:0X00000000
+event:0X10B4 mmcr0:0X00000000 mmcr1:0X00000000F4183E13 mmcra:0X00000000
+event:0X10B5 mmcr0:0X00000000 mmcr1:0X00000000F4183E13 mmcra:0X00000000
+
+#Group 268 pm_cmplu_ifu, IFU stall
+###### DO NOT REMOVE ######
+event:0X10C0 mmcr0:0X00000000 mmcr1:0X0CC00000289C9E4D mmcra:0X00000000
+event:0X10C1 mmcr0:0X00000000 mmcr1:0X0CC00000289C9E4D mmcra:0X00000000
+event:0X10C2 mmcr0:0X00000000 mmcr1:0X0CC00000289C9E4D mmcra:0X00000000
+event:0X10C3 mmcr0:0X00000000 mmcr1:0X0CC00000289C9E4D mmcra:0X00000000
+event:0X10C4 mmcr0:0X00000000 mmcr1:0X0CC00000289C9E4D mmcra:0X00000000
+event:0X10C5 mmcr0:0X00000000 mmcr1:0X0CC00000289C9E4D mmcra:0X00000000
diff --git a/events/ppc64/power7/events b/events/ppc64/power7/events
index 10775a0..851cb93 100644
--- a/events/ppc64/power7/events
+++ b/events/ppc64/power7/events
@@ -5,7 +5,11 @@
 # Contributed by Maynard Johnson <maynardj@us.ibm.com>.
 #
 #
-#  Only events within the same group can be selected simultaneously.
+#  Only events within the same group can be selected simultaneously when
+#  using legacy opcontrol to do profiling.  When profiling with operf,
+#  events from different groups may be specified, and the Linux Performance
+#  Events Kernel Subsystem code will handle the necessary multiplexing.
+#
 #  Each event is given a unique event number.  The event number is used by the
 #  OProfile code to resolve event names for the post-processing.  This is done
 #  to preserve compatibility with the rest of the OProfile code.  The event
@@ -14,10 +18,6 @@
 #Group Default
 event:0X001 counters:0 um:zero minimum:10000 name:CYCLES : Processor Cycles
 
-#Group 0 with random sampling
-event:0X002 counters:3 um:zero minimum:10000 name:CYCLES_RND_SMPL : Processor Cycles with random sampling
-
-
 #Group 1 pm_utilization, CPI and utilization data
 event:0X0010 counters:0 um:zero minimum:10000 name:PM_CYC_GRP1 : (Group 1 pm_utilization) Processor Cycles
 event:0X0011 counters:1 um:zero minimum:10000 name:PM_RUN_CYC_GRP1 : (Group 1 pm_utilization) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
@@ -2121,3 +2121,48 @@ event:0X1072 counters:2 um:zero minimum:10000 name:PM_INST_CMPL_GRP263 : (Group
 event:0X1073 counters:3 um:zero minimum:1000 name:PM_MRK_LSU_FIN_GRP263 : (Group 263 pm_mrk_misc8) One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete
 event:0X1074 counters:4 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP263 : (Group 263 pm_mrk_misc8) Number of run instructions completed.
 event:0X1075 counters:5 um:zero minimum:10000 name:PM_RUN_CYC_GRP263 : (Group 263 pm_mrk_misc8) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
+
+#Group 264 pm_gct_noslot, GCT no slot events
+###### DO NOT REMOVE ######
+# Manually added group
+event:0X1080 counters:0 um:zero minimum:1000 name:PM_GCT_NOSLOT_CYC_EDGE_COUNT_GRP264 : (Group 264 pm_gct_noslot) Number of distinct occurrences when the Global Completion Table has no slots from this thread.
+event:0X1081 counters:1 um:zero minimum:1000 name:PM_GCT_EMPTY_CYC_GRP264 : (Group 264 pm_gct_noslot) Cycles when the Global Completion Table was completely empty.  No thread had an entry allocated.
+event:0X1082 counters:2 um:zero minimum:10000 name:PM_INST_CMPL_GRP264 : (Group 264 pm_gct_noslot) Number of PowerPC Instructions that completed.
+event:0X1083 counters:3 um:zero minimum:1000 name:PM_GCT_NOSLOT_BR_MPRED_EDGE_COUNT_GRP264 : (Group 264 pm_gct_noslot) Number of distinct occurrences when the Global Completion Table has no slots from this thread because of a branch misprediction.
+event:0X1084 counters:4 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP264 : (Group 264 pm_gct_noslot) Number of run instructions completed.
+event:0X1085 counters:5 um:zero minimum:10000 name:PM_RUN_CYC_GRP264 : (Group 264 pm_gct_noslot) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
+
+#Group 265 pm_cmplu_stall, CMPLU stall events
+###### DO NOT REMOVE ######
+event:0X1090 counters:0 um:zero minimum:1000 name:PM_CMPLU_STALL_THRD_EDGE_COUNT_GRP265 : (Group 265 pm_cmplu_stall) Number of distinct occurrences when completion stalled due to thread conflict.  Group ready to complete but it was another thread's turn
+event:0X1091 counters:1 um:zero minimum:1000 name:PM_CMPLU_STALL_DFU_GRP265 : (Group 265 pm_cmplu_stall) Completion stall caused by Decimal Floating Point Unit
+event:0X1092 counters:2 um:zero minimum:10000 name:PM_INST_CMPL_GRP265 : (Group 265 pm_cmplu_stall) Number of PowerPC Instructions that completed.
+event:0X1093 counters:3 um:zero minimum:1000 name:PM_GCT_NOSLOT_BR_MPRED_IC_MISS_GRP265 : (Group 265 pm_cmplu_stall) No slot in GCT caused by branch mispredict or I cache miss
+event:0X1094 counters:4 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP265 : (Group 265 pm_cmplu_stall) Number of run instructions completed.
+event:0X1095 counters:5 um:zero minimum:10000 name:PM_RUN_CYC_GRP265 : (Group 265 pm_cmplu_stall) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
+
+#Group 266 pm_cmplu_stall2, CMPLU stall (vector)
+###### DO NOT REMOVE ######
+event:0X10A0 counters:0 um:zero minimum:1000 name:PM_CMPLU_STALL_END_GCT_NOSLOT_GRP266 : (Group 266 pm_cmplu_stall2) Count ended because GCT went empty
+event:0X10A1 counters:1 um:zero minimum:1000 name:PM_CMPLU_STALL_VECTOR_EDGE_COUNT_GRP266 : (Group 266 pm_cmplu_stall2) Number of distinct occurrences when completion stalled caused by Vector instruction
+event:0X10A2 counters:2 um:zero minimum:1000 name:PM_MRK_STALL_CMPLU_CYC_COUNT_GRP266 : (Group 266 pm_cmplu_stall2) Marked Group Completion Stall cycles (use edge detect to count #)
+event:0X10A3 counters:3 um:zero minimum:1000 name:PM_CMPLU_STALL_EDGE_COUNT_GRP266 : (Group 266 pm_cmplu_stall2) Number of distinct occurrences when no groups completed, GCT not empty
+event:0X10A4 counters:4 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP266 : (Group 266 pm_cmplu_stall2) Number of run instructions completed.
+event:0X10A5 counters:5 um:zero minimum:10000 name:PM_RUN_CYC_GRP266 : (Group 266 pm_cmplu_stall2) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
+
+#Group 267 pm_cmplu_stall3, CMPLU stall (scalar)
+###### DO NOT REMOVE ######
+event:0X10B0 counters:0 um:zero minimum:1000 name:PM_FLOP_GRP267 : (Group 267 pm_cmplu_stall3) A floating point operation has completed
+event:0X10B1 counters:1 um:zero minimum:1000 name:PM_CMPLU_STALL_SCALAR_LONG_GRP267 : (Group 267 pm_cmplu_stall3) Completion stall caused by long latency scalar instruction
+event:0X10B2 counters:2 um:zero minimum:1000 name:PM_MRK_STALL_CMPLU_CYC_GRP267 : (Group 267 pm_cmplu_stall3) Marked Group Completion Stall cycles
+event:0X10B3 counters:3 um:zero minimum:1000 name:PM_CMPLU_STALL_SCALAR_EDGE_COUNT_GRP267 : (Group 267 pm_cmplu_stall3) Number of distinct occurrences when completion stalled caused by FPU instruction
+event:0X10B4 counters:4 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP267 : (Group 267 pm_cmplu_stall3) Number of run instructions completed.
+event:0X10B5 counters:5 um:zero minimum:10000 name:PM_RUN_CYC_GRP267 : (Group 267 pm_cmplu_stall3) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
+
+#Group 268 pm_cmplu_ifu, IFU stall
+event:0X10C0 counters:0 um:zero minimum:1000 name:PM_CMPLU_STALL_END_GCT_NOSLOT_GRP268 : (Group 268 pm_cmplu_ifu) Count ended because GCT went empty
+event:0X10C1 counters:1 um:zero minimum:1000 name:PM_LSU0_L1_SW_PREF_GRP268 : (Group 268 pm_cmplu_ifu) LSU0 Software L1 Prefetches, including SW Transient Prefetches
+event:0X10C2 counters:2 um:zero minimum:1000 name:PM_LSU1_L1_SW_PREF_GRP268 : (Group 268 pm_cmplu_ifu) LSU1 Software L1 Prefetches, including SW Transient Prefetches
+event:0X10C3 counters:3 um:zero minimum:1000 name:PM_CMPLU_STALL_IFU_EDGE_COUNT_GRP268 : (Group 268 pm_cmplu_ifu) Number of distinct occurrences when completion stalled due to IFU
+event:0X10C4 counters:4 um:zero minimum:1000 name:PM_RUN_INST_CMPL_GRP268 : (Group 268 pm_cmplu_ifu) Number of run instructions completed.
+event:0X10C5 counters:5 um:zero minimum:10000 name:PM_RUN_CYC_GRP268 : (Group 268 pm_cmplu_ifu) Processor Cycles gated by the run latch.  Operating systems use the run latch to indicate when they are doing useful work.  The run latch is typically cleared in the OS idle loop.  Gating by the run latch filters out the idle loop.
diff --git a/events/ppc64/power8/events b/events/ppc64/power8/events
new file mode 100644
index 0000000..6e4e688
--- /dev/null
+++ b/events/ppc64/power8/events
@@ -0,0 +1,1020 @@
+#
+#  Copyright OProfile authors
+#  Copyright (c) International Business Machines, 2013.
+#  Contributed by Maynard Johnson <maynardj@us.ibm.com>.
+#
+#  IBM POWER8 Events
+
+include:ppc64/architected_events_v1
+
+event:0x1f05e counters:0 um:zero minimum:100000 name:PM_1LPAR_CYC : Number of cycles in single lpar mode.
+event:0x2006e counters:1 um:zero minimum:10000 name:PM_2LPAR_CYC : Number of cycles in 2 lpar mode.
+event:0x4e05e counters:3 um:zero minimum:100000 name:PM_4LPAR_CYC : Number of cycles in 4 LPAR mode.
+event:0x610050 counters:0 um:zero minimum:10000 name:PM_ALL_CHIP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for all data types ( demand load,data,inst prefetch,inst fetch,xlate (I or d)
+event:0x520050 counters:1 um:zero minimum:10000 name:PM_ALL_GRP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)
+event:0x620052 counters:1 um:zero minimum:10000 name:PM_ALL_GRP_PUMP_MPRED : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro
+event:0x610052 counters:0 um:zero minimum:10000 name:PM_ALL_GRP_PUMP_MPRED_RTY : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)
+event:0x610054 counters:0 um:zero minimum:10000 name:PM_ALL_PUMP_CPRED : Pump prediction correct. Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)
+event:0x640052 counters:3 um:zero minimum:10000 name:PM_ALL_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)
+event:0x630050 counters:2 um:zero minimum:10000 name:PM_ALL_SYS_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)
+event:0x630052 counters:2 um:zero minimum:10000 name:PM_ALL_SYS_PUMP_MPRED : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or
+event:0x640050 counters:3 um:zero minimum:10000 name:PM_ALL_SYS_PUMP_MPRED_RTY : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)
+event:0x2505e counters:1 um:zero minimum:10000 name:PM_BACK_BR_CMPL : Branch instruction completed with a target address less than current instruction address.
+event:0x4082 counters:0,1,2,3 um:zero minimum:10000 name:PM_BANK_CONFLICT : Read blocked due to interleave conflict.  The ifar logic will detect an interleave conflict and kill the data that was read that cycle.
+event:0x10068 counters:0 um:zero minimum:10000 name:PM_BRU_FIN : Branch Instruction Finished .
+event:0x20036 counters:1 um:zero minimum:10000 name:PM_BR_2PATH : two path branch.
+event:0x5086 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_BC_8 : Pairable BC+8 branch that has not been converted to a Resolve Finished in the BRU pipeline
+event:0x5084 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_BC_8_CONV : Pairable BC+8 branch that was converted to a Resolve Finished in the BRU pipeline.
+event:0x40060 counters:3 um:zero minimum:10000 name:PM_BR_CMPL : Branch Instruction completed.
+event:0x40ac counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_MPRED_CCACHE : Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction
+event:0x40b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_MPRED_CR : Conditional Branch Completed that was Mispredicted due to the BHT Direction Prediction (taken/not taken).
+event:0x40ae counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_MPRED_LSTACK : Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction
+event:0x40ba counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_MPRED_TA : Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack.  Only XL-form branches that resolved Taken set this event.
+event:0x10138 counters:0 um:zero minimum:10000 name:PM_BR_MRK_2PATH : marked two path branch.
+event:0x409c counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_BR0 : Conditional Branch Completed on BR0 (1st branch in group) in which the HW predicted the Direction or Target
+event:0x409e counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_BR1 : Conditional Branch Completed on BR1 (2nd branch in group) in which the HW predicted the Direction or Target.  Note: BR1 can only be used in Single Thread Mode.  In all of the SMT modes, only one branch can complete, thus BR1 is unused.
+event:0x489c counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_BR_CMPL : IFU
+event:0x40a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_CCACHE_BR0 : Conditional Branch Completed on BR0 that used the Count Cache for Target Prediction
+event:0x40a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_CCACHE_BR1 : Conditional Branch Completed on BR1 that used the Count Cache for Target Prediction
+event:0x48a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_CCACHE_CMPL : IFU
+event:0x40b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_CR_BR0 : Conditional Branch Completed on BR0 that had its direction predicted. I-form branches do not set this event.  In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and bra
+event:0x40b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_CR_BR1 : Conditional Branch Completed on BR1 that had its direction predicted. I-form branches do not set this event.  In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and bra
+event:0x48b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_CR_CMPL : IFU
+event:0x40a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_LSTACK_BR0 : Conditional Branch Completed on BR0 that used the Link Stack for Target Prediction
+event:0x40aa counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_LSTACK_BR1 : Conditional Branch Completed on BR1 that used the Link Stack for Target Prediction
+event:0x48a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_LSTACK_CMPL : IFU
+event:0x40b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_TA_BR0 : Conditional Branch Completed on BR0 that had its target address predicted. Only XL-form branches set this event.
+event:0x40b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_TA_BR1 : Conditional Branch Completed on BR1 that had its target address predicted. Only XL-form branches set this event.
+event:0x48b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_PRED_TA_CMPL : IFU
+event:0x40a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_UNCOND_BR0 : Unconditional Branch Completed on BR0. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was coverted to a Resolve.
+event:0x40a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_UNCOND_BR1 : Unconditional Branch Completed on BR1. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was coverted to a Resolve.
+event:0x48a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_BR_UNCOND_CMPL : IFU
+event:0x3094 counters:0,1,2,3 um:zero minimum:10000 name:PM_CASTOUT_ISSUED : Castouts issued
+event:0x3096 counters:0,1,2,3 um:zero minimum:10000 name:PM_CASTOUT_ISSUED_GPR : Castouts issued GPR
+event:0x10050 counters:0 um:zero minimum:10000 name:PM_CHIP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for all data types ( demand load,data,inst prefetch,inst fetch,xlate (I or d).
+event:0x2090 counters:0,1,2,3 um:zero minimum:10000 name:PM_CLB_HELD : CLB Hold: Any Reason
+event:0x4000a counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL : Completion stall.
+event:0x4d018 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_BRU : Completion stall due to a Branch Unit.
+event:0x2d018 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_BRU_CRU : Completion stall due to IFU.
+event:0x30026 counters:2 um:zero minimum:10000 name:PM_CMPLU_STALL_COQ_FULL : Completion stall due to CO q full.
+event:0x2c012 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_DCACHE_MISS : Completion stall by Dcache miss.
+event:0x2c018 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_DMISS_L21_L31 : Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3).
+event:0x2c016 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_DMISS_L2L3 : Completion stall by Dcache miss which resolved in L2/L3.
+event:0x4c016 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_DMISS_L2L3_CONFLICT : Completion stall due to cache miss resolving in core's L2/L3 with a conflict.
+event:0x4c01a counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_DMISS_L3MISS : Completion stall due to cache miss resolving missed the L3.
+event:0x4c018 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_DMISS_LMEM : Completion stall due to cache miss resolving in core's Local Memory.
+event:0x2c01c counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_DMISS_REMOTE : Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3).
+event:0x4c012 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_ERAT_MISS : Completion stall due to LSU reject ERAT miss.
+event:0x30038 counters:2 um:zero minimum:10000 name:PM_CMPLU_STALL_FLUSH : completion stall due to flush by own thread.
+event:0x4d016 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_FXLONG : Completion stall due to a long latency fixed point instruction.
+event:0x2d016 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_FXU : Completion stall due to FXU.
+event:0x30036 counters:2 um:zero minimum:10000 name:PM_CMPLU_STALL_HWSYNC : completion stall due to hwsync.
+event:0x4d014 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_LOAD_FINISH : Completion stall due to a Load finish.
+event:0x2c010 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_LSU : Completion stall by LSU instruction.
+event:0x10036 counters:0 um:zero minimum:10000 name:PM_CMPLU_STALL_LWSYNC : completion stall due to isync/lwsync.
+event:0x30028 counters:2 um:zero minimum:10000 name:PM_CMPLU_STALL_MEM_ECC_DELAY : Completion stall due to mem ECC delay.
+event:0x2e01c counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_NO_NTF : Completion stall due to nop.
+event:0x2e01e counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_NTCG_FLUSH : Completion stall due to reject (load hit store).
+event:0x30006 counters:2 um:zero minimum:10000 name:PM_CMPLU_STALL_OTHER_CMPL : Instructions core completed while this thread was stalled.
+event:0x4c010 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_REJECT : Completion stall due to LSU reject.
+event:0x2c01a counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_REJECT_LHS : Completion stall due to reject (load hit store).
+event:0x4c014 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_REJ_LMQ_FULL : Completion stall due to LSU reject LMQ full.
+event:0x4d010 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_SCALAR : Completion stall due to VSU scalar instruction.
+event:0x2d010 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_SCALAR_LONG : Completion stall due to VSU scalar long latency instruction.
+event:0x2c014 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_STORE : Completion stall by stores.
+event:0x4c01c counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_ST_FWD : Completion stall due to store forward.
+event:0x1001c counters:0 um:zero minimum:10000 name:PM_CMPLU_STALL_THRD : Completion stall due to thread conflict.
+event:0x2d014 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_VECTOR : Completion stall due to VSU vector instruction.
+event:0x4d012 counters:3 um:zero minimum:10000 name:PM_CMPLU_STALL_VECTOR_LONG : Completion stall due to VSU vector long instruction.
+event:0x2d012 counters:1 um:zero minimum:10000 name:PM_CMPLU_STALL_VSU : Completion stall due to VSU instruction.
+event:0x16083 counters:0 um:zero minimum:10000 name:PM_CO0_ALLOC : 0.0
+event:0x16082 counters:0 um:zero minimum:10000 name:PM_CO0_BUSY : CO mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)
+event:0x517082 counters:0 um:zero minimum:10000 name:PM_CO_DISP_FAIL : CO dispatch failed due to all CO machines being busy
+event:0x527084 counters:1 um:zero minimum:10000 name:PM_CO_TM_SC_FOOTPRINT : L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3)
+event:0x3608a counters:2 um:zero minimum:10000 name:PM_CO_USAGE : Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running
+event:0x40066 counters:3 um:zero minimum:10000 name:PM_CRU_FIN : IFU Finished a (non-branch) instruction.
+event:0x61c050 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_CHIP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for a demand load
+event:0x64c048 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_DL2L3_MOD : The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c048 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_DL2L3_SHR : The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c04c counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_DL4 : The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c04c counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_DMEM : The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c042 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L2 : The processor's data cache was reloaded from local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c046 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L21_MOD : The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c046 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L21_SHR : The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c04e counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L2MISS_MOD : The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c040 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L2_DISP_CONFLICT_LDHITST : The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c040 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L2_DISP_CONFLICT_OTHER : The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c040 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L2_MEPF : The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c040 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L2_NO_CONFLICT : The processor's data cache was reloaded from local core's L2 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c042 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L3 : The processor's data cache was reloaded from local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c044 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L31_ECO_MOD : The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c044 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L31_ECO_SHR : The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c044 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L31_MOD : The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c046 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L31_SHR : The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c04e counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L3MISS_MOD : The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c042 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L3_DISP_CONFLICT : The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c042 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L3_MEPF : The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c044 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_L3_NO_CONFLICT : The processor's data cache was reloaded from local core's L3 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c04c counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_LL4 : The processor's data cache was reloaded from the local chip's L4 cache due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c048 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_LMEM : The processor's data cache was reloaded from the local chip's Memory due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c04c counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_MEMORY : The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x64c04a counters:3 um:zero minimum:10000 name:PM_DATA_ALL_FROM_OFF_CHIP_CACHE : The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c048 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_ON_CHIP_CACHE : The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c046 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_RL2L3_MOD : The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x61c04a counters:0 um:zero minimum:10000 name:PM_DATA_ALL_FROM_RL2L3_SHR : The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c04a counters:1 um:zero minimum:10000 name:PM_DATA_ALL_FROM_RL4 : The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x63c04a counters:2 um:zero minimum:10000 name:PM_DATA_ALL_FROM_RMEM : The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1
+event:0x62c050 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_GRP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was group pump for a demand load
+event:0x62c052 counters:1 um:zero minimum:10000 name:PM_DATA_ALL_GRP_PUMP_MPRED : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro
+event:0x61c052 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_GRP_PUMP_MPRED_RTY : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor a demand load
+event:0x61c054 counters:0 um:zero minimum:10000 name:PM_DATA_ALL_PUMP_CPRED : Pump prediction correct. Counts across all types of pumps for a demand load
+event:0x64c052 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor a demand load
+event:0x63c050 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_SYS_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was system pump for a demand load
+event:0x63c052 counters:2 um:zero minimum:10000 name:PM_DATA_ALL_SYS_PUMP_MPRED : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or
+event:0x64c050 counters:3 um:zero minimum:10000 name:PM_DATA_ALL_SYS_PUMP_MPRED_RTY : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for a demand load
+event:0x1c050 counters:0 um:zero minimum:10000 name:PM_DATA_CHIP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for a demand load.
+event:0x4c048 counters:3 um:zero minimum:10000 name:PM_DATA_FROM_DL2L3_MOD : The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c048 counters:2 um:zero minimum:10000 name:PM_DATA_FROM_DL2L3_SHR : The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c04c counters:2 um:zero minimum:10000 name:PM_DATA_FROM_DL4 : The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x4c04c counters:3 um:zero minimum:10000 name:PM_DATA_FROM_DMEM : The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c042 counters:0 um:zero minimum:10000 name:PM_DATA_FROM_L2 : The processor's data cache was reloaded from local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x4c046 counters:3 um:zero minimum:10000 name:PM_DATA_FROM_L21_MOD : The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c046 counters:2 um:zero minimum:10000 name:PM_DATA_FROM_L21_SHR : The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c04e counters:0 um:zero minimum:10000 name:PM_DATA_FROM_L2MISS_MOD : The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c040 counters:2 um:zero minimum:10000 name:PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST : The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x4c040 counters:3 um:zero minimum:10000 name:PM_DATA_FROM_L2_DISP_CONFLICT_OTHER : The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c040 counters:1 um:zero minimum:10000 name:PM_DATA_FROM_L2_MEPF : The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c040 counters:0 um:zero minimum:10000 name:PM_DATA_FROM_L2_NO_CONFLICT : The processor's data cache was reloaded from local core's L2 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1 .
+event:0x4c042 counters:3 um:zero minimum:10000 name:PM_DATA_FROM_L3 : The processor's data cache was reloaded from local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x4c044 counters:3 um:zero minimum:10000 name:PM_DATA_FROM_L31_ECO_MOD : The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c044 counters:2 um:zero minimum:10000 name:PM_DATA_FROM_L31_ECO_SHR : The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c044 counters:1 um:zero minimum:10000 name:PM_DATA_FROM_L31_MOD : The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c046 counters:0 um:zero minimum:10000 name:PM_DATA_FROM_L31_SHR : The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x4c04e counters:3 um:zero minimum:10000 name:PM_DATA_FROM_L3MISS_MOD : The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c042 counters:2 um:zero minimum:10000 name:PM_DATA_FROM_L3_DISP_CONFLICT : The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c042 counters:1 um:zero minimum:10000 name:PM_DATA_FROM_L3_MEPF : The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c044 counters:0 um:zero minimum:10000 name:PM_DATA_FROM_L3_NO_CONFLICT : The processor's data cache was reloaded from local core's L3 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c04c counters:0 um:zero minimum:10000 name:PM_DATA_FROM_LL4 : The processor's data cache was reloaded from the local chip's L4 cache due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c048 counters:1 um:zero minimum:10000 name:PM_DATA_FROM_LMEM : The processor's data cache was reloaded from the local chip's Memory due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c04c counters:1 um:zero minimum:10000 name:PM_DATA_FROM_MEMORY : The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x4c04a counters:3 um:zero minimum:10000 name:PM_DATA_FROM_OFF_CHIP_CACHE : The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c048 counters:0 um:zero minimum:10000 name:PM_DATA_FROM_ON_CHIP_CACHE : The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c046 counters:1 um:zero minimum:10000 name:PM_DATA_FROM_RL2L3_MOD : The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x1c04a counters:0 um:zero minimum:10000 name:PM_DATA_FROM_RL2L3_SHR : The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c04a counters:1 um:zero minimum:10000 name:PM_DATA_FROM_RL4 : The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x3c04a counters:2 um:zero minimum:10000 name:PM_DATA_FROM_RMEM : The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1.
+event:0x2c050 counters:1 um:zero minimum:10000 name:PM_DATA_GRP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was group pump for a demand load.
+event:0x2c052 counters:1 um:zero minimum:10000 name:PM_DATA_GRP_PUMP_MPRED : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro
+event:0x1c052 counters:0 um:zero minimum:10000 name:PM_DATA_GRP_PUMP_MPRED_RTY : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor a demand load.
+event:0x1c054 counters:0 um:zero minimum:10000 name:PM_DATA_PUMP_CPRED : Pump prediction correct. Counts across all types of pumps for a demand load.
+event:0x4c052 counters:3 um:zero minimum:10000 name:PM_DATA_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor a demand load.
+event:0x3c050 counters:2 um:zero minimum:10000 name:PM_DATA_SYS_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was system pump for a demand load.
+event:0x3c052 counters:2 um:zero minimum:10000 name:PM_DATA_SYS_PUMP_MPRED : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or
+event:0x4c050 counters:3 um:zero minimum:10000 name:PM_DATA_SYS_PUMP_MPRED_RTY : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for a demand load.
+event:0x3001a counters:2 um:zero minimum:10000 name:PM_DATA_TABLEWALK_CYC : Data Tablewalk Active.
+event:0xe0bc counters:0,1,2,3 um:zero minimum:10000 name:PM_DC_COLLISIONS : DATA Cache collisions42
+event:0x1e050 counters:0 um:zero minimum:10000 name:PM_DC_PREF_STREAM_ALLOC : Stream marked valid. The stream could have been allocated through the hardware prefetch mechanism or through software. This is combined ls0 and ls1.
+event:0x2e050 counters:1 um:zero minimum:10000 name:PM_DC_PREF_STREAM_CONF : A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Combine up + down.
+event:0x4e050 counters:3 um:zero minimum:10000 name:PM_DC_PREF_STREAM_FUZZY_CONF : A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up).
+event:0x3e050 counters:2 um:zero minimum:10000 name:PM_DC_PREF_STREAM_STRIDED_CONF : A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software..
+event:0x4c054 counters:3 um:zero minimum:10000 name:PM_DERAT_MISS_16G : Data ERAT Miss (Data TLB Access) page size 16G.
+event:0x3c054 counters:2 um:zero minimum:10000 name:PM_DERAT_MISS_16M : Data ERAT Miss (Data TLB Access) page size 16M.
+event:0x1c056 counters:0 um:zero minimum:10000 name:PM_DERAT_MISS_4K : Data ERAT Miss (Data TLB Access) page size 4K.
+event:0x2c054 counters:1 um:zero minimum:10000 name:PM_DERAT_MISS_64K : Data ERAT Miss (Data TLB Access) page size 64K.
+event:0xb0ba counters:0,1,2,3 um:zero minimum:10000 name:PM_DFU : Finish DFU (all finish)
+event:0xb0be counters:0,1,2,3 um:zero minimum:10000 name:PM_DFU_DCFFIX : Convert from fixed opcode finish  (dcffix,dcffixq)
+event:0xb0bc counters:0,1,2,3 um:zero minimum:10000 name:PM_DFU_DENBCD : BCD->DPD opcode finish  (denbcd, denbcdq)
+event:0xb0b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_DFU_MC : Finish DFU multicycle
+event:0x2092 counters:0,1,2,3 um:zero minimum:10000 name:PM_DISP_CLB_HELD_BAL : Dispatch/CLB Hold: Balance
+event:0x2094 counters:0,1,2,3 um:zero minimum:10000 name:PM_DISP_CLB_HELD_RES : Dispatch/CLB Hold: Resource
+event:0x20a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_DISP_CLB_HELD_SB : Dispatch/CLB Hold: Scoreboard
+event:0x2098 counters:0,1,2,3 um:zero minimum:10000 name:PM_DISP_CLB_HELD_SYNC : Dispatch/CLB Hold: Sync type instruction
+event:0x2096 counters:0,1,2,3 um:zero minimum:10000 name:PM_DISP_CLB_HELD_TLBIE : Dispatch Hold: Due to TLBIE
+event:0x10006 counters:0 um:zero minimum:10000 name:PM_DISP_HELD : Dispatch Held.
+event:0x20006 counters:1 um:zero minimum:10000 name:PM_DISP_HELD_IQ_FULL : Dispatch held due to Issue q full.
+event:0x1002a counters:0 um:zero minimum:10000 name:PM_DISP_HELD_MAP_FULL : Dispatch held due to Mapper full.
+event:0x30018 counters:2 um:zero minimum:10000 name:PM_DISP_HELD_SRQ_FULL : Dispatch held due SRQ no room.
+event:0x4003c counters:3 um:zero minimum:10000 name:PM_DISP_HELD_SYNC_HOLD : Dispatch held due to SYNC hold.
+event:0x30a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_DISP_HOLD_GCT_FULL : Dispatch Hold Due to no space in the GCT
+event:0x30008 counters:2 um:zero minimum:10000 name:PM_DISP_WT : Dispatched Starved (not held, nothing to dispatch).
+event:0x4e048 counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_DL2L3_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request.
+event:0x3e048 counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_DL2L3_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request.
+event:0x3e04c counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_DL4 : A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request.
+event:0x4e04c counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_DMEM : A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request.
+event:0x1e042 counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_L2 : A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request.
+event:0x4e046 counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_L21_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request.
+event:0x3e046 counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_L21_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request.
+event:0x1e04e counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_L2MISS : A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request.
+event:0x3e040 counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_L2_DISP_CONFLICT_LDHITST : A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a data side request.
+event:0x4e040 counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_L2_DISP_CONFLICT_OTHER : A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a data side request.
+event:0x2e040 counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_L2_MEPF : A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request.
+event:0x1e040 counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_L2_NO_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request.
+event:0x4e042 counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_L3 : A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request.
+event:0x4e044 counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_L31_ECO_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request.
+event:0x3e044 counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_L31_ECO_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request.
+event:0x2e044 counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_L31_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request.
+event:0x1e046 counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_L31_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request.
+event:0x4e04e counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_L3MISS : A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request.
+event:0x3e042 counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_L3_DISP_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request.
+event:0x2e042 counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_L3_MEPF : A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request.
+event:0x1e044 counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_L3_NO_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request.
+event:0x1e04c counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_LL4 : A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request.
+event:0x2e048 counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_LMEM : A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request.
+event:0x2e04c counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_MEMORY : A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request.
+event:0x4e04a counters:3 um:zero minimum:10000 name:PM_DPTEG_FROM_OFF_CHIP_CACHE : A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request.
+event:0x1e048 counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_ON_CHIP_CACHE : A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request.
+event:0x2e046 counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_RL2L3_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request.
+event:0x1e04a counters:0 um:zero minimum:10000 name:PM_DPTEG_FROM_RL2L3_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request.
+event:0x2e04a counters:1 um:zero minimum:10000 name:PM_DPTEG_FROM_RL4 : A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request.
+event:0x3e04a counters:2 um:zero minimum:10000 name:PM_DPTEG_FROM_RMEM : A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request.
+event:0xd094 counters:0,1,2,3 um:zero minimum:10000 name:PM_DSLB_MISS : Data SLB Miss - Total of all segment sizesData SLB misses
+event:0x1c058 counters:0 um:zero minimum:10000 name:PM_DTLB_MISS_16G : Data TLB Miss page size 16G.
+event:0x4c056 counters:3 um:zero minimum:10000 name:PM_DTLB_MISS_16M : Data TLB Miss page size 16M.
+event:0x2c056 counters:1 um:zero minimum:10000 name:PM_DTLB_MISS_4K : Data TLB Miss page size 4k.
+event:0x3c056 counters:2 um:zero minimum:10000 name:PM_DTLB_MISS_64K : Data TLB Miss page size 64K.
+event:0x50a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_EAT_FORCE_MISPRED : XL-form branch was mispredicted due to the predicted target address missing from EAT.  The EAT forces a mispredict in this case since there is no predicated target to validate.  This is a rare case that may occur when the EAT is full and a branch is
+event:0x4084 counters:0,1,2,3 um:zero minimum:10000 name:PM_EAT_FULL_CYC : Cycles No room in EATSet on bank conflict and case where no ibuffers available.
+event:0x2080 counters:0,1,2,3 um:zero minimum:10000 name:PM_EE_OFF_EXT_INT : Ee off and external interrupt
+event:0x20b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_FAV_TBEGIN : Dispatch time Favored tbegin
+event:0xa0ae counters:0,1,2,3 um:zero minimum:10000 name:PM_FLOP_SUM_SCALAR : flops summary scalar instructions
+event:0xa0ac counters:0,1,2,3 um:zero minimum:10000 name:PM_FLOP_SUM_VEC : flops summary vector instructions
+event:0x2084 counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_BR_MPRED : Flush caused by branch mispredict
+event:0x30012 counters:2 um:zero minimum:10000 name:PM_FLUSH_COMPLETION : Completion Flush.
+event:0x2082 counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_DISP : Dispatch flush
+event:0x208c counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_DISP_SB : Dispatch Flush: Scoreboard
+event:0x2088 counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_DISP_SYNC : Dispatch Flush: Sync
+event:0x208a counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_DISP_TLBIE : Dispatch Flush: TLBIE
+event:0x208e counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_LSU : Flush initiated by LSU
+event:0x2086 counters:0,1,2,3 um:zero minimum:10000 name:PM_FLUSH_PARTIAL : Partial flush
+event:0xa0b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_FPU0_FCONV : Convert instruction executed
+event:0xa0b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_FPU0_FEST : Estimate instruction executed
+event:0xa0b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_FPU0_FRSP : Round to single precision instruction executed
+event:0xa0b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_FPU1_FCONV : Convert instruction executed
+event:0xa0ba counters:0,1,2,3 um:zero minimum:10000 name:PM_FPU1_FEST : Estimate instruction executed
+event:0xa0b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_FPU1_FRSP : Round to single precision instruction executed
+event:0x3000c counters:2 um:zero minimum:10000 name:PM_FREQ_DOWN : Frequency is being slewed down due to Power Management.
+event:0x4000c counters:3 um:zero minimum:10000 name:PM_FREQ_UP : Frequency is being slewed up due to Power Management.
+event:0x50b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_TOC_GRP0_1 : One pair of instructions fused with TOC in Group0
+event:0x50ae counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_TOC_GRP0_2 : Two pairs of instructions fused with TOCin Group0
+event:0x50ac counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_TOC_GRP0_3 : Three pairs of instructions fused with TOC in Group0
+event:0x50b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_TOC_GRP1_1 : One pair of instructions fused with TOX in Group1
+event:0x50b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_VSX_GRP0_1 : One pair of instructions fused with VSX in Group0
+event:0x50b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_VSX_GRP0_2 : Two pairs of instructions fused with VSX in Group0
+event:0x50b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_VSX_GRP0_3 : Three pairs of instructions fused with VSX in Group0
+event:0x50ba counters:0,1,2,3 um:zero minimum:10000 name:PM_FUSION_VSX_GRP1_1 : One pair of instructions fused with VSX in Group1
+event:0x3000e counters:2 um:zero minimum:10000 name:PM_FXU0_BUSY_FXU1_IDLE : fxu0 busy and fxu1 idle.
+event:0x10004 counters:0 um:zero minimum:10000 name:PM_FXU0_FIN : FXU0 Finished.
+event:0x4000e counters:3 um:zero minimum:10000 name:PM_FXU1_BUSY_FXU0_IDLE : fxu0 idle and fxu1 busy. .
+event:0x40004 counters:3 um:zero minimum:10000 name:PM_FXU1_FIN : FXU1 Finished.
+event:0x2000e counters:1 um:zero minimum:10000 name:PM_FXU_BUSY : fxu0 busy and fxu1 busy..
+event:0x1000e counters:0 um:zero minimum:10000 name:PM_FXU_IDLE : fxu0 idle and fxu1 idle.
+event:0x20008 counters:1 um:zero minimum:10000 name:PM_GCT_EMPTY_CYC : No itags assigned either thread (GCT Empty).
+event:0x30a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_MERGE : Group dispatched on a merged GCT empty.  GCT entries can be merged only within the same thread
+event:0x4d01e counters:3 um:zero minimum:10000 name:PM_GCT_NOSLOT_BR_MPRED : Gct empty for this thread due to branch mispred.
+event:0x4d01a counters:3 um:zero minimum:10000 name:PM_GCT_NOSLOT_BR_MPRED_ICMISS : Gct empty for this thread due to Icache Miss and branch mispred.
+event:0x2d01e counters:1 um:zero minimum:10000 name:PM_GCT_NOSLOT_DISP_HELD_ISSQ : Gct empty for this thread due to dispatch hold on this thread due to Issue q full.
+event:0x4d01c counters:3 um:zero minimum:10000 name:PM_GCT_NOSLOT_DISP_HELD_MAP : Gct empty for this thread due to dispatch hold on this thread due to Mapper full.
+event:0x2e010 counters:1 um:zero minimum:10000 name:PM_GCT_NOSLOT_DISP_HELD_OTHER : Gct empty for this thread due to dispatch hold on this thread due to sync.
+event:0x2d01c counters:1 um:zero minimum:10000 name:PM_GCT_NOSLOT_DISP_HELD_SRQ : Gct empty for this thread due to dispatch hold on this thread due to SRQ full.
+event:0x4e010 counters:3 um:zero minimum:10000 name:PM_GCT_NOSLOT_IC_L3MISS : Gct empty for this thread due to icach l3 miss.
+event:0x2d01a counters:1 um:zero minimum:10000 name:PM_GCT_NOSLOT_IC_MISS : Gct empty for this thread due to Icache Miss.
+event:0x20a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_UTIL_11_14_ENTRIES : GCT Utilization 11-14 entries
+event:0x20a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_UTIL_15_17_ENTRIES : GCT Utilization 15-17 entries
+event:0x20a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_UTIL_18_ENTRIES : GCT Utilization 18+ entries
+event:0x209c counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_UTIL_1_2_ENTRIES : GCT Utilization 1-2 entries
+event:0x209e counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_UTIL_3_6_ENTRIES : GCT Utilization 3-6 entries
+event:0x20a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_GCT_UTIL_7_10_ENTRIES : GCT Utilization 7-10 entries
+event:0x1000a counters:0 um:zero minimum:10000 name:PM_GRP_BR_MPRED_NONSPEC : Group experienced Non-speculative br mispredicct.
+event:0x30004 counters:2 um:zero minimum:100000 name:PM_GRP_CMPL : group completed.
+event:0x3000a counters:2 um:zero minimum:100000 name:PM_GRP_DISP : dispatch_success (Group Dispatched).
+event:0x1000c counters:0 um:zero minimum:10000 name:PM_GRP_IC_MISS_NONSPEC : Group experi enced Non-specu lative I cache miss.
+event:0x10130 counters:0 um:zero minimum:10000 name:PM_GRP_MRK : Instruction marked in idu.
+event:0x509c counters:0,1,2,3 um:zero minimum:10000 name:PM_GRP_NON_FULL_GROUP : GROUPs where we did not have 6 non branch instructions in the group(ST mode), in SMT mode 3 non branches
+event:0x20050 counters:1 um:zero minimum:10000 name:PM_GRP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
+event:0x20052 counters:1 um:zero minimum:10000 name:PM_GRP_PUMP_MPRED : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro
+event:0x10052 counters:0 um:zero minimum:10000 name:PM_GRP_PUMP_MPRED_RTY : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
+event:0x50a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_GRP_TERM_2ND_BRANCH : There were enough instructions in the Ibuffer, but 2nd branch ends group
+event:0x50a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_GRP_TERM_FPU_AFTER_BR : There were enough instructions in the Ibuffer, but FPU OP IN same group after a branch terminates a group, cant do partial flushes
+event:0x509e counters:0,1,2,3 um:zero minimum:10000 name:PM_GRP_TERM_NOINST : Do not fill every slot in the group, Not enough instructions in the Ibuffer.  This includes cases where the group started with enough instructions, but some got knocked out by a cache miss or branch redirect (which would also empty the Ibuffer).
+event:0x50a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_GRP_TERM_OTHER : There were enough instructions in the Ibuffer, but the group terminated early for some other reason, most likely due to a First or Last.
+event:0x50a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_GRP_TERM_SLOT_LIMIT : There were enough instructions in the Ibuffer, but 3 src RA/RB/RC , 2 way crack caused a group termination
+event:0x2000a counters:1 um:zero minimum:10000 name:PM_HV_CYC : cycles in hypervisor mode .
+event:0x4086 counters:0,1,2,3 um:zero minimum:10000 name:PM_IBUF_FULL_CYC : Cycles No room in ibufffully qualified tranfer (if5 valid).
+event:0x10018 counters:0 um:zero minimum:10000 name:PM_IC_DEMAND_CYC : Demand ifetch pending.
+event:0x4098 counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_DEMAND_L2_BHT_REDIRECT :  L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)
+event:0x409a counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_DEMAND_L2_BR_REDIRECT :  L2 I cache demand request due to branch Mispredict ( 15 cycle path)
+event:0x4088 counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_DEMAND_REQ : Demand Instruction fetch request
+event:0x508a counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_INVALIDATE : Ic line invalidated
+event:0x4092 counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_PREF_CANCEL_HIT : Prefetch Canceled due to icache hit
+event:0x4094 counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_PREF_CANCEL_L2 : L2 Squashed request
+event:0x4090 counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_PREF_CANCEL_PAGE : Prefetch Canceled due to page boundary
+event:0x408a counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_PREF_REQ : Instruction prefetch requests
+event:0x408e counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_PREF_WRITE : Instruction prefetch written into IL1
+event:0x4096 counters:0,1,2,3 um:zero minimum:10000 name:PM_IC_RELOAD_PRIVATE : Reloading line was brought in private for a specific thread.  Most lines are brought in shared for all eight thrreads.  If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was inv
+event:0x4006a counters:3 um:zero minimum:10000 name:PM_IERAT_RELOAD_16M : IERAT Reloaded (Miss) for a 16M page.
+event:0x20064 counters:1 um:zero minimum:10000 name:PM_IERAT_RELOAD_4K : IERAT Reloaded (Miss) for a 4k page.
+event:0x3006a counters:2 um:zero minimum:10000 name:PM_IERAT_RELOAD_64K : IERAT Reloaded (Miss) for a 64k page.
+event:0x3405e counters:2 um:zero minimum:10000 name:PM_IFETCH_THROTTLE : Cycles instruction fecth was throttled in IFU.
+event:0x5088 counters:0,1,2,3 um:zero minimum:10000 name:PM_IFU_L2_TOUCH : L2 touch to update MRU on a line
+event:0x514050 counters:0 um:zero minimum:10000 name:PM_INST_ALL_CHIP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for an instruction fetch
+event:0x544048 counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_DL2L3_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x534048 counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_DL2L3_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x53404c counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_DL4 : The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x54404c counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_DMEM : The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x514042 counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_L2 : The processor's Instruction cache was reloaded from local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x544046 counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_L21_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x534046 counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_L21_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x51404e counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_L2MISS : The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x534040 counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_L2_DISP_CONFLICT_LDHITST : The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x544040 counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_L2_DISP_CONFLICT_OTHER : The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x524040 counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_L2_MEPF : The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x514040 counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_L2_NO_CONFLICT : The processor's Instruction cache was reloaded from local core's L2 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x544042 counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_L3 : The processor's Instruction cache was reloaded from local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x544044 counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_L31_ECO_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x534044 counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_L31_ECO_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x524044 counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_L31_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x514046 counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_L31_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x54404e counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_L3MISS_MOD : The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x534042 counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_L3_DISP_CONFLICT : The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x524042 counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_L3_MEPF : The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x514044 counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_L3_NO_CONFLICT : The processor's Instruction cache was reloaded from local core's L3 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x51404c counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_LL4 : The processor's Instruction cache was reloaded from the local chip's L4 cache due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x524048 counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_LMEM : The processor's Instruction cache was reloaded from the local chip's Memory due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x52404c counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_MEMORY : The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x54404a counters:3 um:zero minimum:10000 name:PM_INST_ALL_FROM_OFF_CHIP_CACHE : The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x514048 counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_ON_CHIP_CACHE : The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x524046 counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_RL2L3_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x51404a counters:0 um:zero minimum:10000 name:PM_INST_ALL_FROM_RL2L3_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x52404a counters:1 um:zero minimum:10000 name:PM_INST_ALL_FROM_RL4 : The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x53404a counters:2 um:zero minimum:10000 name:PM_INST_ALL_FROM_RMEM : The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1
+event:0x524050 counters:1 um:zero minimum:10000 name:PM_INST_ALL_GRP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was group pump for an instruction fetch
+event:0x524052 counters:1 um:zero minimum:10000 name:PM_INST_ALL_GRP_PUMP_MPRED : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro
+event:0x514052 counters:0 um:zero minimum:10000 name:PM_INST_ALL_GRP_PUMP_MPRED_RTY : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor an instruction fetch
+event:0x514054 counters:0 um:zero minimum:10000 name:PM_INST_ALL_PUMP_CPRED : Pump prediction correct. Counts across all types of pumpsfor an instruction fetch
+event:0x544052 counters:3 um:zero minimum:10000 name:PM_INST_ALL_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor an instruction fetch
+event:0x534050 counters:2 um:zero minimum:10000 name:PM_INST_ALL_SYS_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was system pump for an instruction fetch
+event:0x534052 counters:2 um:zero minimum:10000 name:PM_INST_ALL_SYS_PUMP_MPRED : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or
+event:0x544050 counters:3 um:zero minimum:10000 name:PM_INST_ALL_SYS_PUMP_MPRED_RTY : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for an instruction fetch
+event:0x14050 counters:0 um:zero minimum:10000 name:PM_INST_CHIP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for an instruction fetch.
+event:0x2 counters:0,1,2,3 um:zero minimum:100000 name:PM_INST_CMPL : PPC Instructions Finished (completed).
+event:0x44048 counters:3 um:zero minimum:10000 name:PM_INST_FROM_DL2L3_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x34048 counters:2 um:zero minimum:10000 name:PM_INST_FROM_DL2L3_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x3404c counters:2 um:zero minimum:10000 name:PM_INST_FROM_DL4 : The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x4404c counters:3 um:zero minimum:10000 name:PM_INST_FROM_DMEM : The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x4080 counters:0,1,2,3 um:zero minimum:10000 name:PM_INST_FROM_L1 : Instruction fetches from L1
+event:0x14042 counters:0 um:zero minimum:10000 name:PM_INST_FROM_L2 : The processor's Instruction cache was reloaded from local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x44046 counters:3 um:zero minimum:10000 name:PM_INST_FROM_L21_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x34046 counters:2 um:zero minimum:10000 name:PM_INST_FROM_L21_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x1404e counters:0 um:zero minimum:10000 name:PM_INST_FROM_L2MISS : The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x34040 counters:2 um:zero minimum:10000 name:PM_INST_FROM_L2_DISP_CONFLICT_LDHITST : The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x44040 counters:3 um:zero minimum:10000 name:PM_INST_FROM_L2_DISP_CONFLICT_OTHER : The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x24040 counters:1 um:zero minimum:10000 name:PM_INST_FROM_L2_MEPF : The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x14040 counters:0 um:zero minimum:10000 name:PM_INST_FROM_L2_NO_CONFLICT : The processor's Instruction cache was reloaded from local core's L2 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x44042 counters:3 um:zero minimum:10000 name:PM_INST_FROM_L3 : The processor's Instruction cache was reloaded from local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x44044 counters:3 um:zero minimum:10000 name:PM_INST_FROM_L31_ECO_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x34044 counters:2 um:zero minimum:10000 name:PM_INST_FROM_L31_ECO_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x24044 counters:1 um:zero minimum:10000 name:PM_INST_FROM_L31_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x14046 counters:0 um:zero minimum:10000 name:PM_INST_FROM_L31_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x4404e counters:3 um:zero minimum:10000 name:PM_INST_FROM_L3MISS_MOD : The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x34042 counters:2 um:zero minimum:10000 name:PM_INST_FROM_L3_DISP_CONFLICT : The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x24042 counters:1 um:zero minimum:10000 name:PM_INST_FROM_L3_MEPF : The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x14044 counters:0 um:zero minimum:10000 name:PM_INST_FROM_L3_NO_CONFLICT : The processor's Instruction cache was reloaded from local core's L3 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x1404c counters:0 um:zero minimum:10000 name:PM_INST_FROM_LL4 : The processor's Instruction cache was reloaded from the local chip's L4 cache due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x24048 counters:1 um:zero minimum:10000 name:PM_INST_FROM_LMEM : The processor's Instruction cache was reloaded from the local chip's Memory due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x2404c counters:1 um:zero minimum:10000 name:PM_INST_FROM_MEMORY : The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x4404a counters:3 um:zero minimum:10000 name:PM_INST_FROM_OFF_CHIP_CACHE : The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x14048 counters:0 um:zero minimum:10000 name:PM_INST_FROM_ON_CHIP_CACHE : The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x24046 counters:1 um:zero minimum:10000 name:PM_INST_FROM_RL2L3_MOD : The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x1404a counters:0 um:zero minimum:10000 name:PM_INST_FROM_RL2L3_SHR : The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x2404a counters:1 um:zero minimum:10000 name:PM_INST_FROM_RL4 : The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x3404a counters:2 um:zero minimum:10000 name:PM_INST_FROM_RMEM : The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1 .
+event:0x24050 counters:1 um:zero minimum:10000 name:PM_INST_GRP_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was group pump for an instruction fetch.
+event:0x24052 counters:1 um:zero minimum:10000 name:PM_INST_GRP_PUMP_MPRED : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro
+event:0x14052 counters:0 um:zero minimum:10000 name:PM_INST_GRP_PUMP_MPRED_RTY : Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor an instruction fetch.
+event:0x1003a counters:0 um:zero minimum:10000 name:PM_INST_IMC_MATCH_CMPL : IMC Match Count.
+event:0x30016 counters:2 um:zero minimum:10000 name:PM_INST_IMC_MATCH_DISP : IMC Matches dispatched.
+event:0x14054 counters:0 um:zero minimum:10000 name:PM_INST_PUMP_CPRED : Pump prediction correct. Counts across all types of pumpsfor an instruction fetch.
+event:0x44052 counters:3 um:zero minimum:10000 name:PM_INST_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor an instruction fetch.
+event:0x34050 counters:2 um:zero minimum:10000 name:PM_INST_SYS_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was system pump for an instruction fetch.
+event:0x34052 counters:2 um:zero minimum:10000 name:PM_INST_SYS_PUMP_MPRED : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or
+event:0x44050 counters:3 um:zero minimum:10000 name:PM_INST_SYS_PUMP_MPRED_RTY : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for an instruction fetch.
+event:0x10014 counters:0 um:zero minimum:100000 name:PM_IOPS_CMPL : IOPS Completed.
+event:0x30014 counters:2 um:zero minimum:100000 name:PM_IOPS_DISP : IOPS dispatched.
+event:0x45048 counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_DL2L3_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request.
+event:0x35048 counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_DL2L3_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request.
+event:0x3504c counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_DL4 : A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request.
+event:0x4504c counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_DMEM : A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request.
+event:0x15042 counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_L2 : A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request.
+event:0x45046 counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_L21_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request.
+event:0x35046 counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_L21_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request.
+event:0x1504e counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_L2MISS : A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request.
+event:0x35040 counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_L2_DISP_CONFLICT_LDHITST : A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a instruction side request.
+event:0x45040 counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_L2_DISP_CONFLICT_OTHER : A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a instruction side request.
+event:0x25040 counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_L2_MEPF : A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request.
+event:0x15040 counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_L2_NO_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request.
+event:0x45042 counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_L3 : A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request.
+event:0x45044 counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_L31_ECO_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request.
+event:0x35044 counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_L31_ECO_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request.
+event:0x25044 counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_L31_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request.
+event:0x15046 counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_L31_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request.
+event:0x4504e counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_L3MISS : A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request.
+event:0x35042 counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_L3_DISP_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request.
+event:0x25042 counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_L3_MEPF : A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request.
+event:0x15044 counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_L3_NO_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request.
+event:0x1504c counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_LL4 : A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request.
+event:0x25048 counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_LMEM : A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request.
+event:0x2504c counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_MEMORY : A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request.
+event:0x4504a counters:3 um:zero minimum:10000 name:PM_IPTEG_FROM_OFF_CHIP_CACHE : A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request.
+event:0x15048 counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_ON_CHIP_CACHE : A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request.
+event:0x25046 counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_RL2L3_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request.
+event:0x1504a counters:0 um:zero minimum:10000 name:PM_IPTEG_FROM_RL2L3_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request.
+event:0x2504a counters:1 um:zero minimum:10000 name:PM_IPTEG_FROM_RL4 : A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request.
+event:0x3504a counters:2 um:zero minimum:10000 name:PM_IPTEG_FROM_RMEM : A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request.
+event:0x617082 counters:0 um:zero minimum:10000 name:PM_ISIDE_DISP : All i-side dispatch attempts
+event:0x627084 counters:1 um:zero minimum:10000 name:PM_ISIDE_DISP_FAIL : All i-side dispatch attempts that failed due to a addr collision with another machine
+event:0x627086 counters:1 um:zero minimum:10000 name:PM_ISIDE_DISP_FAIL_OTHER : All i-side dispatch attempts that failed due to a reason other than addrs collision
+event:0x4608e counters:3 um:zero minimum:10000 name:PM_ISIDE_L2MEMACC : valid when first beat of data comes in for an i-side fetch where data came from mem(or L4)
+event:0x44608e counters:3 um:zero minimum:10000 name:PM_ISIDE_MRU_TOUCH : Iside L2 MRU touch
+event:0xd096 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISLB_MISS : I SLB Miss.
+event:0x30ac counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_FX0 : FX0 ISU reject
+event:0x30ae counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_FX1 : FX1 ISU reject
+event:0x38ac counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_FXU : ISU
+event:0x30b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_LS0 : LS0 ISU reject
+event:0x30b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_LS1 : LS1 ISU reject
+event:0x30b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_LS2 : LS2 ISU reject
+event:0x30b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REF_LS3 : LS3 ISU reject
+event:0x309c counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJECTS_ALL : All isu rejects could be more than 1 per cycle
+event:0x30a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJECT_RES_NA : ISU reject due to resource not available
+event:0x309e counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJECT_SAR_BYPASS : Reject because of SAR bypass
+event:0x30a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJECT_SRC_NA : ISU reject due to source not available
+event:0x30a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VS0 : VS0 ISU reject
+event:0x30aa counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VS1 : VS1 ISU reject
+event:0x38a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISU_REJ_VSU : ISU
+event:0x30b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_ISYNC : Isync count per thread
+event:0x200301ea counters:2 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_1024 : Reload latency exceeded 1024 cyc
+event:0x200401ec counters:3 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_2048 : Reload latency exceeded 2048 cyc
+event:0x200101e8 counters:0 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_256 : Reload latency exceeded 256 cyc
+event:0x200201e6 counters:1 um:zero minimum:10000 name:PM_L1MISS_LAT_EXC_32 : Reload latency exceeded 32 cyc
+event:0x26086 counters:1 um:zero minimum:10000 name:PM_L1PF_L2MEMACC : valid when first beat of data comes in for an L1pref where data came from mem(or L4)
+event:0x1002c counters:0 um:zero minimum:10000 name:PM_L1_DCACHE_RELOADED_ALL : L1 data cache reloaded for demand or prefetch .
+event:0x408c counters:0,1,2,3 um:zero minimum:10000 name:PM_L1_DEMAND_WRITE : Instruction Demand sectors wriittent into IL1
+event:0x40012 counters:3 um:zero minimum:10000 name:PM_L1_ICACHE_RELOADED_ALL : Counts all Icache reloads includes demand, prefetchm prefetch turned into demand and demand turned into prefetch.
+event:0x30068 counters:2 um:zero minimum:10000 name:PM_L1_ICACHE_RELOADED_PREF : Counts all Icache prefetch reloads ( includes demand turned into prefetch).
+event:0x417080 counters:0 um:zero minimum:10000 name:PM_L2_CASTOUT_MOD : L2 Castouts - Modified (M, Mu, Me)
+event:0x417082 counters:0 um:zero minimum:10000 name:PM_L2_CASTOUT_SHR : L2 Castouts - Shared (T, Te, Si, S)
+event:0x27084 counters:1 um:zero minimum:10000 name:PM_L2_CHIP_PUMP : RC requests that were local on chip pump attempts
+event:0x427086 counters:1 um:zero minimum:10000 name:PM_L2_DC_INV : Dcache invalidates from L2
+event:0x44608c counters:3 um:zero minimum:10000 name:PM_L2_DISP_ALL_L2MISS : All successful Ld/St dispatches for this thread that were an L2miss.
+event:0x64608e counters:3 um:zero minimum:10000 name:PM_L2_GROUP_PUMP : RC requests that were on Node Pump attempts
+event:0x626084 counters:1 um:zero minimum:10000 name:PM_L2_GRP_GUESS_CORRECT : L2 guess grp and guess was correct (data intra-6chip AND ^on-chip)
+event:0x626086 counters:1 um:zero minimum:10000 name:PM_L2_GRP_GUESS_WRONG : L2 guess grp and guess was not correct (ie data on-chip OR beyond-6chip)
+event:0x427084 counters:1 um:zero minimum:10000 name:PM_L2_IC_INV : Icache Invalidates from L2
+event:0x436088 counters:2 um:zero minimum:10000 name:PM_L2_INST : All successful I-side dispatches for this thread   (excludes i_l2mru_tch reqs)
+event:0x43608a counters:2 um:zero minimum:10000 name:PM_L2_INST_MISS : All successful i-side dispatches that were an L2miss for this thread (excludes i_l2mru_tch reqs)
+event:0x416080 counters:0 um:zero minimum:10000 name:PM_L2_LD : All successful D-side Load dispatches for this thread
+event:0x437088 counters:2 um:zero minimum:10000 name:PM_L2_LD_DISP : All successful load dispatches
+event:0x43708a counters:2 um:zero minimum:10000 name:PM_L2_LD_HIT : All successful load dispatches that were L2 hits
+event:0x426084 counters:1 um:zero minimum:10000 name:PM_L2_LD_MISS : All successful D-Side Load dispatches that were an L2miss for this thread
+event:0x616080 counters:0 um:zero minimum:10000 name:PM_L2_LOC_GUESS_CORRECT : L2 guess loc and guess was correct (ie data local)
+event:0x616082 counters:0 um:zero minimum:10000 name:PM_L2_LOC_GUESS_WRONG : L2 guess loc and guess was not correct (ie data not on chip)
+event:0x516080 counters:0 um:zero minimum:10000 name:PM_L2_RCLD_DISP :  L2  RC load dispatch attempt
+event:0x516082 counters:0 um:zero minimum:10000 name:PM_L2_RCLD_DISP_FAIL_ADDR :  L2  RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ
+event:0x526084 counters:1 um:zero minimum:10000 name:PM_L2_RCLD_DISP_FAIL_OTHER :  L2  RC load dispatch attempt failed due to other reasons
+event:0x536088 counters:2 um:zero minimum:10000 name:PM_L2_RCST_DISP :  L2  RC store dispatch attempt
+event:0x53608a counters:2 um:zero minimum:10000 name:PM_L2_RCST_DISP_FAIL_ADDR :  L2  RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ
+event:0x54608c counters:3 um:zero minimum:10000 name:PM_L2_RCST_DISP_FAIL_OTHER :  L2  RC store dispatch attempt failed due to other reasons
+event:0x537088 counters:2 um:zero minimum:10000 name:PM_L2_RC_ST_DONE : RC did st to line that was Tx or Sx
+event:0x63708a counters:2 um:zero minimum:10000 name:PM_L2_RTY_LD : RC retries on PB for any load from core
+event:0x3708a counters:2 um:zero minimum:10000 name:PM_L2_RTY_ST : RC retries on PB for any store from core
+event:0x54708c counters:3 um:zero minimum:10000 name:PM_L2_SN_M_RD_DONE : SNP dispatched for a read and was M
+event:0x54708e counters:3 um:zero minimum:10000 name:PM_L2_SN_M_WR_DONE : SNP dispatched for a write and was M
+event:0x53708a counters:2 um:zero minimum:10000 name:PM_L2_SN_SX_I_DONE : SNP dispatched and went from Sx or Tx to Ix
+event:0x17080 counters:0 um:zero minimum:10000 name:PM_L2_ST : All successful D-side store dispatches for this thread
+event:0x44708c counters:3 um:zero minimum:10000 name:PM_L2_ST_DISP : All successful store dispatches
+event:0x44708e counters:3 um:zero minimum:10000 name:PM_L2_ST_HIT : All successful store dispatches that were L2Hits
+event:0x17082 counters:0 um:zero minimum:10000 name:PM_L2_ST_MISS : All successful D-side store dispatches for this thread that were L2 Miss
+event:0x636088 counters:2 um:zero minimum:10000 name:PM_L2_SYS_GUESS_CORRECT : L2 guess sys and guess was correct (ie data beyond-6chip)
+event:0x63608a counters:2 um:zero minimum:10000 name:PM_L2_SYS_GUESS_WRONG : L2 guess sys and guess was not correct (ie data ^beyond-6chip)
+event:0x37088 counters:2 um:zero minimum:10000 name:PM_L2_SYS_PUMP : RC requests that were system pump attempts
+event:0x1e05e counters:0 um:zero minimum:10000 name:PM_L2_TM_REQ_ABORT : TM abort.
+event:0x3e05c counters:2 um:zero minimum:10000 name:PM_L2_TM_ST_ABORT_SISTER : TM marked store abort.
+event:0x23808a counters:2 um:zero minimum:10000 name:PM_L3_CINJ : l3 ci of cache inject
+event:0x128084 counters:1 um:zero minimum:10000 name:PM_L3_CI_HIT : L3 Castins Hit (total count
+event:0x128086 counters:1 um:zero minimum:10000 name:PM_L3_CI_MISS : L3 castins miss (total count
+event:0x819082 counters:0 um:zero minimum:10000 name:PM_L3_CI_USAGE : rotating sample of 16 CI or CO actives
+event:0x438088 counters:2 um:zero minimum:10000 name:PM_L3_CO : l3 castout occuring ( does not include casthrough or log writes (cinj/dmaw)
+event:0x83908b counters:2 um:zero minimum:10000 name:PM_L3_CO0_ALLOC : 0.0
+event:0x83908a counters:2 um:zero minimum:10000 name:PM_L3_CO0_BUSY : lifetime, sample of CO machine 0 valid
+event:0x28086 counters:1 um:zero minimum:10000 name:PM_L3_CO_L31 : L3 CO to L3.1 OR of port 0 and 1 ( lossy)
+event:0x238088 counters:2 um:zero minimum:10000 name:PM_L3_CO_LCO : Total L3 castouts occurred on LCO
+event:0x28084 counters:1 um:zero minimum:10000 name:PM_L3_CO_MEM : L3 CO to memory OR of port 0 and 1 ( lossy)
+event:0x18082 counters:0 um:zero minimum:10000 name:PM_L3_CO_MEPF : L3 CO of line in Mep state ( includes casthrough
+event:0xb19082 counters:0 um:zero minimum:10000 name:PM_L3_GRP_GUESS_CORRECT : Initial scope=group and data from same group (near) (pred successful)
+event:0xb3908a counters:2 um:zero minimum:10000 name:PM_L3_GRP_GUESS_WRONG_HIGH : Initial scope=group but data from local node. Predition too high
+event:0xb39088 counters:2 um:zero minimum:10000 name:PM_L3_GRP_GUESS_WRONG_LOW : Initial scope=group but data from outside group (far or rem). Prediction too Low
+event:0x218080 counters:0 um:zero minimum:10000 name:PM_L3_HIT : L3 Hits
+event:0x138088 counters:2 um:zero minimum:10000 name:PM_L3_L2_CO_HIT : L2 castout hits
+event:0x13808a counters:2 um:zero minimum:10000 name:PM_L3_L2_CO_MISS : L2 castout miss
+event:0x14808c counters:3 um:zero minimum:10000 name:PM_L3_LAT_CI_HIT : L3 Lateral Castins Hit
+event:0x14808e counters:3 um:zero minimum:10000 name:PM_L3_LAT_CI_MISS : L3 Lateral Castins Miss
+event:0x228084 counters:1 um:zero minimum:10000 name:PM_L3_LD_HIT : L3 demand LD Hits
+event:0x228086 counters:1 um:zero minimum:10000 name:PM_L3_LD_MISS : L3 demand LD Miss
+event:0x1e052 counters:0 um:zero minimum:10000 name:PM_L3_LD_PREF : L3 Load Prefetches.
+event:0xb19080 counters:0 um:zero minimum:10000 name:PM_L3_LOC_GUESS_CORRECT : initial scope=node/chip and data from local node (local) (pred successful)
+event:0xb29086 counters:1 um:zero minimum:10000 name:PM_L3_LOC_GUESS_WRONG : Initial scope=node but data from out side local node (near or far or rem). Prediction too Low
+event:0x218082 counters:0 um:zero minimum:10000 name:PM_L3_MISS : L3 Misses
+event:0x54808c counters:3 um:zero minimum:10000 name:PM_L3_P0_CO_L31 : l3 CO to L3.1 (lco) port 0
+event:0x538088 counters:2 um:zero minimum:10000 name:PM_L3_P0_CO_MEM : l3 CO to memory port 0
+event:0x929084 counters:1 um:zero minimum:10000 name:PM_L3_P0_CO_RTY : L3 CO received retry port 0
+event:0xa29084 counters:1 um:zero minimum:10000 name:PM_L3_P0_GRP_PUMP : L3 pf sent with grp scope port 0
+event:0x528084 counters:1 um:zero minimum:10000 name:PM_L3_P0_LCO_DATA : lco sent with data port 0
+event:0x518080 counters:0 um:zero minimum:10000 name:PM_L3_P0_LCO_NO_DATA : dataless l3 lco sent port 0
+event:0xa4908c counters:3 um:zero minimum:10000 name:PM_L3_P0_LCO_RTY : L3 LCO received retry port 0
+event:0xa19080 counters:0 um:zero minimum:10000 name:PM_L3_P0_NODE_PUMP : L3 pf sent with nodal scope port 0
+event:0x919080 counters:0 um:zero minimum:10000 name:PM_L3_P0_PF_RTY : L3 PF received retry port 0
+event:0x939088 counters:2 um:zero minimum:10000 name:PM_L3_P0_SN_HIT : L3 snoop hit port 0
+event:0x118080 counters:0 um:zero minimum:10000 name:PM_L3_P0_SN_INV : Port0 snooper detects someone doing a store to a line thats Sx
+event:0x94908c counters:3 um:zero minimum:10000 name:PM_L3_P0_SN_MISS : L3 snoop miss port 0
+event:0xa39088 counters:2 um:zero minimum:10000 name:PM_L3_P0_SYS_PUMP : L3 pf sent with sys scope port 0
+event:0x54808e counters:3 um:zero minimum:10000 name:PM_L3_P1_CO_L31 : l3 CO to L3.1 (lco) port 1
+event:0x53808a counters:2 um:zero minimum:10000 name:PM_L3_P1_CO_MEM : l3 CO to memory port 1
+event:0x929086 counters:1 um:zero minimum:10000 name:PM_L3_P1_CO_RTY : L3 CO received retry port 1
+event:0xa29086 counters:1 um:zero minimum:10000 name:PM_L3_P1_GRP_PUMP : L3 pf sent with grp scope port 1
+event:0x528086 counters:1 um:zero minimum:10000 name:PM_L3_P1_LCO_DATA : lco sent with data port 1
+event:0x518082 counters:0 um:zero minimum:10000 name:PM_L3_P1_LCO_NO_DATA : dataless l3 lco sent port 1
+event:0xa4908e counters:3 um:zero minimum:10000 name:PM_L3_P1_LCO_RTY : L3 LCO received retry port 1
+event:0xa19082 counters:0 um:zero minimum:10000 name:PM_L3_P1_NODE_PUMP : L3 pf sent with nodal scope port 1
+event:0x919082 counters:0 um:zero minimum:10000 name:PM_L3_P1_PF_RTY : L3 PF received retry port 1
+event:0x93908a counters:2 um:zero minimum:10000 name:PM_L3_P1_SN_HIT : L3 snoop hit port 1
+event:0x118082 counters:0 um:zero minimum:10000 name:PM_L3_P1_SN_INV : Port1 snooper detects someone doing a store to a line thats Sx
+event:0x94908e counters:3 um:zero minimum:10000 name:PM_L3_P1_SN_MISS : L3 snoop miss port 1
+event:0xa3908a counters:2 um:zero minimum:10000 name:PM_L3_P1_SYS_PUMP : L3 pf sent with sys scope port 1
+event:0x84908d counters:3 um:zero minimum:10000 name:PM_L3_PF0_ALLOC : 0.0
+event:0x84908c counters:3 um:zero minimum:10000 name:PM_L3_PF0_BUSY : lifetime, sample of PF machine 0 valid
+event:0x428084 counters:1 um:zero minimum:10000 name:PM_L3_PF_HIT_L3 : l3 pf hit in l3
+event:0x18080 counters:0 um:zero minimum:10000 name:PM_L3_PF_MISS_L3 : L3 Prefetch missed in L3
+event:0x3808a counters:2 um:zero minimum:10000 name:PM_L3_PF_OFF_CHIP_CACHE : L3 Prefetch from Off chip cache
+event:0x4808e counters:3 um:zero minimum:10000 name:PM_L3_PF_OFF_CHIP_MEM : L3 Prefetch from Off chip memory
+event:0x38088 counters:2 um:zero minimum:10000 name:PM_L3_PF_ON_CHIP_CACHE : L3 Prefetch from On chip cache
+event:0x4808c counters:3 um:zero minimum:10000 name:PM_L3_PF_ON_CHIP_MEM : L3 Prefetch from On chip memory
+event:0x829084 counters:1 um:zero minimum:10000 name:PM_L3_PF_USAGE : rotating sample of 32 PF actives
+event:0x4e052 counters:3 um:zero minimum:10000 name:PM_L3_PREF_ALL : Total HW L3 prefetches(Load+store).
+event:0x84908f counters:3 um:zero minimum:10000 name:PM_L3_RD0_ALLOC : 0.0
+event:0x84908e counters:3 um:zero minimum:10000 name:PM_L3_RD0_BUSY : lifetime, sample of RD machine 0 valid
+event:0x829086 counters:1 um:zero minimum:10000 name:PM_L3_RD_USAGE : rotating sample of 16 RD actives
+event:0x839089 counters:2 um:zero minimum:10000 name:PM_L3_SN0_ALLOC : 0.0
+event:0x839088 counters:2 um:zero minimum:10000 name:PM_L3_SN0_BUSY : lifetime, sample of snooper machine 0 valid
+event:0x819080 counters:0 um:zero minimum:10000 name:PM_L3_SN_USAGE : rotating sample of 8 snoop valids
+event:0x2e052 counters:1 um:zero minimum:10000 name:PM_L3_ST_PREF : L3 store Prefetches.
+event:0x3e052 counters:2 um:zero minimum:10000 name:PM_L3_SW_PREF : Data stream touchto L3.
+event:0xb29084 counters:1 um:zero minimum:10000 name:PM_L3_SYS_GUESS_CORRECT : Initial scope=system and data from outside group (far or rem)(pred successful)
+event:0xb4908c counters:3 um:zero minimum:10000 name:PM_L3_SYS_GUESS_WRONG : Initial scope=system but data from local or near. Predction too high
+event:0x24808e counters:3 um:zero minimum:10000 name:PM_L3_TRANS_PF : L3 Transient prefetch
+event:0x18081 counters:0 um:zero minimum:10000 name:PM_L3_WI0_ALLOC : 0.0
+event:0x418080 counters:0 um:zero minimum:10000 name:PM_L3_WI0_BUSY : lifetime, sample of Write Inject machine 0 valid
+event:0x418082 counters:0 um:zero minimum:10000 name:PM_L3_WI_USAGE : rotating sample of 8 WI actives
+event:0x3c058 counters:2 um:zero minimum:10000 name:PM_LARX_FIN : Larx finished .
+event:0x1002e counters:0 um:zero minimum:10000 name:PM_LD_CMPL : count of Loads completed.
+event:0x10062 counters:0 um:zero minimum:10000 name:PM_LD_L3MISS_PEND_CYC : Cycles L3 miss was pending for this thread.
+event:0x100ee counters:0 um:zero minimum:10000 name:PM_LD_REF_L1 : Load Ref count combined for all units.
+event:0xc080 counters:0,1,2,3 um:zero minimum:10000 name:PM_LD_REF_L1_LSU0 :  LS0 L1 D cache load references counted at finish, gated by rejectLSU0 L1 D cache load references
+event:0xc082 counters:0,1,2,3 um:zero minimum:10000 name:PM_LD_REF_L1_LSU1 :  LS1 L1 D cache load references counted at finish, gated by rejectLSU1 L1 D cache load references
+event:0xc094 counters:0,1,2,3 um:zero minimum:10000 name:PM_LD_REF_L1_LSU2 :  LS2  L1 D cache load references counted at finish, gated by reject42
+event:0xc096 counters:0,1,2,3 um:zero minimum:10000 name:PM_LD_REF_L1_LSU3 :  LS3  L1 D cache load references counted at finish, gated by reject42
+event:0x509a counters:0,1,2,3 um:zero minimum:10000 name:PM_LINK_STACK_INVALID_PTR : A flush were LS ptr is invalid, results in a pop , A lot of interrupts between push and pops
+event:0x5098 counters:0,1,2,3 um:zero minimum:10000 name:PM_LINK_STACK_WRONG_ADD_PRED : Link stack predicts wrong address, because of link stack design limitation.
+event:0xe080 counters:0,1,2,3 um:zero minimum:10000 name:PM_LS0_ERAT_MISS_PREF : LS0 Erat miss due to prefetch42
+event:0xd0b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_LS0_L1_PREF :  LS0 L1 cache data prefetches42
+event:0xc098 counters:0,1,2,3 um:zero minimum:10000 name:PM_LS0_L1_SW_PREF : Software L1 Prefetches, including SW Transient Prefetches42
+event:0xe082 counters:0,1,2,3 um:zero minimum:10000 name:PM_LS1_ERAT_MISS_PREF : LS1 Erat miss due to prefetch42
+event:0xd0ba counters:0,1,2,3 um:zero minimum:10000 name:PM_LS1_L1_PREF :  LS1 L1 cache data prefetches42
+event:0xc09a counters:0,1,2,3 um:zero minimum:10000 name:PM_LS1_L1_SW_PREF : Software L1 Prefetches, including SW Transient Prefetches42
+event:0xc0b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_FLUSH_LRQ : LS0 Flush: LRQLSU0 LRQ flushes
+event:0xc0b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_FLUSH_SRQ : LS0 Flush: SRQLSU0 SRQ lhs flushes
+event:0xc0a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_FLUSH_ULD : LS0 Flush: Unaligned LoadLSU0 unaligned load flushes
+event:0xc0ac counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_FLUSH_UST : LS0 Flush: Unaligned StoreLSU0 unaligned store flushes
+event:0xf088 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_L1_CAM_CANCEL : ls0 l1 tm cam cancel42
+event:0x1e056 counters:0 um:zero minimum:10000 name:PM_LSU0_LARX_FIN :  .
+event:0xd08c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_LMQ_LHR_MERGE : LS0  Load Merged with another cacheline request42
+event:0xc08c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_NCLD : LS0 Non-cachable Loads counted at finishLSU0 non-cacheable loads
+event:0xe090 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_PRIMARY_ERAT_HIT : Primary ERAT hit42
+event:0x1e05a counters:0 um:zero minimum:10000 name:PM_LSU0_REJECT : LSU0 reject .
+event:0xc09c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_SRQ_STFWD : LS0 SRQ forwarded data to a loadLSU0 SRQ store forwarded
+event:0xf084 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_STORE_REJECT : ls0 store reject42
+event:0xe0a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_TMA_REQ_L2 :  addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42
+event:0xe098 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_TM_L1_HIT : Load tm hit in L142
+event:0xe0a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU0_TM_L1_MISS : Load tm L1 miss42
+event:0xc0b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_FLUSH_LRQ : LS1 Flush: LRQLSU1 LRQ flushes
+event:0xc0ba counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_FLUSH_SRQ : LS1 Flush: SRQLSU1 SRQ lhs flushes
+event:0xc0a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_FLUSH_ULD : LS 1 Flush: Unaligned LoadLSU1 unaligned load flushes
+event:0xc0ae counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_FLUSH_UST : LS1 Flush: Unaligned StoreLSU1 unaligned store flushes
+event:0xf08a counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_L1_CAM_CANCEL : ls1 l1 tm cam cancel42
+event:0x2e056 counters:1 um:zero minimum:10000 name:PM_LSU1_LARX_FIN : Larx finished in LSU pipe1.
+event:0xd08e counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_LMQ_LHR_MERGE : LS1 Load Merge with another cacheline request42
+event:0xc08e counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_NCLD : LS1 Non-cachable Loads counted at finishLSU1 non-cacheable loads
+event:0xe092 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_PRIMARY_ERAT_HIT : Primary ERAT hit42
+event:0x2e05a counters:1 um:zero minimum:10000 name:PM_LSU1_REJECT : LSU1 reject .
+event:0xc09e counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_SRQ_STFWD : LS1 SRQ forwarded data to a loadLSU1 SRQ store forwarded
+event:0xf086 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_STORE_REJECT : ls1 store reject42
+event:0xe0aa counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_TMA_REQ_L2 :  addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42
+event:0xe09a counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_TM_L1_HIT : Load tm hit in L142
+event:0xe0a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU1_TM_L1_MISS : Load tm L1 miss42
+event:0xc0b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_FLUSH_LRQ : LS02Flush: LRQ42
+event:0xc0bc counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_FLUSH_SRQ : LS2  Flush: SRQ42
+event:0xc0a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_FLUSH_ULD : LS3 Flush: Unaligned Load42
+event:0xf08c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_L1_CAM_CANCEL : ls2 l1 tm cam cancel42
+event:0x3e056 counters:2 um:zero minimum:10000 name:PM_LSU2_LARX_FIN : Larx finished in LSU pipe2.
+event:0xc084 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_LDF : LS2  Scalar  Loads42
+event:0xc088 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_LDX : LS0 Vector Loads42
+event:0xd090 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_LMQ_LHR_MERGE : LS0  Load Merged with another cacheline request42
+event:0xe094 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_PRIMARY_ERAT_HIT : Primary ERAT hit42
+event:0x3e05a counters:2 um:zero minimum:10000 name:PM_LSU2_REJECT : LSU2 reject .
+event:0xc0a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_SRQ_STFWD : LS2 SRQ forwarded data to a load42
+event:0xe0ac counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_TMA_REQ_L2 :  addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42
+event:0xe09c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_TM_L1_HIT : Load tm hit in L142
+event:0xe0a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU2_TM_L1_MISS : Load tm L1 miss42
+event:0xc0b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_FLUSH_LRQ : LS3 Flush: LRQ42
+event:0xc0be counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_FLUSH_SRQ : LS13 Flush: SRQ42
+event:0xc0aa counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_FLUSH_ULD : LS 14Flush: Unaligned Load42
+event:0xf08e counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_L1_CAM_CANCEL : ls3 l1 tm cam cancel42
+event:0x4e056 counters:3 um:zero minimum:10000 name:PM_LSU3_LARX_FIN : Larx finished in LSU pipe3.
+event:0xc086 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_LDF : LS3  Scalar Loads 42
+event:0xc08a counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_LDX : LS1  Vector Loads42
+event:0xd092 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_LMQ_LHR_MERGE : LS1 Load Merge with another cacheline request42
+event:0xe096 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_PRIMARY_ERAT_HIT : Primary ERAT hit42
+event:0x4e05a counters:3 um:zero minimum:10000 name:PM_LSU3_REJECT : LSU3 reject .
+event:0xc0a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_SRQ_STFWD : LS3  SRQ forwarded data to a load42
+event:0xe0ae counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_TMA_REQ_L2 :  addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42
+event:0xe09e counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_TM_L1_HIT : Load tm hit in L142
+event:0xe0a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU3_TM_L1_MISS : Load tm L1 miss42
+event:0xe880 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_ERAT_MISS_PREF : LSU
+event:0x30066 counters:2 um:zero minimum:10000 name:PM_LSU_FIN : LSU Finished an instruction (up to 2 per cycle).
+event:0xc8ac counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_FLUSH_UST : LSU
+event:0xd0a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_FOUR_TABLEWALK_CYC : Cycles when four tablewalks pending on this thread42
+event:0x10066 counters:0 um:zero minimum:10000 name:PM_LSU_FX_FIN : LSU Finished a FX operation (up to 2 per cycle.
+event:0xd8b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_L1_PREF : LSU
+event:0xc898 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_L1_SW_PREF : LSU
+event:0xc884 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LDF : LSU
+event:0xc888 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LDX : LSU
+event:0xd0a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LMQ_FULL_CYC : LMQ fullCycles LMQ full,
+event:0xd0a1 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LMQ_S0_ALLOC : 0.0
+event:0xd0a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LMQ_S0_VALID : Slot 0 of LMQ validLMQ slot 0 valid
+event:0x3001c counters:2 um:zero minimum:10000 name:PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC : ALL threads lsu empty (lmq and srq empty). Issue HW016541
+event:0x2003e counters:1 um:zero minimum:10000 name:PM_LSU_LMQ_SRQ_EMPTY_CYC : LSU empty (lmq and srq empty).
+event:0xd09f counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LRQ_S0_ALLOC : 0.0
+event:0xd09e counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LRQ_S0_VALID : Slot 0 of LRQ validLRQ slot 0 valid
+event:0xf091 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LRQ_S43_ALLOC : 0.0
+event:0xf090 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_LRQ_S43_VALID : LRQ slot 43 was busy42
+event:0x30162 counters:2 um:zero minimum:10000 name:PM_LSU_MRK_DERAT_MISS : DERAT Reloaded (Miss).
+event:0xc88c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_NCLD : LSU
+event:0xc092 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_NCST : Non-cachable Stores sent to nest42
+event:0x10064 counters:0 um:zero minimum:10000 name:PM_LSU_REJECT : LSU Reject (up to 4 per cycle).
+event:0x2e05c counters:1 um:zero minimum:10000 name:PM_LSU_REJECT_ERAT_MISS : LSU Reject due to ERAT (up to 4 per cycles).
+event:0x4e05c counters:3 um:zero minimum:10000 name:PM_LSU_REJECT_LHS : LSU Reject due to LHS (up to 4 per cycle).
+event:0x1e05c counters:0 um:zero minimum:10000 name:PM_LSU_REJECT_LMQ_FULL : LSU reject due to LMQ full ( 4 per cycle).
+event:0xd082 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SET_MPRED : Line already in cache at reload time42
+event:0x40008 counters:3 um:zero minimum:10000 name:PM_LSU_SRQ_EMPTY_CYC : All threads srq empty.
+event:0x1001a counters:0 um:zero minimum:10000 name:PM_LSU_SRQ_FULL_CYC : SRQ is Full.
+event:0xd09d counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SRQ_S0_ALLOC : 0.0
+event:0xd09c counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SRQ_S0_VALID : Slot 0 of SRQ validSRQ slot 0 valid
+event:0xf093 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SRQ_S39_ALLOC : 0.0
+event:0xf092 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SRQ_S39_VALID : SRQ slot 39 was busy42
+event:0xd09b counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SRQ_SYNC : 0.0
+event:0xd09a counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_SRQ_SYNC_CYC : A sync is in the SRQ (edge detect to count)SRQ sync duration
+event:0xf084 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_STORE_REJECT : LSU
+event:0xd0a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_LSU_TWO_TABLEWALK_CYC : Cycles when two tablewalks pending on this thread42
+event:0x5094 counters:0,1,2,3 um:zero minimum:10000 name:PM_LWSYNC : threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out
+event:0x209a counters:0,1,2,3 um:zero minimum:10000 name:PM_LWSYNC_HELD : LWSYNC held at dispatch
+event:0x4c058 counters:3 um:zero minimum:10000 name:PM_MEM_CO : Memory castouts from this lpar.
+event:0x10058 counters:0 um:zero minimum:10000 name:PM_MEM_LOC_THRESH_IFU : Local Memory above threshold for IFU speculation control.
+event:0x40056 counters:3 um:zero minimum:10000 name:PM_MEM_LOC_THRESH_LSU_HIGH : Local memory above threshold for LSU medium.
+event:0x1c05e counters:0 um:zero minimum:10000 name:PM_MEM_LOC_THRESH_LSU_MED : Local memory above theshold for data prefetch.
+event:0x2c058 counters:1 um:zero minimum:10000 name:PM_MEM_PREF : Memory prefetch for this lpar.
+event:0x10056 counters:0 um:zero minimum:10000 name:PM_MEM_READ : Reads from Memory from this lpar (includes data/inst/xlate/l1prefetch/inst prefetch).
+event:0x3c05e counters:2 um:zero minimum:10000 name:PM_MEM_RWITM : Memory rwitm for this lpar.
+event:0x3515e counters:2 um:zero minimum:1000 name:PM_MRK_BACK_BR_CMPL : Marked branch instruction completed with a target address less than current instruction address.
+event:0x2013a counters:1 um:zero minimum:1000 name:PM_MRK_BRU_FIN : bru marked instr finish.
+event:0x1016e counters:0 um:zero minimum:1000 name:PM_MRK_BR_CMPL : Branch Instruction completed.
+event:0x3013a counters:2 um:zero minimum:1000 name:PM_MRK_CRU_FIN : IFU non-branch marked instruction finished.
+event:0x4d148 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DL2L3_MOD : The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load.
+event:0x2d128 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DL2L3_MOD_CYC : Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load.
+event:0x3d148 counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DL2L3_SHR : The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load.
+event:0x2c128 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DL2L3_SHR_CYC : Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load.
+event:0x3d14c counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DL4 : The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load.
+event:0x2c12c counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DL4_CYC : Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load.
+event:0x4d14c counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DMEM : The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load.
+event:0x2d12c counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_DMEM_CYC : Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load.
+event:0x1d142 counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2 : The processor's data cache was reloaded from local core's L2 due to a marked load.
+event:0x4d146 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L21_MOD : The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load.
+event:0x2d126 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L21_MOD_CYC : Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load.
+event:0x3d146 counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L21_SHR : The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load.
+event:0x2c126 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L21_SHR_CYC : Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load.
+event:0x4c12e counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2MISS_CYC : Duration in cycles to reload from a localtion other than the local core's L2  due to a marked load.
+event:0x4c122 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_CYC : Duration in cycles to reload from local core's L2 due to a marked load.
+event:0x3d140 counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST : The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load.
+event:0x2c120 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC : Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load.
+event:0x4d140 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER : The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load.
+event:0x2d120 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC : Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load.
+event:0x2d140 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_MEPF : The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load.
+event:0x4d120 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_MEPF_CYC : Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load.
+event:0x1d140 counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_NO_CONFLICT : The processor's data cache was reloaded from local core's L2 without conflict due to a marked load.
+event:0x4c120 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC : Duration in cycles to reload from local core's L2 without conflict due to a marked load.
+event:0x4d142 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3 : The processor's data cache was reloaded from local core's L3 due to a marked load.
+event:0x4d144 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_ECO_MOD : The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load.
+event:0x2d124 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_ECO_MOD_CYC : Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load.
+event:0x3d144 counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_ECO_SHR : The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load.
+event:0x2c124 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_ECO_SHR_CYC : Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load.
+event:0x2d144 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_MOD : The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load.
+event:0x4d124 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_MOD_CYC : Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load.
+event:0x1d146 counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_SHR : The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load.
+event:0x4c126 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L31_SHR_CYC : Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load.
+event:0x2d12e counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3MISS_CYC : Duration in cycles to reload from a localtion other than the local core's L3  due to a marked load.
+event:0x2d122 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_CYC : Duration in cycles to reload from local core's L3 due to a marked load.
+event:0x3d142 counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_DISP_CONFLICT : The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load.
+event:0x2c122 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC : Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load.
+event:0x2d142 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_MEPF : The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load.
+event:0x4d122 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_MEPF_CYC : Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load.
+event:0x1d144 counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_NO_CONFLICT : The processor's data cache was reloaded from local core's L3 without conflict due to a marked load.
+event:0x4c124 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC : Duration in cycles to reload from local core's L3 without conflict due to a marked load.
+event:0x1d14c counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_LL4 : The processor's data cache was reloaded from the local chip's L4 cache due to a marked load.
+event:0x4c12c counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_LL4_CYC : Duration in cycles to reload  from the local chip's L4 cache due to a marked load.
+event:0x2d148 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_LMEM : The processor's data cache was reloaded from the local chip's Memory due to a marked load.
+event:0x4d128 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_LMEM_CYC : Duration in cycles to reload  from the local chip's Memory due to a marked load.
+event:0x2d14c counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_MEMORY : The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load.
+event:0x4d12c counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_MEMORY_CYC : Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load.
+event:0x4d14a counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_OFF_CHIP_CACHE : The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load.
+event:0x2d12a counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC : Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load.
+event:0x1d148 counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_ON_CHIP_CACHE : The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load.
+event:0x4c128 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC : Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load.
+event:0x2d146 counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RL2L3_MOD : The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load.
+event:0x4d126 counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RL2L3_MOD_CYC : Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load.
+event:0x1d14a counters:0 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RL2L3_SHR : The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load.
+event:0x4c12a counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RL2L3_SHR_CYC : Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load.
+event:0x2d14a counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RL4 : The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load.
+event:0x4d12a counters:3 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RL4_CYC : Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load.
+event:0x3d14a counters:2 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RMEM : The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load.
+event:0x2c12a counters:1 um:zero minimum:1000 name:PM_MRK_DATA_FROM_RMEM_CYC : Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load.
+event:0x40118 counters:3 um:zero minimum:1000 name:PM_MRK_DCACHE_RELOAD_INTV : Combined Intervention event.
+event:0x4d154 counters:3 um:zero minimum:1000 name:PM_MRK_DERAT_MISS_16G : Marked Data ERAT Miss (Data TLB Access) page size 16G.
+event:0x3d154 counters:2 um:zero minimum:1000 name:PM_MRK_DERAT_MISS_16M : Marked Data ERAT Miss (Data TLB Access) page size 16M.
+event:0x1d156 counters:0 um:zero minimum:1000 name:PM_MRK_DERAT_MISS_4K : Marked Data ERAT Miss (Data TLB Access) page size 4K.
+event:0x2d154 counters:1 um:zero minimum:1000 name:PM_MRK_DERAT_MISS_64K : Marked Data ERAT Miss (Data TLB Access) page size 64K.
+event:0x20132 counters:1 um:zero minimum:1000 name:PM_MRK_DFU_FIN : Decimal Unit marked Instruction Finish.
+event:0x4f148 counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_DL2L3_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request.
+event:0x3f148 counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_DL2L3_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request.
+event:0x3f14c counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_DL4 : A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request.
+event:0x4f14c counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_DMEM : A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request.
+event:0x1f142 counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L2 : A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request.
+event:0x4f146 counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L21_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request.
+event:0x3f146 counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L21_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request.
+event:0x1f14e counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L2MISS : A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request.
+event:0x3f140 counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L2_DISP_CONFLICT_LDHITST : A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a marked data side request.
+event:0x4f140 counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L2_DISP_CONFLICT_OTHER : A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a marked data side request.
+event:0x2f140 counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L2_MEPF : A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request.
+event:0x1f140 counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L2_NO_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request.
+event:0x4f142 counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L3 : A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request.
+event:0x4f144 counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L31_ECO_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request.
+event:0x3f144 counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L31_ECO_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request.
+event:0x2f144 counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L31_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request.
+event:0x1f146 counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L31_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.
+event:0x4f14e counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L3MISS : A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request.
+event:0x3f142 counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request.
+event:0x2f142 counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L3_MEPF : A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request.
+event:0x1f144 counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_L3_NO_CONFLICT : A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request.
+event:0x1f14c counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_LL4 : A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.
+event:0x2f148 counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_LMEM : A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request.
+event:0x2f14c counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_MEMORY : A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request.
+event:0x4f14a counters:3 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE : A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request.
+event:0x1f148 counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_ON_CHIP_CACHE : A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.
+event:0x2f146 counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_RL2L3_MOD : A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.
+event:0x1f14a counters:0 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_RL2L3_SHR : A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.
+event:0x2f14a counters:1 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_RL4 : A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request.
+event:0x3f14a counters:2 um:zero minimum:1000 name:PM_MRK_DPTEG_FROM_RMEM : A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request.
+event:0x1d158 counters:0 um:zero minimum:1000 name:PM_MRK_DTLB_MISS_16G : Marked Data TLB Miss page size 16G.
+event:0x4d156 counters:3 um:zero minimum:1000 name:PM_MRK_DTLB_MISS_16M : Marked Data TLB Miss page size 16M.
+event:0x2d156 counters:1 um:zero minimum:1000 name:PM_MRK_DTLB_MISS_4K : Marked Data TLB Miss page size 4k.
+event:0x3d156 counters:2 um:zero minimum:1000 name:PM_MRK_DTLB_MISS_64K : Marked Data TLB Miss page size 64K.
+event:0x40154 counters:3 um:zero minimum:1000 name:PM_MRK_FAB_RSP_BKILL : Marked store had to do a bkill.
+event:0x2f150 counters:1 um:zero minimum:1000 name:PM_MRK_FAB_RSP_BKILL_CYC : cycles L2 RC took for a bkill.
+event:0x3015e counters:2 um:zero minimum:1000 name:PM_MRK_FAB_RSP_CLAIM_RTY : Sampled store did a rwitm and got a rty.
+event:0x30154 counters:2 um:zero minimum:1000 name:PM_MRK_FAB_RSP_DCLAIM : Marked store had to do a dclaim.
+event:0x2f152 counters:1 um:zero minimum:1000 name:PM_MRK_FAB_RSP_DCLAIM_CYC : cycles L2 RC took for a dclaim.
+event:0x30156 counters:2 um:zero minimum:1000 name:PM_MRK_FAB_RSP_MATCH : ttype and cresp matched as specified in MMCR1.
+event:0x4f152 counters:3 um:zero minimum:1000 name:PM_MRK_FAB_RSP_MATCH_CYC : cresp/ttype match cycles.
+event:0x4015e counters:3 um:zero minimum:1000 name:PM_MRK_FAB_RSP_RD_RTY : Sampled L2 reads retry count.
+event:0x1015e counters:0 um:zero minimum:1000 name:PM_MRK_FAB_RSP_RD_T_INTV : Sampled Read got a T intervention.
+event:0x4f150 counters:3 um:zero minimum:1000 name:PM_MRK_FAB_RSP_RWITM_CYC : cycles L2 RC took for a rwitm.
+event:0x2015e counters:1 um:zero minimum:1000 name:PM_MRK_FAB_RSP_RWITM_RTY : Sampled store did a rwitm and got a rty.
+event:0x3012e counters:2 um:zero minimum:1000 name:PM_MRK_FILT_MATCH : Marked filter Match.
+event:0x1013c counters:0 um:zero minimum:1000 name:PM_MRK_FIN_STALL_CYC : Marked instruction Finish Stall cycles (marked finish after NTC) (use edge detect to count #).
+event:0x20134 counters:1 um:zero minimum:1000 name:PM_MRK_FXU_FIN : fxu marked instr finish.
+event:0x40130 counters:3 um:zero minimum:1000 name:PM_MRK_GRP_CMPL : marked instruction finished (completed).
+event:0x4013a counters:3 um:zero minimum:1000 name:PM_MRK_GRP_IC_MISS : Marked Group experienced I cache miss.
+event:0x3013c counters:2 um:zero minimum:1000 name:PM_MRK_GRP_NTC : Marked group ntc cycles.
+event:0x20130 counters:1 um:zero minimum:1000 name:PM_MRK_INST_DECODED : marked instruction decoded. Name from ISU?
+event:0x30130 counters:2 um:zero minimum:1000 name:PM_MRK_INST_FIN : marked instr finish any unit .
+event:0x10132 counters:0 um:zero minimum:1000 name:PM_MRK_INST_ISSUED : Marked instruction issued.
+event:0x40134 counters:3 um:zero minimum:1000 name:PM_MRK_INST_TIMEO : marked Instruction finish timeout (instruction lost).
+event:0x20114 counters:1 um:zero minimum:1000 name:PM_MRK_L2_RC_DISP : Marked Instruction RC dispatched in L2.
+event:0x3012a counters:2 um:zero minimum:1000 name:PM_MRK_L2_RC_DONE : Marked RC done.
+event:0x40116 counters:3 um:zero minimum:1000 name:PM_MRK_LARX_FIN : Larx finished .
+event:0x1013f counters:0 um:zero minimum:1000 name:PM_MRK_LD_MISS_EXPOSED : Marked Load exposed Miss (use edge detect to count #)
+event:0x1013e counters:0 um:zero minimum:1000 name:PM_MRK_LD_MISS_EXPOSED_CYC : Marked Load exposed Miss (use edge detect to count #).
+event:0x4013e counters:3 um:zero minimum:1000 name:PM_MRK_LD_MISS_L1_CYC : Marked ld latency.
+event:0x40132 counters:3 um:zero minimum:1000 name:PM_MRK_LSU_FIN : lsu marked instr finish.
+event:0xd180 counters:0,1,2,3 um:zero minimum:1000 name:PM_MRK_LSU_FLUSH : Flush: (marked) : All Cases42
+event:0xd188 counters:0,1,2,3 um:zero minimum:1000 name:PM_MRK_LSU_FLUSH_LRQ : Flush: (marked) LRQMarked LRQ flushes
+event:0xd18a counters:0,1,2,3 um:zero minimum:1000 name:PM_MRK_LSU_FLUSH_SRQ : Flush: (marked) SRQMarked SRQ lhs flushes
+event:0xd184 counters:0,1,2,3 um:zero minimum:1000 name:PM_MRK_LSU_FLUSH_ULD : Flush: (marked) Unaligned LoadMarked unaligned load flushes
+event:0xd186 counters:0,1,2,3 um:zero minimum:1000 name:PM_MRK_LSU_FLUSH_UST : Flush: (marked) Unaligned StoreMarked unaligned store flushes
+event:0x40164 counters:3 um:zero minimum:1000 name:PM_MRK_LSU_REJECT : LSU marked reject (up to 2 per cycle).
+event:0x30164 counters:2 um:zero minimum:1000 name:PM_MRK_LSU_REJECT_ERAT_MISS : LSU marked reject due to ERAT (up to 2 per cycle).
+event:0x20112 counters:1 um:zero minimum:1000 name:PM_MRK_NTF_FIN : Marked next to finish instruction finished.
+event:0x1d15e counters:0 um:zero minimum:10000 name:PM_MRK_RUN_CYC : Marked run cycles.
+event:0x1d15a counters:0 um:zero minimum:1000 name:PM_MRK_SRC_PREF_TRACK_EFF : Marked src pref track was effective.
+event:0x3d15a counters:2 um:zero minimum:1000 name:PM_MRK_SRC_PREF_TRACK_INEFF : Prefetch tracked was ineffective for marked src.
+event:0x4d15c counters:3 um:zero minimum:1000 name:PM_MRK_SRC_PREF_TRACK_MOD : Prefetch tracked was moderate for marked src.
+event:0x1d15c counters:0 um:zero minimum:1000 name:PM_MRK_SRC_PREF_TRACK_MOD_L2 : Marked src Prefetch Tracked was moderate (source L2).
+event:0x3d15c counters:2 um:zero minimum:1000 name:PM_MRK_SRC_PREF_TRACK_MOD_L3 : Prefetch tracked was moderate (L3 hit) for marked src.
+event:0x3013e counters:2 um:zero minimum:1000 name:PM_MRK_STALL_CMPLU_CYC : Marked Group Completion Stall cycles (use edge detect to count #).
+event:0x3e158 counters:2 um:zero minimum:1000 name:PM_MRK_STCX_FAIL : marked stcx failed.
+event:0x30134 counters:2 um:zero minimum:1000 name:PM_MRK_ST_CMPL_INT : marked store complete (data home) with intervention.
+event:0x3f150 counters:2 um:zero minimum:1000 name:PM_MRK_ST_DRAIN_TO_L2DISP_CYC : cycles to drain st from core to L2.
+event:0x3012c counters:2 um:zero minimum:1000 name:PM_MRK_ST_FWD : Marked st forwards.
+event:0x1f150 counters:0 um:zero minimum:1000 name:PM_MRK_ST_L2DISP_TO_CMPL_CYC : cycles from L2 rc disp to l2 rc completion.
+event:0x20138 counters:1 um:zero minimum:1000 name:PM_MRK_ST_NEST : Marked store sent to nest.
+event:0x1c15a counters:0 um:zero minimum:1000 name:PM_MRK_TGT_PREF_TRACK_EFF : Marked target pref track was effective.
+event:0x3c15a counters:2 um:zero minimum:1000 name:PM_MRK_TGT_PREF_TRACK_INEFF : Prefetch tracked was ineffective for marked target.
+event:0x4c15c counters:3 um:zero minimum:1000 name:PM_MRK_TGT_PREF_TRACK_MOD : Prefetch tracked was moderate for marked target.
+event:0x1c15c counters:0 um:zero minimum:1000 name:PM_MRK_TGT_PREF_TRACK_MOD_L2 : Marked target Prefetch Tracked was moderate (source L2).
+event:0x3c15c counters:2 um:zero minimum:1000 name:PM_MRK_TGT_PREF_TRACK_MOD_L3 : Prefetch tracked was moderate (L3 hit) for marked target.
+event:0x30132 counters:2 um:zero minimum:1000 name:PM_MRK_VSU_FIN : vsu (fpu) marked instr finish.
+event:0x3d15e counters:2 um:zero minimum:10000 name:PM_MULT_MRK : mult marked instr.
+event:0x20b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_NESTED_TEND : Completion time nested tend
+event:0x3006e counters:2 um:zero minimum:10000 name:PM_NEST_REF_CLK : Nest reference clocks.
+event:0x20b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_NON_FAV_TBEGIN : Dispatch time non favored tbegin
+event:0x328084 counters:1 um:zero minimum:10000 name:PM_NON_TM_RST_SC : non tm snp rst tm sc
+event:0x2001a counters:1 um:zero minimum:10000 name:PM_NTCG_ALL_FIN : Ccycles after all instructions have finished to group completed.
+event:0x20ac counters:0,1,2,3 um:zero minimum:10000 name:PM_OUTER_TBEGIN : Completion time outer tbegin
+event:0x20ae counters:0,1,2,3 um:zero minimum:10000 name:PM_OUTER_TEND : Completion time outer tend
+event:0x20010 counters:1 um:zero minimum:10000 name:PM_PMC1_OVERFLOW : Overflow from counter 1.
+event:0x30010 counters:2 um:zero minimum:10000 name:PM_PMC2_OVERFLOW : Overflow from counter 2.
+event:0x30020 counters:2 um:zero minimum:10000 name:PM_PMC2_REWIND : PMC2 Rewind Event (did not match condition).
+event:0x10022 counters:0 um:zero minimum:10000 name:PM_PMC2_SAVED : PMC2 Rewind Value saved (matched condition).
+event:0x40010 counters:3 um:zero minimum:10000 name:PM_PMC3_OVERFLOW : Overflow from counter 3.
+event:0x10010 counters:0 um:zero minimum:10000 name:PM_PMC4_OVERFLOW : Overflow from counter 4.
+event:0x10020 counters:0 um:zero minimum:10000 name:PM_PMC4_REWIND : PMC4 Rewind Event (did not match condition).
+event:0x30022 counters:2 um:zero minimum:10000 name:PM_PMC4_SAVED : PMC4 Rewind Value saved (matched condition).
+event:0x10024 counters:0 um:zero minimum:10000 name:PM_PMC5_OVERFLOW : Overflow from counter 5.
+event:0x30024 counters:2 um:zero minimum:10000 name:PM_PMC6_OVERFLOW : Overflow from counter 6.
+event:0x2005a counters:1 um:zero minimum:10000 name:PM_PREF_TRACKED : Total number of Prefetch Operations that were tracked.
+event:0x1005a counters:0 um:zero minimum:10000 name:PM_PREF_TRACK_EFF : Prefetch Tracked was effective.
+event:0x3005a counters:2 um:zero minimum:10000 name:PM_PREF_TRACK_INEFF : Prefetch tracked was ineffective.
+event:0x4005a counters:3 um:zero minimum:10000 name:PM_PREF_TRACK_MOD : Prefetch tracked was moderate.
+event:0x1005c counters:0 um:zero minimum:10000 name:PM_PREF_TRACK_MOD_L2 : Prefetch Tracked was moderate (source L2).
+event:0x3005c counters:2 um:zero minimum:10000 name:PM_PREF_TRACK_MOD_L3 : Prefetch tracked was moderate (L3).
+event:0x40014 counters:3 um:zero minimum:10000 name:PM_PROBE_NOP_DISP : ProbeNops dispatched.
+event:0xe084 counters:0,1,2,3 um:zero minimum:10000 name:PM_PTE_PREFETCH : PTE prefetches42
+event:0x10054 counters:0 um:zero minimum:10000 name:PM_PUMP_CPRED : Pump prediction correct. Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
+event:0x40052 counters:3 um:zero minimum:10000 name:PM_PUMP_MPRED : Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
+event:0x16081 counters:0 um:zero minimum:10000 name:PM_RC0_ALLOC : 0.0
+event:0x16080 counters:0 um:zero minimum:10000 name:PM_RC0_BUSY : RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)
+event:0x200301ea counters:2 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_1024 : Reload latency exceeded 1024 cyc
+event:0x200401ec counters:3 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_2048 : Threshold counter exceeded a value of 2048
+event:0x200101e8 counters:0 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_256 : Threshold counter exceed a count of 256
+event:0x200201e6 counters:1 um:zero minimum:10000 name:PM_RC_LIFETIME_EXC_32 : Reload latency exceeded 32 cyc
+event:0x36088 counters:2 um:zero minimum:10000 name:PM_RC_USAGE : Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running
+event:0x34808e counters:3 um:zero minimum:10000 name:PM_RD_CLEARING_SC : rd clearing sc
+event:0x34808c counters:3 um:zero minimum:10000 name:PM_RD_FORMING_SC : rd forming sc
+event:0x428086 counters:1 um:zero minimum:10000 name:PM_RD_HIT_PF : rd machine hit l3 pf machine
+event:0x20004 counters:1 um:zero minimum:10000 name:PM_REAL_SRQ_FULL : Out of real srq entries.
+event:0x3006c counters:2 um:zero minimum:10000 name:PM_RUN_CYC_SMT2_MODE : Cycles run latch is set and core is in SMT2 mode.
+event:0x2006a counters:1 um:zero minimum:10000 name:PM_RUN_CYC_SMT2_SHRD_MODE : Cycles run latch is set and core is in SMT2-shared mode.
+event:0x1006a counters:0 um:zero minimum:100000 name:PM_RUN_CYC_SMT2_SPLIT_MODE : Cycles run latch is set and core is in SMT2-split mode.
+event:0x2006c counters:1 um:zero minimum:10000 name:PM_RUN_CYC_SMT4_MODE : Cycles run latch is set and core is in SMT4 mode.
+event:0x4006c counters:3 um:zero minimum:100000 name:PM_RUN_CYC_SMT8_MODE : Cycles run latch is set and core is in SMT8 mode.
+event:0x1006c counters:0 um:zero minimum:100000 name:PM_RUN_CYC_ST_MODE : Cycles run latch is set and core is in ST mode.
+event:0x10008 counters:0 um:zero minimum:10000 name:PM_RUN_SPURR : Run SPURR.
+event:0xf082 counters:0,1,2,3 um:zero minimum:10000 name:PM_SEC_ERAT_HIT : secondary ERAT Hit42
+event:0x508c counters:0,1,2,3 um:zero minimum:10000 name:PM_SHL_CREATED : Store-Hit-Load Table Entry Created
+event:0x508e counters:0,1,2,3 um:zero minimum:10000 name:PM_SHL_ST_CONVERT : Store-Hit-Load Table Read Hit with entry Enabled
+event:0x5090 counters:0,1,2,3 um:zero minimum:10000 name:PM_SHL_ST_DISABLE : Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)
+event:0x26085 counters:1 um:zero minimum:10000 name:PM_SN0_ALLOC : 0.0
+event:0x26084 counters:1 um:zero minimum:10000 name:PM_SN0_BUSY : SN mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)
+event:0xd0b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_SNOOP_TLBIE : TLBIE snoopSnoop TLBIE
+event:0x338088 counters:2 um:zero minimum:10000 name:PM_SNP_TM_HIT_M :  snp tm st hit m mu
+event:0x33808a counters:2 um:zero minimum:10000 name:PM_SNP_TM_HIT_T : snp tm_st_hit t tn te
+event:0x4608c counters:3 um:zero minimum:10000 name:PM_SN_USAGE : Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running
+event:0x10028 counters:0 um:zero minimum:10000 name:PM_STALL_END_GCT_EMPTY : Count ended because GCT went empty.
+event:0x1e058 counters:0 um:zero minimum:10000 name:PM_STCX_FAIL : stcx failed .
+event:0xc090 counters:0,1,2,3 um:zero minimum:10000 name:PM_STCX_LSU : STCX executed reported at sent to nest42
+event:0x717080 counters:0 um:zero minimum:10000 name:PM_ST_CAUSED_FAIL : Non TM St caused any thread to fail
+event:0x20016 counters:1 um:zero minimum:10000 name:PM_ST_CMPL : Store completion count.
+event:0x20018 counters:1 um:zero minimum:10000 name:PM_ST_FWD : Store forwards that finished.
+event:0x0 counters:0,1,2,3 um:zero minimum:10000 name:PM_SUSPENDED : Counter OFF.
+event:0x3090 counters:0,1,2,3 um:zero minimum:10000 name:PM_SWAP_CANCEL : SWAP cancel , rtag not available
+event:0x3092 counters:0,1,2,3 um:zero minimum:10000 name:PM_SWAP_CANCEL_GPR : SWAP cancel , rtag not available for gpr
+event:0x308c counters:0,1,2,3 um:zero minimum:10000 name:PM_SWAP_COMPLETE : swap cast in completed
+event:0x308e counters:0,1,2,3 um:zero minimum:10000 name:PM_SWAP_COMPLETE_GPR : swap cast in completed fpr gpr
+event:0x15152 counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_BR_LINK : Marked Branch and link branch that can cause a synchronous interrupt.
+event:0x1515c counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_BR_MPRED : Marked Branch mispredict that can cause a synchronous interrupt.
+event:0x15156 counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_FX_DIVIDE : Marked fixed point divide that can cause a synchronous interrupt.
+event:0x15158 counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_L2HIT : Marked L2 Hits that can throw a synchronous interrupt.
+event:0x1515a counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_L2MISS : Marked L2 Miss that can throw a synchronous interrupt.
+event:0x15154 counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_L3MISS : Marked L3 misses that can throw a synchronous interrupt.
+event:0x15150 counters:0 um:zero minimum:10000 name:PM_SYNC_MRK_PROBE_NOP : Marked probeNops which can cause synchronous interrupts.
+event:0x30050 counters:2 um:zero minimum:10000 name:PM_SYS_PUMP_CPRED : Initial and Final Pump Scope and data sourced across this scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
+event:0x30052 counters:2 um:zero minimum:10000 name:PM_SYS_PUMP_MPRED : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or
+event:0x40050 counters:3 um:zero minimum:10000 name:PM_SYS_PUMP_MPRED_RTY : Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate).
+event:0x10026 counters:0 um:zero minimum:10000 name:PM_TABLEWALK_CYC : Tablewalk Active.
+event:0xe086 counters:0,1,2,3 um:zero minimum:10000 name:PM_TABLEWALK_CYC_PREF : tablewalk qualified for pte  prefetches42
+event:0x20b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_TABORT_TRECLAIM : Completion time tabortnoncd, tabortcd, treclaim
+event:0xe0ba counters:0,1,2,3 um:zero minimum:10000 name:PM_TEND_PEND_CYC : TEND latency per thread42
+event:0x2000c counters:1 um:zero minimum:100000 name:PM_THRD_ALL_RUN_CYC : All Threads in Run_cycles (was both threads in run_cycles).
+event:0x10012 counters:0 um:zero minimum:10000 name:PM_THRD_GRP_CMPL_BOTH_CYC : Two threads finished same cycle (gated by run latch).
+event:0x40bc counters:0,1,2,3 um:zero minimum:1000 name:PM_THRD_PRIO_0_1_CYC :  Cycles thread running at priority level 0 or 1
+event:0x40be counters:0,1,2,3 um:zero minimum:1000 name:PM_THRD_PRIO_2_3_CYC :  Cycles thread running at priority level 2 or 3
+event:0x5080 counters:0,1,2,3 um:zero minimum:1000 name:PM_THRD_PRIO_4_5_CYC :  Cycles thread running at priority level 4 or 5
+event:0x5082 counters:0,1,2,3 um:zero minimum:1000 name:PM_THRD_PRIO_6_7_CYC :  Cycles thread running at priority level 6 or 7
+event:0x3098 counters:0,1,2,3 um:zero minimum:10000 name:PM_THRD_REBAL_CYC : cycles rebalance was active
+event:0x4016e counters:3 um:zero minimum:10000 name:PM_THRESH_NOT_MET : Threshold counter did not meet threshold.
+event:0x30058 counters:2 um:zero minimum:10000 name:PM_TLBIE_FIN : tlbie finished.
+event:0x20066 counters:1 um:zero minimum:10000 name:PM_TLB_MISS : TLB Miss (I + D).
+event:0x20b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_BEGIN_ALL : Tm any tbegin
+event:0x318082 counters:0 um:zero minimum:10000 name:PM_TM_CAM_OVERFLOW : l3 tm cam overflow during L2 co of SC
+event:0x74708c counters:3 um:zero minimum:10000 name:PM_TM_CAP_OVERFLOW : TM Footprint Capactiy Overflow
+event:0x20ba counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_END_ALL : Tm any tend
+event:0x3086 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_CONF_NON_TM : TEXAS fail reason @ completion
+event:0x3088 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_CON_TM : TEXAS fail reason @ completion
+event:0xe0b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_DISALLOW : TM fail disallow42
+event:0x3084 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_FOOTPRINT_OVERFLOW : TEXAS fail reason @ completion
+event:0xe0b8 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_NON_TX_CONFLICT : Non transactional conflict from LSU whtver gets repoted to texas42
+event:0x308a counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_SELF : TEXAS fail reason @ completion
+event:0xe0b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_TLBIE : TLBIE hit bloom filter42
+event:0xe0b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_FAIL_TX_CONFLICT : Transactional conflict from LSU, whatever gets reported to texas 42
+event:0x727086 counters:1 um:zero minimum:10000 name:PM_TM_FAV_CAUSED_FAIL : TM Load (fav) caused another thread to fail
+event:0x717082 counters:0 um:zero minimum:10000 name:PM_TM_LD_CAUSED_FAIL : Non TM Ld caused any thread to fail
+event:0x727084 counters:1 um:zero minimum:10000 name:PM_TM_LD_CONF : TM Load (fav or non-fav) ran into conflict (failed)
+event:0x328086 counters:1 um:zero minimum:10000 name:PM_TM_RST_SC : tm snp rst tm sc
+event:0x318080 counters:0 um:zero minimum:10000 name:PM_TM_SC_CO : l3 castout tm Sc line
+event:0x73708a counters:2 um:zero minimum:10000 name:PM_TM_ST_CAUSED_FAIL : TM Store (fav or non-fav) caused another thread to fail
+event:0x737088 counters:2 um:zero minimum:10000 name:PM_TM_ST_CONF : TM Store (fav or non-fav) ran into conflict (failed)
+event:0x20bc counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_TBEGIN : Tm nested tbegin
+event:0x10060 counters:0 um:zero minimum:10000 name:PM_TM_TRANS_RUN_CYC : run cycles in transactional state.
+event:0x30060 counters:2 um:zero minimum:10000 name:PM_TM_TRANS_RUN_INST : Instructions completed in transactional state.
+event:0x3080 counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_TRESUME : Tm resume
+event:0x20be counters:0,1,2,3 um:zero minimum:10000 name:PM_TM_TSUSPEND : Tm suspend
+event:0x2e012 counters:1 um:zero minimum:10000 name:PM_TM_TX_PASS_RUN_CYC : run cycles spent in successful transactions.
+event:0x4e014 counters:3 um:zero minimum:10000 name:PM_TM_TX_PASS_RUN_INST : run instructions spent in successful transactions.
+event:0xe08c counters:0,1,2,3 um:zero minimum:10000 name:PM_UP_PREF_L3 : Micropartition prefetch42
+event:0xe08e counters:0,1,2,3 um:zero minimum:10000 name:PM_UP_PREF_POINTER : Micrpartition pointer prefetches42
+event:0xa0a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_16FLOP : Sixteen flops operation (SP vector versions of fdiv,fsqrt)
+event:0xa080 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_1FLOP : one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finishedDecode into 1,2,4 FLOP according to instr IOP, multiplied by #vector elements according to route( eg x1, x2, x4) Only if instr sends finish to ISU
+event:0xa098 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_2FLOP : two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions)
+event:0xa09c counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_4FLOP : four flops operation (scalar fdiv, fsqrt, DP vector version of fmadd, fnmadd, fmsub, fnmsub, SP vector versions of single flop instructions)
+event:0xa0a0 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_8FLOP : eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub)
+event:0xb0a4 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_COMPLEX_ISSUED : Complex VMX instruction issued
+event:0xb0b4 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_CY_ISSUED : Cryptographic instruction RFC02196 Issued
+event:0xb0a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_DD_ISSUED : 64BIT Decimal Issued
+event:0xa08c counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_DP_2FLOP : DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg
+event:0xa090 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_DP_FMA : DP vector version of fmadd,fnmadd,fmsub,fnmsub
+event:0xa094 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_DP_FSQRT_FDIV : DP vector versions of fdiv,fsqrt
+event:0xb0ac counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_DQ_ISSUED : 128BIT Decimal Issued
+event:0xb0b0 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_EX_ISSUED : Direct move 32/64b VRFtoGPR RFC02206 Issued
+event:0xa0bc counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_FIN : VSU0 Finished an instruction
+event:0xa084 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_FMA : two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only!
+event:0xb098 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_FPSCR : Move to/from FPSCR type instruction issued on Pipe 0
+event:0xa088 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_FSQRT_FDIV : four flops operation (fdiv,fsqrt) Scalar Instructions only!
+event:0xb090 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_PERMUTE_ISSUED : Permute VMX Instruction Issued
+event:0xb088 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_SCALAR_DP_ISSUED : Double Precision scalar instruction issued on Pipe0
+event:0xb094 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_SIMPLE_ISSUED : Simple VMX instruction issued
+event:0xa0a8 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_SINGLE : FPU single precision
+event:0xb09c counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_SQ : Store Vector Issued
+event:0xb08c counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_STF : FPU store (SP or DP) issued on Pipe0
+event:0xb080 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_VECTOR_DP_ISSUED : Double Precision vector instruction issued on Pipe0
+event:0xb084 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU0_VECTOR_SP_ISSUED : Single Precision vector instruction issued (executed)
+event:0xa0a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_16FLOP : Sixteen flops operation (SP vector versions of fdiv,fsqrt)
+event:0xa082 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_1FLOP : one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished
+event:0xa09a counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_2FLOP : two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions)
+event:0xa09e counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_4FLOP : four flops operation (scalar fdiv, fsqrt, DP vector version of fmadd, fnmadd, fmsub, fnmsub, SP vector versions of single flop instructions)
+event:0xa0a2 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_8FLOP : eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub)
+event:0xb0a6 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_COMPLEX_ISSUED : Complex VMX instruction issued
+event:0xb0b6 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_CY_ISSUED : Cryptographic instruction RFC02196 Issued
+event:0xb0aa counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_DD_ISSUED : 64BIT Decimal Issued
+event:0xa08e counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_DP_2FLOP : DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg
+event:0xa092 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_DP_FMA : DP vector version of fmadd,fnmadd,fmsub,fnmsub
+event:0xa096 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_DP_FSQRT_FDIV : DP vector versions of fdiv,fsqrt
+event:0xb0ae counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_DQ_ISSUED : 128BIT Decimal Issued
+event:0xb0b2 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_EX_ISSUED : Direct move 32/64b VRFtoGPR RFC02206 Issued
+event:0xa0be counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_FIN : VSU1 Finished an instruction
+event:0xa086 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_FMA : two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only!
+event:0xb09a counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_FPSCR : Move to/from FPSCR type instruction issued on Pipe 0
+event:0xa08a counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_FSQRT_FDIV : four flops operation (fdiv,fsqrt) Scalar Instructions only!
+event:0xb092 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_PERMUTE_ISSUED : Permute VMX Instruction Issued
+event:0xb08a counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_SCALAR_DP_ISSUED : Double Precision scalar instruction issued on Pipe1
+event:0xb096 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_SIMPLE_ISSUED : Simple VMX instruction issued
+event:0xa0aa counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_SINGLE : FPU single precision
+event:0xb09e counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_SQ : Store Vector Issued
+event:0xb08e counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_STF : FPU store (SP or DP) issued on Pipe1
+event:0xb082 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_VECTOR_DP_ISSUED : Double Precision vector instruction issued on Pipe1
+event:0xb086 counters:0,1,2,3 um:zero minimum:10000 name:PM_VSU1_VECTOR_SP_ISSUED : Single Precision vector instruction issued (executed)
diff --git a/events/ppc64/power8/unit_masks b/events/ppc64/power8/unit_masks
new file mode 100644
index 0000000..988dd41
--- /dev/null
+++ b/events/ppc64/power8/unit_masks
@@ -0,0 +1,9 @@
+#
+# Copyright OProfile authors
+# Copyright (c) International Business Machines, 2013.
+# Contributed by Maynard Johnson <maynardj@us.ibm.com>.
+#
+# ppc64 POWER8 possible unit masks
+#
+name:zero type:mandatory default:0x0
+	0x0 No unit mask
diff --git a/events/rtc/events b/events/rtc/events
deleted file mode 100644
index cce44b0..0000000
--- a/events/rtc/events
+++ /dev/null
@@ -1,3 +0,0 @@
-# RTC events
-#
-name:RTC_INTERRUPTS event:0xff counters:0 um:zero minimum:2 : RTC interrupts/sec (rounded up to power of two)
diff --git a/events/s390/z10/events b/events/s390/z10/events
index 08a2e74..9c975ae 100644
--- a/events/s390/z10/events
+++ b/events/s390/z10/events
@@ -2,6 +2,7 @@
 # Copyright (c) International Business Machines, 2011.
 # Contributed by Andreas Krebbel <krebbel@linux.vnet.ibm.com>.
 #
-# IBM System z10 Basic Mode Sampling events
+# IBM System z10 events for operf/ocount
 #
-event:0x00 counters:0 um:zero minimum:2202 name:HWSAMPLING : Sampling using Basic Mode Hardware Sampling
+event:0x00 counters:0 um:zero minimum:2202 name:CPU_CYCLES : Processor cycles
+event:0x01 counters:0 um:zero minimum:2202 name:INSTRUCTIONS : Instructions completed
diff --git a/events/s390/z196/events b/events/s390/z196/events
index 6c4bd65..c9a7526 100644
--- a/events/s390/z196/events
+++ b/events/s390/z196/events
@@ -2,6 +2,6 @@
 # Copyright (c) International Business Machines, 2011.
 # Contributed by Andreas Krebbel <krebbel@linux.vnet.ibm.com>.
 #
-# zEnterprise z196 Basic Mode Sampling events
+# zEnterprise z196 events for operf/ocount
 #
 include:s390/z10
diff --git a/events/s390/zEC12/events b/events/s390/zEC12/events
new file mode 100644
index 0000000..f2fb415
--- /dev/null
+++ b/events/s390/zEC12/events
@@ -0,0 +1,8 @@
+# Copyright OProfile authors
+# Copyright (c) International Business Machines, 2013.
+# Contributed by Andreas Krebbel <krebbel@linux.vnet.ibm.com>.
+#
+# IBM Enterprise EC12 events for operf/ocount
+#
+event:0x00 counters:0 um:zero minimum:19264 name:CPU_CYCLES : Processor cycles
+event:0x01 counters:0 um:zero minimum:19264 name:INSTRUCTIONS : Instructions completed
diff --git a/events/s390/zEC12/unit_masks b/events/s390/zEC12/unit_masks
new file mode 100644
index 0000000..cfc4dc1
--- /dev/null
+++ b/events/s390/zEC12/unit_masks
@@ -0,0 +1,7 @@
+# Copyright OProfile authors
+# Copyright (c) International Business Machines, 2013.
+# Contributed by Andreas Krebbel <krebbel@linux.vnet.ibm.com>.
+#
+# S/390 Basic Mode Hardware Sampling unit masks
+#
+include:s390/z10
diff --git a/events/x86-64/family10/events b/events/x86-64/family10/events
index 0213f26..3f6ae1e 100644
--- a/events/x86-64/family10/events
+++ b/events/x86-64/family10/events
@@ -9,13 +9,12 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 10h Processors,
 #          Publication# 31116, Revision 3.48, April 22, 2010
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.4
+# Revision: 1.5
 #
 # ChangeLog: 
+# 	1.5: 11 August 2014
+# 	- Removal of IBS events due to missing support in Operf
+#
 #	1.4: 11 March 2011
 #       - Update to BKDG revision 3.48
 #       - Fix typo in the description for event 0xf244
@@ -179,72 +178,3 @@ event:0x4e1 counters:0,1,2,3 um:l3_cache minimum:500 name:L3_CACHE_MISSES : Numb
 event:0x4e2 counters:0,1,2,3 um:l3_fill minimum:500 name:L3_FILLS_CAUSED_BY_L2_EVICTIONS : Number of L3 fills caused by L2 evictions per core
 event:0x4e3 counters:0,1,2,3 um:l3_evict minimum:500 name:L3_EVICTIONS : Number of L3 cache line evictions by cache state
 event:0x4ed counters:0,1,2,3 um:non_cancelled_l3_read_requests minimum:500 name:NON_CANCELLED_L3_READ_REQUESTS : Non-cancelled L3 Read Requests (Rev D)
-
-###############################
-# IBS FETCH EVENTS
-###############################
-event:0xf000 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ALL : All IBS fetch samples
-event:0xf001 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_KILLED : IBS fetch killed
-event:0xf002 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ATTEMPTED : IBS fetch attempted
-event:0xf003 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_COMPLETED : IBS fetch completed
-event:0xf004 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ABORTED : IBS fetch aborted
-event:0xf005 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ITLB_HITS : IBS ITLB hit
-event:0xf006 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_HITS : IBS L1 ITLB misses (and L2 ITLB hits)
-event:0xf007 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_MISSES : IBS L1 L2 ITLB miss
-event:0xf008 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_MISSES : IBS Instruction cache misses
-event:0xf009 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_HITS : IBS Instruction cache hit
-event:0xf00A ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_4K_PAGE : IBS 4K page translation
-event:0xf00B ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_2M_PAGE : IBS 2M page translation
-#
-event:0xf00E ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_LATENCY : IBS fetch latency
-
-###############################
-# IBS OP EVENTS
-###############################
-event:0xf100 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL : All IBS op samples
-event:0xf101 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAG_TO_RETIRE : IBS tag-to-retire cycles
-event:0xf102 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_COMP_TO_RET : IBS completion-to-retire cycles
-event:0xf103 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BRANCH_RETIRED : IBS branch op
-event:0xf104 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH : IBS mispredicted branch op
-event:0xf105 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAKEN_BRANCH : IBS taken branch op
-event:0xf106 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH_TAKEN : IBS mispredicted taken branch op
-event:0xf107 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RETURNS : IBS return op
-event:0xf108 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_RETURNS : IBS mispredicted return op
-event:0xf109 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RESYNC : IBS resync op
-event:0xf200 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL_LOAD_STORE : IBS all load store ops
-event:0xf201 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOAD : IBS load ops
-event:0xf202 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_STORE : IBS store ops
-event:0xf203 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_HITS : IBS L1 DTLB hit
-event:0xf204 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_MISS_L2_DTLB_HIT : IBS L1 DTLB misses L2 hits
-event:0xf205 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_L2_DTLB_MISS : IBS L1 and L2 DTLB misses
-event:0xf206 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_CACHE_MISS : IBS data cache misses
-event:0xf207 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_HITS : IBS data cache hits
-event:0xf208 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISALIGNED_DATA_ACC : IBS misaligned data access
-event:0xf209 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_LOAD : IBS bank conflict on load op
-event:0xf20A ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_STORE : IBS bank conflict on store op
-event:0xf20B ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_FORWARD : IBS store-to-load forwarded
-event:0xf20C ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_CANCELLED : IBS store-to-load cancelled
-event:0xf20D ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCUC_MEM_ACC : IBS UC memory access
-event:0xf20E ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCWC_MEM_ACC : IBS WC memory access
-event:0xf20F ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOCKED : IBS locked operation
-event:0xf210 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MAB_HIT : IBS MAB hit
-event:0xf211 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_4K : IBS L1 DTLB 4K page
-event:0xf212 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_2M : IBS L1 DTLB 2M page
-event:0xf213 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_1G : IBS L1 DTLB 1G page
-event:0xf215 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_4K : IBS L2 DTLB 4K page
-event:0xf216 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_2M : IBS L2 DTLB 2M page
-event:0xf217 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_1G : IBS L2 DTLB 1G page
-event:0xf219 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DC_LOAD_LAT : IBS data cache miss load latency
-event:0xf240 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_ONLY : IBS northbridge local
-event:0xf241 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_ONLY : IBS northbridge remote
-event:0xf242 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_L3 : IBS northbridge local L3
-event:0xf243 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE : IBS northbridge local core L1 or L2 cache
-event:0xf244 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE : IBS northbridge remote core L1, L2, L3 cache
-event:0xf245 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_DRAM : IBS northbridge local DRAM
-event:0xf246 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_DRAM : IBS northbridge remote DRAM
-event:0xf247 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_OTHER : IBS northbridge local APIC MMIO Config PCI
-event:0xf248 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_OTHER : IBS northbridge remote APIC MMIO Config PCI
-event:0xf249 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_MODIFIED : IBS northbridge cache modified state
-event:0xf24A ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_OWNED : IBS northbridge cache owned state
-event:0xf24B ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE_LAT : IBS northbridge local cache latency
-event:0xf24C ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE_LAT : IBS northbridge remote cache latency
diff --git a/events/x86-64/family10/unit_masks b/events/x86-64/family10/unit_masks
index 5c42206..e18504b 100644
--- a/events/x86-64/family10/unit_masks
+++ b/events/x86-64/family10/unit_masks
@@ -13,9 +13,12 @@
 #          Publication# 40546, Revision 3.13, February 2011
 #          (Note: For IBS Derived Performance Events)
 #
-# Revision: 1.4
+# Revision: 1.5
 #
 # ChangeLog: 
+# 	1.5: 11 August 2014
+# 	- Removal of IBS events due to missing support in Operf
+#
 #	1.4: 11 March 2011
 #       - Update to BKDG revision 3.48
 #       - Fix typo in the description for event 0xf244
@@ -377,10 +380,6 @@ name:retired_x87_fp type:bitmask default:0x07
 	0x01 Add/subtract ops
 	0x02 Multiply ops
 	0x04 Divide ops
-name:ibs_op type:bitmask default:0x01
-	0x00 Using IBS OP cycle count mode
-	0x01 Using IBS OP dispatch count mode
-	0x02 Enable IBS OP Memory Access Log 
 name:non_cancelled_l3_read_requests type:bitmask default:0xf7
 	0x01 RbBlk
 	0x02 RbBlkS
diff --git a/events/x86-64/family12h/events b/events/x86-64/family12h/events
index eb5ac5c..b99bbe5 100644
--- a/events/x86-64/family12h/events
+++ b/events/x86-64/family12h/events
@@ -10,13 +10,12 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 12h Processors,
 #          Publication# 41131, Revision 1.13, March 01, 2011
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.2
+# Revision: 1.3
 #
 # ChangeLog:
+# 	1.3: 11 August 2014
+# 	- Remove IBS events due to missing operf support
+#
 #	1.2: 09 March 2011
 # 	- Update with BKDG Rev.1.13 (preliminary)
 #
@@ -130,63 +129,3 @@ event:0x0ee counters:0,1,2,3 um:gart minimum:500 name:DEV_EVENTS : DEV Events
 event:0x1f0 counters:0,1,2,3 um:mem_control_request minimum:500 name:MEMORY_CONTROLLER_REQUESTS : Memory Controller Requests
 event:0x1e9 counters:0,1,2,3 um:sideband_signals minimum:500 name:SIDEBAND_SIGNALS : Sideband Signals and Special Cycles
 event:0x1ea counters:0,1,2,3 um:interrupt_events minimum:500 name:INTERRUPT_EVENTS : Interrupt Events
-event:0xf000 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ALL : All IBS fetch samples
-event:0xf001 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_KILLED : IBS fetch killed
-event:0xf002 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ATTEMPTED : IBS fetch attempted
-event:0xf003 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_COMPLETED : IBS fetch completed
-event:0xf004 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ABORTED : IBS fetch aborted
-event:0xf005 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ITLB_HITS : IBS ITLB hit
-event:0xf006 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_HITS : IBS L1 ITLB misses (and L2 ITLB hits)
-event:0xf007 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_MISSES : IBS L1 L2 ITLB miss
-event:0xf008 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_MISSES : IBS instruction cache misses
-event:0xf009 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_HITS : IBS instruction cache hit
-event:0xf00a ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_4K_PAGE : IBS 4K page translation
-event:0xf00b ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_2M_PAGE : IBS 2M page translation
-event:0xf00e ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_LATENCY : IBS fetch latency
-event:0xf100 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL : All IBS op samples
-event:0xf101 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAG_TO_RETIRE : IBS tag-to-retire cycles
-event:0xf102 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_COMP_TO_RET : IBS completion-to-retire cycles
-event:0xf103 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BRANCH_RETIRED : IBS branch op
-event:0xf104 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH : IBS mispredicted branch op
-event:0xf105 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAKEN_BRANCH : IBS taken branch op
-event:0xf106 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH_TAKEN : IBS mispredicted taken branch op
-event:0xf107 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RETURNS : IBS return op
-event:0xf108 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_RETURNS : IBS mispredicted return op
-event:0xf109 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RESYNC : IBS resync op
-event:0xf200 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL_LOAD_STORE : IBS all load store ops
-event:0xf201 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOAD : IBS load ops
-event:0xf202 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_STORE : IBS store ops
-event:0xf203 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_HITS : IBS L1 DTLB hit
-event:0xf204 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_MISS_L2_DTLB_HIT : IBS L1 DTLB misses L2 hits
-event:0xf205 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_L2_DTLB_MISS : IBS L1 and L2 DTLB misses
-event:0xf206 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_CACHE_MISS : IBS data cache misses
-event:0xf207 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_HITS : IBS data cache hits
-event:0xf208 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISALIGNED_DATA_ACC : IBS misaligned data access
-event:0xf209 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_LOAD : IBS bank conflict on load op
-event:0xf20a ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_STORE : IBS bank conflict on store op
-event:0xf20b ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_FORWARD : IBS store-to-load forwarded
-event:0xf20c ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_CANCELLED : IBS store-to-load cancelled
-event:0xf20d ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCUC_MEM_ACC : IBS UC memory access
-event:0xf20e ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCWC_MEM_ACC : IBS WC memory access
-event:0xf20f ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOCKED : IBS locked operation
-event:0xf210 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MAB_HIT : IBS MAB hit
-event:0xf211 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_4K : IBS L1 DTLB 4K page
-event:0xf212 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_2M : IBS L1 DTLB 2M page
-event:0xf213 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_1G : IBS L1 DTLB 1G page
-event:0xf215 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_4K : IBS L2 DTLB 4K page
-event:0xf216 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_2M : IBS L2 DTLB 2M page
-event:0xf217 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_1G : IBS L2 DTLB 1G page
-event:0xf219 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DC_LOAD_LAT : IBS data cache miss load latency
-event:0xf240 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_ONLY : IBS Northbridge local
-event:0xf241 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_ONLY : IBS Northbridge remote
-event:0xf242 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_L3 : IBS Northbridge local L3
-event:0xf243 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE : IBS Northbridge local core L1 or L2 cache
-event:0xf244 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE : IBS Northbridge local core L1, L2, L3 cache
-event:0xf245 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_DRAM : IBS Northbridge local DRAM
-event:0xf246 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_DRAM : IBS Northbridge remote DRAM
-event:0xf247 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_OTHER : IBS Northbridge local APIC MMIO Config PCI
-event:0xf248 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_OTHER : IBS Northbridge remote APIC MMIO Config PCI
-event:0xf249 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_MODIFIED : IBS Northbridge cache modified state
-event:0xf24a ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_OWNED : IBS Northbridge cache owned state
-event:0xf24b ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE_LAT : IBS Northbridge local cache latency
-event:0xf24c ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE_LAT : IBS Northbridge remote cache latency
diff --git a/events/x86-64/family12h/unit_masks b/events/x86-64/family12h/unit_masks
index 4f97c3b..f824490 100644
--- a/events/x86-64/family12h/unit_masks
+++ b/events/x86-64/family12h/unit_masks
@@ -10,13 +10,12 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 12h Processors,
 #          Publication# 41131, Revision 1.13, March 01, 2011
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.2
+# Revision: 1.3
 #
 # ChangeLog: 
+# 	1.3: 11 August 2014
+# 	- Remove IBS events due to missing operf support
+#
 #	1.2: 09 March 2011
 # 	- Update with BKDG Rev.1.13 (preliminary)
 #
@@ -266,8 +265,3 @@ name:interrupt_events type:bitmask default:0xff
 	0x20 STARTUP
 	0x40 INT
 	0x80 EOI
-name:ibs_op type:bitmask default:0x01
-	0x00 Using IBS OP cycle count mode
-	0x01 Using IBS OP dispatch count mode
-	0x02 Enable IBS OP Memory Access Log 
-	0x04 Enable IBS OP Branch Target Address Log
diff --git a/events/x86-64/family14h/events b/events/x86-64/family14h/events
index cd05d28..956bc24 100644
--- a/events/x86-64/family14h/events
+++ b/events/x86-64/family14h/events
@@ -10,13 +10,12 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 14h Processors,
 #          Publication# 43170, Revision 3.04, Feb 16, 2011
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.2
+# Revision: 1.3
 #
 # ChangeLog:
+# 	1.3: 11 August 2014
+# 	- Remove IBS events due to missing support in Operf
+#
 #	1.2: 11 March 2011
 # 	- Update to BKDG Rev.3.04
 #
@@ -109,63 +108,3 @@ event:0x0ee counters:0,1,2,3 um:gart minimum:500 name:DEV_EVENTS : DEV Events
 event:0x1f0 counters:0,1,2,3 um:mem_control_request minimum:500 name:MEMORY_CONTROLLER_REQUESTS : Memory Controller Requests
 event:0x1e9 counters:0,1,2,3 um:sideband_signals minimum:500 name:SIDEBAND_SIGNALS : Sideband Signals and Special Cycles
 event:0x1ea counters:0,1,2,3 um:interrupt_events minimum:500 name:INTERRUPT_EVENTS : Interrupt Events
-event:0xf000 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ALL : All IBS fetch samples
-event:0xf001 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_KILLED : IBS fetch killed
-event:0xf002 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ATTEMPTED : IBS fetch attempted
-event:0xf003 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_COMPLETED : IBS fetch completed
-event:0xf004 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ABORTED : IBS fetch aborted
-event:0xf005 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ITLB_HITS : IBS ITLB hit
-event:0xf006 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_HITS : IBS L1 ITLB misses (and L2 ITLB hits)
-event:0xf007 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_MISSES : IBS L1 L2 ITLB miss
-event:0xf008 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_MISSES : IBS instruction cache misses
-event:0xf009 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_HITS : IBS instruction cache hit
-event:0xf00a ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_4K_PAGE : IBS 4K page translation
-event:0xf00b ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_2M_PAGE : IBS 2M page translation
-event:0xf00e ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_LATENCY : IBS fetch latency
-event:0xf100 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL : All IBS op samples
-event:0xf101 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAG_TO_RETIRE : IBS tag-to-retire cycles
-event:0xf102 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_COMP_TO_RET : IBS completion-to-retire cycles
-event:0xf103 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BRANCH_RETIRED : IBS branch op
-event:0xf104 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH : IBS mispredicted branch op
-event:0xf105 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAKEN_BRANCH : IBS taken branch op
-event:0xf106 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH_TAKEN : IBS mispredicted taken branch op
-event:0xf107 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RETURNS : IBS return op
-event:0xf108 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_RETURNS : IBS mispredicted return op
-event:0xf109 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RESYNC : IBS resync op
-event:0xf200 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL_LOAD_STORE : IBS all load store ops
-event:0xf201 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOAD : IBS load ops
-event:0xf202 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_STORE : IBS store ops
-event:0xf203 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_HITS : IBS L1 DTLB hit
-event:0xf204 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_MISS_L2_DTLB_HIT : IBS L1 DTLB misses L2 hits
-event:0xf205 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_L2_DTLB_MISS : IBS L1 and L2 DTLB misses
-event:0xf206 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_CACHE_MISS : IBS data cache misses
-event:0xf207 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_HITS : IBS data cache hits
-event:0xf208 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISALIGNED_DATA_ACC : IBS misaligned data access
-event:0xf209 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_LOAD : IBS bank conflict on load op
-event:0xf20a ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_STORE : IBS bank conflict on store op
-event:0xf20b ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_FORWARD : IBS store-to-load forwarded
-event:0xf20c ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_CANCELLED : IBS store-to-load cancelled
-event:0xf20d ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCUC_MEM_ACC : IBS UC memory access
-event:0xf20e ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCWC_MEM_ACC : IBS WC memory access
-event:0xf20f ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOCKED : IBS locked operation
-event:0xf210 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MAB_HIT : IBS MAB hit
-event:0xf211 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_4K : IBS L1 DTLB 4K page
-event:0xf212 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_2M : IBS L1 DTLB 2M page
-event:0xf213 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_1G : IBS L1 DTLB 1G page
-event:0xf215 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_4K : IBS L2 DTLB 4K page
-event:0xf216 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_2M : IBS L2 DTLB 2M page
-event:0xf217 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_1G : IBS L2 DTLB 1G page
-event:0xf219 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DC_LOAD_LAT : IBS data cache miss load latency
-event:0xf240 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_ONLY : IBS Northbridge local
-event:0xf241 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_ONLY : IBS Northbridge remote
-event:0xf242 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_L3 : IBS Northbridge local L3
-event:0xf243 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE : IBS Northbridge local core L1 or L2 cache
-event:0xf244 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE : IBS Northbridge local core L1, L2, L3 cache
-event:0xf245 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_DRAM : IBS Northbridge local DRAM
-event:0xf246 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_DRAM : IBS Northbridge remote DRAM
-event:0xf247 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_OTHER : IBS Northbridge local APIC MMIO Config PCI
-event:0xf248 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_OTHER : IBS Northbridge remote APIC MMIO Config PCI
-event:0xf249 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_MODIFIED : IBS Northbridge cache modified state
-event:0xf24a ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_OWNED : IBS Northbridge cache owned state
-event:0xf24b ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE_LAT : IBS Northbridge local cache latency
-event:0xf24c ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE_LAT : IBS Northbridge remote cache latency
diff --git a/events/x86-64/family14h/unit_masks b/events/x86-64/family14h/unit_masks
index 9e4484e..b722ced 100644
--- a/events/x86-64/family14h/unit_masks
+++ b/events/x86-64/family14h/unit_masks
@@ -10,13 +10,12 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 14h Processors,
 #          Publication# 43170, Revision 3.04, Feb 16, 2011
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.2
+# Revision: 1.3
 #
 # ChangeLog:
+# 	1.3: 11 August 2014
+# 	- Remove IBS events due to missing support in Operf
+#
 #	1.2: 11 March 2011
 # 	- Update to BKDG Rev.3.04
 #
@@ -239,8 +238,3 @@ name:interrupt_events type:bitmask default:0xff
 	0x20 STARTUP
 	0x40 INT
 	0x80 EOI
-name:ibs_op type:bitmask default:0x01
-	0x00 Using IBS OP cycle count mode
-	0x01 Using IBS OP dispatch count mode
-	0x02 Enable IBS OP Memory Access Log
-	0x04 Enable IBS OP Branch Target Address Log
diff --git a/events/x86-64/family15h/events b/events/x86-64/family15h/events
index faa9b90..cc7b49e 100644
--- a/events/x86-64/family15h/events
+++ b/events/x86-64/family15h/events
@@ -10,13 +10,12 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 15h Models 00h-0Fh Processors,
 #          Publication# 42301, Revision 1.12, February 16, 2011
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.3
+# Revision: 1.4
 #
 # ChangeLog: 
+# 	1.4: 11 August 2014
+# 	- Remove IBS events due to missing support in Operf
+#
 # 	1.3: 9 March 2011
 # 	- Update to BKDG Rev 1.12 (still preliminary)
 #
@@ -111,63 +110,3 @@ event:0x0de counters:0,1,2,3,4,5 um:zero minimum:500 name:DR2_BREAKPOINTS : DR2
 event:0x0df counters:0,1,2,3,4,5 um:zero minimum:500 name:DR3_BREAKPOINTS : DR3 Breakpoint Match
 event:0x1cf counters:0,1,2,3,4,5 um:ibs_ops_tagged minimum:50000 name:IBS_OPS_TAGGED : Tagged IBS Ops
 event:0x1d8 counters:0,1,2,3,4,5 um:zero minimum:500 name:DISPATCH_STALL_FOR_STQ_FULL : Dispatch Stall for STQ Full
-event:0xf000 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ALL : All IBS fetch samples
-event:0xf001 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_KILLED : IBS fetch killed
-event:0xf002 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ATTEMPTED : IBS fetch attempted
-event:0xf003 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_COMPLETED : IBS fetch completed
-event:0xf004 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ABORTED : IBS fetch aborted
-event:0xf005 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ITLB_HITS : IBS ITLB hit
-event:0xf006 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_HITS : IBS L1 ITLB misses (and L2 ITLB hits)
-event:0xf007 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_L1_ITLB_MISSES_L2_ITLB_MISSES : IBS L1 L2 ITLB miss
-event:0xf008 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_MISSES : IBS instruction cache misses
-event:0xf009 ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_ICACHE_HITS : IBS instruction cache hit
-event:0xf00a ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_4K_PAGE : IBS 4K page translation
-event:0xf00b ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_2M_PAGE : IBS 2M page translation
-event:0xf00e ext:ibs_fetch um:zero minimum:50000 name:IBS_FETCH_LATENCY : IBS fetch latency
-event:0xf100 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL : All IBS op samples
-event:0xf101 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAG_TO_RETIRE : IBS tag-to-retire cycles
-event:0xf102 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_COMP_TO_RET : IBS completion-to-retire cycles
-event:0xf103 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BRANCH_RETIRED : IBS branch op
-event:0xf104 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH : IBS mispredicted branch op
-event:0xf105 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_TAKEN_BRANCH : IBS taken branch op
-event:0xf106 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_BRANCH_TAKEN : IBS mispredicted taken branch op
-event:0xf107 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RETURNS : IBS return op
-event:0xf108 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISPREDICTED_RETURNS : IBS mispredicted return op
-event:0xf109 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_RESYNC : IBS resync op
-event:0xf200 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_ALL_LOAD_STORE : IBS all load store ops
-event:0xf201 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOAD : IBS load ops
-event:0xf202 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_STORE : IBS store ops
-event:0xf203 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_HITS : IBS L1 DTLB hit
-event:0xf204 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_MISS_L2_DTLB_HIT : IBS L1 DTLB misses L2 hits
-event:0xf205 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_L2_DTLB_MISS : IBS L1 and L2 DTLB misses
-event:0xf206 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_CACHE_MISS : IBS data cache misses
-event:0xf207 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DATA_HITS : IBS data cache hits
-event:0xf208 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MISALIGNED_DATA_ACC : IBS misaligned data access
-event:0xf209 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_LOAD : IBS bank conflict on load op
-event:0xf20a ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_BANK_CONF_STORE : IBS bank conflict on store op
-event:0xf20b ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_FORWARD : IBS store-to-load forwarded
-event:0xf20c ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_CANCELLED : IBS store-to-load cancelled
-event:0xf20d ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCUC_MEM_ACC : IBS UC memory access
-event:0xf20e ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DCWC_MEM_ACC : IBS WC memory access
-event:0xf20f ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_LOCKED : IBS locked operation
-event:0xf210 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_MAB_HIT : IBS MAB hit
-event:0xf211 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_4K : IBS L1 DTLB 4K page
-event:0xf212 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_2M : IBS L1 DTLB 2M page
-event:0xf213 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L1_DTLB_1G : IBS L1 DTLB 1G page
-event:0xf215 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_4K : IBS L2 DTLB 4K page
-event:0xf216 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_2M : IBS L2 DTLB 2M page
-event:0xf217 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_L2_DTLB_1G : IBS L2 DTLB 1G page
-event:0xf219 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_DC_LOAD_LAT : IBS data cache miss load latency
-event:0xf240 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_ONLY : IBS Northbridge local
-event:0xf241 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_ONLY : IBS Northbridge remote
-event:0xf242 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_L3 : IBS Northbridge local L3
-event:0xf243 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE : IBS Northbridge local core L1 or L2 cache
-event:0xf244 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE : IBS Northbridge local core L1, L2, L3 cache
-event:0xf245 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_DRAM : IBS Northbridge local DRAM
-event:0xf246 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_DRAM : IBS Northbridge remote DRAM
-event:0xf247 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_OTHER : IBS Northbridge local APIC MMIO Config PCI
-event:0xf248 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_OTHER : IBS Northbridge remote APIC MMIO Config PCI
-event:0xf249 ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_MODIFIED : IBS Northbridge cache modified state
-event:0xf24a ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_CACHE_OWNED : IBS Northbridge cache owned state
-event:0xf24b ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_LOCAL_CACHE_LAT : IBS Northbridge local cache latency
-event:0xf24c ext:ibs_op um:ibs_op minimum:50000 name:IBS_OP_NB_REMOTE_CACHE_LAT : IBS Northbridge remote cache latency
diff --git a/events/x86-64/family15h/unit_masks b/events/x86-64/family15h/unit_masks
index 071eb1b..4c207ff 100644
--- a/events/x86-64/family15h/unit_masks
+++ b/events/x86-64/family15h/unit_masks
@@ -10,15 +10,11 @@
 # Sources: BIOS and Kernel Developer's Guide for AMD Family 15h Models 00h-0Fh Processors,
 #          Publication# 42301, Revision 1.12, February 16, 2011
 #
-#          Software Optimization Guide for AMD Family 10h and Family 12h Processors,
-#          Publication# 40546, Revision 3.13, February 2011
-#          (Note: For IBS Derived Performance Events)
-#
-# Revision: 1.3
+# Revision: 1.4
 #
 # ChangeLog: 
-# 	1.3: 9 March 2011
-# 	- Update to BKDG Rev 1.12 (still preliminary)
+# 	1.4: 11 August 2014
+# 	- Remove IBS events due to missing support in Operf
 #
 #	1.2: 25 Januray 2011
 #	- Updated to BKDG Rev 1.09 (still preliminary)
@@ -185,8 +181,3 @@ name:ls_dispatch type:bitmask default:0x07
 name:l2_prefetcher_trigger type:bitmask default:0x03
 	0x01 Load L1 miss seen by prefetcher
 	0x02 Store L1 miss seen by prefetcher
-name:ibs_op type:bitmask default:0x01
-	0x00 Using IBS OP cycle count mode
-	0x01 Using IBS OP dispatch count mode
-	0x02 Enable IBS OP Memory Access Log
-	0x04 Enable IBS OP Branch Target Address Log
diff --git a/events/x86-64/generic/events b/events/x86-64/generic/events
new file mode 100644
index 0000000..3edf5ce
--- /dev/null
+++ b/events/x86-64/generic/events
@@ -0,0 +1,40 @@
+# AMD Generic processor performance events
+#
+# Copyright OProfile authors
+# Copyright (c) 2006-2013 Advanced Micro Devices
+# Contributed by Ray Bryant <raybry at amd.com>,
+#		Jason Yeh <jason.yeh at amd.com>
+#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
+#		Paul Drongowski <paul.drongowski at amd.com>
+#
+# Sources: BIOS and Kernel Developer's Guide for AMD processors,
+#
+# Revision: 1.0
+#
+# ChangeLog:
+#	1.0: 07 Feb 2013
+# 	- Preliminary version
+
+# L1 DATA CACHE
+event:0x040 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_ACCESSES : Data Cache Accesses
+event:0x041 counters:0,1,2,3 um:dcache_misses minimum:500 name:DATA_CACHE_MISSES : Data Cache Misses
+event:0x042 counters:0,1,2,3 um:dcache_refills minimum:500 name:DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE : Data Cache Refills from L2 or System
+event:0x043 counters:0,1,2,3  um:zero minimum:500 name:DATA_CACHE_REFILLS_FROM_NORTHBRIDGE : Data Cache Refills from System
+
+# CYCLE
+event:0x076 counters:0,1,2,3 um:zero minimum:50000 name:CPU_CLK_UNHALTED : CPU Clocks not Halted
+
+# INSTRUCTION CACHE
+event:0x080 counters:0,1,2,3 um:zero minimum:500 name:INSTRUCTION_CACHE_FETCHES : Instruction Cache Fetches
+event:0x081 counters:0,1,2,3 um:zero minimum:500 name:INSTRUCTION_CACHE_MISSES : Instruction Cache Misses
+event:0x082 counters:0,1,2,3 um:zero minimum:500 name:INSTRUCTION_CACHE_REFILLS_FROM_L2 : Instruction Cache Refills from L2
+event:0x083 counters:0,1,2,3 um:zero minimum:500 name:INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM : Instruction Cache Refills from System
+
+# INSTRUCTIONS
+event:0x0c0 counters:0,1,2,3 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired Instructions
+event:0x0c1 counters:0,1,2,3 um:zero minimum:50000 name:RETIRED_UOPS : Retired uops
+event:0x0c2 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_BRANCH_INSTRUCTIONS : Retired Branch Instructions
+event:0x0c3 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS : Retired Mispredicted Branch Instructions
+event:0x0c4 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_TAKEN_BRANCH_INSTRUCTIONS : Retired Taken Branch Instructions
+event:0x0c5 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED : Retired Taken Branch Instructions Mispredicted
+event:0x0ca counters:0,1,2,3 um:zero minimum:500 name:RETIRED_INDIRECT_BRANCHES_MISPREDICTED : Retired Indirect Branches Mispredicted
diff --git a/events/x86-64/generic/unit_masks b/events/x86-64/generic/unit_masks
new file mode 100644
index 0000000..b111b82
--- /dev/null
+++ b/events/x86-64/generic/unit_masks
@@ -0,0 +1,26 @@
+# AMD Generic processor performance events
+#
+# Copyright OProfile authors
+# Copyright (c) 2006-2013 Advanced Micro Devices
+# Contributed by Ray Bryant <raybry at amd.com>,
+#		Jason Yeh <jason.yeh at amd.com>
+#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
+#		Paul Drongowski <paul.drongowski at amd.com>
+#
+# Sources: BIOS and Kernel Developer's Guide for AMD processors,
+#
+# Revision: 1.0
+#
+# ChangeLog:
+#	1.0: 07 Feb 2013
+# 	- Preliminary version
+
+name:zero type:mandatory default:0x00
+	0x00 No unit mask
+name:dcache_misses type:bitmask default:0x01
+	0x01 First data cache miss or streaming store to a 64B cache line
+	0x02 First streaming store to a 64B cache line
+name:dcache_refills type:bitmask default:0x0b
+	0x01 Fill with good data. (Final valid status is valid)
+	0x02 Early valid status turned out to be invalid
+	0x08 Fill with read data error
diff --git a/gui/Makefile.am b/gui/Makefile.am
deleted file mode 100644
index c079e9b..0000000
--- a/gui/Makefile.am
+++ /dev/null
@@ -1,43 +0,0 @@
-SUBDIRS = ui
-
-dist_sources = \
-	oprof_start.cpp \
-	oprof_start_config.cpp \
-	oprof_start_util.cpp \
-	oprof_start_main.cpp \
-	oprof_start.h \
-	oprof_start_config.h \
-	oprof_start_util.h
-
-EXTRA_DIST = $(dist_sources)
-
-if have_qt
-
-AM_CPPFLAGS = \
-	@QT_CFLAGS@ \
-	-I ${top_srcdir}/libop \
-	-I ${top_srcdir}/libutil++ \
-	-I ${top_srcdir}/libutil \
-	@OP_CPPFLAGS@
-
-AM_CXXFLAGS = @OP_CXXFLAGS@
-
-bin_PROGRAMS = oprof_start
-
-oprof_start_SOURCES = $(dist_sources)
-nodist_oprof_start_SOURCES = oprof_start.moc.cpp
-oprof_start_LDADD = \
-	../libutil++/libutil++.a \
-	../libop/libop.a \
-	../libutil/libutil.a \
-	ui/liboprof_start.a \
-	@QT_LIBS@ \
-	@X_LIBS@
-
-oprof_start.moc.cpp: ${top_srcdir}/gui/oprof_start.h
-	$(MOC) -o $@ ${top_srcdir}/gui/oprof_start.h
-
-clean-local:
-	rm -f oprof_start.moc.cpp
-
-endif
diff --git a/gui/Makefile.in b/gui/Makefile.in
deleted file mode 100644
index a1d5b12..0000000
--- a/gui/Makefile.in
+++ /dev/null
@@ -1,767 +0,0 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-@have_qt_TRUE@bin_PROGRAMS = oprof_start$(EXEEXT)
-subdir = gui
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
-	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
-	$(top_srcdir)/m4/compileroption.m4 \
-	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
-	$(top_srcdir)/m4/extradirs.m4 \
-	$(top_srcdir)/m4/kernelversion.m4 $(top_srcdir)/m4/libtool.m4 \
-	$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
-	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
-	$(top_srcdir)/m4/mallocattribute.m4 \
-	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
-	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
-	$(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
-	$(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-am__installdirs = "$(DESTDIR)$(bindir)"
-PROGRAMS = $(bin_PROGRAMS)
-am__oprof_start_SOURCES_DIST = oprof_start.cpp oprof_start_config.cpp \
-	oprof_start_util.cpp oprof_start_main.cpp oprof_start.h \
-	oprof_start_config.h oprof_start_util.h
-am__objects_1 = oprof_start.$(OBJEXT) oprof_start_config.$(OBJEXT) \
-	oprof_start_util.$(OBJEXT) oprof_start_main.$(OBJEXT)
-@have_qt_TRUE@am_oprof_start_OBJECTS = $(am__objects_1)
-@have_qt_TRUE@nodist_oprof_start_OBJECTS = oprof_start.moc.$(OBJEXT)
-oprof_start_OBJECTS = $(am_oprof_start_OBJECTS) \
-	$(nodist_oprof_start_OBJECTS)
-@have_qt_TRUE@oprof_start_DEPENDENCIES = ../libutil++/libutil++.a \
-@have_qt_TRUE@	../libop/libop.a ../libutil/libutil.a \
-@have_qt_TRUE@	ui/liboprof_start.a
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-am__mv = mv -f
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
-	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-SOURCES = $(oprof_start_SOURCES) $(nodist_oprof_start_SOURCES)
-DIST_SOURCES = $(am__oprof_start_SOURCES_DIST)
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
-	html-recursive info-recursive install-data-recursive \
-	install-dvi-recursive install-exec-recursive \
-	install-html-recursive install-info-recursive \
-	install-pdf-recursive install-ps-recursive install-recursive \
-	installcheck-recursive installdirs-recursive pdf-recursive \
-	ps-recursive uninstall-recursive
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
-  distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
-	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
-	distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
-  dir0=`pwd`; \
-  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
-  sed_rest='s,^[^/]*/*,,'; \
-  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
-  sed_butlast='s,/*[^/]*$$,,'; \
-  while test -n "$$dir1"; do \
-    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
-    if test "$$first" != "."; then \
-      if test "$$first" = ".."; then \
-        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
-        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
-      else \
-        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
-        if test "$$first2" = "$$first"; then \
-          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
-        else \
-          dir2="../$$dir2"; \
-        fi; \
-        dir0="$$dir0"/"$$first"; \
-      fi; \
-    fi; \
-    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
-  done; \
-  reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BFD_LIBS = @BFD_LIBS@
-CAT_ENTRY_END = @CAT_ENTRY_END@
-CAT_ENTRY_START = @CAT_ENTRY_START@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DATE = @DATE@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-DOCBOOK_ROOT = @DOCBOOK_ROOT@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-EXTRA_CFLAGS_MODULE = @EXTRA_CFLAGS_MODULE@
-FGREP = @FGREP@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-JAVA_HOMEDIR = @JAVA_HOMEDIR@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBERTY_LIBS = @LIBERTY_LIBS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAKEINFO = @MAKEINFO@
-MKDIR_P = @MKDIR_P@
-MOC = @MOC@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OP_CFLAGS = @OP_CFLAGS@
-OP_CPPFLAGS = @OP_CPPFLAGS@
-OP_CXXFLAGS = @OP_CXXFLAGS@
-OP_DOCDIR = @OP_DOCDIR@
-OP_LDFLAGS = @OP_LDFLAGS@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PERF_EVENT_FLAGS = @PERF_EVENT_FLAGS@
-PFM_LIB = @PFM_LIB@
-PKG_CONFIG = @PKG_CONFIG@
-POPT_LIBS = @POPT_LIBS@
-PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SIZE_T_TYPE = @SIZE_T_TYPE@
-STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
-VERSION = @VERSION@
-XMKMF = @XMKMF@
-XML_CATALOG = @XML_CATALOG@
-XSLTPROC = @XSLTPROC@
-XSLTPROC_FLAGS = @XSLTPROC_FLAGS@
-X_CFLAGS = @X_CFLAGS@
-X_EXTRA_LIBS = @X_EXTRA_LIBS@
-X_LIBS = @X_LIBS@
-X_PRE_LIBS = @X_PRE_LIBS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-lt_ECHO = @lt_ECHO@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-topdir = @topdir@
-SUBDIRS = ui
-dist_sources = \
-	oprof_start.cpp \
-	oprof_start_config.cpp \
-	oprof_start_util.cpp \
-	oprof_start_main.cpp \
-	oprof_start.h \
-	oprof_start_config.h \
-	oprof_start_util.h
-
-EXTRA_DIST = $(dist_sources)
-@have_qt_TRUE@AM_CPPFLAGS = \
-@have_qt_TRUE@	@QT_CFLAGS@ \
-@have_qt_TRUE@	-I ${top_srcdir}/libop \
-@have_qt_TRUE@	-I ${top_srcdir}/libutil++ \
-@have_qt_TRUE@	-I ${top_srcdir}/libutil \
-@have_qt_TRUE@	@OP_CPPFLAGS@
-
-@have_qt_TRUE@AM_CXXFLAGS = @OP_CXXFLAGS@
-@have_qt_TRUE@oprof_start_SOURCES = $(dist_sources)
-@have_qt_TRUE@nodist_oprof_start_SOURCES = oprof_start.moc.cpp
-@have_qt_TRUE@oprof_start_LDADD = \
-@have_qt_TRUE@	../libutil++/libutil++.a \
-@have_qt_TRUE@	../libop/libop.a \
-@have_qt_TRUE@	../libutil/libutil.a \
-@have_qt_TRUE@	ui/liboprof_start.a \
-@have_qt_TRUE@	@QT_LIBS@ \
-@have_qt_TRUE@	@X_LIBS@
-
-all: all-recursive
-
-.SUFFIXES:
-.SUFFIXES: .cpp .lo .o .obj
-$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
-	@for dep in $?; do \
-	  case '$(am__configure_deps)' in \
-	    *$$dep*) \
-	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-	        && { if test -f $@; then exit 0; else break; fi; }; \
-	      exit 1;; \
-	  esac; \
-	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign gui/Makefile'; \
-	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --foreign gui/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
-	@case '$?' in \
-	  *config.status*) \
-	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
-	  *) \
-	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
-	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
-	esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure:  $(am__configure_deps)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
-	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-install-binPROGRAMS: $(bin_PROGRAMS)
-	@$(NORMAL_INSTALL)
-	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
-	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
-	for p in $$list; do echo "$$p $$p"; done | \
-	sed 's/$(EXEEXT)$$//' | \
-	while read p p1; do if test -f $$p || test -f $$p1; \
-	  then echo "$$p"; echo "$$p"; else :; fi; \
-	done | \
-	sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
-	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
-	sed 'N;N;N;s,\n, ,g' | \
-	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
-	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
-	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
-	    else { print "f", $$3 "/" $$4, $$1; } } \
-	  END { for (d in files) print "f", d, files[d] }' | \
-	while read type dir files; do \
-	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
-	    test -z "$$files" || { \
-	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
-	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
-	    } \
-	; done
-
-uninstall-binPROGRAMS:
-	@$(NORMAL_UNINSTALL)
-	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
-	files=`for p in $$list; do echo "$$p"; done | \
-	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
-	      -e 's/$$/$(EXEEXT)/' `; \
-	test -n "$$list" || exit 0; \
-	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
-	cd "$(DESTDIR)$(bindir)" && rm -f $$files
-
-clean-binPROGRAMS:
-	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
-	echo " rm -f" $$list; \
-	rm -f $$list || exit $$?; \
-	test -n "$(EXEEXT)" || exit 0; \
-	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
-	echo " rm -f" $$list; \
-	rm -f $$list
-oprof_start$(EXEEXT): $(oprof_start_OBJECTS) $(oprof_start_DEPENDENCIES) 
-	@rm -f oprof_start$(EXEEXT)
-	$(CXXLINK) $(oprof_start_OBJECTS) $(oprof_start_LDADD) $(LIBS)
-
-mostlyclean-compile:
-	-rm -f *.$(OBJEXT)
-
-distclean-compile:
-	-rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start.moc.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start_config.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start_main.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start_util.Po@am__quote@
-
-.cpp.o:
-@am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ $<
-
-.cpp.obj:
-@am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.cpp.lo:
-@am__fastdepCXX_TRUE@	$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@	$(LTCXXCOMPILE) -c -o $@ $<
-
-mostlyclean-libtool:
-	-rm -f *.lo
-
-clean-libtool:
-	-rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-#     (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
-	@fail= failcom='exit 1'; \
-	for f in x $$MAKEFLAGS; do \
-	  case $$f in \
-	    *=* | --[!k]*);; \
-	    *k*) failcom='fail=yes';; \
-	  esac; \
-	done; \
-	dot_seen=no; \
-	target=`echo $@ | sed s/-recursive//`; \
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  echo "Making $$target in $$subdir"; \
-	  if test "$$subdir" = "."; then \
-	    dot_seen=yes; \
-	    local_target="$$target-am"; \
-	  else \
-	    local_target="$$target"; \
-	  fi; \
-	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
-	  || eval $$failcom; \
-	done; \
-	if test "$$dot_seen" = "no"; then \
-	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
-	fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
-	@fail= failcom='exit 1'; \
-	for f in x $$MAKEFLAGS; do \
-	  case $$f in \
-	    *=* | --[!k]*);; \
-	    *k*) failcom='fail=yes';; \
-	  esac; \
-	done; \
-	dot_seen=no; \
-	case "$@" in \
-	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
-	  *) list='$(SUBDIRS)' ;; \
-	esac; \
-	rev=''; for subdir in $$list; do \
-	  if test "$$subdir" = "."; then :; else \
-	    rev="$$subdir $$rev"; \
-	  fi; \
-	done; \
-	rev="$$rev ."; \
-	target=`echo $@ | sed s/-recursive//`; \
-	for subdir in $$rev; do \
-	  echo "Making $$target in $$subdir"; \
-	  if test "$$subdir" = "."; then \
-	    local_target="$$target-am"; \
-	  else \
-	    local_target="$$target"; \
-	  fi; \
-	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
-	  || eval $$failcom; \
-	done && test -z "$$fail"
-tags-recursive:
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
-	done
-ctags-recursive:
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
-	done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
-	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-		$(TAGS_FILES) $(LISP)
-	set x; \
-	here=`pwd`; \
-	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
-	  include_option=--etags-include; \
-	  empty_fix=.; \
-	else \
-	  include_option=--include; \
-	  empty_fix=; \
-	fi; \
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  if test "$$subdir" = .; then :; else \
-	    test ! -f $$subdir/TAGS || \
-	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
-	  fi; \
-	done; \
-	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	shift; \
-	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-	  test -n "$$unique" || unique=$$empty_fix; \
-	  if test $$# -gt 0; then \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      "$$@" $$unique; \
-	  else \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      $$unique; \
-	  fi; \
-	fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-		$(TAGS_FILES) $(LISP)
-	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-	unique=`for i in $$list; do \
-	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	test -z "$(CTAGS_ARGS)$$unique" \
-	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-	     $$unique
-
-GTAGS:
-	here=`$(am__cd) $(top_builddir) && pwd` \
-	  && $(am__cd) $(top_srcdir) \
-	  && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
-	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
-	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	list='$(DISTFILES)'; \
-	  dist_files=`for file in $$list; do echo $$file; done | \
-	  sed -e "s|^$$srcdirstrip/||;t" \
-	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-	case $$dist_files in \
-	  */*) $(MKDIR_P) `echo "$$dist_files" | \
-			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-			   sort -u` ;; \
-	esac; \
-	for file in $$dist_files; do \
-	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
-	  if test -d $$d/$$file; then \
-	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-	    if test -d "$(distdir)/$$file"; then \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
-	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
-	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
-	  else \
-	    test -f "$(distdir)/$$file" \
-	    || cp -p $$d/$$file "$(distdir)/$$file" \
-	    || exit 1; \
-	  fi; \
-	done
-	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-	  if test "$$subdir" = .; then :; else \
-	    test -d "$(distdir)/$$subdir" \
-	    || $(MKDIR_P) "$(distdir)/$$subdir" \
-	    || exit 1; \
-	  fi; \
-	done
-	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-	  if test "$$subdir" = .; then :; else \
-	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
-	    $(am__relativize); \
-	    new_distdir=$$reldir; \
-	    dir1=$$subdir; dir2="$(top_distdir)"; \
-	    $(am__relativize); \
-	    new_top_distdir=$$reldir; \
-	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
-	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
-	    ($(am__cd) $$subdir && \
-	      $(MAKE) $(AM_MAKEFLAGS) \
-	        top_distdir="$$new_top_distdir" \
-	        distdir="$$new_distdir" \
-		am__remove_distdir=: \
-		am__skip_length_check=: \
-		am__skip_mode_fix=: \
-	        distdir) \
-	      || exit 1; \
-	  fi; \
-	done
-check-am: all-am
-check: check-recursive
-all-am: Makefile $(PROGRAMS)
-installdirs: installdirs-recursive
-installdirs-am:
-	for dir in "$(DESTDIR)$(bindir)"; do \
-	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
-	done
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
-	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	  `test -z '$(STRIP)' || \
-	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
-	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
-	@echo "This command is intended for maintainers to use"
-	@echo "it deletes files that may require special tools to rebuild."
-@have_qt_FALSE@clean-local:
-clean: clean-recursive
-
-clean-am: clean-binPROGRAMS clean-generic clean-libtool clean-local \
-	mostlyclean-am
-
-distclean: distclean-recursive
-	-rm -rf ./$(DEPDIR)
-	-rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
-	distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am: install-binPROGRAMS
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
-	-rm -rf ./$(DEPDIR)
-	-rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
-	mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-binPROGRAMS
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
-	install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
-	all all-am check check-am clean clean-binPROGRAMS \
-	clean-generic clean-libtool clean-local ctags ctags-recursive \
-	distclean distclean-compile distclean-generic \
-	distclean-libtool distclean-tags distdir dvi dvi-am html \
-	html-am info info-am install install-am install-binPROGRAMS \
-	install-data install-data-am install-dvi install-dvi-am \
-	install-exec install-exec-am install-html install-html-am \
-	install-info install-info-am install-man install-pdf \
-	install-pdf-am install-ps install-ps-am install-strip \
-	installcheck installcheck-am installdirs installdirs-am \
-	maintainer-clean maintainer-clean-generic mostlyclean \
-	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
-	pdf pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
-	uninstall-binPROGRAMS
-
-
-@have_qt_TRUE@oprof_start.moc.cpp: ${top_srcdir}/gui/oprof_start.h
-@have_qt_TRUE@	$(MOC) -o $@ ${top_srcdir}/gui/oprof_start.h
-
-@have_qt_TRUE@clean-local:
-@have_qt_TRUE@	rm -f oprof_start.moc.cpp
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/gui/oprof_start.cpp b/gui/oprof_start.cpp
deleted file mode 100644
index 725b215..0000000
--- a/gui/oprof_start.cpp
+++ /dev/null
@@ -1,1087 +0,0 @@
-/**
- * @file oprof_start.cpp
- * The GUI start main class
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Philippe Elie
- * @author John Levon
- */
-
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <ctime>
-#include <cstdio>
-#include <cmath>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-
-#if QT3_SUPPORT
-#include <Qt/qlineedit.h>
-#include <Qt/qcheckbox.h>
-#include <Qt/qtabwidget.h>
-#include <Qt/qmessagebox.h>
-#include <Qt/qvalidator.h>
-#include <Qt/qlabel.h>
-#include <Qt/qpushbutton.h>
-#include <Qt/q3listview.h>
-#include <Qt/q3combobox.h>
-#include <Qt/q3listbox.h>
-#include <Qt/q3filedialog.h>
-#include <Qt/q3buttongroup.h>
-#include <Qt/q3header.h>
-#else
-#include <qlineedit.h>
-#include <qcheckbox.h>
-#include <qtabwidget.h>
-#include <qmessagebox.h>
-#include <qvalidator.h>
-#include <qlabel.h>
-#include <qpushbutton.h>
-#include <qlistview.h>
-#include <qcombobox.h>
-#include <qlistbox.h>
-#include <qfiledialog.h>
-#include <qbuttongroup.h>
-#include <qheader.h>
-#define Q3ListView QListView
-#endif
-
-#include "oprof_start.h"
-#include "op_config.h"
-#include "op_config_24.h"
-#include "string_manip.h"
-#include "op_cpufreq.h"
-#include "op_alloc_counter.h"
-#include "oprof_start_util.h"
-#include "file_manip.h"
-
-#include "op_hw_config.h"
-
-using namespace std;
-
-static char const * green_xpm[] = {
-"16 16 2 1",
-" 	c None",
-".	c #00FF00",
-"    .......     ",
-"  ...........   ",
-" .............  ",
-" .............  ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-" .............  ",
-" .............  ",
-"  ...........   ",
-"    .......     ",
-"                " };
-
-static char const * red_xpm[] = {
-"16 16 2 1",
-" 	c None",
-".	c #FF0000",
-"    .......     ",
-"  ...........   ",
-" .............  ",
-" .............  ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-"............... ",
-" .............  ",
-" .............  ",
-"  ...........   ",
-"    .......     ",
-"                " };
-
-static QPixmap * green_pixmap;
-static QPixmap * red_pixmap;
-
-
-op_event_descr::op_event_descr()
-	:
-	counter_mask(0),
-	val(0),
-	unit(0),
-	min_count(0)
-{
-}
-
-
-oprof_start::oprof_start()
-	:
-	oprof_start_base(0, 0, false, 0),
-	event_count_validator(new QIntValidator(event_count_edit)),
-	current_event(0),
-	cpu_speed(op_cpu_frequency()),
-	total_nr_interrupts(0)
-{
-	green_pixmap = new QPixmap(green_xpm);
-	red_pixmap = new QPixmap(red_xpm);
-	vector<string> args;
-	args.push_back("--init");
-
-	if (do_exec_command(OP_BINDIR "/opcontrol", args))
-		exit(EXIT_FAILURE);
-
-	cpu_type = op_get_cpu_type();
-	op_nr_counters = op_get_nr_counters(cpu_type);
-
-	if (cpu_type == CPU_TIMER_INT) {
-		setup_config_tab->removePage(counter_setup_page);
-	} else {
-		fill_events();
-	}
-
-	op_interface interface = op_get_interface();
-	if (interface == OP_INTERFACE_NO_GOOD) {
-		QMessageBox::warning(this, 0, "Couldn't determine kernel"
-		                     " interface version");
-		exit(EXIT_FAILURE);
-	}
-	bool is_26 = interface == OP_INTERFACE_26;
-
-	if (is_26) {
-		note_table_size_edit->hide();
-		note_table_size_label->hide();
-		if (!op_file_readable("/dev/oprofile/backtrace_depth")) {
-			callgraph_depth_label->hide();
-			callgraph_depth_edit->hide();
-		}
-	} else {
-		callgraph_depth_label->hide();
-		callgraph_depth_edit->hide();
-		buffer_watershed_label->hide();
-		buffer_watershed_edit->hide();
-		cpu_buffer_size_label->hide();
-		cpu_buffer_size_edit->hide();
-	}
-
-	// setup the configuration page.
-	kernel_filename_edit->setText(config.kernel_filename.c_str());
-
-	no_vmlinux->setChecked(config.no_kernel);
-
-	buffer_size_edit->setText(QString().setNum(config.buffer_size));
-	buffer_watershed_edit->setText(QString().setNum(config.buffer_watershed));
-	cpu_buffer_size_edit->setText(QString().setNum(config.cpu_buffer_size));
-	note_table_size_edit->setText(QString().setNum(config.note_table_size));
-	callgraph_depth_edit->setText(QString().setNum(config.callgraph_depth));
-	verbose->setChecked(config.verbose);
-	separate_lib_cb->setChecked(config.separate_lib);
-	separate_kernel_cb->setChecked(config.separate_kernel);
-	separate_cpu_cb->setChecked(config.separate_cpu);
-	separate_thread_cb->setChecked(config.separate_thread);
-
-	// the unit mask check boxes
-	hide_masks();
-
-	event_count_edit->setValidator(event_count_validator);
-	QIntValidator * iv;
-	iv = new QIntValidator(OP_MIN_BUF_SIZE, OP_MAX_BUF_SIZE, buffer_size_edit);
-	buffer_size_edit->setValidator(iv);
-	iv = new QIntValidator(OP_MIN_NOTE_TABLE_SIZE, OP_MAX_NOTE_TABLE_SIZE, note_table_size_edit);
-	note_table_size_edit->setValidator(iv);
-	iv = new QIntValidator(0, INT_MAX, callgraph_depth_edit);
-	callgraph_depth_edit->setValidator(iv);
-	iv = new QIntValidator(0, INT_MAX, buffer_watershed_edit);
-	buffer_watershed_edit->setValidator(iv);
-	iv = new QIntValidator(0, OP_MAX_CPU_BUF_SIZE, cpu_buffer_size_edit);
-	cpu_buffer_size_edit->setValidator(iv);
-
-	// daemon status timer
-	startTimer(5000);
-	timerEvent(0);
-
-	resize(minimumSizeHint());
-
-	// force the pixmap re-draw
-	event_selected();
-}
-
-
-void oprof_start::fill_events()
-{
-	// we need to build the event descr stuff before loading the
-	// configuration because we use locate_event to get an event descr
-	// from its name.
-	struct list_head * pos;
-	struct list_head * events = op_events(cpu_type);
-
-	list_for_each(pos, events) {
-		struct op_event * event = list_entry(pos, struct op_event, event_next);
-
-		op_event_descr descr;
-
-		descr.counter_mask = event->counter_mask;
-		descr.val = event->val;
-		if (event->unit->num) {
-			descr.unit = event->unit;
-		} else {
-			descr.unit = 0;
-		}
-
-		descr.name = event->name;
-		descr.help_str = event->desc;
-		descr.min_count = event->min_count;
-
-		for (uint ctr = 0; ctr < op_nr_counters; ++ctr) {
-			uint count;
-
-			if (!(descr.counter_mask & (1 << ctr)))
-				continue;
-
-			if (cpu_type == CPU_RTC) {
-				count = 1024;
-			} else {
-				/* setting to cpu Hz / 2000 gives a safe value for
-				 * all events, and a good one for most.
-				 */
-				if (cpu_speed)
-					count = int(cpu_speed * 500);
-				else
-					count = descr.min_count * 100;
-			}
-
-			event_cfgs[descr.name].count = count;
-			event_cfgs[descr.name].umask = 0;
-			if (descr.unit)
-				event_cfgs[descr.name].umask = descr.unit->default_mask;
-			event_cfgs[descr.name].os_ring_count = 1;
-			event_cfgs[descr.name].user_ring_count = 1;
-		}
-
-		v_events.push_back(descr);
-	}
-
-	events_list->header()->hide();
-	events_list->setSorting(-1);
-
-	fill_events_listbox();
-
-	read_set_events();
-
-	// FIXME: why this ?
-	if (cpu_type == CPU_RTC)
-		events_list->setCurrentItem(events_list->firstChild());
-
-	load_config_file();
-}
-
-
-namespace {
-
-/// find the first item with the given text in column 0 or return NULL
-Q3ListViewItem * findItem(Q3ListView * view, char const * name)
-{
-	// Qt 2.3.1 does not have QListView::findItem()
-	Q3ListViewItem * item = view->firstChild();
-
-	while (item && strcmp(item->text(0).latin1(), name))
-		item = item->nextSibling();
-
-	return item;
-}
-
-};
-
-
-void oprof_start::setup_default_event()
-{
-	struct op_default_event_descr descr;
-	op_default_event(cpu_type, &descr);
-
-	event_cfgs[descr.name].umask = descr.um;
-	event_cfgs[descr.name].count = descr.count;
-	event_cfgs[descr.name].user_ring_count = 1;
-	event_cfgs[descr.name].os_ring_count = 1;
-
-	Q3ListViewItem * item = findItem(events_list, descr.name);
-	if (item)
-		item->setSelected(true);
-}
-
-
-void oprof_start::read_set_events()
-{
-	string name = get_config_filename(".oprofile/daemonrc");
-
-	ifstream in(name.c_str());
-
-	if (!in) {
-		setup_default_event();
-		return;
-	}
-
-	string str;
-
-	bool one_enabled = false;
-
-	while (getline(in, str)) {
-		string const val = split(str, '=');
-		string const name = str;
-
-		if (!is_prefix(name, "CHOSEN_EVENTS_"))
-			continue;
-
-		one_enabled = true;
-
-		// CHOSEN_EVENTS_#nr=CPU_CLK_UNHALTED:10000:0:1:1
-		vector<string> parts = separate_token(val, ':');
-
-		if (parts.size() != 5 && parts.size() != 2) {
-			cerr << "invalid configuration file\n";
-			// FIXME
-			exit(EXIT_FAILURE);
-		}
-
-		string ev_name = parts[0];
-		event_cfgs[ev_name].count =
-			op_lexical_cast<unsigned int>(parts[1]);
-
-		// CPU_CLK_UNHALTED:10000 is also valid
-		if (parts.size() == 5) {
-			event_cfgs[ev_name].umask =
-				op_lexical_cast<unsigned int>(parts[2]);
-			event_cfgs[ev_name].user_ring_count =
-				op_lexical_cast<unsigned int>(parts[3]);
-			event_cfgs[ev_name].os_ring_count =
-				op_lexical_cast<unsigned int>(parts[4]);
-		} else {
-			event_cfgs[ev_name].umask = 0;
-			event_cfgs[ev_name].user_ring_count = 1;
-			event_cfgs[ev_name].os_ring_count = 1;
-		}
-
-		Q3ListViewItem * item = findItem(events_list, ev_name.c_str());
-		if (item)
-			item->setSelected(true);
-	}
-
-	// use default event if none set
-	if (!one_enabled)
-		setup_default_event();
-}
-
-
-void oprof_start::load_config_file()
-{
-	string name = get_config_filename(".oprofile/daemonrc");
-
-	ifstream in(name.c_str());
-	if (!in) {
-		if (!check_and_create_config_dir())
-			return;
-
-		ofstream out(name.c_str());
-		if (!out) {
-			QMessageBox::warning(this, 0, "Unable to open configuration "
-				"file ~/.oprofile/daemonrc");
-		}
-		return;
-	}
-
-	in >> config;
-}
-
-
-// user request a "normal" exit so save the config file.
-void oprof_start::accept()
-{
-	// record the previous settings
-	record_selected_event_config();
-
-	save_config();
-
-	QDialog::accept();
-}
-
-
-void oprof_start::closeEvent(QCloseEvent *)
-{
-	accept();
-}
-
-
-void oprof_start::timerEvent(QTimerEvent *)
-{
-	static time_t last = time(0);
-
-	daemon_status dstat;
-
-	flush_profiler_data_btn->setEnabled(dstat.running);
-	stop_profiler_btn->setEnabled(dstat.running);
-	start_profiler_btn->setEnabled(!dstat.running);
-	reset_sample_files_btn->setEnabled(!dstat.running);
-
-	if (!dstat.running) {
-		daemon_label->setText("Profiler is not running.");
-		return;
-	}
-
-	ostringstream ss;
-	ss << "Profiler running:";
-
-	time_t curr = time(0);
-	total_nr_interrupts += dstat.nr_interrupts;
-
-	if (curr - last)
-		ss << " (" << dstat.nr_interrupts / (curr - last) << " interrupts / second, total " << total_nr_interrupts << ")";
-
-	daemon_label->setText(ss.str().c_str());
-
-	last = curr;
-}
-
-
-void oprof_start::fill_events_listbox()
-{
-	setUpdatesEnabled(false);
-
-	for (vector<op_event_descr>::reverse_iterator cit = v_events.rbegin();
-	     cit != v_events.rend(); ++cit) {
-		new Q3ListViewItem(events_list, cit->name.c_str());
-	}
-
-	setUpdatesEnabled(true);
-	update();
-}
-
-
-void oprof_start::display_event(op_event_descr const & descr)
-{
-	setUpdatesEnabled(false);
-
-	setup_unit_masks(descr);
-	os_ring_count_cb->setEnabled(true);
-	user_ring_count_cb->setEnabled(true);
-	event_count_edit->setEnabled(true);
-
-	event_setting & cfg = event_cfgs[descr.name];
-
-	os_ring_count_cb->setChecked(cfg.os_ring_count);
-	user_ring_count_cb->setChecked(cfg.user_ring_count);
-	QString count_text;
-	count_text.setNum(cfg.count);
-	event_count_edit->setText(count_text);
-	event_count_validator->setRange(descr.min_count, max_perf_count());
-
-	setUpdatesEnabled(true);
-	update();
-}
-
-
-bool oprof_start::is_selectable_event(Q3ListViewItem * item)
-{
-	if (item->isSelected())
-		return true;
-
-	selected_events.insert(item);
-
-	bool ret = false;
-	if (alloc_selected_events())
-		ret = true;
-
-	selected_events.erase(item);
-
-	return ret;
-}
-
-
-void oprof_start::draw_event_list()
-{
-	Q3ListViewItem * cur;
-	for (cur = events_list->firstChild(); cur; cur = cur->nextSibling()) {
-		if (is_selectable_event(cur))
-			cur->setPixmap(0, *green_pixmap);
-		else
-			cur->setPixmap(0, *red_pixmap);
-	}
-}
-
-
-bool oprof_start::alloc_selected_events() const
-{
-	vector<op_event const *> events;
-
-	set<Q3ListViewItem *>::const_iterator it;
-	for (it = selected_events.begin(); it != selected_events.end(); ++it)
-		events.push_back(find_event_by_name((*it)->text(0).latin1(),0,0));
-
-	size_t * map =
-		map_event_to_counter(&events[0], events.size(), cpu_type);
-
-	if (!map)
-		return false;
-
-	free(map);
-	return true;
-}
-
-void oprof_start::event_selected()
-{
-	// The deal is simple: QT lack of a way to know what item was the last
-	// (de)selected item so we record a set of selected items and diff
-	// it in the appropriate way with the previous list of selected items.
-
-	set<Q3ListViewItem *> current_selection;
-	Q3ListViewItem * cur;
-	for (cur = events_list->firstChild(); cur; cur = cur->nextSibling()) {
-		if (cur->isSelected())
-			current_selection.insert(cur);
-	}
-
-	// First remove the deselected item.
-	vector<Q3ListViewItem *> new_deselected;
-	set_difference(selected_events.begin(), selected_events.end(),
-		       current_selection.begin(), current_selection.end(),
-		       back_inserter(new_deselected));
-	vector<Q3ListViewItem *>::const_iterator it;
-	for (it = new_deselected.begin(); it != new_deselected.end(); ++it)
-		selected_events.erase(*it);
-
-	// Now try to add the newly selected item if enough HW resource exists
-	vector<Q3ListViewItem *> new_selected;
-	set_difference(current_selection.begin(), current_selection.end(),
-		       selected_events.begin(), selected_events.end(),
-		       back_inserter(new_selected));
-	for (it = new_selected.begin(); it != new_selected.end(); ++it) {
-		selected_events.insert(*it);
-		if (!alloc_selected_events()) {
-			(*it)->setSelected(false);
-			selected_events.erase(*it);
-		} else {
-			current_event = *it;
-		}
-	}
-
-	draw_event_list();
-
-	if (current_event)
-		display_event(locate_event(current_event->text(0).latin1()));
-}
-
-
-void oprof_start::event_over(Q3ListViewItem * item)
-{
-	op_event_descr const & descr = locate_event(item->text(0).latin1());
-
-	string help_str = descr.help_str.c_str();
-	if (!is_selectable_event(item)) {
-		help_str += " conflicts with:";
-
-		set<Q3ListViewItem *>::const_iterator it;
-		for (it = selected_events.begin(); 
-		     it != selected_events.end(); ) {
-			Q3ListViewItem * temp = *it;
-			selected_events.erase(it++);
-			if (is_selectable_event(item)) {
-				help_str += " ";
-				help_str += temp->text(0).latin1();
-			}
-			selected_events.insert(temp);
-		}
-	}
-
-	event_help_label->setText(help_str.c_str());
-}
-
-
-/// select the kernel image filename
-void oprof_start::choose_kernel_filename()
-{
-	string name = kernel_filename_edit->text().latin1();
-	string result = do_open_file_or_dir(name, false);
-
-	if (!result.empty())
-		kernel_filename_edit->setText(result.c_str());
-}
-
-
-// this record the current selected event setting in the event_cfg[] stuff.
-// FIXME: need validation?
-void oprof_start::record_selected_event_config()
-{
-	if (!current_event)
-		return;
-
-	string name(current_event->text(0).latin1());
-
-	event_setting & cfg = event_cfgs[name];
-	op_event_descr const & curr = locate_event(name);
-
-	cfg.count = event_count_edit->text().toUInt();
-	cfg.os_ring_count = os_ring_count_cb->isChecked();
-	cfg.user_ring_count = user_ring_count_cb->isChecked();
-	cfg.umask = get_unit_mask(curr);
-}
-
-
-// validate and save the configuration (The qt validator installed
-// are not sufficient to do the validation)
-bool oprof_start::record_config()
-{
-	config.kernel_filename = kernel_filename_edit->text().latin1();
-	config.no_kernel = no_vmlinux->isChecked();
-
-	uint temp = buffer_size_edit->text().toUInt();
-	if (temp < OP_MIN_BUF_SIZE || temp > OP_MAX_BUF_SIZE) {
-		ostringstream error;
-
-		error << "buffer size out of range: " << temp
-		      << " valid range is [" << OP_MIN_BUF_SIZE << ", "
-		      << OP_MAX_BUF_SIZE << "]";
-
-		QMessageBox::warning(this, 0, error.str().c_str());
-
-		return false;
-	}
-	config.buffer_size = temp;
-
-	temp = buffer_watershed_edit->text().toUInt();
-	// watershed above half of buffer size make little sense.
-	if (temp > config.buffer_size / 2) {
-		ostringstream error;
-
-		error << "buffer watershed out of range: " << temp
-		      << " valid range is [0 (use default), buffer size/2] "
-		      << "generally 0.25 * buffer size is fine";
-
-		QMessageBox::warning(this, 0, error.str().c_str());
-
-		return false;
-	}
-	config.buffer_watershed = temp;
-
-	temp = cpu_buffer_size_edit->text().toUInt();
-	if ((temp != 0 && temp < OP_MIN_CPU_BUF_SIZE) ||
-	    temp > OP_MAX_CPU_BUF_SIZE) {
-		ostringstream error;
-
-		error << "cpu buffer size out of range: " << temp
-		      << " valid range is [" << OP_MIN_CPU_BUF_SIZE << ", "
-		      << OP_MAX_CPU_BUF_SIZE << "] (size = 0: use default)";
-
-		QMessageBox::warning(this, 0, error.str().c_str());
-
-		return false;
-	}
-	config.cpu_buffer_size = temp;
-
-	temp = note_table_size_edit->text().toUInt();
-	if (temp < OP_MIN_NOTE_TABLE_SIZE || temp > OP_MAX_NOTE_TABLE_SIZE) {
-		ostringstream error;
-
-		error << "note table size out of range: " << temp
-		      << " valid range is [" << OP_MIN_NOTE_TABLE_SIZE << ", "
-		      << OP_MAX_NOTE_TABLE_SIZE << "]";
-
-		QMessageBox::warning(this, 0, error.str().c_str());
-
-		return false;
-	}
-	config.note_table_size = temp;
-
-	temp = callgraph_depth_edit->text().toUInt();
-	if (temp > INT_MAX) {
-		ostringstream error;
-
-		error << "callgraph depth  out of range: " << temp
-		      << " valid range is [" << 0 << ", "
-		      << INT_MAX << "]";
-
-		QMessageBox::warning(this, 0, error.str().c_str());
-
-		return false;
-	}
-	config.callgraph_depth = temp;
-
-	config.verbose = verbose->isChecked();
-	config.separate_lib = separate_lib_cb->isChecked();
-	config.separate_kernel = separate_kernel_cb->isChecked();
-	config.separate_cpu = separate_cpu_cb->isChecked();
-	config.separate_thread = separate_thread_cb->isChecked();
-
-	return true;
-}
-
-
-void oprof_start::get_unit_mask_part(op_event_descr const & descr, uint num,
-                                     bool selected, uint & mask)
-{
-	if (!selected)
-		return;
-	if  (num >= descr.unit->num)
-		return;
-
-	if (descr.unit->unit_type_mask == utm_bitmask)
-		mask |= descr.unit->um[num].value;
-	else
-		mask = descr.unit->um[num].value;
-}
-
-
-// return the unit mask selected through the unit mask check box
-uint oprof_start::get_unit_mask(op_event_descr const & descr)
-{
-	uint mask = 0;
-
-	if (!descr.unit)
-		return 0;
-
-	// mandatory mask is transparent for user.
-	if (descr.unit->unit_type_mask == utm_mandatory) {
-		mask = descr.unit->default_mask;
-		return mask;
-	}
-
-	get_unit_mask_part(descr, 0, check0->isChecked(), mask);
-	get_unit_mask_part(descr, 1, check1->isChecked(), mask);
-	get_unit_mask_part(descr, 2, check2->isChecked(), mask);
-	get_unit_mask_part(descr, 3, check3->isChecked(), mask);
-	get_unit_mask_part(descr, 4, check4->isChecked(), mask);
-	get_unit_mask_part(descr, 5, check5->isChecked(), mask);
-	get_unit_mask_part(descr, 6, check6->isChecked(), mask);
-	get_unit_mask_part(descr, 7, check7->isChecked(), mask);
-	get_unit_mask_part(descr, 8, check8->isChecked(), mask);
-	get_unit_mask_part(descr, 9, check9->isChecked(), mask);
-	get_unit_mask_part(descr, 10, check10->isChecked(), mask);
-	get_unit_mask_part(descr, 11, check11->isChecked(), mask);
-	get_unit_mask_part(descr, 12, check12->isChecked(), mask);
-	get_unit_mask_part(descr, 13, check13->isChecked(), mask);
-	get_unit_mask_part(descr, 14, check14->isChecked(), mask);
-	get_unit_mask_part(descr, 15, check15->isChecked(), mask);
-	return mask;
-}
-
-
-void oprof_start::hide_masks()
-{
-	check0->hide();
-	check1->hide();
-	check2->hide();
-	check3->hide();
-	check4->hide();
-	check5->hide();
-	check6->hide();
-	check7->hide();
-	check8->hide();
-	check9->hide();
-	check10->hide();
-	check11->hide();
-	check12->hide();
-	check13->hide();
-	check14->hide();
-	check15->hide();
-}
-
-
-void oprof_start::setup_unit_masks(op_event_descr const & descr)
-{
-	op_unit_mask const * um = descr.unit;
-
-	hide_masks();
-
-	if (!um || um->unit_type_mask == utm_mandatory)
-		return;
-
-	event_setting & cfg = event_cfgs[descr.name];
-
-	unit_mask_group->setExclusive(um->unit_type_mask == utm_exclusive);
-
-	for (size_t i = 0; i < um->num ; ++i) {
-		QCheckBox * check = 0;
-		switch (i) {
-			case 0: check = check0; break;
-			case 1: check = check1; break;
-			case 2: check = check2; break;
-			case 3: check = check3; break;
-			case 4: check = check4; break;
-			case 5: check = check5; break;
-			case 6: check = check6; break;
-			case 7: check = check7; break;
-			case 8: check = check8; break;
-			case 9: check = check9; break;
-			case 10: check = check10; break;
-			case 11: check = check11; break;
-			case 12: check = check12; break;
-			case 13: check = check13; break;
-			case 14: check = check14; break;
-			case 15: check = check15; break;
-		}
-		check->setText(um->um[i].desc);
-		if (um->unit_type_mask == utm_exclusive)
-			check->setChecked(cfg.umask == um->um[i].value);
-		else
-			check->setChecked(cfg.umask & um->um[i].value);
-
-		check->show();
-	}
-	unit_mask_group->setMinimumSize(unit_mask_group->sizeHint());
-	setup_config_tab->setMinimumSize(setup_config_tab->sizeHint());
-}
-
-
-uint oprof_start::max_perf_count() const
-{
-	return cpu_type == CPU_RTC ? OP_MAX_RTC_COUNT : OP_MAX_PERF_COUNT;
-}
-
-
-void oprof_start::on_flush_profiler_data()
-{
-	vector<string> args;
-	args.push_back("--dump");
-
-	if (daemon_status().running)
-		do_exec_command(OP_BINDIR "/opcontrol", args);
-	else
-		QMessageBox::warning(this, 0, "The profiler is not started.");
-}
-
-
-// user is happy of its setting.
-void oprof_start::on_start_profiler()
-{
-	// save the current settings
-	record_selected_event_config();
-
-	bool one_enable = false;
-
-	Q3ListViewItem * cur;
-	for (cur = events_list->firstChild(); cur; cur = cur->nextSibling()) {
-		if (!cur->isSelected())
-			continue;
-
-		// the missing reference is intended: gcc 2.91.66 can compile
-		// "op_event_descr const & descr = ..." w/o a warning
-		op_event_descr const descr =
-			locate_event(cur->text(0).latin1());
-
-		event_setting & cfg = event_cfgs[cur->text(0).latin1()];
-
-		one_enable = true;
-
-		if (!cfg.os_ring_count && !cfg.user_ring_count) {
-			QMessageBox::warning(this, 0, "You must select to "
-					 "profile at least one of user binaries/kernel");
-			return;
-		}
-
-		if (cfg.count < descr.min_count || 
-		    cfg.count > max_perf_count()) {
-			ostringstream out;
-
-			out << "event " << descr.name << " count of range: "
-			    << cfg.count << " must be in [ "
-			    << descr.min_count << ", "
-			    << max_perf_count()
-			    << "]";
-
-			QMessageBox::warning(this, 0, out.str().c_str());
-			return;
-		}
-
-		if (descr.unit &&
-		    descr.unit->unit_type_mask == utm_bitmask &&
-		    cfg.umask == 0) {
-			ostringstream out;
-
-			out << "event " << descr.name << " invalid unit mask: "
-			    << cfg.umask << endl;
-
-			QMessageBox::warning(this, 0, out.str().c_str());
-			return;
-		}
-	}
-
-	if (one_enable == false && cpu_type != CPU_TIMER_INT) {
-		QMessageBox::warning(this, 0, "No counters enabled.\n");
-		return;
-	}
-
-	if (daemon_status().running) {
-		// gcc 2.91 work around
-		int user_choice = 0;
-		user_choice =
-			QMessageBox::warning(this, 0,
-					     "Profiler already started:\n\n"
-					     "stop and restart it?",
-					     "&Restart", "&Cancel", 0, 0, 1);
-
-		if (user_choice == 1)
-			return;
-
-		// this flush profiler data also.
-		on_stop_profiler();
-	}
-
-	vector<string> args;
-
-	// save_config validate and setup the config
-	if (save_config()) {
-		// now actually start
-		args.push_back("--start");
-		if (config.verbose)
-			args.push_back("--verbose");
-		do_exec_command(OP_BINDIR "/opcontrol", args);
-	}
-
-	total_nr_interrupts = 0;
-	timerEvent(0);
-}
-
-
-bool oprof_start::save_config()
-{
-	if (!record_config())
-		return false;
-
-	vector<string> args;
-
-	// saving config is done by running opcontrol --setup with appropriate
-	// setted parameters so we use the same config file as command line
-	// tools
-
-	args.push_back("--setup");
-
-	bool one_enabled = false;
-
-	vector<string> tmpargs;
-	tmpargs.push_back("--setup");
-
-	Q3ListViewItem * cur;
-	for (cur = events_list->firstChild(); cur; cur = cur->nextSibling()) {
-		if (!cur->isSelected())
-			continue;
-
-		event_setting & cfg = event_cfgs[cur->text(0).latin1()];
-
-		op_event_descr const & descr =
-			locate_event(cur->text(0).latin1());
-
-		one_enabled = true;
-
-		string arg = "--event=" + descr.name;
-		arg += ":" + op_lexical_cast<string>(cfg.count);
-		arg += ":" + op_lexical_cast<string>(cfg.umask);
-		arg += ":" + op_lexical_cast<string>(cfg.os_ring_count);
-		arg += ":" + op_lexical_cast<string>(cfg.user_ring_count);
-
-		tmpargs.push_back(arg);
-	}
-
-	// only set counters if at least one is enabled
-	if (one_enabled)
-		args = tmpargs;
-
-	if (config.no_kernel) {
-		args.push_back("--no-vmlinux");
-	} else {
-		args.push_back("--vmlinux=" + config.kernel_filename);
-	}
-
-	args.push_back("--buffer-size=" +
-	       op_lexical_cast<string>(config.buffer_size));
-
-	if (op_get_interface() == OP_INTERFACE_24) {
-		args.push_back("--note-table-size=" +
-		       op_lexical_cast<string>(config.note_table_size));
-	} else {
-		args.push_back("--buffer-watershed=" +
-		       op_lexical_cast<string>(config.buffer_watershed));
-		args.push_back("--cpu-buffer-size=" +
-		       op_lexical_cast<string>(config.cpu_buffer_size));
-		if (op_file_readable("/dev/oprofile/backtrace_depth")) {
-			args.push_back("--callgraph=" +
-		              op_lexical_cast<string>(config.callgraph_depth));
-		}
-	}
-
-	string sep = "--separate=";
-
-	if (config.separate_lib)
-		sep += "library,";
-	if (config.separate_kernel)
-		sep += "kernel,";
-	if (config.separate_cpu)
-		sep += "cpu,";
-	if (config.separate_thread)
-		sep += "thread,";
-
-	if (sep == "--separate=")
-		sep += "none";
-	args.push_back(sep);
-
-	// 2.95 work-around, it didn't like return !do_exec_command() 
-	bool ret = !do_exec_command(OP_BINDIR "/opcontrol", args);
-	return ret;
-}
-
-
-// flush and stop the profiler if it was started.
-void oprof_start::on_stop_profiler()
-{
-	vector<string> args;
-	args.push_back("--shutdown");
-
-	if (daemon_status().running)
-		do_exec_command(OP_BINDIR "/opcontrol", args);
-	else
-		QMessageBox::warning(this, 0, "The profiler is already stopped.");
-
-	timerEvent(0);
-}
-
-
-void oprof_start::on_separate_kernel_cb_changed(int state)
-{
-	if (state == 2)
-		separate_lib_cb->setChecked(true);
-}
-
-void oprof_start::on_reset_sample_files()
-{
-	int ret = QMessageBox::warning(this, 0, "Are you sure you want to "
-	       "reset your last profile session ?", "Yes", "No", 0, 0, 1);
-	if (!ret) {
-		vector<string> args;
-		args.push_back("--reset");
-		if (!do_exec_command(OP_BINDIR "/opcontrol", args))
-			// the next timer event will overwrite the message
-			daemon_label->setText("Last profile session reseted.");
-		else
-			QMessageBox::warning(this, 0,
-			     "Can't reset profiling session.");
-	}
-}
-
-
-/// function object for matching against name
-class event_name_eq {
-	string name_;
-public:
-	explicit event_name_eq(string const & s) : name_(s) {}
-	bool operator()(op_event_descr const & d) const {
-		return d.name == name_;
-	}
-};
-
-
-// helper to retrieve an event descr through its name.
-op_event_descr const & oprof_start::locate_event(string const & name) const
-{
-	return *(find_if(v_events.begin(), v_events.end(), event_name_eq(name)));
-}
diff --git a/gui/oprof_start.h b/gui/oprof_start.h
deleted file mode 100644
index 477e3f4..0000000
--- a/gui/oprof_start.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/**
- * @file oprof_start.h
- * The GUI start main class
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Philippe Elie
- * @author John Levon
- */
-
-#ifndef OPROF_START_H
-#define OPROF_START_H
-
-#include <vector>
-#include <map>
-#include <set>
-
-#include "config.h"
-
-#include "ui/oprof_start.base.h"
-#include "oprof_start_config.h"
-
-#include "op_events.h"
-
-#ifndef QT3_SUPPORT
-#define Q3ListViewItem QListViewItem
-#endif
-
-class QIntValidator;
-class QListViewItem;
-class QTimerEvent;
-
-/// a struct describing a particular event type
-struct op_event_descr {
-	op_event_descr();
-
-	/// bit mask of allowed counters
-	uint counter_mask;
-	/// hardware event number
-	u32 val;
-	/// unit mask values if applicable
-	op_unit_mask const * unit;
-	/// name of event
-	std::string name;
-	/// description of event
-	std::string help_str;
-	/// minimum counter value
-	uint min_count;
-};
-
-class oprof_start : public oprof_start_base
-{
-	Q_OBJECT
-
-public:
-	oprof_start();
-
-protected slots:
-	/// select the kernel image filename
-	void choose_kernel_filename();
-	/// flush profiler
-	void on_flush_profiler_data();
-	/// start profiler
-	void on_start_profiler();
-	/// stop profiler
-	void on_stop_profiler();
-	/// events selection change
-	void event_selected();
-	/// the mouse is over an event
-	void event_over(Q3ListViewItem *);
-	/// state of separate_kernel_cb changed
-	void on_separate_kernel_cb_changed(int);
-	/// reset sample files
-	void on_reset_sample_files();
-
-	/// close the dialog
-	void accept();
-
-	/// WM hide event
-	void closeEvent(QCloseEvent * e);
-
-	/// timer event
-	void timerEvent(QTimerEvent * e);
-
-private:
-	/// the counter combo has been activated
-	void fill_events_listbox();
-
-	/// fill the event details and gui setup
-	void fill_events();
-
-	/// find an event description by name
-	op_event_descr const & locate_event(std::string const & name) const;
-
-	/// update config on user change
-	void record_selected_event_config();
-	/// update config and validate
-	bool record_config();
-
-	/// calculate unit mask for given event and unit mask part
-	void get_unit_mask_part(op_event_descr const & descr, uint num, bool selected, uint & mask);
-	/// calculate unit mask for given event
-	uint get_unit_mask(op_event_descr const & descr);
-	/// set the unit mask widgets for given event
-	void setup_unit_masks(op_event_descr const & descr);
-
-	/// return the maximum perf counter value for the current cpu type
-	uint max_perf_count() const;
-
-	/// show an event's settings
-	void display_event(op_event_descr const & descrp);
-
-	/// hide unit mask widgets
-	void hide_masks(void);
-
-	/// read the events set in daemonrc
-	void read_set_events();
-	/// use the default event
-	void setup_default_event();
-	/// load the extra config file
-	void load_config_file();
-	/// save the config
-	bool save_config();
-
-	/// redraw the event list by changing icon status
-	void draw_event_list();
-
-	/// return true if item is selectable or already selected
-	bool is_selectable_event(Q3ListViewItem * item);
-
-	/// try to alloc counters for the selected_events
-	bool alloc_selected_events() const;
-
-	/// validator for event count
-	QIntValidator* event_count_validator;
-
-	/// all available events for this hardware
-	std::vector<op_event_descr> v_events;
-
-	/// current event configs for each counter
-	typedef std::map<std::string, event_setting> event_setting_map;
-	event_setting_map event_cfgs;
-
-	/// The currently selected events. We must track this because
-	/// with multiple selection listbox QT doesn't allow to know
-	/// what is the last selected item. events_selected() update it
-	std::set<Q3ListViewItem *> selected_events;
-	Q3ListViewItem * current_event;
-
-	/// current config
-	config_setting config;
-
-	/// the expansion of "~" directory
-	std::string user_dir;
-
-	/// CPU type
-	op_cpu cpu_type;
-
-	/// CPU speed in MHz
-	double cpu_speed;
-
-	/// total number of available HW counters
-	uint op_nr_counters;
-
-	/// Total number of samples for this run
-	unsigned long total_nr_interrupts;
-};
-
-#endif // OPROF_START_H
diff --git a/gui/oprof_start_config.cpp b/gui/oprof_start_config.cpp
deleted file mode 100644
index b120af4..0000000
--- a/gui/oprof_start_config.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * @file oprof_start_config.cpp
- * GUI startup config management
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#include <stdio.h>
-
-#include <sstream>
-#include <fstream>
-#include <iomanip>
-#include <sys/utsname.h>
-
-#include "string_manip.h"
-#include "oprof_start_config.h"
-#include "op_config.h"
-#include "op_config_24.h"
-
-using namespace std;
-
-event_setting::event_setting()
-	:
-	count(0),
-	umask(0),
-	os_ring_count(0),
-	user_ring_count(0)
-{
-}
-
-
-config_setting::config_setting()
-	:
-	buffer_size(OP_DEFAULT_BUF_SIZE),
-	note_table_size(OP_DEFAULT_NOTE_SIZE),
-	no_kernel(false),
-	verbose(false),
-	separate_lib(false),
-	separate_kernel(false),
-	separate_cpu(false),
-	separate_thread(false),
-	callgraph_depth(0),
-	buffer_watershed(0),
-	cpu_buffer_size(0)
-{
-	struct utsname info;
-
-	/* Guess path to vmlinux based on kernel currently running. */
-	if (uname(&info)) {
-		perror("oprof_start: Unable to determine OS release.");
-	} else {
-		string const version(info.release);
-		string const vmlinux_path("/lib/modules/" + version
-					 + "/build/vmlinux");
-		kernel_filename = vmlinux_path;
-	}
-}
-
-
-void config_setting::load(istream & in)
-{
-	buffer_size = OP_DEFAULT_BUF_SIZE;
-	note_table_size = OP_DEFAULT_NOTE_SIZE;
-
-	string str;
-
-	while (getline(in, str)) {
-		string val = split(str, '=');
-		if (str == "BUF_SIZE") {
-			buffer_size = op_lexical_cast<unsigned int>(val);
-			if (buffer_size < OP_DEFAULT_BUF_SIZE)
-				buffer_size = OP_DEFAULT_BUF_SIZE;
-		} else if (str == "NOTE_SIZE") {
-			note_table_size = op_lexical_cast<unsigned int>(val);
-			if (note_table_size < OP_DEFAULT_NOTE_SIZE)
-				note_table_size = OP_DEFAULT_NOTE_SIZE;
-		} else if (str == "VMLINUX") {
-			if (val == "none") {
-				kernel_filename = "";
-				no_kernel = true;
-			} else if (!val.empty()) {
-				no_kernel = false;
-				kernel_filename = val;
-			}
-		} else if (str == "SEPARATE_LIB") {
-			separate_lib = op_lexical_cast<bool>(val);
-		} else if (str == "SEPARATE_KERNEL") {
-			separate_kernel = op_lexical_cast<bool>(val);
-		} else if (str == "SEPARATE_CPU") {
-			separate_cpu = op_lexical_cast<bool>(val);
-		} else if (str == "SEPARATE_THREAD") {
-			separate_thread = op_lexical_cast<bool>(val);
-		} else if (str == "CALLGRAPH") {
-			callgraph_depth = op_lexical_cast<unsigned int>(val);
-		} else if (str == "BUF_WATERSHED") {
-			buffer_watershed = op_lexical_cast<unsigned int>(val);
-		} else if (str == "CPU_BUF_SIZE") {
-			cpu_buffer_size = op_lexical_cast<unsigned int>(val);
-		}
-	}
-}
-
-
-istream & operator>>(istream & in, config_setting & object)
-{
-	object.load(in);
-	return in;
-}
diff --git a/gui/oprof_start_config.h b/gui/oprof_start_config.h
deleted file mode 100644
index 07b25d8..0000000
--- a/gui/oprof_start_config.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * @file oprof_start_config.h
- * GUI startup config management
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OPROF_START_CONFIG_H
-#define OPROF_START_CONFIG_H
-
-#include <sys/types.h>
-#include <string>
-#include <iosfwd>
-
-/// Store the setup of one event
-struct event_setting {
-
-	event_setting();
-
-	uint count;
-	uint umask;
-	bool os_ring_count;
-	bool user_ring_count;
-};
-
-/**
- * Store the general  configuration of the profiler.
- * There is no save(), instead opcontrol --setup must be
- * called. This uses opcontrol's daemonrc file.
- */
-struct config_setting {
-	config_setting();
-
-	void load(std::istream & in);
-
-	uint buffer_size;
-	uint note_table_size;
-	std::string kernel_filename;
-	bool no_kernel;
-	bool verbose;
-	bool separate_lib;
-	bool separate_kernel;
-	bool separate_cpu;
-	bool separate_thread;
-	uint callgraph_depth;
-	uint buffer_watershed;
-	uint cpu_buffer_size;
-};
-
-std::istream & operator>>(std::istream & in, config_setting & object);
-
-#endif // ! OPROF_START_CONFIG_H
diff --git a/gui/oprof_start_main.cpp b/gui/oprof_start_main.cpp
deleted file mode 100644
index 44da5de..0000000
--- a/gui/oprof_start_main.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * @file oprof_start_main.cpp
- * main routine for GUI start
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Philippe Elie
- * @author John Levon
- */
-
-#include <qapplication.h>
-
-#include "oprof_start.h"
-
-int main(int argc, char* argv[])
-{
-	QApplication a(argc, argv);
-
-	oprof_start* dlg = new oprof_start();
-
-	a.setMainWidget(dlg);
-
-	dlg->show();
-
-	return a.exec();
-}
diff --git a/gui/oprof_start_util.cpp b/gui/oprof_start_util.cpp
deleted file mode 100644
index d293431..0000000
--- a/gui/oprof_start_util.cpp
+++ /dev/null
@@ -1,331 +0,0 @@
-/**
- * @file oprof_start_util.cpp
- * Miscellaneous helpers for the GUI start
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Philippe Elie
- * @author John Levon
- */
-
-#include <dirent.h>
-#include <unistd.h>
-#include <glob.h>
-
-#include <cerrno>
-#include <vector>
-#include <cmath>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <cstdlib>
-
-#include <qfiledialog.h>
-#include <qmessagebox.h>
-
-#include "op_file.h"
-#include "file_manip.h"
-#include "child_reader.h"
-#include "op_libiberty.h"
-
-#include "oprof_start.h"
-#include "oprof_start_util.h"
-
-using namespace std;
-
-namespace {
-
-// return the ~ expansion suffixed with a '/'
-string const get_config_dir()
-{
-	return "/root";
-}
-
-string daemon_pid;
-
-} // namespace anon
-
-daemon_status::daemon_status()
-	: running(false),
-	  nr_interrupts(0)
-{
-	int HZ;
-	if (!daemon_pid.empty()) {
-		string proc_filename = string("/proc/") + daemon_pid + "/exe";
-		string const exec = op_realpath(proc_filename);
-		if (exec == proc_filename)
-			daemon_pid.erase();
-		else
-			running = true;
-	}
-
-	if (daemon_pid.empty()) {
-		DIR * dir;
-		struct dirent * dirent;
-
-		if (!(dir = opendir("/proc"))) {
-			perror("oprofiled: /proc directory could not be opened. ");
-			exit(EXIT_FAILURE);
-		}
-
-		while ((dirent = readdir(dir))) {
-			string const exec =
-				op_realpath(string("/proc/")
-				               + dirent->d_name + "/exe");
-			string const name = op_basename(exec);
-			if (name != "oprofiled")
-				continue;
-
-			daemon_pid = dirent->d_name;
-			running = true;
-		}
-
-		closedir(dir);
-	}
-
-	HZ = sysconf(_SC_CLK_TCK);
-	if (HZ == -1) {
-		perror("oprofiled: Unable to determine clock ticks per second. ");
-		exit(EXIT_FAILURE);
-	}
-
-	if (daemon_pid.empty())
-		return;
-
-	nr_interrupts = 0;
-
-	switch (op_get_interface()) {
-	case OP_INTERFACE_24:
-		{
-			ifstream ifs3("/proc/sys/dev/oprofile/nr_interrupts");
-			if (ifs3)
-				ifs3 >> nr_interrupts;
-		}
-		break;
-	case OP_INTERFACE_26:
-		{
-			static unsigned int old_sum_interrupts;
-			unsigned int sum_interrupts = 0;
-			glob_t file_names;
-
-			file_names.gl_offs = 0;
-			glob("/dev/oprofile/stats/cpu*/sample_received",
-			     GLOB_DOOFFS, NULL, &file_names);
-
-			for (size_t i = 0; i < file_names.gl_pathc; ++i) {
-				ifstream ifs3(file_names.gl_pathv[i]);
-				if (ifs3) {
-					unsigned int file_interrupts;
-					ifs3 >> file_interrupts;
-					sum_interrupts += file_interrupts;
-				}
-			}
-			if (old_sum_interrupts > sum_interrupts)
-				// occur if we stop/restart daemon.
-				old_sum_interrupts = 0;
-			nr_interrupts = sum_interrupts - old_sum_interrupts;
-			old_sum_interrupts = sum_interrupts;
-			globfree(&file_names);
-		}
-		break;
-	default:
-		break;
-	}
-}
-
-
-/**
- * get_config_filename - get absolute filename of file in user $HOME
- * @param filename  the relative filename
- *
- * Get the absolute path of a file in a user's home directory.
- */
-string const get_config_filename(string const & filename)
-{
-	return get_config_dir() + "/" + filename;
-}
-
-
-/**
- * check_and_create_config_dir - make sure config dir is accessible
- *
- * Returns %true if the dir is accessible.
- */
-bool check_and_create_config_dir()
-{
-	string dir = get_config_filename(".oprofile");
-
-	char * name = xstrdup(dir.c_str());
-
-	if (create_dir(name)) {
-		ostringstream out;
-		out << "unable to create " << dir << " directory ";
-		out << "cause: " << strerror(errno);
-		QMessageBox::warning(0, 0, out.str().c_str());
-
-		free(name);
-
-		return false;
-	}
-
-	free(name);
-	return true;
-}
-
-
-/**
- * format - re-format a string
- * @param orig  string to format
- * @param maxlen  width of line
- *
- * Re-formats a string to fit into a certain width,
- * breaking lines at spaces between words.
- *
- * Returns the formatted string
- */
-string const format(string const & orig, uint const maxlen)
-{
-	string text(orig);
-
-	istringstream ss(text);
-	vector<string> lines;
-
-	string oline;
-	string line;
-
-	while (getline(ss, oline)) {
-		if (line.size() + oline.size() < maxlen) {
-			lines.push_back(line + oline);
-			line.erase();
-		} else {
-			lines.push_back(line);
-			line.erase();
-			string s;
-			string word;
-			istringstream oss(oline);
-			while (oss >> word) {
-				if (line.size() + word.size() > maxlen) {
-					lines.push_back(line);
-					line.erase();
-				}
-				line += word + " ";
-			}
-		}
-	}
-
-	if (line.size())
-		lines.push_back(line);
-
-	string ret;
-
-	for(vector<string>::const_iterator it = lines.begin(); it != lines.end(); ++it)
-		ret += *it + "\n";
-
-	return ret;
-}
-
-
-/**
- * do_exec_command - execute a command
- * @param cmd  command name
- * @param args  arguments to command
- *
- * Execute a command synchronously. An error message is shown
- * if the command returns a non-zero status, which is also returned.
- *
- * The arguments are verified and will refuse to execute if they contain
- * shell metacharacters.
- */
-int do_exec_command(string const & cmd, vector<string> const & args)
-{
-	ostringstream err;
-	bool ok = true;
-
-	// verify arguments
-	for (vector<string>::const_iterator cit = args.begin();
-		cit != args.end(); ++cit) {
-		if (verify_argument(*cit))
-			continue;
-
-		QMessageBox::warning(0, 0,
-			string(
-			"Could not execute: Argument \"" + *cit +
-			"\" contains shell metacharacters.\n").c_str());
-		return EINVAL;
-	}
-
-	child_reader reader(cmd, args);
-	if (reader.error())
-		ok = false;
-
-	if (ok)
-		reader.get_data(cout, err);
-
-	int ret = reader.terminate_process();
-	if (ret) {
-		string error = reader.error_str() + "\n";
-		error += "Failed: \n" + err.str() + "\n";
-		string cmdline = cmd;
-		for (vector<string>::const_iterator cit = args.begin();
-		     cit != args.end(); ++cit) {
-			cmdline += " " + *cit + " ";
-		}
-		error += "\n\nCommand was :\n\n" + cmdline + "\n";
-
-		QMessageBox::warning(0, 0, format(error, 50).c_str());
-	}
-
-	return ret;
-}
-
-
-/**
- * do_open_file_or_dir - open file/directory
- * @param base_dir  directory to start at
- * @param dir_only  directory or filename to select
- *
- * Select a file or directory. The selection is returned;
- * an empty string if the selection was cancelled.
- */
-string const do_open_file_or_dir(string const & base_dir, bool dir_only)
-{
-	QString result;
-
-	if (dir_only) {
-		result = QFileDialog::getExistingDirectory(base_dir.c_str(), 0,
-			"open_file_or_dir", "Get directory name", true);
-	} else {
-		result = QFileDialog::getOpenFileName(base_dir.c_str(), 0, 0,
-			"open_file_or_dir", "Get filename");
-	}
-
-	if (result.isNull())
-		return string();
-	else
-		return result.latin1();
-}
-
-/**
- * verify_argument - check string for potentially dangerous characters
- *
- * This function returns false if the string contains dangerous shell
- * metacharacters.
- *
- * WWW Security FAQ dangerous chars:
- *
- * & ; ` ' \ " | * ? ~ < > ^ ( ) [ ] { } $ \n \r
- *
- * David Wheeler: ! #
- *
- * We allow '-' because we disallow whitespace. We allow ':' and '='
- */
-bool verify_argument(string const & str)
-{
-	if (str.find_first_not_of(
-		"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-		"abcdefghijklmnopqrstuvwxyz0123456789_:=-+%,./")
-		!= string::npos)
-		return false;
-	return true;
-}
diff --git a/gui/oprof_start_util.h b/gui/oprof_start_util.h
deleted file mode 100644
index cce646d..0000000
--- a/gui/oprof_start_util.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * @file oprof_start_util.h
- * Miscellaneous helpers for the GUI start
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Philippe Elie
- * @author John Levon
- */
-
-#ifndef OPROF_START_UTIL_H
-#define OPROF_START_UTIL_H
-
-#include <cmath>
-#include <string>
-#include <vector>
-
-/// Store various daemon status data
-struct daemon_status {
-	/// fill various data member according to the current daemon status
-	daemon_status();
-	bool running;			///< true if daemon is running
-	unsigned int nr_interrupts;	///< nr_interrupts from profiling start
-};
-
-inline double ratio(double x1, double x2)
-{
-	return fabs(((x1 - x2) / x2)) * 100;
-}
-
-std::string const get_config_filename(std::string const & filename);
-bool check_and_create_config_dir();
-std::string const format(std::string const & orig, uint const maxlen);
-int do_exec_command(std::string const & cmd, std::vector<std::string> const & args = std::vector<std::string>());
-std::string const do_open_file_or_dir(std::string const & base_dir, bool dir_only);
-bool verify_argument(std::string const & str);
-
-#endif // OPROF_START_UTIL_H
diff --git a/gui/ui/Makefile.am b/gui/ui/Makefile.am
deleted file mode 100644
index 235d370..0000000
--- a/gui/ui/Makefile.am
+++ /dev/null
@@ -1,24 +0,0 @@
-EXTRA_DIST = oprof_start.base.ui
-
-if have_qt
-
-AM_CPPFLAGS = @QT_CFLAGS@ @OP_CPPFLAGS@
-
-AM_CXXFLAGS = @OP_CXXFLAGS@
-
-noinst_LIBRARIES = liboprof_start.a
-nodist_liboprof_start_a_SOURCES = oprof_start.base.cpp oprof_start.base.moc.cpp
-
-oprof_start.base.h: oprof_start.base.ui
-	$(UIC) -o $@ $<
-
-oprof_start.base.cpp: oprof_start.base.h oprof_start.base.ui
-	$(UIC) -o $@ -impl $^
-
-oprof_start.base.moc.cpp: oprof_start.base.h
-	$(MOC) -o $@ $<
-
-clean-local:
-	rm -f oprof_start.base.h oprof_start.base.cpp oprof_start.base.moc.cpp
-
-endif
diff --git a/gui/ui/oprof_start.base.ui b/gui/ui/oprof_start.base.ui
deleted file mode 100644
index 4fcc43f..0000000
--- a/gui/ui/oprof_start.base.ui
+++ /dev/null
@@ -1,1190 +0,0 @@
-<!DOCTYPE UI><UI>
-<class>oprof_start_base</class>
-<widget>
-    <class>QDialog</class>
-    <property stdset="1">
-        <name>name</name>
-        <cstring>oprof_start_base</cstring>
-    </property>
-    <property stdset="1">
-        <name>geometry</name>
-        <rect>
-            <x>0</x>
-            <y>0</y>
-            <width>625</width>
-            <height>735</height>
-        </rect>
-    </property>
-    <property stdset="1">
-        <name>caption</name>
-        <string>Start profiler</string>
-    </property>
-    <property stdset="1">
-        <name>sizeGripEnabled</name>
-        <bool>true</bool>
-    </property>
-    <vbox>
-        <property stdset="1">
-            <name>margin</name>
-            <number>11</number>
-        </property>
-        <property stdset="1">
-            <name>spacing</name>
-            <number>6</number>
-        </property>
-        <widget>
-            <class>QTabWidget</class>
-            <property stdset="1">
-                <name>name</name>
-                <cstring>setup_config_tab</cstring>
-            </property>
-            <widget>
-                <class>QWidget</class>
-                <property stdset="1">
-                    <name>name</name>
-                    <cstring>counter_setup_page</cstring>
-                </property>
-                <attribute>
-                    <name>title</name>
-                    <string>&amp;Setup</string>
-                </attribute>
-                <vbox>
-                    <property stdset="1">
-                        <name>margin</name>
-                        <number>11</number>
-                    </property>
-                    <property stdset="1">
-                        <name>spacing</name>
-                        <number>6</number>
-                    </property>
-                    <widget>
-                        <class>QGroupBox</class>
-                        <property stdset="1">
-                            <name>name</name>
-                            <cstring>counter_group</cstring>
-                        </property>
-                        <property stdset="1">
-                            <name>title</name>
-                            <string>Events</string>
-                        </property>
-                        <vbox>
-                            <property stdset="1">
-                                <name>margin</name>
-                                <number>11</number>
-                            </property>
-                            <property stdset="1">
-                                <name>spacing</name>
-                                <number>6</number>
-                            </property>
-                            <widget>
-                                <class>QLayoutWidget</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>Layout10</cstring>
-                                </property>
-                                <hbox>
-                                    <property stdset="1">
-                                        <name>margin</name>
-                                        <number>0</number>
-                                    </property>
-                                    <property stdset="1">
-                                        <name>spacing</name>
-                                        <number>6</number>
-                                    </property>
-                                    <widget>
-                                        <class>QListView</class>
-                                        <column>
-                                            <property>
-                                                <name>text</name>
-                                                <string>Removedincode</string>
-                                            </property>
-                                            <property>
-                                                <name>clickable</name>
-                                                <bool>false</bool>
-                                            </property>
-                                            <property>
-                                                <name>resizeable</name>
-                                                <bool>true</bool>
-                                            </property>
-                                        </column>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>events_list</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>minimumSize</name>
-                                            <size>
-                                                <width>250</width>
-                                                <height>0</height>
-                                            </size>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>selectionMode</name>
-                                            <enum>Multi</enum>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>Available events</string>
-                                        </property>
-                                    </widget>
-                                    <widget>
-                                        <class>QLayoutWidget</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>Layout9</cstring>
-                                        </property>
-                                        <vbox>
-                                            <property stdset="1">
-                                                <name>margin</name>
-                                                <number>0</number>
-                                            </property>
-                                            <property stdset="1">
-                                                <name>spacing</name>
-                                                <number>6</number>
-                                            </property>
-                                            <widget>
-                                                <class>QCheckBox</class>
-                                                <property stdset="1">
-                                                    <name>name</name>
-                                                    <cstring>os_ring_count_cb</cstring>
-                                                </property>
-                                                <property stdset="1">
-                                                    <name>text</name>
-                                                    <string>Profile &amp;kernel</string>
-                                                </property>
-                                                <property>
-                                                    <name>toolTip</name>
-                                                    <string>Profile kernel code</string>
-                                                </property>
-                                            </widget>
-                                            <widget>
-                                                <class>QCheckBox</class>
-                                                <property stdset="1">
-                                                    <name>name</name>
-                                                    <cstring>user_ring_count_cb</cstring>
-                                                </property>
-                                                <property stdset="1">
-                                                    <name>text</name>
-                                                    <string>Profile &amp;user binaries</string>
-                                                </property>
-                                                <property>
-                                                    <name>toolTip</name>
-                                                    <string>Profile user libraries and applications</string>
-                                                </property>
-                                            </widget>
-                                            <widget>
-                                                <class>QLayoutWidget</class>
-                                                <property stdset="1">
-                                                    <name>name</name>
-                                                    <cstring>Layout16</cstring>
-                                                </property>
-                                                <hbox>
-                                                    <property stdset="1">
-                                                        <name>margin</name>
-                                                        <number>0</number>
-                                                    </property>
-                                                    <property stdset="1">
-                                                        <name>spacing</name>
-                                                        <number>6</number>
-                                                    </property>
-                                                    <widget>
-                                                        <class>QLabel</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>TextLabel1_2</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>C&amp;ount</string>
-                                                        </property>
-                                                        <property>
-                                                            <name>buddy</name>
-                                                            <cstring>event_count_edit</cstring>
-                                                        </property>
-                                                    </widget>
-                                                    <widget>
-                                                        <class>QLineEdit</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>event_count_edit</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>sizePolicy</name>
-                                                            <sizepolicy>
-                                                                <hsizetype>1</hsizetype>
-                                                                <vsizetype>0</vsizetype>
-                                                            </sizepolicy>
-                                                        </property>
-                                                        <property>
-                                                            <name>toolTip</name>
-                                                            <string>Set the count value</string>
-                                                        </property>
-                                                    </widget>
-                                                    <spacer>
-                                                        <property>
-                                                            <name>name</name>
-                                                            <cstring>Spacer13_2</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>orientation</name>
-                                                            <enum>Horizontal</enum>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>sizeType</name>
-                                                            <enum>Expanding</enum>
-                                                        </property>
-                                                        <property>
-                                                            <name>sizeHint</name>
-                                                            <size>
-                                                                <width>20</width>
-                                                                <height>20</height>
-                                                            </size>
-                                                        </property>
-                                                    </spacer>
-                                                </hbox>
-                                            </widget>
-                                            <widget>
-                                                <class>QButtonGroup</class>
-                                                <property stdset="1">
-                                                    <name>name</name>
-                                                    <cstring>unit_mask_group</cstring>
-                                                </property>
-                                                <property stdset="1">
-                                                    <name>sizePolicy</name>
-                                                    <sizepolicy>
-                                                        <hsizetype>1</hsizetype>
-                                                        <vsizetype>5</vsizetype>
-                                                    </sizepolicy>
-                                                </property>
-                                                <property stdset="1">
-                                                    <name>title</name>
-                                                    <string>Unit mask</string>
-                                                </property>
-                                                <property>
-                                                    <name>toolTip</name>
-                                                    <string>Unit mask settings for this event</string>
-                                                </property>
-                                                <grid>
-                                                    <property stdset="1">
-                                                        <name>margin</name>
-                                                        <number>11</number>
-                                                    </property>
-                                                    <property stdset="1">
-                                                        <name>spacing</name>
-                                                        <number>6</number>
-                                                    </property>
-                                                    <widget row="0"  column="0" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check0</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check0</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="1"  column="0"  rowspan="2"  colspan="2" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check1</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check1</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="3"  column="0"  rowspan="2"  colspan="2" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check2</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check2</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="5"  column="0" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check3</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check3</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="6"  column="0" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check4</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check4</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="7"  column="0" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check5</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check5</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="8"  column="0"  rowspan="2"  colspan="2" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check6</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check6</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="10"  column="0" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check7</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check7</string>
-                                                        </property>
-                                                    </widget>
-                                                    <spacer row="11"  column="1" >
-                                                        <property>
-                                                            <name>name</name>
-                                                            <cstring>Spacer14</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>orientation</name>
-                                                            <enum>Vertical</enum>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>sizeType</name>
-                                                            <enum>Expanding</enum>
-                                                        </property>
-                                                        <property>
-                                                            <name>sizeHint</name>
-                                                            <size>
-                                                                <width>20</width>
-                                                                <height>20</height>
-                                                            </size>
-                                                        </property>
-                                                    </spacer>
-                                                    <widget row="5"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check11</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check11</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="7"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check13</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check13</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="10"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check15</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check15</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="4"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check10</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check10</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="9"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check14</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check14</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="2"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check9</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check9</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="0"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check8</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check8</string>
-                                                        </property>
-                                                    </widget>
-                                                    <widget row="6"  column="1" >
-                                                        <class>QCheckBox</class>
-                                                        <property stdset="1">
-                                                            <name>name</name>
-                                                            <cstring>check12</cstring>
-                                                        </property>
-                                                        <property stdset="1">
-                                                            <name>text</name>
-                                                            <string>check12</string>
-                                                        </property>
-                                                    </widget>
-                                                </grid>
-                                            </widget>
-                                        </vbox>
-                                    </widget>
-                                </hbox>
-                            </widget>
-                            <widget>
-                                <class>QLabel</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>event_help_label</cstring>
-                                </property>
-                                <property stdset="1">
-                                    <name>enabled</name>
-                                    <bool>true</bool>
-                                </property>
-                                <property stdset="1">
-                                    <name>sizePolicy</name>
-                                    <sizepolicy>
-                                        <hsizetype>1</hsizetype>
-                                        <vsizetype>1</vsizetype>
-                                    </sizepolicy>
-                                </property>
-                                <property stdset="1">
-                                    <name>frameShape</name>
-                                    <enum>WinPanel</enum>
-                                </property>
-                                <property stdset="1">
-                                    <name>frameShadow</name>
-                                    <enum>Sunken</enum>
-                                </property>
-                                <property stdset="1">
-                                    <name>text</name>
-                                    <string></string>
-                                </property>
-                            </widget>
-                        </vbox>
-                    </widget>
-                </vbox>
-            </widget>
-            <widget>
-                <class>QWidget</class>
-                <property stdset="1">
-                    <name>name</name>
-                    <cstring>configuration_page</cstring>
-                </property>
-                <attribute>
-                    <name>title</name>
-                    <string>&amp;Configuration</string>
-                </attribute>
-                <vbox>
-                    <property stdset="1">
-                        <name>margin</name>
-                        <number>11</number>
-                    </property>
-                    <property stdset="1">
-                        <name>spacing</name>
-                        <number>6</number>
-                    </property>
-                    <widget>
-                        <class>QLayoutWidget</class>
-                        <property stdset="1">
-                            <name>name</name>
-                            <cstring>Layout11</cstring>
-                        </property>
-                        <hbox>
-                            <property stdset="1">
-                                <name>margin</name>
-                                <number>0</number>
-                            </property>
-                            <property stdset="1">
-                                <name>spacing</name>
-                                <number>6</number>
-                            </property>
-                            <widget>
-                                <class>QLabel</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>TextLabel1</cstring>
-                                </property>
-                                <property stdset="1">
-                                    <name>text</name>
-                                    <string>&amp;Kernel image file</string>
-                                </property>
-                                <property>
-                                    <name>buddy</name>
-                                    <cstring>kernel_filename_edit</cstring>
-                                </property>
-                            </widget>
-                            <widget>
-                                <class>QLineEdit</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>kernel_filename_edit</cstring>
-                                </property>
-                                <property>
-                                    <name>toolTip</name>
-                                    <string>The vmlinux file of the running kernel</string>
-                                </property>
-                            </widget>
-                            <widget>
-                                <class>QToolButton</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>kernel_filename_tb</cstring>
-                                </property>
-                                <property stdset="1">
-                                    <name>text</name>
-                                    <string>...</string>
-                                </property>
-                            </widget>
-                            <widget>
-                                <class>QCheckBox</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>no_vmlinux</cstring>
-                                </property>
-                                <property stdset="1">
-                                    <name>text</name>
-                                    <string>No kernel image</string>
-                                </property>
-                                <property>
-                                    <name>toolTip</name>
-                                    <string>No kernel image available. Disables kernel profiling.</string>
-                                </property>
-                            </widget>
-                        </hbox>
-                    </widget>
-                    <widget>
-                        <class>QLayoutWidget</class>
-                        <property stdset="1">
-                            <name>name</name>
-                            <cstring>Layout12</cstring>
-                        </property>
-                        <hbox>
-                            <property stdset="1">
-                                <name>margin</name>
-                                <number>0</number>
-                            </property>
-                            <property stdset="1">
-                                <name>spacing</name>
-                                <number>6</number>
-                            </property>
-                            <widget>
-                                <class>QLayoutWidget</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>Layout11</cstring>
-                                </property>
-                                <grid>
-                                    <property stdset="1">
-                                        <name>margin</name>
-                                        <number>0</number>
-                                    </property>
-                                    <property stdset="1">
-                                        <name>spacing</name>
-                                        <number>6</number>
-                                    </property>
-                                    <widget row="0"  column="1" >
-                                        <class>QLineEdit</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>buffer_size_edit</cstring>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>The size of the profiler's buffers</string>
-                                        </property>
-                                    </widget>
-                                    <widget row="1"  column="1" >
-                                        <class>QLineEdit</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>note_table_size_edit</cstring>
-                                        </property>
-                                    </widget>
-                                    <widget row="3"  column="0" >
-                                        <class>QLabel</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>cpu_buffer_size_label</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Cpu buffer size</string>
-                                        </property>
-                                        <property>
-                                            <name>buddy</name>
-                                            <cstring>cpu_buffer_size_edit</cstring>
-                                        </property>
-                                    </widget>
-                                    <widget row="3"  column="1" >
-                                        <class>QLineEdit</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>cpu_buffer_size_edit</cstring>
-                                        </property>
-                                    </widget>
-                                    <widget row="0"  column="0" >
-                                        <class>QLabel</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>buffer_size_label</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>&amp;Buffer size</string>
-                                        </property>
-                                        <property>
-                                            <name>buddy</name>
-                                            <cstring>buffer_size_edit</cstring>
-                                        </property>
-                                    </widget>
-                                    <widget row="1"  column="0" >
-                                        <class>QLabel</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>note_table_size_label</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Note Size</string>
-                                        </property>
-                                        <property>
-                                            <name>buddy</name>
-                                            <cstring>note_table_size_edit</cstring>
-                                        </property>
-                                    </widget>
-                                    <widget row="2"  column="1" >
-                                        <class>QLineEdit</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>buffer_watershed_edit</cstring>
-                                        </property>
-                                    </widget>
-                                    <widget row="2"  column="0" >
-                                        <class>QLabel</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>buffer_watershed_label</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Buffer watershed</string>
-                                        </property>
-                                        <property>
-                                            <name>buddy</name>
-                                            <cstring>buffer_watershed_edit</cstring>
-                                        </property>
-                                    </widget>
-                                </grid>
-                            </widget>
-                            <spacer>
-                                <property>
-                                    <name>name</name>
-                                    <cstring>Spacer11</cstring>
-                                </property>
-                                <property stdset="1">
-                                    <name>orientation</name>
-                                    <enum>Horizontal</enum>
-                                </property>
-                                <property stdset="1">
-                                    <name>sizeType</name>
-                                    <enum>Expanding</enum>
-                                </property>
-                                <property>
-                                    <name>sizeHint</name>
-                                    <size>
-                                        <width>20</width>
-                                        <height>20</height>
-                                    </size>
-                                </property>
-                            </spacer>
-                        </hbox>
-                    </widget>
-                    <widget>
-                        <class>QLayoutWidget</class>
-                        <property stdset="1">
-                            <name>name</name>
-                            <cstring>Layout36</cstring>
-                        </property>
-                        <grid>
-                            <property stdset="1">
-                                <name>margin</name>
-                                <number>0</number>
-                            </property>
-                            <property stdset="1">
-                                <name>spacing</name>
-                                <number>6</number>
-                            </property>
-                            <widget row="0"  column="0" >
-                                <class>QLayoutWidget</class>
-                                <property stdset="1">
-                                    <name>name</name>
-                                    <cstring>Layout34</cstring>
-                                </property>
-                                <vbox>
-                                    <property stdset="1">
-                                        <name>margin</name>
-                                        <number>0</number>
-                                    </property>
-                                    <property stdset="1">
-                                        <name>spacing</name>
-                                        <number>6</number>
-                                    </property>
-                                    <widget>
-                                        <class>QCheckBox</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>verbose</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>&amp;Verbose</string>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>Very verbose output in log file</string>
-                                        </property>
-                                    </widget>
-                                    <widget>
-                                        <class>QCheckBox</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>separate_lib_cb</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Per-application profiles</string>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>Separate samples for each shared library. This increases the time and space overhead of OProfile.</string>
-                                        </property>
-                                    </widget>
-                                    <widget>
-                                        <class>QCheckBox</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>separate_kernel_cb</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Per-application profiles, including kernel</string>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>Separate samples for each shared library and kernel samples. This increases the time and space overhead of OProfile.</string>
-                                        </property>
-                                    </widget>
-                                    <widget>
-                                        <class>QCheckBox</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>separate_cpu_cb</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Per-CPU profiles</string>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>Separate samples for each shared library and kernel samples. This increases the time and space overhead of OProfile.</string>
-                                        </property>
-                                    </widget>
-                                    <widget>
-                                        <class>QCheckBox</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>separate_thread_cb</cstring>
-                                        </property>
-                                        <property stdset="1">
-                                            <name>text</name>
-                                            <string>Per-thread/task profiles</string>
-                                        </property>
-                                        <property>
-                                            <name>toolTip</name>
-                                            <string>Separate samples for each shared library and kernel samples. This increases the time and space overhead of OProfile.</string>
-                                        </property>
-                                    </widget>
-                                    <widget>
-                                        <class>QLayoutWidget</class>
-                                        <property stdset="1">
-                                            <name>name</name>
-                                            <cstring>Layout33</cstring>
-                                        </property>
-                                        <hbox>
-                                            <property stdset="1">
-                                                <name>margin</name>
-                                                <number>0</number>
-                                            </property>
-                                            <property stdset="1">
-                                                <name>spacing</name>
-                                                <number>6</number>
-                                            </property>
-                                            <widget>
-                                                <class>QLabel</class>
-                                                <property stdset="1">
-                                                    <name>name</name>
-                                                    <cstring>callgraph_depth_label</cstring>
-                                                </property>
-                                                <property stdset="1">
-                                                    <name>text</name>
-                                                    <string>callgraph depth, zero to disable</string>
-                                                </property>
-                                                <property>
-                                                    <name>buddy</name>
-                                                    <cstring>callgraph_depth_edit</cstring>
-                                                </property>
-                                            </widget>
-                                            <widget>
-                                                <class>QLineEdit</class>
-                                                <property stdset="1">
-                                                    <name>name</name>
-                                                    <cstring>callgraph_depth_edit</cstring>
-                                                </property>
-                                            </widget>
-                                        </hbox>
-                                    </widget>
-                                </vbox>
-                            </widget>
-                            <spacer row="0"  column="1" >
-                                <property>
-                                    <name>name</name>
-                                    <cstring>Spacer12</cstring>
-                                </property>
-                                <property stdset="1">
-                                    <name>orientation</name>
-                                    <enum>Horizontal</enum>
-                                </property>
-                                <property stdset="1">
-                                    <name>sizeType</name>
-                                    <enum>Expanding</enum>
-                                </property>
-                                <property>
-                                    <name>sizeHint</name>
-                                    <size>
-                                        <width>20</width>
-                                        <height>20</height>
-                                    </size>
-                                </property>
-                            </spacer>
-                        </grid>
-                    </widget>
-                    <spacer>
-                        <property>
-                            <name>name</name>
-                            <cstring>Spacer9</cstring>
-                        </property>
-                        <property stdset="1">
-                            <name>orientation</name>
-                            <enum>Vertical</enum>
-                        </property>
-                        <property stdset="1">
-                            <name>sizeType</name>
-                            <enum>Expanding</enum>
-                        </property>
-                        <property>
-                            <name>sizeHint</name>
-                            <size>
-                                <width>20</width>
-                                <height>20</height>
-                            </size>
-                        </property>
-                    </spacer>
-                </vbox>
-            </widget>
-        </widget>
-        <widget>
-            <class>QLabel</class>
-            <property stdset="1">
-                <name>name</name>
-                <cstring>daemon_label</cstring>
-            </property>
-            <property stdset="1">
-                <name>frameShape</name>
-                <enum>Panel</enum>
-            </property>
-            <property stdset="1">
-                <name>frameShadow</name>
-                <enum>Sunken</enum>
-            </property>
-            <property stdset="1">
-                <name>text</name>
-                <string></string>
-            </property>
-            <property>
-                <name>toolTip</name>
-                <string>Current daemon status</string>
-            </property>
-        </widget>
-        <widget>
-            <class>QLayoutWidget</class>
-            <property stdset="1">
-                <name>name</name>
-                <cstring>Layout37</cstring>
-            </property>
-            <hbox>
-                <property stdset="1">
-                    <name>margin</name>
-                    <number>0</number>
-                </property>
-                <property stdset="1">
-                    <name>spacing</name>
-                    <number>6</number>
-                </property>
-                <widget>
-                    <class>QPushButton</class>
-                    <property stdset="1">
-                        <name>name</name>
-                        <cstring>start_profiler_btn</cstring>
-                    </property>
-                    <property stdset="1">
-                        <name>text</name>
-                        <string>St&amp;art</string>
-                    </property>
-                </widget>
-                <widget>
-                    <class>QPushButton</class>
-                    <property stdset="1">
-                        <name>name</name>
-                        <cstring>flush_profiler_data_btn</cstring>
-                    </property>
-                    <property stdset="1">
-                        <name>text</name>
-                        <string>&amp;Flush</string>
-                    </property>
-                </widget>
-                <widget>
-                    <class>QPushButton</class>
-                    <property stdset="1">
-                        <name>name</name>
-                        <cstring>stop_profiler_btn</cstring>
-                    </property>
-                    <property stdset="1">
-                        <name>text</name>
-                        <string>Stop</string>
-                    </property>
-                </widget>
-                <spacer>
-                    <property>
-                        <name>name</name>
-                        <cstring>Spacer5</cstring>
-                    </property>
-                    <property stdset="1">
-                        <name>orientation</name>
-                        <enum>Horizontal</enum>
-                    </property>
-                    <property stdset="1">
-                        <name>sizeType</name>
-                        <enum>Expanding</enum>
-                    </property>
-                    <property>
-                        <name>sizeHint</name>
-                        <size>
-                            <width>20</width>
-                            <height>20</height>
-                        </size>
-                    </property>
-                </spacer>
-                <widget>
-                    <class>QPushButton</class>
-                    <property stdset="1">
-                        <name>name</name>
-                        <cstring>reset_sample_files_btn</cstring>
-                    </property>
-                    <property stdset="1">
-                        <name>text</name>
-                        <string>Reset sample files</string>
-                    </property>
-                </widget>
-                <widget>
-                    <class>QPushButton</class>
-                    <property stdset="1">
-                        <name>name</name>
-                        <cstring>quit_and_save_btn</cstring>
-                    </property>
-                    <property stdset="1">
-                        <name>text</name>
-                        <string>Save and &amp;quit</string>
-                    </property>
-                    <property stdset="1">
-                        <name>autoDefault</name>
-                        <bool>true</bool>
-                    </property>
-                </widget>
-            </hbox>
-        </widget>
-    </vbox>
-</widget>
-<connections>
-    <connection>
-        <sender>start_profiler_btn</sender>
-        <signal>clicked()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>on_start_profiler()</slot>
-    </connection>
-    <connection>
-        <sender>stop_profiler_btn</sender>
-        <signal>clicked()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>on_stop_profiler()</slot>
-    </connection>
-    <connection>
-        <sender>flush_profiler_data_btn</sender>
-        <signal>clicked()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>on_flush_profiler_data()</slot>
-    </connection>
-    <connection>
-        <sender>quit_and_save_btn</sender>
-        <signal>clicked()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>accept()</slot>
-    </connection>
-    <connection>
-        <sender>kernel_filename_tb</sender>
-        <signal>clicked()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>choose_kernel_filename()</slot>
-    </connection>
-    <connection>
-        <sender>no_vmlinux</sender>
-        <signal>toggled(bool)</signal>
-        <receiver>kernel_filename_tb</receiver>
-        <slot>setDisabled(bool)</slot>
-    </connection>
-    <connection>
-        <sender>no_vmlinux</sender>
-        <signal>toggled(bool)</signal>
-        <receiver>kernel_filename_edit</receiver>
-        <slot>setDisabled(bool)</slot>
-    </connection>
-    <connection>
-        <sender>no_vmlinux</sender>
-        <signal>toggled(bool)</signal>
-        <receiver>TextLabel1</receiver>
-        <slot>setDisabled(bool)</slot>
-    </connection>
-    <connection>
-        <sender>separate_kernel_cb</sender>
-        <signal>stateChanged(int)</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>on_separate_kernel_cb_changed(int)</slot>
-    </connection>
-    <connection>
-        <sender>reset_sample_files_btn</sender>
-        <signal>clicked()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>on_reset_sample_files()</slot>
-    </connection>
-    <connection>
-        <sender>events_list</sender>
-        <signal>selectionChanged()</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>event_selected()</slot>
-    </connection>
-    <connection>
-        <sender>events_list</sender>
-        <signal>currentChanged(QListViewItem*)</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>event_over(QListViewItem *)</slot>
-    </connection>
-    <connection>
-        <sender>events_list</sender>
-        <signal>onItem(QListViewItem*)</signal>
-        <receiver>oprof_start_base</receiver>
-        <slot>event_over(QListViewItem *)</slot>
-    </connection>
-    <slot access="protected">choose_kernel_filename()</slot>
-    <slot access="protected">event_over(QListViewItem *)</slot>
-    <slot access="protected">event_selected()</slot>
-    <slot access="protected">on_reset_sample_files()</slot>
-    <slot access="protected">on_flush_profiler_data()</slot>
-    <slot access="protected">on_separate_kernel_cb_changed(int)</slot>
-    <slot access="protected">on_start_profiler()</slot>
-    <slot access="protected">on_stop_profiler()</slot>
-</connections>
-<tabstops>
-    <tabstop>setup_config_tab</tabstop>
-    <tabstop>events_list</tabstop>
-    <tabstop>os_ring_count_cb</tabstop>
-    <tabstop>user_ring_count_cb</tabstop>
-    <tabstop>event_count_edit</tabstop>
-    <tabstop>check0</tabstop>
-    <tabstop>check1</tabstop>
-    <tabstop>check2</tabstop>
-    <tabstop>check3</tabstop>
-    <tabstop>check4</tabstop>
-    <tabstop>check5</tabstop>
-    <tabstop>check6</tabstop>
-    <tabstop>check7</tabstop>
-    <tabstop>check8</tabstop>
-    <tabstop>check9</tabstop>
-    <tabstop>check10</tabstop>
-    <tabstop>check11</tabstop>
-    <tabstop>check12</tabstop>
-    <tabstop>check13</tabstop>
-    <tabstop>check14</tabstop>
-    <tabstop>check15</tabstop>
-    <tabstop>start_profiler_btn</tabstop>
-    <tabstop>flush_profiler_data_btn</tabstop>
-    <tabstop>stop_profiler_btn</tabstop>
-    <tabstop>reset_sample_files_btn</tabstop>
-    <tabstop>quit_and_save_btn</tabstop>
-    <tabstop>kernel_filename_edit</tabstop>
-    <tabstop>no_vmlinux</tabstop>
-    <tabstop>buffer_size_edit</tabstop>
-    <tabstop>note_table_size_edit</tabstop>
-    <tabstop>buffer_watershed_edit</tabstop>
-    <tabstop>verbose</tabstop>
-    <tabstop>separate_lib_cb</tabstop>
-    <tabstop>separate_kernel_cb</tabstop>
-    <tabstop>separate_cpu_cb</tabstop>
-    <tabstop>separate_thread_cb</tabstop>
-    <tabstop>callgraph_depth_edit</tabstop>
-</tabstops>
-</UI>
diff --git a/libabi/Makefile.in b/libabi/Makefile.in
index 05032d7..c28d916 100644
--- a/libabi/Makefile.in
+++ b/libabi/Makefile.in
@@ -41,7 +41,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -50,7 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -188,7 +187,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -212,20 +210,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libabi/opimport.cpp b/libabi/opimport.cpp
index 9dca6bb..c2234fe 100644
--- a/libabi/opimport.cpp
+++ b/libabi/opimport.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <iostream>
+#include <sstream>
 #include <vector>
 #include <cassert>
 #include <cstring>
@@ -87,10 +88,13 @@ void extractor::extract(T & targ, void const * src_,
 	assert(src >= begin);
 	assert(src + nbytes <= end);
 	
-	if (verbose)
-		cerr << hex << "get " << sz << " = " << nbytes
-		     << " bytes @ " << off << " = " << (src - begin)
-		     << " : ";
+	if (verbose) {
+		ostringstream message;
+		message << hex << "get " << sz << " = " << nbytes
+		        << " bytes @ " << off << " = " << (src - begin)
+		        << " : ";
+		cerr << message.str();
+	}
 
 	if (little_endian)
 		while(nbytes--)
@@ -209,10 +213,11 @@ int main(int argc, char const ** argv)
 	odb_t dest;
 	int rc;
 
-	assert((in_fd = open(inputs[0].c_str(), O_RDONLY)) > 0);		
+	in_fd = open(inputs[0].c_str(), O_RDONLY);
+	assert(in_fd > 0);
 	assert(fstat(in_fd, &statb) == 0);
-	assert((in = mmap(0, statb.st_size, PROT_READ,
-			  MAP_PRIVATE, in_fd, 0)) != (void *)-1);
+	in = mmap(0, statb.st_size, PROT_READ, MAP_PRIVATE, in_fd, 0);
+	assert(in != (void *)-1);
 
 	rc = odb_open(&dest, output_filename.c_str(), ODB_RDWR,
 		      sizeof(struct opd_header));
diff --git a/libabi/tests/Makefile.in b/libabi/tests/Makefile.in
index 3885536..6d07577 100644
--- a/libabi/tests/Makefile.in
+++ b/libabi/tests/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -131,7 +130,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -155,20 +153,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libdb/Makefile.in b/libdb/Makefile.in
index d0f86a1..722c840 100644
--- a/libdb/Makefile.in
+++ b/libdb/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -171,7 +170,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -195,20 +193,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libdb/db_stat.c b/libdb/db_stat.c
index 6d29e9a..c85d93a 100644
--- a/libdb/db_stat.c
+++ b/libdb/db_stat.c
@@ -65,7 +65,8 @@ odb_hash_stat_t * odb_hash_stat(odb_t const * odb)
 	}
 
 	result->max_list_length = max_length;
-	result->average_list_length = total_length / nr_non_empty_list;
+	result->average_list_length = (!nr_non_empty_list) ? 0
+	                                                   : total_length / nr_non_empty_list;
 
 	return result;
 }
diff --git a/libdb/tests/Makefile.in b/libdb/tests/Makefile.in
index e7bc3fa..eaaea7d 100644
--- a/libdb/tests/Makefile.in
+++ b/libdb/tests/Makefile.in
@@ -40,7 +40,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -49,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -132,7 +131,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -156,20 +154,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libop/Makefile.am b/libop/Makefile.am
index f893118..0452f70 100644
--- a/libop/Makefile.am
+++ b/libop/Makefile.am
@@ -13,17 +13,16 @@ libop_a_SOURCES = \
 	op_cpu_type.h \
 	op_mangle.c \
 	op_mangle.h \
-	op_get_interface.c \
-	op_interface.h \
 	op_alloc_counter.c \
 	op_alloc_counter.h \
 	op_hw_config.h \
 	op_config.c \
 	op_config.h \
-	op_config_24.h \
 	op_sample_file.h \
 	op_xml_events.c \
 	op_xml_events.h \
 	op_xml_out.c \
 	op_xml_out.h \
-	op_hw_specific.h
+	op_hw_specific.h \
+	op_netburst.c \
+	op_netburst.h
diff --git a/libop/Makefile.in b/libop/Makefile.in
index 8ce1c94..6e36c87 100644
--- a/libop/Makefile.in
+++ b/libop/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -63,9 +62,9 @@ libop_a_AR = $(AR) $(ARFLAGS)
 libop_a_LIBADD =
 am_libop_a_OBJECTS = op_events.$(OBJEXT) op_parse_event.$(OBJEXT) \
 	op_cpu_type.$(OBJEXT) op_mangle.$(OBJEXT) \
-	op_get_interface.$(OBJEXT) op_alloc_counter.$(OBJEXT) \
-	op_config.$(OBJEXT) op_xml_events.$(OBJEXT) \
-	op_xml_out.$(OBJEXT)
+	op_alloc_counter.$(OBJEXT) op_config.$(OBJEXT) \
+	op_xml_events.$(OBJEXT) op_xml_out.$(OBJEXT) \
+	op_netburst.$(OBJEXT)
 libop_a_OBJECTS = $(am_libop_a_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp = $(SHELL) $(top_srcdir)/depcomp
@@ -174,7 +173,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -198,20 +196,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -288,20 +279,19 @@ libop_a_SOURCES = \
 	op_cpu_type.h \
 	op_mangle.c \
 	op_mangle.h \
-	op_get_interface.c \
-	op_interface.h \
 	op_alloc_counter.c \
 	op_alloc_counter.h \
 	op_hw_config.h \
 	op_config.c \
 	op_config.h \
-	op_config_24.h \
 	op_sample_file.h \
 	op_xml_events.c \
 	op_xml_events.h \
 	op_xml_out.c \
 	op_xml_out.h \
-	op_hw_specific.h
+	op_hw_specific.h \
+	op_netburst.c \
+	op_netburst.h
 
 all: all-recursive
 
@@ -355,8 +345,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_config.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_cpu_type.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_events.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_get_interface.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_mangle.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_netburst.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_parse_event.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_xml_events.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_xml_out.Po@am__quote@
diff --git a/libop/op_alloc_counter.c b/libop/op_alloc_counter.c
index 7aec316..1595e5e 100644
--- a/libop/op_alloc_counter.c
+++ b/libop/op_alloc_counter.c
@@ -109,17 +109,17 @@ static void delete_counter_arc(counter_arc_head * ctr_arc, int nr_events)
  * mapping number.
  *
  * Solution is searched through a simple backtracking exploring recursively all
- * possible solution until one is found, prunning is done in O(1) by tracking
+ * possible solution until one is found, pruning is done in O(1) by tracking
  * a bitmask of already allocated counter. Walking through node is done in
  * preorder left to right.
  *
- * In case of extended events (required no phisical counters), the associated
+ * In case of extended events (required no physical counters), the associated
  * counter_map entry will be -1.
  *
- * Possible improvment if neccessary: partition counters in class of counter,
+ * Possible improvement if necessary: partition counters in class of counter,
  * two counter belong to the same class if they allow exactly the same set of
  * event. Now using a variant of the backtrack algo can works on class of
- * counter rather on counter (this is not an improvment if each counter goes
+ * counter rather on counter (this is not an improvement if each counter goes
  * in it's own class)
  */
 static int
@@ -157,47 +157,6 @@ allocate_counter(counter_arc_head const * ctr_arc, int max_depth, int depth,
 	return 0;
 }
 
-/* determine which directories are counter directories
- */
-static int perfcounterdir(const struct dirent * entry)
-{
-	return (isdigit(entry->d_name[0]));
-}
-
-
-/**
- * @param mask pointer where to place bit mask of unavailable counters
- *
- * return >= 0 number of counters that are available
- *        < 0  could not determine number of counters
- *
- */
-static int op_get_counter_mask(u32 * mask)
-{
-	struct dirent **counterlist;
-	int count, i;
-	/* assume nothing is available */
-	u32 available=0;
-
-	count = scandir("/dev/oprofile", &counterlist, perfcounterdir,
-			alphasort);
-	if (count < 0)
-		/* unable to determine bit mask */
-		return -1;
-	/* convert to bit map (0 where counter exists) */
-	for (i=0; i<count; ++i) {
-		available |= 1 << atoi(counterlist[i]->d_name);
-		free(counterlist[i]);
-	}
-	/* Append the timer counter to the mask of hardware counters.  */
-	if (op_cpu_has_timer_fs()) {
-		available |= 1 << (op_get_nr_counters(op_get_cpu_type()) - 1);
-		count++;
-	}
-	*mask=~available;
-	free(counterlist);
-	return count;
-}
 
 size_t * map_event_to_counter(struct op_event const * pev[], int nr_events,
                               op_cpu cpu_type)
@@ -205,19 +164,9 @@ size_t * map_event_to_counter(struct op_event const * pev[], int nr_events,
 	counter_arc_head * ctr_arc;
 	size_t * counter_map;
 	int i, nr_counters, nr_pmc_events;
-	op_cpu curr_cpu_type;
 	u32 unavailable_counters = 0;
 
-	/* Either ophelp or one of the libop tests may invoke this
-	 * function with a non-native cpu_type.  If so, we should not
-	 * call op_get_counter_mask because that will look for real counter
-	 * information in oprofilefs.
-	 */
-	curr_cpu_type = op_get_cpu_type();
-	if (cpu_type != curr_cpu_type)
-		nr_counters = op_get_nr_counters(cpu_type);
-	else
-		nr_counters = op_get_counter_mask(&unavailable_counters);
+	nr_counters = op_get_nr_counters(cpu_type);
 
 	/* no counters then probably perfmon managing perfmon hw */
 	if (nr_counters <= 0) {
diff --git a/libop/op_config.c b/libop/op_config.c
index 837242b..7dd0ec4 100644
--- a/libop/op_config.c
+++ b/libop/op_config.c
@@ -10,7 +10,6 @@
  */
 
 #include "op_config.h"
-#include "op_config_24.h"
 
 #include <limits.h>
 #include <stdlib.h>
@@ -32,6 +31,21 @@ char op_device[PATH_MAX];
 char op_note_device[PATH_MAX];
 char op_hash_device[PATH_MAX];
 
+char * stats_filenames[OPERF_MAX_STATS] = {
+                                           "total_samples",
+                                           "",
+                                           "",
+                                           "lost_invalid_domain",
+                                           "lost_kernel",
+                                           "lost_samplefile",
+                                           "lost_no_mapping",
+                                           "lost_no_app_for_kernel_sample",
+                                           "lost_no_app_for_user_sample",
+                                           "lost_bt_no_mapping",
+                                           "lost_invalid_hypervisor_addr",
+                                           "lost_records_by_kernel",
+};
+
 void
 init_op_config_dirs(char const * session_dir)
 {
@@ -54,24 +68,4 @@ init_op_config_dirs(char const * session_dir)
 	strcpy(op_samples_current_dir, op_samples_dir);
 	strcat(op_samples_current_dir, "/current/");
 
-	strcpy(op_lock_file, op_session_dir);
-	strcat(op_lock_file, "/lock");
-
-	strcpy(op_pipe_file, op_session_dir);
-	strcat(op_pipe_file, "/opd_pipe");
-
-	strcpy(op_log_file, op_samples_dir);
-	strcat(op_log_file, "oprofiled.log");
-
-	strcpy(op_dump_status, op_session_dir);
-	strcat(op_dump_status, "/complete_dump");
-
-	strcpy(op_device, op_session_dir);
-	strcat(op_device, "/opdev");
-
-	strcpy(op_note_device, op_session_dir);
-	strcat(op_note_device, "/opnotedev");
-
-	strcpy(op_hash_device, op_session_dir);
-	strcat(op_hash_device, "/ophashmapdev");
 }
diff --git a/libop/op_config.h b/libop/op_config.h
index e94aa1d..8fe70c8 100644
--- a/libop/op_config.h
+++ b/libop/op_config.h
@@ -19,6 +19,30 @@
 extern "C" {
 #endif
   
+
+enum {	OPERF_SAMPLES, /**< nr. samples */
+	OPERF_KERNEL, /**< nr. kernel samples */
+	OPERF_PROCESS, /**< nr. userspace samples */
+	OPERF_INVALID_CTX, /**< nr. samples lost due to sample address not in expected range for domain */
+	OPERF_LOST_KERNEL,  /**< nr. kernel samples lost */
+	OPERF_LOST_SAMPLEFILE, /**< nr samples for which sample file can't be opened */
+	OPERF_LOST_NO_MAPPING, /**< nr samples lost due to no mapping */
+	OPERF_NO_APP_KERNEL_SAMPLE, /**<nr. user ctx kernel samples dropped due to no app context available */
+	OPERF_NO_APP_USER_SAMPLE, /**<nr. user samples dropped due to no app context available */
+	OPERF_BT_LOST_NO_MAPPING, /**<nr. backtrace samples dropped due to no mapping */
+	OPERF_LOST_INVALID_HYPERV_ADDR, /**<nr. hypervisor samples dropped due to address out-of-range */
+	OPERF_RECORD_LOST_SAMPLE, /**<nr. samples lost reported by perf_events kernel */
+	OPERF_MAX_STATS /**< end of stats */
+};
+#define OPERF_INDEX_OF_FIRST_LOST_STAT 3
+
+/* Warn on lost samples if number of lost samples is greater the this fraction
+ * of the total samples
+*/
+#define OPERF_WARN_LOST_SAMPLES_THRESHOLD   0.0001
+
+extern char * stats_filenames[];
+
 /** 
  * must be called to initialize the paths below.
  * @param session_dir  the non-NULL value of the base session directory
@@ -27,17 +51,14 @@ void init_op_config_dirs(char const * session_dir);
 
 #define OP_SESSION_DIR_DEFAULT "/var/lib/oprofile/"
 
+
 /* 
- * various paths, corresponding to opcontrol, that should be
+ * various paths used by various oprofile tools, that should be
  * initialized by init_op_config_dirs() above. 
  */
 extern char op_session_dir[];
 extern char op_samples_dir[];
 extern char op_samples_current_dir[];
-extern char op_lock_file[];
-extern char op_log_file[];
-extern char op_pipe_file[];
-extern char op_dump_status[];
 
 /* Global directory that stores debug files */
 #ifndef DEBUGDIR
@@ -45,10 +66,7 @@ extern char op_dump_status[];
 #endif
 
 #define OPD_MAGIC "DAE\n"
-#define OPD_VERSION 0x12
-
-#define OP_MIN_CPU_BUF_SIZE 2048
-#define OP_MAX_CPU_BUF_SIZE 131072
+#define OPD_VERSION 0x13
 
 #if defined(__cplusplus)
 }
diff --git a/libop/op_config_24.h b/libop/op_config_24.h
deleted file mode 100644
index 1786fae..0000000
--- a/libop/op_config_24.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * @file op_config_24.h
- *
- * Parameters a user may want to change
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OP_CONFIG_24_H
-#define OP_CONFIG_24_H
-
-#define OP_MOUNT "/proc/sys/dev/oprofile/"
-
-extern char op_device[];
-extern char op_note_device[];
-extern char op_hash_device[];
-
-/*@{\name module default/min/max settings */
-
-/** 65536 * sizeof(op_sample) */
-#define OP_DEFAULT_BUF_SIZE 65536
-/** 
- * we don't try to wake-up daemon until it remains more than this free entry
- * in eviction buffer 
- */
-#define OP_PRE_WATERMARK(buffer_size)			\
-	(((buffer_size) / 8) < OP_MIN_PRE_WATERMARK	\
-		? OP_MIN_PRE_WATERMARK			\
-		: (buffer_size) / 8)
-/** minimal buffer water mark before we try to wakeup daemon */
-#define OP_MIN_PRE_WATERMARK 8192
-/** maximum number of entry in samples eviction buffer */
-#define OP_MAX_BUF_SIZE	1048576
-/** minimum number of entry in samples eviction buffer */
-#define OP_MIN_BUF_SIZE	(32768 + OP_PRE_WATERMARK(32768))
-
-/** 16384 * sizeof(op_note) = 273680 bytes default */
-#define OP_DEFAULT_NOTE_SIZE 16384
-/** 
- * we don't try to wake-up daemon until it remains more than this free entry
- * in note buffer 
- */
-#define OP_PRE_NOTE_WATERMARK(note_size)		\
-	(((note_size) / 32) < OP_MIN_NOTE_PRE_WATERMARK	\
-		? OP_MIN_NOTE_PRE_WATERMARK		\
-		: (note_size) / 32)
-/** minimal note buffer water mark before we try to wakeup daemon */
-#define OP_MIN_NOTE_PRE_WATERMARK 512
-/** maximum number of entry in note buffer */
-#define OP_MAX_NOTE_TABLE_SIZE	1048576
-/** minimum number of entry in note buffer */
-#define OP_MIN_NOTE_TABLE_SIZE	(1024 + OP_PRE_NOTE_WATERMARK(1024))
-
-/** maximum sampling rate when using RTC */
-#define OP_MAX_RTC_COUNT	4096
-/** minimum sampling rate when using RTC */
-#define OP_MIN_RTC_COUNT	2
-
-/*@}*/
-
-/** 
- * nr entries in hash map. This is the maximum number of name components
- * allowed. Must be a prime number 
- */
-#define OP_HASH_MAP_NR 4093
-
-/** size of string pool in bytes */
-#define POOL_SIZE 65536
-
-#ifndef NR_CPUS
-/** maximum number of cpus present in the box */
-#define NR_CPUS 32
-#endif
-
-#endif /* OP_CONFIG_24_H */
diff --git a/libop/op_cpu_type.c b/libop/op_cpu_type.c
index 07e26a0..6ce1f13 100644
--- a/libop/op_cpu_type.c
+++ b/libop/op_cpu_type.c
@@ -11,15 +11,28 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 #include <string.h>
 #include <sys/utsname.h>
 #include <ctype.h>
 #include <errno.h>
 #include <fnmatch.h>
+#include <elf.h>
+#include <link.h>
 
+#include "config.h"
 #include "op_cpu_type.h"
 #include "op_hw_specific.h"
 
+/* A macro to be used for ppc64 architecture-specific code.  The '__powerpc__' macro
+ * is defined for both ppc64 and ppc32 architectures, so we must further qualify by
+ * including the 'HAVE_LIBPFM' macro, since that macro will be defined only for ppc64.
+ */
+#define PPC64_ARCH (HAVE_LIBPFM) && ((defined(__powerpc__) || defined(__powerpc64__)))
+
 struct cpu_descr {
 	char const * pretty;
 	char const * name;
@@ -32,18 +45,10 @@ static struct cpu_descr const cpu_descrs[MAX_CPU_TYPE] = {
 	{ "PII", "i386/pii", CPU_PII, 2 },
 	{ "PIII", "i386/piii", CPU_PIII, 2 },
 	{ "Athlon", "i386/athlon", CPU_ATHLON, 4 },
-	{ "CPU with timer interrupt", "timer", CPU_TIMER_INT, 1 },
-	{ "CPU with RTC device", "rtc", CPU_RTC, 1 },
+        { "CPU with timer interrupt", "timer", CPU_TIMER_INT, 1 },
 	{ "P4 / Xeon", "i386/p4", CPU_P4, 8 },
-	{ "IA64", "ia64/ia64", CPU_IA64, 4 },
-	{ "Itanium", "ia64/itanium", CPU_IA64_1, 4 },
-	{ "Itanium 2", "ia64/itanium2", CPU_IA64_2, 4 },
 	{ "AMD64 processors", "x86-64/hammer", CPU_HAMMER, 4 },
 	{ "P4 / Xeon with 2 hyper-threads", "i386/p4-ht", CPU_P4_HT2, 4 },
-	{ "Alpha EV4", "alpha/ev4", CPU_AXP_EV4, 2 },
-	{ "Alpha EV5", "alpha/ev5", CPU_AXP_EV5, 3 },
-	{ "Alpha PCA56", "alpha/pca56", CPU_AXP_PCA56, 3 },
-	{ "Alpha EV6", "alpha/ev6", CPU_AXP_EV6, 2 },
 	{ "Alpha EV67", "alpha/ev67", CPU_AXP_EV67, 20 },
 	{ "Pentium M (P6 core)", "i386/p6_mobile", CPU_P6_MOBILE, 2 },
 	{ "ARM/XScale PMU1", "arm/xscale1", CPU_ARM_XSCALE1, 3 },
@@ -71,19 +76,15 @@ static struct cpu_descr const cpu_descrs[MAX_CPU_TYPE] = {
 	{ "Core 2", "i386/core_2", CPU_CORE_2, 2 },
 	{ "ppc64 POWER6", "ppc64/power6", CPU_PPC64_POWER6, 4 },
 	{ "ppc64 970MP", "ppc64/970MP", CPU_PPC64_970MP, 8 },
-	{ "ppc64 Cell Broadband Engine", "ppc64/cell-be", CPU_PPC64_CELL, 8 },
 	{ "AMD64 family10", "x86-64/family10", CPU_FAMILY10, 4 },
-	{ "ppc64 PA6T", "ppc64/pa6t", CPU_PPC64_PA6T, 6 },
 	{ "ARM 11MPCore", "arm/mpcore", CPU_ARM_MPCORE, 2 },
 	{ "ARM V6 PMU", "arm/armv6", CPU_ARM_V6, 3 },
 	{ "ppc64 POWER5++", "ppc64/power5++", CPU_PPC64_POWER5pp, 6 },
 	{ "e300", "ppc/e300", CPU_PPC_E300, 4 },
-	{ "AVR32", "avr32", CPU_AVR32, 3 },
 	{ "ARM Cortex-A8", "arm/armv7", CPU_ARM_V7, 5 },
  	{ "Intel Architectural Perfmon", "i386/arch_perfmon", CPU_ARCH_PERFMON, 0},
 	{ "AMD64 family11h", "x86-64/family11h", CPU_FAMILY11H, 4 },
 	{ "ppc64 POWER7", "ppc64/power7", CPU_PPC64_POWER7, 6 },
-	{ "ppc64 compat version 1", "ppc64/ibm-compat-v1", CPU_PPC64_IBM_COMPAT_V1, 4 },
    	{ "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 },
    	{ "Intel Atom", "i386/atom", CPU_ATOM, 2 },
 	{ "Loongson2", "mips/loongson2", CPU_MIPS_LOONGSON2, 2 },
@@ -107,6 +108,18 @@ static struct cpu_descr const cpu_descrs[MAX_CPU_TYPE] = {
 	{ "ARM Cortex-A5", "arm/armv7-ca5", CPU_ARM_V7_CA5, 3 },
 	{ "ARM Cortex-A7", "arm/armv7-ca7", CPU_ARM_V7_CA7, 5 },
 	{ "ARM Cortex-A15", "arm/armv7-ca15", CPU_ARM_V7_CA15, 7 },
+	{ "Intel Haswell microarchitecture", "i386/haswell", CPU_HASWELL, 4 },
+	{ "IBM zEnterprise EC12", "s390/zEC12", CPU_S390_ZEC12, 1 },	{ "AMD64 generic", "x86-64/generic", CPU_AMD64_GENERIC, 4 },
+	{ "IBM Power Architected Events V1", "ppc64/architected_events_v1", CPU_PPC64_ARCH_V1, 6 },
+	{ "ppc64 POWER8", "ppc64/power8", CPU_PPC64_POWER8, 6 },
+	{ "e500mc", "ppc/e500mc", CPU_PPC_E500MC, 4 },
+	{ "e6500", "ppc/e6500", CPU_PPC_E6500, 6 },
+	{ "Intel Silvermont microarchitecture", "i386/silvermont", CPU_SILVERMONT, 2 },
+	{ "ARMv7 Krait", "arm/armv7-krait", CPU_ARM_KRAIT, 5 },
+	{ "APM X-Gene", "arm/armv8-xgene", CPU_ARM_V8_APM_XGENE, 6 },
+	{ "Intel Broadwell microarchitecture", "i386/broadwell", CPU_BROADWELL, 4 },
+	{ "ARM Cortex-A57", "arm/armv8-ca57", CPU_ARM_V8_CA57, 6},
+	{ "ARM Cortex-A53", "arm/armv8-ca53", CPU_ARM_V8_CA53, 6},
 };
  
 static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
@@ -165,11 +178,139 @@ static char * _get_cpuinfo_cpu_type(char * buf, int len, const char * prefix)
 	return _get_cpuinfo_cpu_type_line(buf, len, prefix, 1);
 }
 
+#if PPC64_ARCH
+// The aux vector stuff below is currently only used by ppc64 arch
+static ElfW(auxv_t) * auxv_buf = NULL;
+
+static ElfW(auxv_t) * _auxv_fetch()
+{
+	ElfW(auxv_t) * auxv_temp = (ElfW(auxv_t) *)auxv_buf;
+	int auxv_f;
+	size_t page_size = getpagesize();
+	ssize_t bytes;
+
+
+	if(auxv_temp == NULL) {
+		auxv_f = open("/proc/self/auxv", O_RDONLY);
+
+		if(auxv_f == -1) {
+			perror("Cannot open /proc/self/auxv");
+			fprintf(stderr, "Assuming native platform profiling is supported.\n");
+			return NULL;
+		}
+		else {
+			auxv_temp = (ElfW(auxv_t) *)malloc(page_size);
+			if (!auxv_temp) {
+				perror("Allocation of space for auxv failed.");
+				close(auxv_f);
+				return NULL;
+			}
+			bytes = read(auxv_f, (void *)auxv_temp, page_size);
+
+			if (bytes <= 0) {
+				free(auxv_temp);
+				close(auxv_f);
+				perror("Error /proc/self/auxv read failed");
+				return NULL;
+			}
+
+			if (close(auxv_f)) {
+				perror("Error close failed");
+				fprintf(stderr, "Recoverable error. Continuing.\n");
+			}
+		}
+		auxv_buf = auxv_temp;
+	}
+	return (ElfW(auxv_t) *)auxv_temp;
+}
+
+
+static const char * fetch_at_hw_platform(ElfW(Addr) type)
+{
+	int i = 0;
+	const char * platform = NULL;
+	ElfW(auxv_t) * my_auxv = NULL;
+
+	if ((my_auxv = (ElfW(auxv_t)*) _auxv_fetch()) == NULL)
+		return NULL;
+
+	do {
+		if(my_auxv[i].a_type == type) {
+			platform = (const char *)my_auxv[i].a_un.a_val;
+			break;
+		}
+		i++;
+	} while (my_auxv[i].a_type != AT_NULL);
+
+	return platform;
+}
+
+static void release_at_hw_platform(void)
+{
+	if (auxv_buf) {
+		free(auxv_buf);
+		auxv_buf = NULL;
+	}
+}
+
+static op_cpu _try_ppc64_arch_generic_cpu(void)
+{
+	const char * platform, * base_platform;
+	op_cpu cpu_type = CPU_NO_GOOD;
+
+	platform = fetch_at_hw_platform(AT_PLATFORM);
+	base_platform = fetch_at_hw_platform(AT_BASE_PLATFORM);
+	if (!platform || !base_platform) {
+		fprintf(stderr, "NULL returned for one or both of AT_PLATFORM/AT_BASE_PLATFORM\n");
+		fprintf(stderr, "AT_PLATFORM: %s; \tAT_BASE_PLATFORM: %s\n", platform, base_platform);
+		release_at_hw_platform();
+		return cpu_type;
+	}
+	// FIXME whenever a new IBM Power processor is added -- need to ensure
+	// we're returning the correct version of the architected events file.
+	if (strcmp(platform, base_platform)) {
+		// If platform and base_platform differ by only a "+" at the end of the name, we
+		// consider these equal.
+		int platforms_are_equivalent = 0;
+		size_t p1_len, p2_len;
+		p1_len = strlen(platform);
+		p2_len = strlen(base_platform);
+		if (p2_len == (p1_len + 1)) {
+			if ((strncmp(platform, base_platform, p1_len) == 0) &&
+					(base_platform[p2_len - 1] == '+')) {
+				platforms_are_equivalent = 1;
+			}
+		}
+		if (!platforms_are_equivalent) {
+			//  FIXME
+			/* For POWER8 running in POWER7 compat mode (RHEL 6.5 and SLES 11 SP4),
+			 * the kernel will have enough POWER8-specific PMU code so we can utilize
+			 * all of the POWER8 events. In general, this is not necessarily the case
+			 * when running in compat mode.  This code needs to be inspected for every
+			 * new IBM Power processor released, but for now, we'll assume that for the
+			 * next processor model (assuming there will be something like a POWER9?),
+			 * we should use just the architected events when running POWER8 compat mode.
+			 */
+			if ((strcmp(platform, "power7") == 0) && (strcmp(base_platform, "power8") == 0))
+				cpu_type = CPU_PPC64_POWER8;
+			else
+				cpu_type = CPU_PPC64_ARCH_V1;
+		}
+	}
+	release_at_hw_platform();
+	return cpu_type;
+}
+
 static op_cpu _get_ppc64_cpu_type(void)
 {
 	int i;
 	size_t len;
 	char line[100], cpu_type_str[64], cpu_name_lowercase[64], * cpu_name;
+	op_cpu cpu_type = CPU_NO_GOOD;
+
+	cpu_type = _try_ppc64_arch_generic_cpu();
+	if (cpu_type != CPU_NO_GOOD)
+		return cpu_type;
 
 	cpu_name = _get_cpuinfo_cpu_type(line, 100, "cpu");
 	if (!cpu_name)
@@ -179,11 +320,48 @@ static op_cpu _get_ppc64_cpu_type(void)
 	for (i = 0; i < (int)len ; i++)
 		cpu_name_lowercase[i] = tolower(cpu_name[i]);
 
+	if (strncmp(cpu_name_lowercase, "power7+", 7) == 0)
+		cpu_name_lowercase[6] = '\0';
+	if (strncmp(cpu_name_lowercase, "power8e", 7) == 0)
+		cpu_name_lowercase[6] = '\0';
+
 	cpu_type_str[0] = '\0';
 	strcat(cpu_type_str, "ppc64/");
 	strncat(cpu_type_str, cpu_name_lowercase, len);
-	return op_get_cpu_number(cpu_type_str);
+	cpu_type = op_get_cpu_number(cpu_type_str);
+	return cpu_type;
 }
+#else
+static op_cpu _get_ppc64_cpu_type(void)
+{
+	return CPU_NO_GOOD;
+}
+#endif
+
+
+static char *alpha_cpu_models[] = {
+	"EV67", "EV68CB", "EV68AL", "EV68CX", "EV7", "EV79", "EV69", NULL
+};
+
+
+static op_cpu _get_alpha_cpu_type(void)
+{
+	char *cpu_model;
+	char **p;
+	char line[100];
+
+	cpu_model = _get_cpuinfo_cpu_type(line, 100, "cpu model");
+	if (!cpu_model)
+		return CPU_NO_GOOD;
+
+	for (p = alpha_cpu_models; *p; p++) {
+		if (strcmp(cpu_model, *p) == 0)
+			return CPU_AXP_EV67;
+	}
+
+	return CPU_NO_GOOD;
+}
+
 
 static op_cpu _get_arm_cpu_type(void)
 {
@@ -231,6 +409,15 @@ static op_cpu _get_arm_cpu_type(void)
 			return op_get_cpu_number("arm/armv7-ca9");
 		case 0xc0f:
 			return op_get_cpu_number("arm/armv7-ca15");
+		case 0xd07:
+			return op_get_cpu_number("arm/armv8-ca57");
+		case 0xd03:
+			return op_get_cpu_number("arm/armv8-ca53");
+		}
+	} else if (vendorid == 0x50) {	/* Applied Micro Circuits Corporation */
+		switch (cpuid) {
+		case 0x000:
+			return op_get_cpu_number("arm/armv8-xgene");
 		}
 	} else if (vendorid == 0x69) {	/* Intel xscale */
 		switch (cpuid >> 9) {
@@ -343,12 +530,16 @@ static op_cpu _get_intel_cpu_type(void)
 
 static op_cpu _get_amd_cpu_type(void)
 {
-	unsigned eax, family, model;
+	unsigned eax, family;
 	op_cpu ret = CPU_NO_GOOD;
+	char buf[20] = {'\0'};
 
 	eax = cpuid_signature();
 	family = cpu_family(eax);
-	model = cpu_model(eax);
+
+	/* These family does not exist in the past.*/
+	if (family < 0x0f || family == 0x13)
+		return ret;
 
 	switch (family) {
 	case 0x0f:
@@ -358,24 +549,17 @@ static op_cpu _get_amd_cpu_type(void)
 		ret = op_get_cpu_number("x86-64/family10");
 		break;
 	case 0x11:
-		ret = op_get_cpu_number("x86-64/family11h");
-		break;
 	case 0x12:
-		ret = op_get_cpu_number("x86-64/family12h");
-		break;
 	case 0x14:
-		ret = op_get_cpu_number("x86-64/family14h");
-		break;
 	case 0x15:
-		switch (model) {
-		case 0x00 ... 0x0f:
-			ret = op_get_cpu_number("x86-64/family15h");
-			break;		
-		default:
-			break;
-		}
+		/* From family11h and forward, we use the same naming scheme */
+		snprintf(buf, 20, "x86-64/family%xh", family);
+		ret = op_get_cpu_number(buf);
 		break;
 	default:
+		/* Future processors */
+		snprintf(buf, 20, "x86-64/generic");
+		ret = op_get_cpu_number(buf);
 		break;
 	}
 
@@ -456,7 +640,40 @@ static op_cpu _get_mips_cpu_type(void)
 	return CPU_NO_GOOD;
 }
 
-static op_cpu __get_cpu_type_alt_method(void)
+static op_cpu _get_s390_cpu_type(void)
+{
+	char line[100];
+	char *ptr;
+	const char prefix[] = "machine = ";
+	unsigned model;
+
+	ptr = _get_cpuinfo_cpu_type_line(line, sizeof(line), "processor", 0);
+	if (!ptr)
+		return CPU_NO_GOOD;
+
+	ptr = strstr(ptr, prefix);
+	if (!ptr)
+		return CPU_NO_GOOD;
+
+	ptr += sizeof(prefix) - 1;
+	if (sscanf(ptr, "%u", &model) != 1)
+		return CPU_NO_GOOD;
+
+	switch (model) {
+	case 2097:
+	case 2098:
+		return CPU_S390_Z10;
+	case 2817:
+	case 2818:
+		return CPU_S390_Z196;
+	case 2827:
+	case 2828:
+		return CPU_S390_ZEC12;
+	}
+	return CPU_NO_GOOD;
+}
+
+static op_cpu __get_cpu_type(void)
 {
 	struct utsname uname_info;
 	if (uname(&uname_info) < 0) {
@@ -467,10 +684,15 @@ static op_cpu __get_cpu_type_alt_method(void)
 	    fnmatch("i?86", uname_info.machine, 0) == 0) {
 		return _get_x86_64_cpu_type();
 	}
-	if (strncmp(uname_info.machine, "ppc64", 5) == 0) {
+	if ((strncmp(uname_info.machine, "ppc64", 5) == 0) ||
+			(strncmp(uname_info.machine, "ppc64le", 7) == 0)) {
 		return _get_ppc64_cpu_type();
 	}
-	if (strncmp(uname_info.machine, "arm", 3) == 0) {
+	if (strncmp(uname_info.machine, "alpha", 5) == 0) {
+		return _get_alpha_cpu_type();
+	}
+	if (strncmp(uname_info.machine, "arm", 3) == 0 ||
+	    strncmp(uname_info.machine, "aarch64", 7) == 0) {
 		return _get_arm_cpu_type();
 	}
 	if (strncmp(uname_info.machine, "tile", 4) == 0) {
@@ -479,6 +701,9 @@ static op_cpu __get_cpu_type_alt_method(void)
 	if (strncmp(uname_info.machine, "mips", 4) == 0) {
 		return _get_mips_cpu_type();
 	}
+	if (strncmp(uname_info.machine, "s390", 4) == 0) {
+		return _get_s390_cpu_type();
+	}
 	return CPU_NO_GOOD;
 }
 
@@ -501,6 +726,9 @@ op_cpu op_cpu_base_type(op_cpu cpu_type)
 	case CPU_CORE_I7:
 	case CPU_ATOM:
 	case CPU_NEHALEM:
+	case CPU_HASWELL:
+	case CPU_BROADWELL:
+	case CPU_SILVERMONT:
 	case CPU_WESTMERE:
 	case CPU_SANDYBRIDGE:
 	case CPU_IVYBRIDGE:
@@ -514,35 +742,13 @@ op_cpu op_cpu_base_type(op_cpu cpu_type)
 op_cpu op_get_cpu_type(void)
 {
 	int cpu_type = CPU_NO_GOOD;
-	char str[100];
-	FILE * fp;
 
-	fp = fopen("/proc/sys/dev/oprofile/cpu_type", "r");
-	if (!fp) {
-		/* Try 2.6's oprofilefs one instead. */
-		fp = fopen("/dev/oprofile/cpu_type", "r");
-		if (!fp) {
-			if ((cpu_type = __get_cpu_type_alt_method()) == CPU_NO_GOOD) {
-				fprintf(stderr, "Unable to open cpu_type file for reading\n");
-				fprintf(stderr, "Make sure you have done opcontrol --init\n");
-			}
-			return cpu_type;
-		}
+	if ((cpu_type = __get_cpu_type()) == CPU_NO_GOOD) {
+		fprintf(stderr, "Unable to obtain cpu_type\n");
+		fprintf(stderr, "Verify that a pre-1.0 version of OProfile is not in use.\n"
+		        "If the /dev/oprofile/cpu_type file exists, locate the pre-1.0 OProfile\n"
+		        "installation, and use its 'opcontrol' command, passing the --deinit option.\n");
 	}
-
-	if (!fgets(str, 99, fp)) {
-		fprintf(stderr, "Could not read cpu type.\n");
-		fclose(fp);
-		return cpu_type;
-	}
-
-	cpu_type = op_get_cpu_number(str);
-
-	if (op_cpu_variations(cpu_type))
-		cpu_type = op_cpu_specific_type(cpu_type);
-
-	fclose(fp);
-
 	return cpu_type;
 }
 
@@ -550,6 +756,7 @@ op_cpu op_get_cpu_type(void)
 op_cpu op_get_cpu_number(char const * cpu_string)
 {
 	int cpu_type = CPU_NO_GOOD;
+	int scan_matches = 0;
 	size_t i;
 	
 	for (i = 0; i < nr_cpu_descrs; ++i) {
@@ -560,12 +767,11 @@ op_cpu op_get_cpu_number(char const * cpu_string)
 	}
 
 	/* Attempt to convert into a number */
-	if (cpu_type == CPU_NO_GOOD)
-		sscanf(cpu_string, "%d\n", &cpu_type);
-	
-	if (cpu_type <= CPU_NO_GOOD || cpu_type >= MAX_CPU_TYPE)
-		cpu_type = CPU_NO_GOOD;
-
+	if (cpu_type == CPU_NO_GOOD) {
+		scan_matches = sscanf(cpu_string, "%d\n", &cpu_type);
+		if (scan_matches && (cpu_type <= CPU_NO_GOOD || cpu_type >= MAX_CPU_TYPE))
+			cpu_type = CPU_NO_GOOD;
+	}
 	return cpu_type;
 }
 
@@ -596,26 +802,8 @@ int op_get_nr_counters(op_cpu cpu_type)
 		return 0;
 
 	cnt = arch_num_counters(cpu_type);
-	if (cnt >= 0)
-		return cnt;
-
-	return op_cpu_has_timer_fs()
-		? cpu_descrs[cpu_type].nr_counters + 1
-		: cpu_descrs[cpu_type].nr_counters;
+	if (cnt < 0)
+		cnt = cpu_descrs[cpu_type].nr_counters;
+	return cnt;
 }
 
-int op_cpu_has_timer_fs(void)
-{
-	static int cached_has_timer_fs_p = -1;
-	FILE * fp;
-
-	if (cached_has_timer_fs_p != -1)
-		return cached_has_timer_fs_p;
-
-	fp = fopen("/dev/oprofile/timer", "r");
-	cached_has_timer_fs_p = !!fp;
-	if (fp)
-		fclose(fp);
-
-	return cached_has_timer_fs_p;
-}
diff --git a/libop/op_cpu_type.h b/libop/op_cpu_type.h
index c7e953f..bd8170a 100644
--- a/libop/op_cpu_type.h
+++ b/libop/op_cpu_type.h
@@ -25,18 +25,10 @@ typedef enum {
 	CPU_PII, /**< Pentium II series */
 	CPU_PIII, /**< Pentium III series */
 	CPU_ATHLON, /**< AMD P6 series */
-	CPU_TIMER_INT, /**< CPU using the timer interrupt */
-	CPU_RTC, /**< other CPU to use the RTC */
+        CPU_TIMER_INT, /**< CPU using the timer interrupt */
 	CPU_P4,  /**< Pentium 4 / Xeon series */
-	CPU_IA64, /**< Generic IA64 */
-	CPU_IA64_1, /**< IA64 Merced */
-	CPU_IA64_2, /**< IA64 McKinley */
 	CPU_HAMMER, /**< AMD Hammer family */
 	CPU_P4_HT2, /**< Pentium 4 / Xeon series with 2 hyper-threads */
-	CPU_AXP_EV4, /**< Alpha EV4 family */
-	CPU_AXP_EV5, /**< Alpha EV5 family */
-	CPU_AXP_PCA56, /**< Alpha PCA56 family */
-	CPU_AXP_EV6, /**< Alpha EV6 family */
 	CPU_AXP_EV67, /**< Alpha EV67 family */
 	CPU_P6_MOBILE, /**< Pentium M series */
 	CPU_ARM_XSCALE1, /**< ARM XScale 1 */
@@ -64,19 +56,15 @@ typedef enum {
 	CPU_CORE_2, /**< Intel Core 2 */
 	CPU_PPC64_POWER6, /**< ppc64 POWER6 family */
 	CPU_PPC64_970MP, /**< ppc64 970MP */
-	CPU_PPC64_CELL, /**< ppc64 Cell Broadband Engine*/
 	CPU_FAMILY10, /**< AMD family 10 */
- 	CPU_PPC64_PA6T, /**< ppc64 PA6T */
 	CPU_ARM_MPCORE, /**< ARM MPCore */
 	CPU_ARM_V6, /**< ARM V6 */
 	CPU_PPC64_POWER5pp,  /**< ppc64 Power5++ family */
 	CPU_PPC_E300, /**< e300 */
-	CPU_AVR32, /**< AVR32 */
 	CPU_ARM_V7, /**< ARM Cortex-A8 */
  	CPU_ARCH_PERFMON, /**< Intel architectural perfmon */
 	CPU_FAMILY11H, /**< AMD family 11h */
 	CPU_PPC64_POWER7, /**< ppc64 POWER7 family */
-	CPU_PPC64_IBM_COMPAT_V1, /**< IBM PPC64 processor compat mode version 1 */
    	CPU_CORE_I7, /* Intel Core i7, Nehalem */
    	CPU_ATOM, /* First generation Intel Atom */
 	CPU_MIPS_LOONGSON2, /* < loongson2 family */
@@ -100,6 +88,18 @@ typedef enum {
 	CPU_ARM_V7_CA5, /**< ARM Cortex-A5 */
 	CPU_ARM_V7_CA7, /**< ARM Cortex-A7 */
 	CPU_ARM_V7_CA15, /**< ARM Cortex-A15 */
+	CPU_HASWELL, /** < Intel Haswell microarchitecture */
+	CPU_S390_ZEC12, /**< IBM zEnterprise EC12 */	CPU_AMD64_GENERIC, /**< AMD64 Generic */
+	CPU_PPC64_ARCH_V1, /** < IBM Power architected events version 1 */
+	CPU_PPC64_POWER8, /**< ppc64 POWER8 family */
+	CPU_PPC_E500MC, /**< e500mc */
+	CPU_PPC_E6500, /**< e6500 */
+	CPU_SILVERMONT, /** < Intel Silvermont microarchitecture */
+	CPU_ARM_KRAIT, /**< ARM KRAIT */
+	CPU_ARM_V8_APM_XGENE, /* APM X-Gene */
+	CPU_BROADWELL, /** < Intel Broadwell (Core-M) microarchitecture */
+	CPU_ARM_V8_CA57, /* ARM Cortex-A57 */
+	CPU_ARM_V8_CA53, /* ARM Cortex-A53 */
 	MAX_CPU_TYPE
 } op_cpu;
 
@@ -163,29 +163,6 @@ int op_is_cpu_vendor(char * vendor);
  */
 int op_get_nr_counters(op_cpu cpu_type);
 
-typedef enum {
-	OP_INTERFACE_NO_GOOD = -1,
-	OP_INTERFACE_24,
-	OP_INTERFACE_26
-} op_interface;
-
-/**
- * get the INTERFACE used to communicate between daemon and the kernel
- *
- * returns OP_INTERFACE_NO_GOOD if the INTERFACE could not be identified.
- * This function will identify the interface as OP_INTERFACE_NO_GOOD if
- * the module is not loaded.
- */
-op_interface op_get_interface(void);
-
-/**
- * determine if the /dev/oprofile/timer is available
- *
- * return true if the kernel modules provides the /dev/oprofile
- * interface for timer mode sampling.
- */
-int op_cpu_has_timer_fs(void);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/libop/op_events.c b/libop/op_events.c
index ae8e1e5..8bfd3d2 100644
--- a/libop/op_events.c
+++ b/libop/op_events.c
@@ -114,7 +114,8 @@ unsigned parse_extra(const char *s)
 	unsigned v, w;
 	int o;
 
-	v = 0;
+	/* This signifies that the first word of the description is unique */
+	v = EXTRA_NONE;
 	while (*s) {
 		if (isspace(*s))
 			break;
@@ -130,6 +131,9 @@ unsigned parse_extra(const char *s)
 		} else if (strisprefix(s, "any")) {
 			v |= EXTRA_ANY;
 			s += 3;
+		} else if (strisprefix(s, "pebs")) {
+			v |= EXTRA_PEBS;
+			s += 4;
 		} else {
 			parse_error("Illegal extra field modifier");
 		}
@@ -193,7 +197,12 @@ static void parse_um(struct op_unit_mask * um, char const * line)
 			if (seen_default)
 				parse_error("duplicate default: tag");
 			seen_default = 1;
-			um->default_mask = parse_hex(tagend);
+			if (0 != strncmp(tagend, "0x", 2)) {
+				um->default_mask_name = op_xstrndup(
+					tagend, valueend - tagend);
+			} else {
+				um->default_mask = parse_hex(tagend);
+			}
 		} else {
 			parse_error("invalid unit mask tag");
 		}
@@ -211,32 +220,50 @@ static void parse_um(struct op_unit_mask * um, char const * line)
 
 
 /* \t0x08 (M)odified cache state */
-/* \t0x08 extra:inv,cmask=... (M)odified cache state */
+/* \t0x08 extra:inv,cmask=... mod_cach_state (M)odified cache state */
 static void parse_um_entry(struct op_described_um * entry, char const * line)
 {
 	char const * c = line;
 
+	/* value */
 	c = skip_ws(c);
 	entry->value = parse_hex(c);
-	c = skip_nonws(c);
 
+	/* extra: */
+	c = skip_nonws(c);
 	c = skip_ws(c);
+	if (!*c)
+		goto invalid_out;
+
 	if (strisprefix(c, "extra:")) {
 		c += 6;
 		entry->extra = parse_extra(c);
+		/* named mask */
 		c = skip_nonws(c);
-	} else
-		entry->extra = 0;
-
-	if (!*c)
-		parse_error("invalid unit mask entry");
+		c = skip_ws(c);
+		if (!*c)
+			goto invalid_out;
 
-	c = skip_ws(c);
+		/* "extra:" !!ALWAYS!! followed by named mask */
+		entry->name = op_xstrndup(c, strcspn(c, " \t"));
+		c = skip_nonws(c);
+		c = skip_ws(c);
+	} else {
+		entry->extra = 0;
+	}
 
-	if (!*c)
-		parse_error("invalid unit mask entry");
+	/* desc */
+	if (!*c) {
+		/* This is a corner case where the named unit mask entry
+		 * only has one word.  This should really be fixed in the
+		 * unit_mask file */
+		entry->desc = xstrdup(entry->name);
+	} else
+		entry->desc = xstrdup(c);
+	return;
 
-	entry->desc = xstrdup(c);
+invalid_out:
+	parse_error("invalid unit mask entry");
 }
 
 
@@ -253,6 +280,7 @@ static void free_unit_mask(struct op_unit_mask * um)
 {
 	list_del(&um->um_next);
 	free(um);
+	um = NULL;
 }
 
 /*
@@ -286,8 +314,6 @@ static void read_unit_masks(char const * file)
 		} else {
 			if (!um)
 				parse_error("no unit mask name line");
-			if (um->num >= MAX_UNIT_MASK)
-				parse_error("oprofile: maximum unit mask entries exceeded");
 
 			parse_um_entry(&um->um[um->num], line);
 			++(um->num);
@@ -383,7 +409,7 @@ static struct op_unit_mask * merge_um(char * value)
 	if (type == -1U)
 		parse_error("Empty unit mask");
 	new->unit_type_mask = type;
-	return new;		
+	return new;
 }
 
 /* parse either a "tag:value" or a ": trailing description string" */
@@ -497,7 +523,7 @@ static void read_events(char const * file)
 		event = new_event();
 		event->filter = -1;
 		event->ext = NULL;
-		
+
 		c = line;
 		while (next_token(&c, &name, &value)) {
 			if (strcmp(name, "name") == 0) {
@@ -558,6 +584,7 @@ static void read_events(char const * file)
 				c = skip_ws(c);
 				if (*c != '\0' && *c != '#')
 					parse_error("non whitespace after include:");
+				break;
 			} else {
 				parse_error("unknown tag");
 			}
@@ -601,10 +628,18 @@ static int check_unit_mask(struct op_unit_mask const * um,
 				"(%s)\n", um->name, cpu_name);
 			err = EXIT_FAILURE;
 		}
-	} else {
-		for (i = 0; i < um->num; ++i) {
-			if (um->default_mask == um->um[i].value)
-				break;
+	} else if (um->unit_type_mask == utm_exclusive) {
+		if (um->default_mask_name) {
+			for (i = 0; i < um->num; ++i) {
+				if (0 == strcmp(um->default_mask_name,
+						um->um[i].name))
+					break;
+			}
+		} else {
+			for (i = 0; i < um->num; ++i) {
+				if (um->default_mask == um->um[i].value)
+					break;
+			}
 		}
 
 		if (i == um->num) {
@@ -639,7 +674,7 @@ static void load_events_name(const char *cpu_name)
 
 	read_unit_masks(um_file);
 	read_events(event_file);
-	
+
 	free(um_file);
 	free(event_file);
 }
@@ -648,8 +683,6 @@ static void load_events(op_cpu cpu_type)
 {
 	const char * cpu_name = op_get_cpu_name(cpu_type);
 	struct list_head * pos;
-	struct op_event *event;
-	struct op_unit_mask *unit_mask;
 	int err = 0;
 
 	if (!list_empty(&events_list))
@@ -667,56 +700,6 @@ static void load_events(op_cpu cpu_type)
 	if (err)
 		exit(err);
 
-	if (!op_cpu_has_timer_fs())
-		return;
-
-	/* sanity check: Don't use event `TIMER' since it is predefined.  */
-	list_for_each(pos, &events_list) {
-		struct op_event * event = list_entry(pos, struct op_event,
-						     event_next);
-
-		if (strcmp(event->name, TIMER_EVENT_NAME) == 0) {
-			fprintf(stderr, "Error: " TIMER_EVENT_NAME
-				" event cannot be redefined.\n");
-			exit(EXIT_FAILURE);
-		}
-		if (event->val == TIMER_EVENT_VALUE) {
-			fprintf(stderr, "Error: Event %s uses " TIMER_EVENT_NAME
-				" which is reserverd for timer based sampling.\n",
-				event->name);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	list_for_each(pos, &um_list) {
-		struct op_unit_mask * um = list_entry(pos, struct op_unit_mask,
-						      um_next);
-		if (strcmp(um->name, TIMER_EVENT_UNIT_MASK_NAME) == 0) {
-			fprintf(stderr, "Error: " TIMER_EVENT_UNIT_MASK_NAME
-				" unit mask cannot be redefined.\n");
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	unit_mask = new_unit_mask();
-	unit_mask->name = xstrdup(TIMER_EVENT_UNIT_MASK_NAME);
-	unit_mask->num = 1;
-	unit_mask->unit_type_mask = utm_mandatory;
-	unit_mask->um[0].extra = 0;
-	unit_mask->um[0].value = 0;
-	unit_mask->um[0].desc = xstrdup("No unit mask");
-	unit_mask->used = 1;
-
-	event = new_event();
-	event->name = xstrdup(TIMER_EVENT_NAME);
-	event->desc = xstrdup(TIMER_EVENT_DESC);
-	event->val = TIMER_EVENT_VALUE;
-	event->unit = unit_mask;
-	event->min_count = 0;
-	event->filter = 0;
-	event->counter_mask = 1 << (op_get_nr_counters(cpu_type) - 1);
-	event->ext = NULL;
-	event->filter = -1;
 }
 
 struct list_head * op_events(op_cpu cpu_type)
@@ -912,7 +895,6 @@ char const * find_mapping_for_event(u32 nr, op_cpu cpu_type)
 	FILE * fp = open_event_mapping_file(cpu_name);
 	char const * map = NULL;
 	switch (cpu_type) {
-		case CPU_PPC64_PA6T:
 		case CPU_PPC64_970:
 		case CPU_PPC64_970MP:
 		case CPU_PPC64_POWER4:
@@ -921,14 +903,14 @@ char const * find_mapping_for_event(u32 nr, op_cpu cpu_type)
 		case CPU_PPC64_POWER5pp:
 		case CPU_PPC64_POWER6:
 		case CPU_PPC64_POWER7:
-		case CPU_PPC64_IBM_COMPAT_V1:
+		// For ppc64 types of CPU_PPC64_ARCH_V1 and higher, we don't need an event_mappings file
 			if (!fp) {
 				fprintf(stderr, "oprofile: could not open event mapping file %s\n", filename);
 				exit(EXIT_FAILURE);
 			} else {
 				map = get_mapping(nr, fp);
 			}
-			break;			
+			break;
 		default:
 			break;
 	}
@@ -1008,6 +990,97 @@ struct op_event * op_find_event_any(op_cpu cpu_type, u32 nr)
 	return find_event_any(nr);
 }
 
+static int _is_um_valid_bitmask(struct op_event * event, u32 passed_um)
+{
+	int duped_um[MAX_UNIT_MASK];
+	int retval = 0;
+	u32 masked_val = 0;
+	u32 i, k;
+	int dup_value_used = 0;
+
+	struct op_event evt;
+	struct op_unit_mask * tmp_um = xmalloc(sizeof(struct op_unit_mask));
+	struct op_unit_mask * tmp_um_no_dups = xmalloc(sizeof(struct op_unit_mask));
+	memset(tmp_um, '\0', sizeof(struct op_unit_mask));;
+	memset(tmp_um_no_dups, '\0', sizeof(struct op_unit_mask));
+	memset(duped_um, '\0', sizeof(int) * MAX_UNIT_MASK);
+
+	// First, we make a copy of the event, with just its unit mask values.
+	evt.unit = tmp_um;
+	evt.unit->num = event->unit->num;
+	for (i = 0; i < event->unit->num; i++)
+		evt.unit->um[i].value = event->unit->um[i].value;
+
+	// Next, we sort the unit mask values in ascending order.
+	for (i = 1; i < evt.unit->num; i++) {
+		int j = i - 1;
+		u32 tmp = evt.unit->um[i].value;
+		while (j >= 0 && tmp < evt.unit->um[j].value) {
+			evt.unit->um[j + 1].value = evt.unit->um[j].value;
+			j -= 1;
+		}
+		evt.unit->um[j + 1].value = tmp;
+	}
+
+	/* Now we remove duplicates. Duplicate unit mask values were not
+	 * allowed until the "named unit mask" support was added in
+	 * release 0.9.7.  The down side to this is that if the user passed
+	 * a unit mask value that includes one of the duplicated values,
+	 * we have no way of differentiating between the duplicates, so
+	 * the meaning of the bitmask would be ambiguous if we were to
+	 * allow it.  Thus, we must prevent the user from specifying such
+	 * bitmasks.
+	 */
+	for (i = 0, k = 0; k < evt.unit->num; i++) {
+		tmp_um_no_dups->um[i].value = evt.unit->um[k].value;
+		tmp_um_no_dups->num++;
+		k++;
+		while ((evt.unit->um[i].value == evt.unit->um[k].value) && i < evt.unit->num) {
+			k++;
+			duped_um[i] = 1;
+		}
+	}
+	evt.unit = tmp_um_no_dups;
+
+	// Now check if passed um==0 and if the defined event has a UM with value '0'.
+	if (!passed_um) {
+		for (i = 0; i < evt.unit->num; i++) {
+			if (!evt.unit->um[i].value)
+				return 1;
+		}
+	}
+
+	/* Finally, we'll see if the passed unit mask value can be matched with a
+	 * mask of available unit mask values. We check for this by determining
+	 * whether the exact bits set in the current um are also set in the
+	 * passed um; if so, we OR those bits into a cumulative masked_val variable.
+	 * Simultaneously, we check if the passed um contains a non-unique unit
+	 * mask value, in which case, it's invalid..
+	 */
+	for (i = 0; i < evt.unit->num; i++) {
+		if ((evt.unit->um[i].value & passed_um) == evt.unit->um[i].value) {
+			masked_val |= evt.unit->um[i].value;
+			if (duped_um[i]) {
+				dup_value_used = 1;
+				break;
+			}
+		}
+	}
+
+	if (dup_value_used) {
+		fprintf(stderr, "Ambiguous bitmask: Unit mask values"
+		        " cannot include non-unique numerical values (i.e., 0x%x).\n",
+		        evt.unit->um[i].value);
+		fprintf(stderr, "Use ophelp to see the unit mask values for event %s.\n",
+		        event->name);
+	} else if (masked_val == passed_um && passed_um != 0) {
+		retval = 1;
+	}
+	free(tmp_um);
+	free(tmp_um_no_dups);
+	return retval;
+}
+
 int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type)
 {
 	int ret = OP_INVALID_EVENT;
@@ -1028,21 +1101,15 @@ int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type)
 			ret |= OP_INVALID_COUNTER;
 
 		if (event->unit->unit_type_mask == utm_bitmask) {
-			for (i = 0; i < event->unit->num; ++i)
-				um &= ~(event->unit->um[i].value);			
-			
-			if (um)
+			if (!_is_um_valid_bitmask(event, um))
 				ret |= OP_INVALID_UM;
-			
 		} else {
 			for (i = 0; i < event->unit->num; ++i) {
 				if (event->unit->um[i].value == um)
 					break;
 			}
-			
 			if (i == event->unit->num)
 				ret |= OP_INVALID_UM;
-
 		}
 
 		if (ret == OP_OK_EVENT)
@@ -1078,6 +1145,9 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr)
  		case CPU_ATOM:
  		case CPU_CORE_I7:
 		case CPU_NEHALEM:
+		case CPU_HASWELL:
+		case CPU_BROADWELL:
+		case CPU_SILVERMONT:
 		case CPU_WESTMERE:
 		case CPU_SANDYBRIDGE:
 		case CPU_IVYBRIDGE:
@@ -1085,33 +1155,19 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr)
 		case CPU_FAMILY12H:
 		case CPU_FAMILY14H:
 		case CPU_FAMILY15H:
+		case CPU_AMD64_GENERIC:
 			descr->name = "CPU_CLK_UNHALTED";
 			break;
 
-		case CPU_RTC:
-			descr->name = "RTC_INTERRUPTS";
-			descr->count = 1024;
-			break;
-
 		case CPU_P4:
 		case CPU_P4_HT2:
 			descr->name = "GLOBAL_POWER_EVENTS";
 			descr->um = 0x1;
 			break;
 
-		case CPU_IA64:
-		case CPU_IA64_1:
-		case CPU_IA64_2:
-			descr->count = 1000000;
-			descr->name = "CPU_CYCLES";
-			break;
-
-		case CPU_AXP_EV4:
-		case CPU_AXP_EV5:
-		case CPU_AXP_PCA56:
-		case CPU_AXP_EV6:
 		case CPU_AXP_EV67:
 			descr->name = "CYCLES";
+			descr->um = 0x1;
 			break;
 
 		// we could possibly use the CCNT
@@ -1124,13 +1180,15 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr)
 		case CPU_ARM_V7_CA7:
 		case CPU_ARM_V7_CA9:
 		case CPU_ARM_V7_CA15:
-		case CPU_AVR32:
 		case CPU_ARM_SCORPION:
 		case CPU_ARM_SCORPIONMP:
+		case CPU_ARM_KRAIT:
+		case CPU_ARM_V8_APM_XGENE:
+		case CPU_ARM_V8_CA57:
+		case CPU_ARM_V8_CA53:
 			descr->name = "CPU_CYCLES";
 			break;
 
-		case CPU_PPC64_PA6T:
 		case CPU_PPC64_970:
 		case CPU_PPC64_970MP:
 		case CPU_PPC_7450:
@@ -1139,9 +1197,9 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr)
 		case CPU_PPC64_POWER6:
 		case CPU_PPC64_POWER5p:
 		case CPU_PPC64_POWER5pp:
-		case CPU_PPC64_CELL:
 		case CPU_PPC64_POWER7:
-		case CPU_PPC64_IBM_COMPAT_V1:
+		case CPU_PPC64_ARCH_V1:
+		case CPU_PPC64_POWER8:
 			descr->name = "CYCLES";
 			break;
 
@@ -1182,19 +1240,18 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr)
 
 		case CPU_PPC_E500:
 		case CPU_PPC_E500_2:
+		case CPU_PPC_E500MC:
+		case CPU_PPC_E6500:
 		case CPU_PPC_E300:
 			descr->name = "CPU_CLK";
 			break;
-  	        case CPU_S390_Z10:
-  	        case CPU_S390_Z196:
- 			if (op_get_nr_counters(cpu_type) > 1) {
- 				descr->name = "HWSAMPLING";
- 				descr->count = 4127518;
- 			} else {
- 				descr->name = TIMER_EVENT_NAME;
- 				descr->count = 10000;
- 			}
-  			break;
+
+		case CPU_S390_Z10:
+		case CPU_S390_Z196:
+		case CPU_S390_ZEC12:
+			descr->name = "CPU_CYCLES";
+			descr->count = 4127518;
+			break;
 
 		case CPU_TILE_TILE64:
 		case CPU_TILE_TILEPRO:
@@ -1211,33 +1268,16 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr)
 	}
 }
 
-static void extra_check(struct op_event *e, u32 unit_mask)
-{
-	unsigned i;
-	int found = 0;
-
-	for (i = 0; i < e->unit->num; i++)
-		if (e->unit->um[i].value == unit_mask)
-			found++;
-	if (found > 1) {
-		fprintf(stderr,
-"Named unit masks not allowed for events without 'extra:' values.\n"
-"Please specify the numerical value for the unit mask. See 'opcontrol'"
-" man page for more info.\n");
-		exit(EXIT_FAILURE);
-	}
-}
-
-static void another_extra_check(struct op_event *e, char *name, unsigned w)
+static void extra_check(struct op_event *e, char *name, unsigned w)
 {
 	int found;
 	unsigned i;
 
 	if (!e->unit->um[w].extra) {
 		fprintf(stderr,
-"Named unit mask (%s) not allowed for event without 'extra:' values.\n"
-"Please specify the numerical value for the unit mask. See 'opcontrol'"
-" man page for more info.\n", name);
+			"Named unit mask (%s) not allowed for event without 'extra:' values.\n"
+			"Please specify the numerical value for the unit mask. See the 'operf'\n"
+			"man page for more info.\n", name);
 		exit(EXIT_FAILURE);
 	}
 
@@ -1248,69 +1288,99 @@ static void another_extra_check(struct op_event *e, char *name, unsigned w)
 		    name[len] == '\0')
 			found++;
 	}
+
 	if (found > 1) {
 		fprintf(stderr,
-	"Unit mask name `%s' not unique. Sorry please use a numerical unit mask\n", name);
+			"Unit mask name `%s' not unique. Please use a numerical unit mask\n", name);
 		exit(EXIT_FAILURE);
 	}
 }
 
-static void do_resolve_unit_mask(struct op_event *e, struct parsed_event *pe,
-				 u32 *extra)
+static void do_resolve_unit_mask(struct op_event *e,
+	struct parsed_event *pe, u32 *extra)
 {
 	unsigned i;
-	int found;
+
+	/* If not specified um and the default um is name type
+	 * we populate pe unitmask name with default name */
+	if ((e->unit->default_mask_name != NULL) &&
+			(pe->unit_mask_name == NULL) && (!pe->unit_mask_valid)) {
+		pe->unit_mask_name = xstrdup(e->unit->default_mask_name);
+	}
 
 	for (;;) {
 		if (pe->unit_mask_name == NULL) {
-			int had_unit_mask = pe->unit_mask_valid;
+			/* For numerical unit mask */
+			int found = 0;
+			int old_um_valid = pe->unit_mask_valid;
+
+			/* Use default unitmask if not specified */
+			if (!pe->unit_mask_valid) {
+				pe->unit_mask_valid = 1;
+				pe->unit_mask = e->unit->default_mask;
+			}
 
-			found = 0;
+			/* Checking to see there are any duplicate numerical unit mask
+			 * in which case it should be using named unit mask instead.
+			 */
 			for (i = 0; i < e->unit->num; i++) {
-				if (!pe->unit_mask_valid &&
-				e->unit->um[i].value == e->unit->default_mask) {
-					pe->unit_mask_valid = 1;
-					pe->unit_mask = e->unit->default_mask;
-					break;
-				}
+				if (e->unit->um[i].value == (unsigned int)pe->unit_mask)
+					found++;
 			}
-			if (found > 1 && had_unit_mask) {
-				fprintf(stderr,
-	"Non unique numerical unit mask.\n"
-	"Please specify the unit mask using the first word of the description\n");
+			if (found > 1) {
+				if (!old_um_valid)
+					fprintf(stderr,
+						"Default unit mask not supported for this event.\n"
+						"Please speicfy a unit mask by name, using the first "
+						"word of the unit mask description\n");
+				else
+					fprintf(stderr,
+						"Unit mask (0x%x) is non unique.\n"
+						"Please specify the unit mask using the first "
+						"word of the description\n",
+					pe->unit_mask);
 				exit(EXIT_FAILURE);
 			}
-			extra_check(e, pe->unit_mask);
+
 			if (i == e->unit->num) {
 				e = find_next_event(e);
 				if (e != NULL)
 					continue;
-			} else {
-				if (extra)
+			}
+			return;
+		} else {
+			/* For named unit mask */
+			for (i = 0; i < e->unit->num; i++) {
+				int len = 0;
+
+				if (e->unit->um[i].name)
+					len = strlen(e->unit->um[i].name);
+
+				if (len
+				&&  (!strncmp(pe->unit_mask_name,
+					      e->unit->um[i].name, len))
+				&&  (pe->unit_mask_name[len] == '\0'))
+					break;
+			}
+			if (i == e->unit->num) {
+				e = find_next_event(e);
+				if (e != NULL)
+					continue;
+				fprintf(stderr, "Cannot find unit mask %s for %s\n",
+					pe->unit_mask_name, pe->name);
+				exit(EXIT_FAILURE);
+			}
+			extra_check(e, pe->unit_mask_name, i);
+			pe->unit_mask_valid = 1;
+			pe->unit_mask = e->unit->um[i].value;
+			if (extra) {
+				if (e->unit->um[i].extra == EXTRA_NONE)
+					*extra = e->unit->um[i].value;
+				else
 					*extra = e->unit->um[i].extra;
 			}
 			return;
 		}
-		for (i = 0; i < e->unit->num; i++) {
-			int len = strcspn(e->unit->um[i].desc, " \t");
-			if (!strncmp(pe->unit_mask_name, e->unit->um[i].desc,
-				    len) && pe->unit_mask_name[len] == '\0')
-				break;
-		}
-		if (i == e->unit->num) {
-			e = find_next_event(e);
-			if (e != NULL)
-				continue;
-			fprintf(stderr, "Cannot find unit mask %s for %s\n",
-				pe->unit_mask_name, pe->name);
-			exit(EXIT_FAILURE);
-		}
-		another_extra_check(e, pe->unit_mask_name, i);
-		pe->unit_mask_valid = 1;
-		pe->unit_mask = e->unit->um[i].value;
-		if (extra)
-			*extra = e->unit->um[i].extra;
-		return;
 	}
 }
 
diff --git a/libop/op_events.h b/libop/op_events.h
index 1a65a7a..be609f7 100644
--- a/libop/op_events.h
+++ b/libop/op_events.h
@@ -21,21 +21,15 @@ extern "C" {
 #include "op_list.h"
 
 #define EXTRA_EDGE (1U << 18)
+#define EXTRA_MIN_VAL EXTRA_EDGE
+
 #define EXTRA_ANY  (1U << 21)
 #define EXTRA_INV  (1U << 23)
 #define EXTRA_CMASK_SHIFT 24
 #define EXTRA_CMASK_MASK 0xff
+#define EXTRA_PEBS (1U << 19) /* fake, mapped to pin control, but mapped back for perf */
+#define EXTRA_NONE (1U << 22) /* mapped to enabled */
 
-/*
- * For timer based sampling some targets (e.g. s390) use a virtual
- * counter whose file system resides in /dev/oprofile/timer.  These
- * macros set the values used to define a specific timer event solely
- * used by the timer counter.
- */
-#define TIMER_EVENT_NAME           "TIMER"
-#define TIMER_EVENT_UNIT_MASK_NAME "timer_unit_mask"
-#define TIMER_EVENT_DESC           "Timer based sampling"
-#define TIMER_EVENT_VALUE          (u32)-1
 
 /** Describe an unit mask type. Events can optionally use a filter called
  * the unit mask. the mask type can be a bitmask or a discrete value */
@@ -45,8 +39,7 @@ enum unit_mask_type {
 	utm_bitmask		/**< bitmask */
 };
 
-/** up to thirty two allowed unit masks */
-#define MAX_UNIT_MASK 32
+#define MAX_UNIT_MASK 64
 
 
 /** Describe an unit mask. */
@@ -55,9 +48,11 @@ struct op_unit_mask {
 	u32 num;		/**< number of possible unit masks */
 	enum unit_mask_type unit_type_mask;
 	u32 default_mask;	/**< only the gui use it */
+	char * default_mask_name;
 	struct op_described_um {
 	        u32 extra;
 		u32 value;
+		char * name;
 		char * desc;
 	} um[MAX_UNIT_MASK];
 	struct list_head um_next; /**< next um in list */
diff --git a/libop/op_get_interface.c b/libop/op_get_interface.c
deleted file mode 100644
index bdf72a5..0000000
--- a/libop/op_get_interface.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * @file op_get_interface.c
- * Determine which oprofile kernel interface used
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Will Cohen
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "op_cpu_type.h"
-#include "op_file.h"
-
-op_interface op_get_interface(void)
-{
-	static op_interface current_interface = OP_INTERFACE_NO_GOOD;
-
-	if (current_interface != OP_INTERFACE_NO_GOOD)
-		return current_interface;
-
-	if (op_file_readable("/proc/sys/dev/oprofile/cpu_type")) {
-		current_interface = OP_INTERFACE_24;
-	} else if (op_file_readable("/dev/oprofile/cpu_type")) {
-		current_interface = OP_INTERFACE_26;
-	}
-
-	return current_interface;
-}
diff --git a/libop/op_hw_config.h b/libop/op_hw_config.h
index 169b36b..739ee4d 100644
--- a/libop/op_hw_config.h
+++ b/libop/op_hw_config.h
@@ -12,15 +12,11 @@
 #ifndef OP_HW_CONFIG_H
 #define OP_HW_CONFIG_H
 
-/** maximum number of counters, up to 4 for Athlon (18 for P4). The primary
- * use of this variable is for static/local array dimension. Never use it in
- * loop or in array index access/index checking unless you know what you
- * made. */
-#ifdef __alpha__
-#define OP_MAX_COUNTERS	20
-#else
+/** Maximum number of counters.
+ * The primary use of this variable is for static/local array dimension.
+ * Never use it in loops or in array index access/index checking unless
+ * you know what you made. */
 #define OP_MAX_COUNTERS	8
-#endif
 
 /** maximum number of events between interrupts. Counters are 40 bits, but
  * for convenience we only use 32 bits. The top bit is used for overflow
diff --git a/libop/op_hw_specific.h b/libop/op_hw_specific.h
index 36e47d8..1d39692 100644
--- a/libop/op_hw_specific.h
+++ b/libop/op_hw_specific.h
@@ -142,7 +142,20 @@ static inline op_cpu op_cpu_specific_type(op_cpu cpu_type)
 		case 0x2d:
 			return CPU_SANDYBRIDGE;
 		case 0x3a:
+		case 0x3e:
 			return CPU_IVYBRIDGE;
+		case 0x3c:
+		case 0x3f:
+		case 0x45:
+		case 0x46:
+			return CPU_HASWELL;
+		case 0x3d:
+		case 0x47:
+		case 0x4f:
+			return CPU_BROADWELL;
+		case 0x37:
+		case 0x4d:
+			return CPU_SILVERMONT;
 		}
 	}
 	return cpu_type;
diff --git a/libop/op_interface.h b/libop/op_interface.h
deleted file mode 100644
index fa2ecbd..0000000
--- a/libop/op_interface.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * @file op_interface.h
- *
- * Module / user space interface for 2.4
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- */
-
-#ifndef OP_INTERFACE_H
-#define OP_INTERFACE_H
-
-#include "op_config.h"
-#include "op_types.h"
-
-/*@{\name notifications types encoded in op_note::type */
-/** fork(),vfork(),clone() */
-#define OP_FORK 1
-/** mapping */
-#define OP_MAP 2
-/** execve() */
-#define OP_EXEC 4
-/** init_module() */
-#define OP_DROP_MODULES 8
-/** exit() */
-#define OP_EXIT 16
-/*@}*/
-
-/** Data type to transfer samples counts from the module to the daemon */
-struct op_sample {
-	unsigned long eip; /**< eip value where occur interrupt */
-	u32 counter; /**< counter nr */
-	u32 pid; /**< 32 bits can hold any pid */
-	u32 tgid; /**< always equal to pid for kernel < 2.4.0 */
-};
-
-/** the current kernel-side profiler state */
-enum oprof_state {
-	STOPPED = 0,
-	STOPPING = 1,
-	RUNNING = 2
-};
- 
-/**
- * The head structure of a kernel sample buffer.
- */
-struct op_buffer_head {
-	int cpu_nr; /**< the CPU number of this buffer */
-	size_t count; /**< number of samples in this buffer */
-	enum oprof_state state; /**< current profiler state */
-	struct op_sample buffer[0]; /**< the sample buffer */
-} __attribute__((__packed__));
-	 
-/**
- * Data type used by the module to notify daemon of fork/exit/mapping etc.
- * Meanings of fields depend on the type of notification encoded in the type
- * field.
- * \sa OP_FORK, OP_EXEC, OP_MAP, OP_DROP_MODULES and OP_EXIT
- */
-struct op_note {
-	unsigned long   addr;
-	unsigned long   len;
-	unsigned long   offset;
-	unsigned int    hash;
-	unsigned int	pid;
-	unsigned int    tgid;
-	unsigned short	type;
-};
-
-/**
- * A path component. Directory name are stored as a stack of path components.
- * Note than the name index acts also as an unique identifier
- */
-struct op_hash_index {
-	/** index inside the string pool */
-	u32 name;
-	/** parent component, zero if this component is the root */
-	u32 parent;
-} __attribute__((__packed__));
-
-/** size of hash map in bytes */
-#define OP_HASH_MAP_SIZE (OP_HASH_MAP_NR * sizeof(struct op_hash_index) + POOL_SIZE)
-
-#endif /* OP_INTERFACE_H */
diff --git a/libop/op_mangle.c b/libop/op_mangle.c
index 1efe5b1..17a0ab7 100644
--- a/libop/op_mangle.c
+++ b/libop/op_mangle.c
@@ -23,7 +23,7 @@ static void append_image(char * dest, int flags, int anon, char const * name, ch
 {
 	if ((flags & MANGLE_KERNEL) && !strchr(name, '/')) {
 		strcat(dest, "{kern}/");
-	} else if (anon) {
+	} else if (anon && anon_name) {
 		strcat(dest, "{anon:");
 		strcat(dest, anon_name);
 		strcat(dest,"}/");
@@ -53,10 +53,11 @@ char * op_mangle_filename(struct mangle_values const * values)
 	len = strlen(op_samples_current_dir) + strlen(dep_name) + 1
 		+ strlen(values->event_name) + 1 + strlen(image_name) + 1;
 
-	if (values->flags & MANGLE_CALLGRAPH)
+	// Just to silence Coverity, check cg_image_name and anon_name below for !=NULL.
+	if (cg_image_name && (values->flags & MANGLE_CALLGRAPH))
 		len += strlen(cg_image_name) + 1;
 
-	if (anon || cg_anon)
+	if (anon_name && (anon || cg_anon))
 		len += strlen(anon_name);
 
 	/* provision for tgid, tid, unit_mask, cpu and some {root}, {dep},
@@ -72,7 +73,7 @@ char * op_mangle_filename(struct mangle_values const * values)
 	strcat(mangled, "{dep}" "/");
 	append_image(mangled, values->flags, anon, dep_name, anon_name);
 
-	if (values->flags & MANGLE_CALLGRAPH) {
+	if (cg_image_name && (values->flags & MANGLE_CALLGRAPH)) {
 		strcat(mangled, "{cg}" "/");
 		append_image(mangled, values->flags, cg_anon,
 		             cg_image_name, anon_name);
diff --git a/libop/op_netburst.c b/libop/op_netburst.c
new file mode 100644
index 0000000..04198a2
--- /dev/null
+++ b/libop/op_netburst.c
@@ -0,0 +1,1597 @@
+/**
+ * @file libop/op_netburst.c
+ * Definitions of Netburst events and a function for obtaining an encoding
+ * for a given event/unit mask in perf_events format.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 14, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "op_types.h"
+#include "op_netburst.h"
+
+/**
+ * netburst_events
+ *
+ * Array of events that can be counted on Pentium4.
+ **/
+netburst_entry_t op_netburst_events[] = {
+
+	/* 0 */
+	{.name = "TC_DELIVER_MODE",
+	 .desc = "The duration (in clock cycles) of the operating modes of "
+		 "the trace cache and decode engine in the processor package",
+	 .event_select = 0x1,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 9, 32 },
+	 .perf_code = P4_EVENT_TC_DELIVER_MODE,
+	 .event_masks = {
+		{.name = "DD",
+		 .desc = "Both logical CPUs in deliver mode",
+		 .bit = 0,
+		},
+		{.name = "DB",
+		 .desc = "Logical CPU 0 in deliver mode and "
+			 "logical CPU 1 in build mode",
+		 .bit = 1,
+		},
+		{.name = "DI",
+		 .desc = "Logical CPU 0 in deliver mode and logical CPU 1 "
+			 "either halted, under machine clear condition, or "
+			 "transitioning to a long microcode flow",
+		 .bit = 2,
+		},
+		{.name = "BD",
+		 .desc = "Logical CPU 0 in build mode and "
+			 "logical CPU 1 is in deliver mode",
+		 .bit = 3,
+		},
+		{.name = "BB",
+		 .desc = "Both logical CPUs in build mode",
+		 .bit = 4,
+		},
+		{.name = "BI",
+		 .desc = "Logical CPU 0 in build mode and logical CPU 1 "
+			 "either halted, under machine clear condition, or "
+			 "transitioning to a long microcode flow",
+		 .bit = 5,
+		},
+		{.name = "ID",
+		 .desc = "Logical CPU 0 either halted, under machine clear "
+			 "condition, or transitioning to a long microcode "
+			 "flow, and logical CPU 1 in deliver mode",
+		 .bit = 6,
+		},
+		{.name = "IB",
+		 .desc = "Logical CPU 0 either halted, under machine clear "
+			 "condition, or transitioning to a long microcode "
+			 "flow, and logical CPU 1 in build mode",
+		 .bit = 7,
+		},
+	 },
+	},
+
+	/* 1 */
+	{.name = "BPU_FETCH_REQUEST",
+	 .desc = "Instruction fetch requests by the Branch Prediction Unit",
+	 .event_select = 0x3,
+	 .escr_select = 0x0,
+	 .allowed_escrs = { 0, 23 },
+	 .perf_code = P4_EVENT_BPU_FETCH_REQUEST,
+	 .event_masks = {
+		{.name = "TCMISS",
+		 .desc = "Trace cache lookup miss",
+		 .bit = 0,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 2 */
+	{.name = "ITLB_REFERENCE",
+	 .desc = "Translations using the Instruction "
+		 "Translation Look-Aside Buffer",
+	 .event_select = 0x18,
+	 .escr_select = 0x3,
+	 .allowed_escrs = { 3, 26 },
+	 .perf_code = P4_EVENT_ITLB_REFERENCE,
+	 .event_masks = {
+		{.name = "HIT",
+		 .desc = "ITLB hit",
+		 .bit = 0,
+		},
+		{.name = "MISS",
+		 .desc = "ITLB miss",
+		 .bit = 1,
+		},
+		{.name = "HIT_UC",
+		 .desc = "Uncacheable ITLB hit",
+		 .bit = 2,
+		},
+	 },
+	},
+
+	/* 3 */
+	{.name = "MEMORY_CANCEL",
+	 .desc = "Canceling of various types of requests in the "
+		 "Data cache Address Control unit (DAC)",
+	 .event_select = 0x2,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 15, 38 },
+	 .perf_code = P4_EVENT_MEMORY_CANCEL,
+	 .event_masks = {
+		{.name = "ST_RB_FULL",
+		 .desc = "Replayed because no store request "
+			 "buffer is available",
+		 .bit = 2,
+		},
+		{.name = "64K_CONF",
+		 .desc = "Conflicts due to 64K aliasing",
+		 .bit = 3,
+		},
+	 },
+	},
+
+	/* 4 */
+	{.name = "MEMORY_COMPLETE",
+	 .desc = "Completions of a load split, store split, "
+		 "uncacheable (UC) split, or UC load",
+	 .event_select = 0x8,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 13, 36 },
+	 .perf_code = P4_EVENT_MEMORY_COMPLETE,
+	 .event_masks = {
+		{.name = "LSC",
+		 .desc = "Load split completed, excluding UC/WC loads",
+		 .bit = 0,
+		},
+		{.name = "SSC",
+		 .desc = "Any split stores completed",
+		 .bit = 1,
+		},
+	 },
+	},
+
+	/* 5 */
+	{.name = "LOAD_PORT_REPLAY",
+	 .desc = "Replayed events at the load port",
+	 .event_select = 0x4,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 13, 36 },
+	 .perf_code = P4_EVENT_LOAD_PORT_REPLAY,
+	 .event_masks = {
+		{.name = "SPLIT_LD",
+		 .desc = "Split load",
+		 .bit = 1,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 6 */
+	{.name = "STORE_PORT_REPLAY",
+	 .desc = "Replayed events at the store port",
+	 .event_select = 0x5,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 13, 36 },
+	 .perf_code = P4_EVENT_STORE_PORT_REPLAY,
+	 .event_masks = {
+		{.name = "SPLIT_ST",
+		 .desc = "Split store",
+		 .bit = 1,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 7 */
+	{.name = "MOB_LOAD_REPLAY",
+	 .desc = "Count of times the memory order buffer (MOB) "
+		 "caused a load operation to be replayed",
+	 .event_select = 0x3,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 2, 25 },
+	 .perf_code = P4_EVENT_MOB_LOAD_REPLAY,
+	 .event_masks = {
+		{.name = "NO_STA",
+		 .desc = "Replayed because of unknown store address",
+		 .bit = 1,
+		},
+		{.name = "NO_STD",
+		 .desc = "Replayed because of unknown store data",
+		 .bit = 3,
+		},
+		{.name = "PARTIAL_DATA",
+		 .desc = "Replayed because of partially overlapped data "
+			 "access between the load and store operations",
+		 .bit = 4,
+		},
+		{.name = "UNALGN_ADDR",
+		 .desc = "Replayed because the lower 4 bits of the "
+			 "linear address do not match between the "
+			 "load and store operations",
+		 .bit = 5,
+		},
+	 },
+	},
+
+	/* 8 */
+	{.name = "PAGE_WALK_TYPE",
+	 .desc = "Page walks that the page miss handler (PMH) performs",
+	 .event_select = 0x1,
+	 .escr_select = 0x4,
+	 .allowed_escrs = { 4, 27 },
+	 .perf_code = P4_EVENT_PAGE_WALK_TYPE,
+	 .event_masks = {
+		{.name = "DTMISS",
+		 .desc = "Page walk for a data TLB miss (load or store)",
+		 .bit = 0,
+		},
+		{.name = "ITMISS",
+		 .desc = "Page walk for an instruction TLB miss",
+		 .bit = 1,
+		},
+	 },
+	},
+
+	/* 9 */
+	{.name = "BSQ_CACHE_REFERENCE",
+	 .desc = "Cache references (2nd or 3rd level caches) as seen by the "
+		 "bus unit. Read types include both load and RFO, and write "
+		 "types include writebacks and evictions",
+	 .event_select = 0xC,
+	 .escr_select = 0x7,
+	 .allowed_escrs = { 7, 30 },
+	 .perf_code = P4_EVENT_BSQ_CACHE_REFERENCE,
+	 .event_masks = {
+		{.name = "RD_2ndL_HITS",
+		 .desc = "Read 2nd level cache hit Shared",
+		 .bit = 0,
+		},
+		{.name = "RD_2ndL_HITE",
+		 .desc = "Read 2nd level cache hit Exclusive",
+		 .bit = 1,
+		},
+		{.name = "RD_2ndL_HITM",
+		 .desc = "Read 2nd level cache hit Modified",
+		 .bit = 2,
+		},
+		{.name = "RD_3rdL_HITS",
+		 .desc = "Read 3rd level cache hit Shared",
+		 .bit = 3,
+		},
+		{.name = "RD_3rdL_HITE",
+		 .desc = "Read 3rd level cache hit Exclusive",
+		 .bit = 4,
+		},
+		{.name = "RD_3rdL_HITM",
+		 .desc = "Read 3rd level cache hit Modified",
+		 .bit = 5,
+		},
+		{.name = "RD_2ndL_MISS",
+		 .desc = "Read 2nd level cache miss",
+		 .bit = 8,
+		},
+		{.name = "RD_3rdL_MISS",
+		 .desc = "Read 3rd level cache miss",
+		 .bit = 9,
+		},
+		{.name = "WR_2ndL_MISS",
+		 .desc = "A writeback lookup from DAC misses the 2nd "
+			 "level cache (unlikely to happen)",
+		 .bit = 10,
+		},
+	 },
+	},
+
+	/* 10 */
+	{.name = "IOQ_ALLOCATION",
+	 .desc = "Count of various types of transactions on the bus. A count "
+		 "is generated each time a transaction is allocated into the "
+		 "IOQ that matches the specified mask bits. An allocated entry "
+		 "can be a sector (64 bytes) or a chunk of 8 bytes. Requests "
+		 "are counted once per retry. All 'TYPE_BIT*' event-masks "
+		 "together are treated as a single 5-bit value",
+	 .event_select = 0x3,
+	 .escr_select = 0x6,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_IOQ_ALLOCATION,
+	 .event_masks = {
+		{.name = "TYPE_BIT0",
+		 .desc = "Bus request type (bit 0)",
+		 .bit = 0,
+		},
+		{.name = "TYPE_BIT1",
+		 .desc = "Bus request type (bit 1)",
+		 .bit = 1,
+		},
+		{.name = "TYPE_BIT2",
+		 .desc = "Bus request type (bit 2)",
+		 .bit = 2,
+		},
+		{.name = "TYPE_BIT3",
+		 .desc = "Bus request type (bit 3)",
+		 .bit = 3,
+		},
+		{.name = "TYPE_BIT4",
+		 .desc = "Bus request type (bit 4)",
+		 .bit = 4,
+		},
+		{.name = "ALL_READ",
+		 .desc = "Count read entries",
+		 .bit = 5,
+		},
+		{.name = "ALL_WRITE",
+		 .desc = "Count write entries",
+		 .bit = 6,
+		},
+		{.name = "MEM_UC",
+		 .desc = "Count UC memory access entries",
+		 .bit = 7,
+		},
+		{.name = "MEM_WC",
+		 .desc = "Count WC memory access entries",
+		 .bit = 8,
+		},
+		{.name = "MEM_WT",
+		 .desc = "Count write-through (WT) memory access entries",
+		 .bit = 9,
+		},
+		{.name = "MEM_WP",
+		 .desc = "Count write-protected (WP) memory access entries",
+		 .bit = 10,
+		},
+		{.name = "MEM_WB",
+		 .desc = "Count WB memory access entries",
+		 .bit = 11,
+		},
+		{.name = "OWN",
+		 .desc = "Count all store requests driven by processor, as "
+			 "opposed to other processor or DMA",
+		 .bit = 13,
+		},
+		{.name = "OTHER",
+		 .desc = "Count all requests driven by other "
+			 "processors or DMA",
+		 .bit = 14,
+		},
+		{.name = "PREFETCH",
+		 .desc = "Include HW and SW prefetch requests in the count",
+		 .bit = 15,
+		},
+	 },
+	},
+
+	/* 11 */
+	{.name = "IOQ_ACTIVE_ENTRIES",
+	 .desc = "Number of entries (clipped at 15) in the IOQ that are "
+		 "active. An allocated entry can be a sector (64 bytes) "
+		 "or a chunk of 8 bytes. This event must be programmed in "
+		 "conjuction with IOQ_allocation. All 'TYPE_BIT*' event-masks "
+		 "together are treated as a single 5-bit value",
+	 .event_select = 0x1A,
+	 .escr_select = 0x6,
+	 .allowed_escrs = { 29, -1 },
+	 .perf_code = P4_EVENT_IOQ_ACTIVE_ENTRIES,
+	 .event_masks = {
+		{.name = "TYPE_BIT0",
+		 .desc = "Bus request type (bit 0)",
+		 .bit = 0,
+		},
+		{.name = "TYPE_BIT1",
+		 .desc = "Bus request type (bit 1)",
+		 .bit = 1,
+		},
+		{.name = "TYPE_BIT2",
+		 .desc = "Bus request type (bit 2)",
+		 .bit = 2,
+		},
+		{.name = "TYPE_BIT3",
+		 .desc = "Bus request type (bit 3)",
+		 .bit = 3,
+		},
+		{.name = "TYPE_BIT4",
+		 .desc = "Bus request type (bit 4)",
+		 .bit = 4,
+		},
+		{.name = "ALL_READ",
+		 .desc = "Count read entries",
+		 .bit = 5,
+		},
+		{.name = "ALL_WRITE",
+		 .desc = "Count write entries",
+		 .bit = 6,
+		},
+		{.name = "MEM_UC",
+		 .desc = "Count UC memory access entries",
+		 .bit = 7,
+		},
+		{.name = "MEM_WC",
+		 .desc = "Count WC memory access entries",
+		 .bit = 8,
+		},
+		{.name = "MEM_WT",
+		 .desc = "Count write-through (WT) memory access entries",
+		 .bit = 9,
+		},
+		{.name = "MEM_WP",
+		 .desc = "Count write-protected (WP) memory access entries",
+		 .bit = 10,
+		},
+		{.name = "MEM_WB",
+		 .desc = "Count WB memory access entries",
+		 .bit = 11,
+		},
+		{.name = "OWN",
+		 .desc = "Count all store requests driven by processor, as "
+			 "opposed to other processor or DMA",
+		 .bit = 13,
+		},
+		{.name = "OTHER",
+		 .desc = "Count all requests driven by other "
+			 "processors or DMA",
+		 .bit = 14,
+		},
+		{.name = "PREFETCH",
+		 .desc = "Include HW and SW prefetch requests in the count",
+		 .bit = 15,
+		},
+	 },
+	},
+
+	/* 12 */
+	{.name = "FSB_DATA_ACTIVITY",
+	 .desc = "Count of DRDY or DBSY events that "
+		 "occur on the front side bus",
+	 .event_select = 0x17,
+	 .escr_select = 0x6,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_FSB_DATA_ACTIVITY,
+	 .event_masks = {
+		{.name = "DRDY_DRV",
+		 .desc = "Count when this processor drives data onto the bus. "
+			 "Includes writes and implicit writebacks",
+		 .bit = 0,
+		},
+		{.name = "DRDY_OWN",
+		 .desc = "Count when this processor reads data from the bus. "
+			 "Includes loads and some PIC transactions. Count "
+			 "DRDY events that we drive. Count DRDY events sampled "
+			 "that we own",
+		 .bit = 1,
+		},
+		{.name = "DRDY_OTHER",
+		 .desc = "Count when data is on the bus but not being sampled "
+			 "by the processor. It may or may not be driven by "
+			 "this processor",
+		 .bit = 2,
+		},
+		{.name = "DBSY_DRV",
+		 .desc = "Count when this processor reserves the bus for use "
+			 "in the next bus cycle in order to drive data",
+		 .bit = 3,
+		},
+		{.name = "DBSY_OWN",
+		 .desc = "Count when some agent reserves the bus for use in "
+			 "the next bus cycle to drive data that this processor "
+			 "will sample",
+		 .bit = 4,
+		},
+		{.name = "DBSY_OTHER",
+		 .desc = "Count when some agent reserves the bus for use in "
+			 "the next bus cycle to drive data that this processor "
+			 "will NOT sample. It may or may not be being driven "
+			 "by this processor",
+		 .bit = 5,
+		},
+	 },
+	},
+
+	/* 13 */
+	{.name = "BSQ_ALLOCATION",
+	 .desc = "Allocations in the Bus Sequence Unit (BSQ). The event mask "
+		 "bits consist of four sub-groups: request type, request "
+		 "length, memory type, and a sub-group consisting mostly of "
+		 "independent bits (5 through 10). Must specify a mask for "
+		 "each sub-group",
+	 .event_select = 0x5,
+	 .escr_select = 0x7,
+	 .allowed_escrs = { 7, -1 },
+	 .perf_code = P4_EVENT_BSQ_ALLOCATION,
+	 .event_masks = {
+		{.name = "REQ_TYPE0",
+		 .desc = "Along with REQ_TYPE1, request type encodings are: "
+			 "0 - Read (excludes read invalidate), 1 - Read "
+			 "invalidate, 2 - Write (other than writebacks), 3 - "
+			 "Writeback (evicted from cache)",
+		 .bit = 0,
+		},
+		{.name = "REQ_TYPE1",
+		 .desc = "Along with REQ_TYPE0, request type encodings are: "
+			 "0 - Read (excludes read invalidate), 1 - Read "
+			 "invalidate, 2 - Write (other than writebacks), 3 - "
+			 "Writeback (evicted from cache)",
+		 .bit = 1,
+		},
+		{.name = "REQ_LEN0",
+		 .desc = "Along with REQ_LEN1, request length encodings are: "
+			 "0 - zero chunks, 1 - one chunk, 3 - eight chunks",
+		 .bit = 2,
+		},
+		{.name = "REQ_LEN1",
+		 .desc = "Along with REQ_LEN0, request length encodings are: "
+			 "0 - zero chunks, 1 - one chunk, 3 - eight chunks",
+		 .bit = 3,
+		},
+		{.name = "REQ_IO_TYPE",
+		 .desc = "Request type is input or output",
+		 .bit = 5,
+		},
+		{.name = "REQ_LOCK_TYPE",
+		 .desc = "Request type is bus lock",
+		 .bit = 6,
+		},
+		{.name = "REQ_CACHE_TYPE",
+		 .desc = "Request type is cacheable",
+		 .bit = 7,
+		},
+		{.name = "REQ_SPLIT_TYPE",
+		 .desc = "Request type is a bus 8-byte chunk split across "
+			 "an 8-byte boundary",
+		 .bit = 8,
+		},
+		{.name = "REQ_DEM_TYPE",
+		 .desc = "0: Request type is HW.SW prefetch. "
+			 "1: Request type is a demand",
+		 .bit = 9,
+		},
+		{.name = "REQ_ORD_TYPE",
+		 .desc = "Request is an ordered type",
+		 .bit = 10,
+		},
+		{.name = "MEM_TYPE0",
+		 .desc = "Along with MEM_TYPE1 and MEM_TYPE2, "
+			 "memory type encodings are: 0 - UC, "
+			 "1 - USWC, 4- WT, 5 - WP, 6 - WB",
+		 .bit = 11,
+		},
+		{.name = "MEM_TYPE1",
+		 .desc = "Along with MEM_TYPE0 and MEM_TYPE2, "
+			 "memory type encodings are: 0 - UC, "
+			 "1 - USWC, 4- WT, 5 - WP, 6 - WB",
+		 .bit = 12,
+		},
+		{.name = "MEM_TYPE2",
+		 .desc = "Along with MEM_TYPE0 and MEM_TYPE1, "
+			 "memory type encodings are: 0 - UC, "
+			 "1 - USWC, 4- WT, 5 - WP, 6 - WB",
+		 .bit = 13,
+		},
+	 },
+	},
+
+	/* 14 */
+	{.name = "BSQ_ACTIVE_ENTRIES",
+	 .desc = "Number of BSQ entries (clipped at 15) currently active "
+		 "(valid) which meet the subevent mask criteria during "
+		 "allocation in the BSQ. Active request entries are allocated "
+		 "on the BSQ until de-allocated. De-allocation of an entry "
+		 "does not necessarily imply the request is filled. This "
+		 "event must be programmed in conjunction with BSQ_allocation",
+	 .event_select = 0x6,
+	 .escr_select = 0x7,
+	 .allowed_escrs = { 30, -1 },
+	 .perf_code = P4_EVENT_BSQ_ACTIVE_ENTRIES,
+	 .event_masks = {
+		{.name = "REQ_TYPE0",
+		 .desc = "Along with REQ_TYPE1, request type encodings are: "
+			 "0 - Read (excludes read invalidate), 1 - Read "
+			 "invalidate, 2 - Write (other than writebacks), 3 - "
+			 "Writeback (evicted from cache)",
+		 .bit = 0,
+		},
+		{.name = "REQ_TYPE1",
+		 .desc = "Along with REQ_TYPE0, request type encodings are: "
+			 "0 - Read (excludes read invalidate), 1 - Read "
+			 "invalidate, 2 - Write (other than writebacks), 3 - "
+			 "Writeback (evicted from cache)",
+		 .bit = 1,
+		},
+		{.name = "REQ_LEN0",
+		 .desc = "Along with REQ_LEN1, request length encodings are: "
+			 "0 - zero chunks, 1 - one chunk, 3 - eight chunks",
+		 .bit = 2,
+		},
+		{.name = "REQ_LEN1",
+		 .desc = "Along with REQ_LEN0, request length encodings are: "
+			 "0 - zero chunks, 1 - one chunk, 3 - eight chunks",
+		 .bit = 3,
+		},
+		{.name = "REQ_IO_TYPE",
+		 .desc = "Request type is input or output",
+		 .bit = 5,
+		},
+		{.name = "REQ_LOCK_TYPE",
+		 .desc = "Request type is bus lock",
+		 .bit = 6,
+		},
+		{.name = "REQ_CACHE_TYPE",
+		 .desc = "Request type is cacheable",
+		 .bit = 7,
+		},
+		{.name = "REQ_SPLIT_TYPE",
+		 .desc = "Request type is a bus 8-byte chunk split across "
+			 "an 8-byte boundary",
+		 .bit = 8,
+		},
+		{.name = "REQ_DEM_TYPE",
+		 .desc = "0: Request type is HW.SW prefetch. "
+			 "1: Request type is a demand",
+		 .bit = 9,
+		},
+		{.name = "REQ_ORD_TYPE",
+		 .desc = "Request is an ordered type",
+		 .bit = 10,
+		},
+		{.name = "MEM_TYPE0",
+		 .desc = "Along with MEM_TYPE1 and MEM_TYPE2, "
+			 "memory type encodings are: 0 - UC, "
+			 "1 - USWC, 4- WT, 5 - WP, 6 - WB",
+		 .bit = 11,
+		},
+		{.name = "MEM_TYPE1",
+		 .desc = "Along with MEM_TYPE0 and MEM_TYPE2, "
+			 "memory type encodings are: 0 - UC, "
+			 "1 - USWC, 4- WT, 5 - WP, 6 - WB",
+		 .bit = 12,
+		},
+		{.name = "MEM_TYPE2",
+		 .desc = "Along with MEM_TYPE0 and MEM_TYPE1, "
+			 "memory type encodings are: 0 - UC, "
+			 "1 - USWC, 4- WT, 5 - WP, 6 - WB",
+		 .bit = 13,
+		},
+	 },
+	},
+
+	/* 15 */
+	{.name = "SSE_INPUT_ASSIST",
+	 .desc = "Number of times an assist is requested to handle problems "
+		 "with input operands for SSE/SSE2/SSE3 operations; most "
+		 "notably denormal source operands when the DAZ bit isn't set",
+	 .event_select = 0x34,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_SSE_INPUT_ASSIST,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count assists for SSE/SSE2/SSE3 uops",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 16 */
+	{.name = "PACKED_SP_UOP",
+	 .desc = "Number of packed single-precision uops",
+	 .event_select = 0x8,
+	 .escr_select = 0x1,
+	 .perf_code = P4_EVENT_PACKED_SP_UOP,
+	 .allowed_escrs = { 12, 35 },
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all uops operating on packed "
+			 "single-precisions operands",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 17 */
+	{.name = "PACKED_DP_UOP",
+	 .desc = "Number of packed double-precision uops",
+	 .event_select = 0xC,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_PACKED_DP_UOP,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all uops operating on packed "
+			 "double-precisions operands",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 18 */
+	{.name = "SCALAR_SP_UOP",
+	 .desc = "Number of scalar single-precision uops",
+	 .event_select = 0xA,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_SCALAR_SP_UOP,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all uops operating on scalar "
+			 "single-precisions operands",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 19 */
+	{.name = "SCALAR_DP_UOP",
+	 .desc = "Number of scalar double-precision uops",
+	 .event_select = 0xE,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_SCALAR_DP_UOP,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all uops operating on scalar "
+			 "double-precisions operands",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 20 */
+	{.name = "64BIT_MMX_UOP",
+	 .desc = "Number of MMX instructions which "
+		 "operate on 64-bit SIMD operands",
+	 .event_select = 0x2,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_64BIT_MMX_UOP,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all uops operating on 64-bit SIMD integer "
+			 "operands in memory or MMX registers",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 21 */
+	{.name = "128BIT_MMX_UOP",
+	 .desc = "Number of MMX instructions which "
+		 "operate on 128-bit SIMD operands",
+	 .event_select = 0x1A,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_128BIT_MMX_UOP,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all uops operating on 128-bit SIMD integer "
+			 "operands in memory or MMX registers",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 22 */
+	{.name = "X87_FP_UOP",
+	 .desc = "Number of x87 floating-point uops",
+	 .event_select = 0x4,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 12, 35 },
+	 .perf_code = P4_EVENT_X87_FP_UOP,
+	 .event_masks = {
+		{.name = "ALL",
+		 .desc = "Count all x87 FP uops",
+		 .bit = 15,
+		 .flags = NETBURST_FL_DFL,
+		},
+		{.name = "TAG0",
+		 .desc = "Tag this event with tag bit 0 "
+			 "for retirement counting with execution_event",
+		 .bit = 16,
+		},
+		{.name = "TAG1",
+		 .desc = "Tag this event with tag bit 1 "
+			 "for retirement counting with execution_event",
+		 .bit = 17,
+		},
+		{.name = "TAG2",
+		 .desc = "Tag this event with tag bit 2 "
+			 "for retirement counting with execution_event",
+		 .bit = 18,
+		},
+		{.name = "TAG3",
+		 .desc = "Tag this event with tag bit 3 "
+			 "for retirement counting with execution_event",
+		 .bit = 19,
+		},
+	 },
+	},
+
+	/* 23 */
+	{.name = "TC_misc",
+	 .desc = "Miscellaneous events detected by the TC. The counter will "
+		 "count twice for each occurrence",
+	 .event_select = 0x6,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 9, 32 },
+	 .perf_code = P4_EVENT_TC_MISC,
+	 .event_masks = {
+		{.name = "FLUSH",
+		 .desc = "Number of flushes",
+		 .bit = 4,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 24 */
+	{.name = "GLOBAL_POWER_EVENTS",
+	 .desc = "Counts the time during which a processor is not stopped",
+	 .event_select = 0x13,
+	 .escr_select = 0x6,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_GLOBAL_POWER_EVENTS,
+	 .event_masks = {
+		{.name = "RUNNING",
+		 .desc = "The processor is active (includes the "
+			 "handling of HLT STPCLK and throttling",
+		 .bit = 0,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 25 */
+	{.name = "TC_MS_XFER",
+	 .desc = "Number of times that uop delivery changed from TC to MS ROM",
+	 .event_select = 0x5,
+	 .escr_select = 0x0,
+	 .allowed_escrs = { 8, 31 },
+	 .perf_code = P4_EVENT_TC_MS_XFER,
+	 .event_masks = {
+		{.name = "CISC",
+		 .desc = "A TC to MS transfer occurred",
+		 .bit = 0,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 26 */
+	{.name = "UOP_QUEUE_WRITES",
+	 .desc = "Number of valid uops written to the uop queue",
+	 .event_select = 0x9,
+	 .escr_select = 0x0,
+	 .allowed_escrs = { 8, 31 },
+	 .perf_code = P4_EVENT_UOP_QUEUE_WRITES,
+	 .event_masks = {
+		{.name = "FROM_TC_BUILD",
+		 .desc = "The uops being written are from TC build mode",
+		 .bit = 0,
+		},
+		{.name = "FROM_TC_DELIVER",
+		 .desc = "The uops being written are from TC deliver mode",
+		 .bit = 1,
+		},
+		{.name = "FROM_ROM",
+		 .desc = "The uops being written are from microcode ROM",
+		 .bit = 2,
+		},
+	 },
+	},
+
+	/* 27 */
+	{.name = "RETIRED_MISPRED_BRANCH_TYPE",
+	 .desc = "Number of retiring mispredicted branches by type",
+	 .event_select = 0x5,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 10, 33 },
+	 .perf_code = P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE,
+	 .event_masks = {
+		{.name = "CONDITIONAL",
+		 .desc = "Conditional jumps",
+		 .bit = 1,
+		},
+		{.name = "CALL",
+		 .desc = "Indirect call branches",
+		 .bit = 2,
+		},
+		{.name = "RETURN",
+		 .desc = "Return branches",
+		 .bit = 3,
+		},
+		{.name = "INDIRECT",
+		 .desc = "Returns, indirect calls, or indirect jumps",
+		 .bit = 4,
+		},
+	 },
+	},
+
+	/* 28 */
+	{.name = "RETIRED_BRANCH_TYPE",
+	 .desc = "Number of retiring branches by type",
+	 .event_select = 0x4,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 10, 33 },
+	 .perf_code = P4_EVENT_RETIRED_BRANCH_TYPE,
+	 .event_masks = {
+		{.name = "CONDITIONAL",
+		 .desc = "Conditional jumps",
+		 .bit = 1,
+		},
+		{.name = "CALL",
+		 .desc = "Indirect call branches",
+		 .bit = 2,
+		},
+		{.name = "RETURN",
+		 .desc = "Return branches",
+		 .bit = 3,
+		},
+		{.name = "INDIRECT",
+		 .desc = "Returns, indirect calls, or indirect jumps",
+		 .bit = 4,
+		},
+	 },
+	},
+
+	/* 29 */
+	{.name = "resource_stall",
+	 .desc = "Occurrences of latency or stalls in the Allocator",
+	 .event_select = 0x1,
+	 .escr_select = 0x1,
+	 .allowed_escrs = { 17, 40 },
+	 .perf_code = P4_EVENT_RESOURCE_STALL,
+	 .event_masks = {
+		{.name = "SBFULL",
+		 .desc = "A stall due to lack of store buffers",
+		 .bit = 5,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 30 */
+	{.name = "WC_Buffer",
+	 .desc = "Number of Write Combining Buffer operations",
+	 .event_select = 0x5,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 15, 38 },
+	 .perf_code = P4_EVENT_WC_BUFFER,
+	 .event_masks = {
+		{.name = "WCB_EVICTS",
+		 .desc = "WC Buffer evictions of all causes",
+		 .bit = 0,
+		},
+		{.name = "WCB_FULL_EVICT",
+		 .desc = "WC Buffer eviction; no WC buffer is available",
+		 .bit = 1,
+		},
+	 },
+	},
+
+	/* 31 */
+	{.name = "b2b_cycles",
+	 .desc = "Number of back-to-back bus cycles",
+	 .event_select = 0x16,
+	 .escr_select = 0x3,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_B2B_CYCLES,
+	 .event_masks = {
+		{.name = "BIT1",
+		 .desc = "bit 1",
+		 .bit = 1,
+		},
+		{.name = "BIT2",
+		 .desc = "bit 2",
+		 .bit = 2,
+		},
+		{.name = "BIT3",
+		 .desc = "bit 3",
+		 .bit = 3,
+		},
+		{.name = "BIT4",
+		 .desc = "bit 4",
+		 .bit = 4,
+		},
+		{.name = "BIT5",
+		 .desc = "bit 5",
+		 .bit = 4,
+		},
+		{.name = "BIT6",
+		 .desc = "bit 6",
+		 .bit = 4,
+		},
+	 },
+	},
+	/* 32 */
+	{.name = "bnr",
+	 .desc = "Number of bus-not-ready conditions",
+	 .event_select = 0x8,
+	 .escr_select = 0x3,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_BNR,
+	 .event_masks = {
+		{.name = "BIT0",
+		 .desc = "bit 0",
+		 .bit = 0,
+		},
+		{.name = "BIT1",
+		 .desc = "bit 1",
+		 .bit = 1,
+		},
+		{.name = "BIT2",
+		 .desc = "bit 2",
+		 .bit = 2,
+		},
+	 },
+	},
+
+	/* 33 */
+	{.name = "snoop",
+	 .desc = "Number of snoop hit modified bus traffic",
+	 .event_select = 0x6,
+	 .escr_select = 0x3,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_SNOOP,
+	 .event_masks = {
+		{.name = "BIT2",
+		 .desc = "bit 2",
+		 .bit = 2,
+		},
+		{.name = "BIT6",
+		 .desc = "bit 6",
+		 .bit = 6,
+		},
+		{.name = "BIT7",
+		 .desc = "bit 7",
+		 .bit = 7,
+		},
+	 },
+	},
+
+	/* 34 */
+	{.name = "response",
+	 .desc = "Count of different types of responses",
+	 .event_select = 0x4,
+	 .escr_select = 0x3,
+	 .allowed_escrs = { 6, 29 },
+	 .perf_code = P4_EVENT_RESPONSE,
+	 .event_masks = {
+		{.name = "BIT1",
+		 .desc = "bit 1",
+		 .bit = 1,
+		},
+		{.name = "BIT2",
+		 .desc = "bit 2",
+		 .bit = 2,
+		},
+		{.name = "BIT8",
+		 .desc = "bit 8",
+		 .bit = 8,
+		},
+		{.name = "BIT9",
+		 .desc = "bit 9",
+		 .bit = 9,
+		},
+	 },
+	},
+
+	/* 35 */
+	{.name = "front_end_event",
+	 .desc = "Number of retirements of tagged uops which are specified "
+		 "through the front-end tagging mechanism",
+	 .event_select = 0x8,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 21, 43 },
+	 .perf_code = P4_EVENT_FRONT_END_EVENT,
+	 .event_masks = {
+		{.name = "NBOGUS",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 0,
+		},
+		{.name = "BOGUS",
+		 .desc = "The marked uops are bogus",
+		 .bit = 1,
+		},
+	 },
+	},
+
+	/* 36 */
+	{.name = "execution_event",
+	 .desc = "Number of retirements of tagged uops which are specified "
+		 "through the execution tagging mechanism. The event-mask "
+		 "allows from one to four types of uops to be tagged",
+	 .event_select = 0xC,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 21, 43 },
+	 .perf_code = P4_EVENT_EXECUTION_EVENT,
+	 .event_masks = {
+		{.name = "NBOGUS0",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 0,
+		},
+		{.name = "NBOGUS1",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 1,
+		},
+		{.name = "NBOGUS2",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 2,
+		},
+		{.name = "NBOGUS3",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 3,
+		},
+		{.name = "BOGUS0",
+		 .desc = "The marked uops are bogus",
+		 .bit = 4,
+		},
+		{.name = "BOGUS1",
+		 .desc = "The marked uops are bogus",
+		 .bit = 5,
+		},
+		{.name = "BOGUS2",
+		 .desc = "The marked uops are bogus",
+		 .bit = 6,
+		},
+		{.name = "BOGUS3",
+		 .desc = "The marked uops are bogus",
+		 .bit = 7,
+		},
+	 },
+	},
+
+	/* 37 */
+	{.name = "replay_event",
+	 .desc = "Number of retirements of tagged uops which are specified "
+		 "through the replay tagging mechanism",
+	 .event_select = 0x9,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 21, 43 },
+	 .perf_code = P4_EVENT_REPLAY_EVENT,
+	 .event_masks = {
+		{.name = "NBOGUS",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 0,
+		},
+		{.name = "BOGUS",
+		 .desc = "The marked uops are bogus",
+		 .bit = 1,
+		},
+		{.name = "L1_LD_MISS",
+		 .desc = "Virtual mask for L1 cache load miss replays",
+		 .bit = 2,
+		},
+		{.name = "L2_LD_MISS",
+		 .desc = "Virtual mask for L2 cache load miss replays",
+		 .bit = 3,
+		},
+		{.name = "DTLB_LD_MISS",
+		 .desc = "Virtual mask for DTLB load miss replays",
+		 .bit = 4,
+		},
+		{.name = "DTLB_ST_MISS",
+		 .desc = "Virtual mask for DTLB store miss replays",
+		 .bit = 5,
+		},
+		{.name = "DTLB_ALL_MISS",
+		 .desc = "Virtual mask for all DTLB miss replays",
+		 .bit = 6,
+		},
+		{.name = "BR_MSP",
+		 .desc = "Virtual mask for tagged mispredicted branch replays",
+		 .bit = 7,
+		},
+		{.name = "MOB_LD_REPLAY",
+		 .desc = "Virtual mask for MOB load replays",
+		 .bit = 8,
+		},
+		{.name = "SP_LD_RET",
+		 .desc = "Virtual mask for split load replays. Use with load_port_replay event",
+		 .bit = 9,
+		},
+		{.name = "SP_ST_RET",
+		 .desc = "Virtual mask for split store replays. Use with store_port_replay event",
+		 .bit = 10,
+		},
+	 },
+	},
+
+	/* 38 */
+	{.name = "INSTR_RETIRED",
+	 .desc = "Number of instructions retired during a clock cycle",
+	 .event_select = 0x2,
+	 .escr_select = 0x4,
+	 .allowed_escrs = { 20, 42 },
+	 .perf_code = P4_EVENT_INSTR_RETIRED,
+	 .event_masks = {
+		{.name = "NBOGUSNTAG",
+		 .desc = "Non-bogus instructions that are not tagged",
+		 .bit = 0,
+		},
+		{.name = "NBOGUSTAG",
+		 .desc = "Non-bogus instructions that are tagged",
+		 .bit = 1,
+		},
+		{.name = "BOGUSNTAG",
+		 .desc = "Bogus instructions that are not tagged",
+		 .bit = 2,
+		},
+		{.name = "BOGUSTAG",
+		 .desc = "Bogus instructions that are tagged",
+		 .bit = 3,
+		},
+	 },
+	},
+
+	/* 39 */
+	{.name = "UOPS_RETIRED",
+	 .desc = "Number of uops retired during a clock cycle",
+	 .event_select = 0x1,
+	 .escr_select = 0x4,
+	 .allowed_escrs = { 20, 42 },
+	 .perf_code = P4_EVENT_UOPS_RETIRED,
+	 .event_masks = {
+		{.name = "NBOGUS",
+		 .desc = "The marked uops are not bogus",
+		 .bit = 0,
+		},
+		{.name = "BOGUS",
+		 .desc = "The marked uops are bogus",
+		 .bit = 1,
+		},
+	 },
+	},
+
+	/* 40 */
+	{.name = "UOP_TYPE",
+	 .desc = "This event is used in conjunction with with the front-end "
+		 "mechanism to tag load and store uops",
+	 .event_select = 0x2,
+	 .escr_select = 0x2,
+	 .allowed_escrs = { 18, 41 },
+	 .perf_code = P4_EVENT_UOP_TYPE,
+	 .event_masks = {
+		{.name = "TAGLOADS",
+		 .desc = "The uop is a load operation",
+		 .bit = 1,
+		},
+		{.name = "TAGSTORES",
+		 .desc = "The uop is a store operation",
+		 .bit = 2,
+		},
+	 },
+	},
+
+	/* 41 */
+	{.name = "BRANCH_RETIRED",
+	 .desc = "Number of retirements of a branch",
+	 .event_select = 0x6,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 21, 43 },
+	 .perf_code = P4_EVENT_BRANCH_RETIRED,
+	 .event_masks = {
+		{.name = "MMNP",
+		 .desc = "Branch not-taken predicted",
+		 .bit = 0,
+		},
+		{.name = "MMNM",
+		 .desc = "Branch not-taken mispredicted",
+		 .bit = 1,
+		},
+		{.name = "MMTP",
+		 .desc = "Branch taken predicted",
+		 .bit = 2,
+		},
+		{.name = "MMTM",
+		 .desc = "Branch taken mispredicted",
+		 .bit = 3,
+		},
+	 },
+	},
+
+	/* 42 */
+	{.name = "MISPRED_BRANCH_RETIRED",
+	 .desc = "Number of retirements of mispredicted "
+		 "IA-32 branch instructions",
+	 .event_select = 0x3,
+	 .escr_select = 0x4,
+	 .allowed_escrs = { 20, 42 },
+	 .perf_code = P4_EVENT_MISPRED_BRANCH_RETIRED,
+	 .event_masks = {
+		{.name = "BOGUS",
+		 .desc = "The retired instruction is not bogus",
+		 .bit = 0,
+		 .flags = NETBURST_FL_DFL,
+		},
+	 },
+	},
+
+	/* 43 */
+	{.name = "X87_ASSIST",
+	 .desc = "Number of retirements of x87 instructions that required "
+		 "special handling",
+	 .event_select = 0x3,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 21, 43 },
+	 .perf_code = P4_EVENT_X87_ASSIST,
+	 .event_masks = {
+		{.name = "FPSU",
+		 .desc = "Handle FP stack underflow",
+		 .bit = 0,
+		},
+		{.name = "FPSO",
+		 .desc = "Handle FP stack overflow",
+		 .bit = 1,
+		},
+		{.name = "POAO",
+		 .desc = "Handle x87 output overflow",
+		 .bit = 2,
+		},
+		{.name = "POAU",
+		 .desc = "Handle x87 output underflow",
+		 .bit = 3,
+		},
+		{.name = "PREA",
+		 .desc = "Handle x87 input assist",
+		 .bit = 4,
+		},
+	 },
+	},
+
+	/* 44 */
+	{.name = "MACHINE_CLEAR",
+	 .desc = "Number of occurances when the entire "
+		 "pipeline of the machine is cleared",
+	 .event_select = 0x2,
+	 .escr_select = 0x5,
+	 .allowed_escrs = { 21, 43 },
+	 .perf_code = P4_EVENT_MACHINE_CLEAR,
+	 .event_masks = {
+		{.name = "CLEAR",
+		 .desc = "Counts for a portion of the many cycles while the "
+			 "machine is cleared for any cause. Use edge-"
+			 "triggering for this bit only to get a count of "
+			 "occurances versus a duration",
+		 .bit = 0,
+		},
+		{.name = "MOCLEAR",
+		 .desc = "Increments each time the machine is cleared due to "
+			 "memory ordering issues",
+		 .bit = 2,
+		},
+		{.name = "SMCLEAR",
+		 .desc = "Increments each time the machine is cleared due to "
+			 "self-modifying code issues",
+		 .bit = 6,
+		},
+	 },
+	},
+
+	/* 45 */
+	{.name = "instr_completed",
+	 .desc = "Instructions that have completed and "
+		 "retired during a clock cycle (models 3, 4, 6 only)",
+	 .event_select = 0x7,
+	 .escr_select = 0x4,
+	 .allowed_escrs = { 21, 42 },
+	 .perf_code = P4_EVENT_INSTR_COMPLETED,
+	 .event_masks = {
+		{.name = "NBOGUS",
+		 .desc = "Non-bogus instructions",
+		 .bit = 0,
+		},
+		{.name = "BOGUS",
+		 .desc = "Bogus instructions",
+		 .bit = 1,
+		},
+	 },
+	},
+};
+
+#define NETBURST_EVENT_COUNT (sizeof(op_netburst_events)/sizeof(netburst_entry_t))
+
+int op_netburst_get_perf_encoding(const char * evt_name, unsigned long evt_um, int do_kernel,
+                                  int do_user, u64 * config)
+{
+	unsigned int evmask = 0;
+	const char *n;
+	unsigned int i, evt_idx, um_idx;
+	int  bit;
+	int tag_enable = 0, tag_value = 0;
+	int perf_code;
+	netburst_escr_value_t escr;
+	netburst_cccr_value_t cccr;
+	u64 escr_val;
+
+	evt_idx = um_idx = 0xffffffff;
+
+	// Match up event name with netburst event index
+	for (i = 0; i < NETBURST_EVENT_COUNT; i++) {
+		if (!strcmp(evt_name, op_netburst_events[i].name)) {
+			evt_idx = i;
+			break;
+		}
+	}
+	if (evt_idx == 0xffffffff)
+		return -1;
+
+	// Iterate through unit masks of the event to find UM idx
+	for (i = 0; op_netburst_events[evt_idx].event_masks[i].name; i++) {
+		if (evt_um == (unsigned long)(1 << op_netburst_events[evt_idx].event_masks[i].bit)) {
+			um_idx = i;
+			break;
+		}
+	}
+	if (um_idx == 0xffffffff)
+		return -1;
+
+	perf_code = op_netburst_events[evt_idx].perf_code;
+
+	bit = op_netburst_events[evt_idx].event_masks[um_idx].bit;
+	n   = op_netburst_events[evt_idx].event_masks[um_idx].name;
+	if (bit < EVENT_MASK_BITS && n) {
+		evmask |= (1 << bit);
+	} else	if (bit >= EVENT_MASK_BITS && n) {
+		tag_value |= (1 << (bit - EVENT_MASK_BITS));
+		tag_enable = 1;
+	}
+
+	if (do_user) {
+		escr.bits.t1_usr = 1;
+		escr.bits.t0_usr = 1;
+	}
+	if (do_kernel) {
+		escr.bits.t1_os = 1;
+		escr.bits.t0_os = 1;
+	}
+
+	escr.bits.tag_enable   = tag_enable;
+	escr.bits.tag_value    = tag_value;
+	escr.bits.event_mask   = evmask;
+	escr.bits.event_select = op_netburst_events[evt_idx].event_select;
+
+	cccr.bits.enable        = 1;
+	cccr.bits.escr_select   = op_netburst_events[evt_idx].escr_select;
+	cccr.bits.active_thread = 3;
+
+	escr_val  = escr.val & ~(0x3full << 25);
+	escr_val |= perf_code << 25;
+	*config = (escr_val << 32) | cccr.val;
+
+	return 0;
+}
diff --git a/libop/op_netburst.h b/libop/op_netburst.h
new file mode 100644
index 0000000..809b550
--- /dev/null
+++ b/libop/op_netburst.h
@@ -0,0 +1,256 @@
+/*
+ * @file libop/op_netburst.h
+ * Definitions of structures and methods for handling Intel Netburst events.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 14, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ * NOTE:  The code in this file was largely borrowed from a libpfm file,
+ * so we include below the Copyright and licensing information from that file.
+ */
+/*
+ * Copyright (c) 2006 IBM Corp.
+ * Contributed by Kevin Corry <kevcorry@us.ibm.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * pfmlib_netburst_priv.h
+ *
+ * Structures and definitions for use in the Pentium4/Xeon/EM64T libpfm code.
+ */
+
+#ifndef OP_NETBURST_H_
+#define OP_NETBURST_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ESCR: Event Selection Control Register
+ *
+ * These registers are used to select which event to count along with options
+ * for that event. There are (up to) 45 ESCRs, but each data counter is
+ * restricted to a specific set of ESCRs.
+ */
+
+/**
+ * netburst_escr_value_t
+ *
+ * Bit-wise breakdown of the ESCR registers.
+ *
+ *    Bits     Description
+ *   -------   -----------
+ *   63 - 31   Reserved
+ *   30 - 25   Event Select
+ *   24 - 9    Event Mask
+ *    8 - 5    Tag Value
+ *      4      Tag Enable
+ *      3      T0 OS - Enable counting in kernel mode (thread 0)
+ *      2      T0 USR - Enable counting in user mode (thread 0)
+ *      1      T1 OS - Enable counting in kernel mode (thread 1)
+ *      0      T1 USR - Enable counting in user mode (thread 1)
+ **/
+
+#define EVENT_MASK_BITS 16
+#define EVENT_SELECT_BITS 6
+
+typedef union {
+	unsigned long long val;
+	struct {
+		unsigned long t1_usr:1;
+		unsigned long t1_os:1;
+		unsigned long t0_usr:1;
+		unsigned long t0_os:1;
+		unsigned long tag_enable:1;
+		unsigned long tag_value:4;
+		unsigned long event_mask:EVENT_MASK_BITS;
+		unsigned long event_select:EVENT_SELECT_BITS;
+		unsigned long reserved:1;
+	} bits;
+} netburst_escr_value_t;
+
+/* CCCR: Counter Configuration Control Register
+ *
+ * These registers are used to configure the data counters. There are 18
+ * CCCRs, one for each data counter.
+ */
+
+/**
+ * netburst_cccr_value_t
+ *
+ * Bit-wise breakdown of the CCCR registers.
+ *
+ *    Bits     Description
+ *   -------   -----------
+ *   63 - 32   Reserved
+ *     31      OVF - The data counter overflowed.
+ *     30      Cascade - Enable cascading of data counter when alternate
+ *             counter overflows.
+ *   29 - 28   Reserved
+ *     27      OVF_PMI_T1 - Generate interrupt for LP1 on counter overflow
+ *     26      OVF_PMI_T0 - Generate interrupt for LP0 on counter overflow
+ *     25      FORCE_OVF - Force interrupt on every counter increment
+ *     24      Edge - Enable rising edge detection of the threshold comparison
+ *             output for filtering event counts.
+ *   23 - 20   Threshold Value - Select the threshold value for comparing to
+ *             incoming event counts.
+ *     19      Complement - Select how incoming event count is compared with
+ *             the threshold value.
+ *     18      Compare - Enable filtering of event counts.
+ *   17 - 16   Active Thread - Only used with HT enabled.
+ *             00 - None: Count when neither LP is active.
+ *             01 - Single: Count when only one LP is active.
+ *             10 - Both: Count when both LPs are active.
+ *             11 - Any: Count when either LP is active.
+ *   15 - 13   ESCR Select - Select which ESCR to use for selecting the
+ *             event to count.
+ *     12      Enable - Turns the data counter on or off.
+ *   11 - 0    Reserved
+ **/
+typedef union {
+	unsigned long long val;
+	struct {
+		unsigned long reserved1:12;
+		unsigned long enable:1;
+		unsigned long escr_select:3;
+		unsigned long active_thread:2;
+		unsigned long compare:1;
+		unsigned long complement:1;
+		unsigned long threshold:4;
+		unsigned long edge:1;
+		unsigned long force_ovf:1;
+		unsigned long ovf_pmi_t0:1;
+		unsigned long ovf_pmi_t1:1;
+		unsigned long reserved2:2;
+		unsigned long cascade:1;
+		unsigned long overflow:1;
+	} bits;
+} netburst_cccr_value_t;
+
+/**
+ * netburst_event_mask_t
+ *
+ * Defines one bit of the event-mask for one Pentium4 event.
+ *
+ * @name: Event mask name
+ * @desc: Event mask description
+ * @bit: The bit position within the event_mask field.
+ **/
+typedef struct {
+	char *name;
+	char *desc;
+	unsigned int bit;
+	unsigned int flags;
+} netburst_event_mask_t;
+/*
+ * netburst_event_mask_t->flags
+ */
+#define NETBURST_FL_DFL	0x1 /* event mask is default */
+
+#define MAX_ESCRS_PER_EVENT 2
+
+/*
+ * These are the unique event codes used by perf_events.
+ * They need to be encoded in the ESCR.event_select field when
+ * programming for perf_events.
+ * NOTE: Only 36 of the events specified below have counterparts
+ * in oprofile's p4 event list
+ */
+enum netburst_event_code {
+	P4_EVENT_TC_DELIVER_MODE,
+	P4_EVENT_BPU_FETCH_REQUEST,
+	P4_EVENT_ITLB_REFERENCE,
+	P4_EVENT_MEMORY_CANCEL,
+	P4_EVENT_MEMORY_COMPLETE,
+	P4_EVENT_LOAD_PORT_REPLAY,
+	P4_EVENT_STORE_PORT_REPLAY,
+	P4_EVENT_MOB_LOAD_REPLAY,
+	P4_EVENT_PAGE_WALK_TYPE,
+	P4_EVENT_BSQ_CACHE_REFERENCE,
+	P4_EVENT_IOQ_ALLOCATION,
+	P4_EVENT_IOQ_ACTIVE_ENTRIES,
+	P4_EVENT_FSB_DATA_ACTIVITY,
+	P4_EVENT_BSQ_ALLOCATION,
+	P4_EVENT_BSQ_ACTIVE_ENTRIES,
+	P4_EVENT_SSE_INPUT_ASSIST,
+	P4_EVENT_PACKED_SP_UOP,
+	P4_EVENT_PACKED_DP_UOP,
+	P4_EVENT_SCALAR_SP_UOP,
+	P4_EVENT_SCALAR_DP_UOP,
+	P4_EVENT_64BIT_MMX_UOP,
+	P4_EVENT_128BIT_MMX_UOP,
+	P4_EVENT_X87_FP_UOP,
+	P4_EVENT_TC_MISC,
+	P4_EVENT_GLOBAL_POWER_EVENTS,
+	P4_EVENT_TC_MS_XFER,
+	P4_EVENT_UOP_QUEUE_WRITES,
+	P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE,
+	P4_EVENT_RETIRED_BRANCH_TYPE,
+	P4_EVENT_RESOURCE_STALL,
+	P4_EVENT_WC_BUFFER,
+	P4_EVENT_B2B_CYCLES,
+	P4_EVENT_BNR,
+	P4_EVENT_SNOOP,
+	P4_EVENT_RESPONSE,
+	P4_EVENT_FRONT_END_EVENT,
+	P4_EVENT_EXECUTION_EVENT,
+	P4_EVENT_REPLAY_EVENT,
+	P4_EVENT_INSTR_RETIRED,
+	P4_EVENT_UOPS_RETIRED,
+	P4_EVENT_UOP_TYPE,
+	P4_EVENT_BRANCH_RETIRED,
+	P4_EVENT_MISPRED_BRANCH_RETIRED,
+	P4_EVENT_X87_ASSIST,
+	P4_EVENT_MACHINE_CLEAR,
+	P4_EVENT_INSTR_COMPLETED,
+};
+
+typedef struct {
+	char *name;
+	char *desc;
+	unsigned int event_select;
+	unsigned int escr_select;
+	enum netburst_event_code perf_code;	/* perf_event event code, enum P4_EVENTS */
+	int allowed_escrs[MAX_ESCRS_PER_EVENT];
+	netburst_event_mask_t event_masks[EVENT_MASK_BITS];
+} netburst_entry_t;
+
+#define NETBURST_ATTR_U	0
+#define NETBURST_ATTR_K	1
+#define NETBURST_ATTR_C	2
+#define NETBURST_ATTR_E	3
+#define NETBURST_ATTR_T	4
+
+#define _NETBURST_ATTR_U (1 << NETBURST_ATTR_U)
+#define _NETBURST_ATTR_K (1 << NETBURST_ATTR_K)
+
+#define P4_REPLAY_REAL_MASK 0x00000003
+
+int op_netburst_get_perf_encoding(const char * evt_name, unsigned long evt_um, int do_kernel,
+                                  int do_user, u64 * config);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // OP_NETBURST_H_
diff --git a/libop/op_parse_event.c b/libop/op_parse_event.c
index 05fcf73..a42966b 100644
--- a/libop/op_parse_event.c
+++ b/libop/op_parse_event.c
@@ -59,10 +59,9 @@ static int parse_ulong(char const * str)
 
 
 size_t parse_events(struct parsed_event * parsed_events, size_t max_events,
-                  char const * const * events)
+                  char const * const * events, int check_count)
 {
 	size_t i = 0;
-	int timer_event_found_p = 0;
 
 	while (events[i]) {
 		char const * cp = events[i];
@@ -80,21 +79,19 @@ size_t parse_events(struct parsed_event * parsed_events, size_t max_events,
 			exit(EXIT_FAILURE);
 		}
 
-		if (strcmp(part, TIMER_EVENT_NAME) == 0)
-			timer_event_found_p = 1;
-
 		parsed_events[i].name = part;
 
-		part = next_part(&cp);
+		if (check_count) {
+			part = next_part(&cp);
+			if (!part) {
+				fprintf(stderr, "Invalid count for event %s\n", events[i]);
+				exit(EXIT_FAILURE);
+			}
 
-		if (!part) {
-			fprintf(stderr, "Invalid count for event %s\n", events[i]);
-			exit(EXIT_FAILURE);
+			parsed_events[i].count = parse_ulong(part);
+			free(part);
 		}
 
-		parsed_events[i].count = parse_ulong(part);
-		free(part);
-
 		parsed_events[i].unit_mask = 0;
 		part = next_part(&cp);
 
@@ -126,12 +123,5 @@ size_t parse_events(struct parsed_event * parsed_events, size_t max_events,
 	
 		++i;
 	}
-
-	if (i > 1 && timer_event_found_p) {
-		fprintf(stderr, "TIMER event cannot be used in combination with"
-			" hardware counters.\n");
-		exit(EXIT_FAILURE);
-	}
-
 	return i;
 }
diff --git a/libop/op_parse_event.h b/libop/op_parse_event.h
index 2519b0d..c958f78 100644
--- a/libop/op_parse_event.h
+++ b/libop/op_parse_event.h
@@ -39,6 +39,6 @@ struct parsed_event {
  * Return the number of events parsed.
  */
 size_t parse_events(struct parsed_event * parsed_events, size_t max_events,
-                    char const * const * events);
+                    char const * const * events, int check_count);
 
 #endif /* !OP_PARSE_EVENT_H */
diff --git a/libop/op_sample_file.h b/libop/op_sample_file.h
index c29191c..3637d76 100644
--- a/libop/op_sample_file.h
+++ b/libop/op_sample_file.h
@@ -30,13 +30,9 @@ struct opd_header {
 	double cpu_speed;
 	u64 mtime;
 	u32 cg_to_is_kernel;
-	/* spu_profile=1 says sample file contains Cell BE SPU profile data */
-	u32 spu_profile;
-	uint64_t embedded_offset;
 	u64 anon_start;
 	u64 cg_to_anon_start;
 	/* binary compatibility reserve */
-	u32 reserved1[1];
 };
 
 #endif /* OP_SAMPLE_FILE_H */
diff --git a/libop/op_xml_events.c b/libop/op_xml_events.c
index 3b1af21..ed1dce2 100644
--- a/libop/op_xml_events.c
+++ b/libop/op_xml_events.c
@@ -21,7 +21,7 @@ static char buffer[MAX_BUFFER];
 
 void open_xml_events(char const * title, char const * doc, op_cpu the_cpu_type)
 {
-	char const * schema_version = "1.1";
+	char const * schema_version = "2.0";
 
 	buffer[0] = '\0';
 	cpu_type = the_cpu_type;
@@ -42,20 +42,6 @@ void close_xml_events(void)
 	printf("%s", buffer);
 }
 
-static void xml_do_arch_specific_event_help(struct op_event const *event,
-					    char *buffer, size_t size)
-{
-	switch (cpu_type) {
-	case CPU_PPC64_CELL:
-		init_xml_int_attr(HELP_EVENT_GROUP, event->val / 100, buffer,
-				  size);
-		break;
-	default:
-		break;
-	}
-}
-
-
 void xml_help_for_event(struct op_event const * event)
 {
 	uint i;
@@ -64,7 +50,6 @@ void xml_help_for_event(struct op_event const * event)
 	buffer[0] = '\0';
 	open_xml_element(HELP_EVENT, 1, buffer, MAX_BUFFER);
 	init_xml_str_attr(HELP_EVENT_NAME, event->name, buffer, MAX_BUFFER);
-	xml_do_arch_specific_event_help(event, buffer, MAX_BUFFER);
 	init_xml_str_attr(HELP_EVENT_DESC, event->desc, buffer, MAX_BUFFER);
 
 	init_xml_int_attr(HELP_COUNTER_MASK, event->counter_mask, buffer,
@@ -95,16 +80,16 @@ void xml_help_for_event(struct op_event const * event)
 		close_xml_element(NONE, 1, buffer, MAX_BUFFER);
 		for (i = 0; i < event->unit->num; i++) {
 			open_xml_element(HELP_UNIT_MASK, 1, buffer, MAX_BUFFER);
+			if (event->unit->um[i].name)
+				init_xml_str_attr(HELP_UNIT_MASK_NAME,
+					  event->unit->um[i].name,
+					  buffer, MAX_BUFFER);
 			init_xml_int_attr(HELP_UNIT_MASK_VALUE,
 					  event->unit->um[i].value,
 					  buffer, MAX_BUFFER);
 			init_xml_str_attr(HELP_UNIT_MASK_DESC,
 					  event->unit->um[i].desc,
 					  buffer, MAX_BUFFER);
-			if (event->unit->um[i].extra)
-				init_xml_int_attr(HELP_UNIT_EXTRA_VALUE,
-					          event->unit->um[i].extra,
-					          buffer, MAX_BUFFER);
 			close_xml_element(NONE, 0, buffer, MAX_BUFFER);
 		}
 		close_xml_element(HELP_UNIT_MASKS, 0, buffer, MAX_BUFFER);
diff --git a/libop/op_xml_out.c b/libop/op_xml_out.c
index 0b3deea..63ee41c 100644
--- a/libop/op_xml_out.c
+++ b/libop/op_xml_out.c
@@ -84,7 +84,7 @@ char const * xml_tag_map[] = {
 	"unit_mask",
 		"mask",
 		"desc",
-		"extra"
+		"name"
 };
 
 #define MAX_BUF_LEN 2048
diff --git a/libop/op_xml_out.h b/libop/op_xml_out.h
index 544bd51..a829f66 100644
--- a/libop/op_xml_out.h
+++ b/libop/op_xml_out.h
@@ -58,7 +58,7 @@ typedef enum {
 	HELP_UNIT_MASK,
 	HELP_UNIT_MASK_VALUE,
 	HELP_UNIT_MASK_DESC,
-	HELP_UNIT_EXTRA_VALUE,
+	HELP_UNIT_MASK_NAME,
 	} tag_t;
 
 char const * xml_tag_name(tag_t tag);
diff --git a/libop/tests/Makefile.in b/libop/tests/Makefile.in
index 9ce6393..53d8795 100644
--- a/libop/tests/Makefile.in
+++ b/libop/tests/Makefile.in
@@ -42,7 +42,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -51,7 +50,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -152,7 +151,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -176,20 +174,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libop/tests/alloc_counter_tests.c b/libop/tests/alloc_counter_tests.c
index 5aeb605..d0da7a7 100644
--- a/libop/tests/alloc_counter_tests.c
+++ b/libop/tests/alloc_counter_tests.c
@@ -18,8 +18,6 @@
 #include "op_cpu_type.h"
 #include "op_events.h"
 
-/* FIXME: alpha description events need 20 but when running test on x86
- * OP_MAX_COUNTERS is 8, so we can't use it */
 #define MAX_EVENTS 20
 
 
@@ -39,17 +37,6 @@ struct allocated_counter {
 };
 
 
-/* not more than MAX_EVENTS string for all these arrays */
-static char const * const events_alpha_ev4_1[] = {
-	"ISSUES:4096:0:1:1",
-	NULL
-};
-
-static char const * const events_alpha_ev4_2[] = {
-	"UNKNOWN_EVENT:4096:0:1:1",
-	NULL
-};
-
 static char const * const events_ppro_1[] = {
 	"CPU_CLK_UNHALTED:4096:0:1:1",
 	NULL
@@ -114,8 +101,6 @@ static char const * const events_mips_34k[] = {
 };
 
 static struct allocated_counter const tests[] = {
-	{ CPU_AXP_EV4, events_alpha_ev4_1, { 0 }, no_failure },
-	{ CPU_AXP_EV4, events_alpha_ev4_2, { -1 }, fail_to_find_event },
 	{ CPU_PPRO, events_ppro_1, { 0 }, no_failure },
 	{ CPU_PPRO, events_ppro_2, { 0, 1 }, no_failure },
 	{ CPU_PPRO, events_ppro_3, { -1 }, fail_to_alloc_counter },
@@ -154,7 +139,7 @@ static void do_test(struct allocated_counter const * it)
 
 	op_events(it->cpu_type);
 
-	nr_events = parse_events(parsed, MAX_EVENTS, it->events);
+	nr_events = parse_events(parsed, MAX_EVENTS, it->events, 1);
 
 	for (i = 0; i < nr_events; ++i) {
 		event[i] = find_event_by_name(parsed[i].name, parsed[i].unit_mask,
diff --git a/libop/tests/cpu_type_tests.c b/libop/tests/cpu_type_tests.c
index 3cbde5e..468d351 100644
--- a/libop/tests/cpu_type_tests.c
+++ b/libop/tests/cpu_type_tests.c
@@ -24,17 +24,9 @@ static struct cpu_type {
 	{ "i386/piii", CPU_PIII },
 	{ "i386/athlon", CPU_ATHLON },
 	{ "timer", CPU_TIMER_INT },
-	{ "rtc", CPU_RTC },
 	{ "i386/p4", CPU_P4 },
-	{ "ia64/ia64", CPU_IA64 },
-	{ "ia64/itanium", CPU_IA64_1 },
-	{ "ia64/itanium2", CPU_IA64_2 },
 	{ "x86-64/hammer", CPU_HAMMER },
 	{ "i386/p4-ht", CPU_P4_HT2 },
-	{ "alpha/ev4", CPU_AXP_EV4 },
-	{ "alpha/ev5", CPU_AXP_EV5 },
-	{ "alpha/pca56", CPU_AXP_PCA56 },
-	{ "alpha/ev6", CPU_AXP_EV6 },
 	{ "alpha/ev67", CPU_AXP_EV67 },
 	{ "tile/tile64", CPU_TILE_TILE64 },
 	{ "tile/tilepro", CPU_TILE_TILEPRO },
diff --git a/libop/tests/parse_event_tests.c b/libop/tests/parse_event_tests.c
index 8e9dabe..67addbb 100644
--- a/libop/tests/parse_event_tests.c
+++ b/libop/tests/parse_event_tests.c
@@ -32,7 +32,7 @@ static void do_test(struct events_test const * ev)
 {
 	struct parsed_event parsed;
 
-	parse_events(&parsed, 1, ev->tests);
+	parse_events(&parsed, 1, ev->tests, 1);
 
 	if (strcmp(ev->expected.name, parsed.name) ||
 	    ev->expected.count != parsed.count ||
diff --git a/libopagent/Makefile.am b/libopagent/Makefile.am
index caea70e..afc2d4c 100644
--- a/libopagent/Makefile.am
+++ b/libopagent/Makefile.am
@@ -21,9 +21,13 @@ libopagent_la_LIBADD = $(BFD_LIBS)
 # intentionally break backward ABI compatability.  Use the
 # symbol versioning technique (via the version script) to add new or
 # change existing functions; then just increment the minor version.
-# See http://www.gnu.org/software/binutils/manual/ld-2.9.1/html_node/ld_25.html
+#
+# See http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
+# for information about library versioning.
+#
+# See http://www.gnu.org/software/gnulib/manual/html_node/LD-Version-Scripts.html
 # for details about the --version-script option.
-libopagent_la_LDFLAGS = -version-info  1:0:0 \
+libopagent_la_LDFLAGS = -version-info  1:1:0 \
 			-Wl,--version-script=${top_srcdir}/libopagent/opagent_symbols.ver \
 			@OP_LDFLAGS@
 
diff --git a/libopagent/Makefile.in b/libopagent/Makefile.in
index e9b638c..1f7aa2e 100644
--- a/libopagent/Makefile.in
+++ b/libopagent/Makefile.in
@@ -41,7 +41,6 @@ DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -50,7 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -159,7 +158,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -183,20 +181,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -280,9 +271,13 @@ libopagent_la_LIBADD = $(BFD_LIBS)
 # intentionally break backward ABI compatability.  Use the
 # symbol versioning technique (via the version script) to add new or
 # change existing functions; then just increment the minor version.
-# See http://www.gnu.org/software/binutils/manual/ld-2.9.1/html_node/ld_25.html
+#
+# See http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
+# for information about library versioning.
+#
+# See http://www.gnu.org/software/gnulib/manual/html_node/LD-Version-Scripts.html
 # for details about the --version-script option.
-libopagent_la_LDFLAGS = -version-info  1:0:0 \
+libopagent_la_LDFLAGS = -version-info  1:1:0 \
 			-Wl,--version-script=${top_srcdir}/libopagent/opagent_symbols.ver \
 			@OP_LDFLAGS@
 
diff --git a/libopagent/opagent.c b/libopagent/opagent.c
index 860413f..431dfae 100644
--- a/libopagent/opagent.c
+++ b/libopagent/opagent.c
@@ -57,11 +57,13 @@
 #include <stdint.h>
 #include <limits.h>
 #include <sys/types.h>
+#include <dirent.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <time.h>
 #include <bfd.h>
+#include <sys/file.h>
 
 #include "opagent.h"
 #include "op_config.h"
@@ -118,34 +120,72 @@ static int define_bfd_vars(void)
 #define OP_MAJOR_VERSION 1
 #define OP_MINOR_VERSION 0
 
-#define AGENT_DIR OP_SESSION_DIR_DEFAULT "jitdump"
+#define TMP_OPROFILE_DIR "/tmp/.oprofile"
+#define JITDUMP_DIR TMP_OPROFILE_DIR "/jitdump"
 
 #define MSG_MAXLEN 20
 
 op_agent_t op_open_agent(void)
 {
+#define OP_JITCONV_USECS_TO_WAIT 1000
+	unsigned int usecs_waited = 0;
 	char pad_bytes[7] = {0, 0, 0, 0, 0, 0, 0};
 	int pad_cnt;
 	char dump_path[PATH_MAX];
 	char err_msg[PATH_MAX + 16];
-	struct stat dirstat;
 	int rc;
 	struct jitheader header;
 	int fd;
 	struct timeval tv;
 	FILE * dumpfile = NULL;
 
-	rc = stat(AGENT_DIR, &dirstat);
-	if (rc || !S_ISDIR(dirstat.st_mode)) {
-		if (!rc)
-			errno = ENOTDIR;
-		fprintf(stderr,"libopagent: Jitdump agent directory %s "
-			"missing\n", AGENT_DIR);
-		fprintf(stderr,"libopagent: do opcontrol --setup or "
-			"opcontrol --reset, first\n");
-		return NULL;
+	/* Coverity complains about 'time-of-check-time-of-use' race if we do stat() on
+	 * a file (or directory) and then open or create it afterwards.  So instead,
+	 * we'll try to open it and see what happens.
+	 */
+	int create_dir = 0;
+	DIR * dir1 = opendir(TMP_OPROFILE_DIR);
+	if (!dir1) {
+		if (errno == ENOENT) {
+			create_dir = 1;
+		} else if (errno == ENOTDIR) {
+			fprintf(stderr, "Error: Creation of directory %s failed. File exists where directory is expected.\n",
+			        TMP_OPROFILE_DIR);
+			return NULL;
+		}
+	} else {
+		closedir(dir1);
+	}
+	if (create_dir) {
+		create_dir = 0;
+		rc = mkdir(TMP_OPROFILE_DIR, S_IRWXU | S_IRWXG | S_IRWXO);
+		if (rc && (errno != EEXIST)) {
+			fprintf(stderr, "Error trying to create %s dir.\n", TMP_OPROFILE_DIR);
+			return NULL;
+		}
+	}
+
+	dir1 = opendir(JITDUMP_DIR);
+	if (!dir1) {
+		if (errno == ENOENT) {
+			create_dir = 1;
+		} else if (errno == ENOTDIR) {
+			fprintf(stderr, "Error: Creation of directory %s failed. File exists where directory is expected.\n",
+			        JITDUMP_DIR);
+			return NULL;
+		}
+	} else {
+		closedir(dir1);
 	}
-	snprintf(dump_path, PATH_MAX, "%s/%i.dump", AGENT_DIR, getpid());
+
+	if (create_dir) {
+		rc = mkdir(JITDUMP_DIR, S_IRWXU | S_IRWXG | S_IRWXO);
+		if (rc && (errno != EEXIST)) {
+			fprintf(stderr, "Error trying to create %s dir.\n", JITDUMP_DIR);
+			return NULL;
+		}
+	}
+	snprintf(dump_path, PATH_MAX, "%s/%i.dump", JITDUMP_DIR, getpid());
 	snprintf(err_msg, PATH_MAX + 16, "Error opening %s\n", dump_path);
 	// make the dump file only accessible for the user for security reason.
 	fd = creat(dump_path, S_IRUSR|S_IWUSR);
@@ -156,10 +196,31 @@ op_agent_t op_open_agent(void)
 	dumpfile = fdopen(fd, "w");
 	if (!dumpfile) {
 		fprintf(stderr, "%s\n", err_msg);
+		close(fd);
 		return NULL;
 	}
-	if (define_bfd_vars())
+
+again:
+	/* We need OS-level file locking here because the opjitconv process may need to
+	 * copy the dumpfile while the JIT agent is still writing to it. */
+	rc = flock(fd, LOCK_EX | LOCK_NB);
+	if (rc) {
+		if (usecs_waited < OP_JITCONV_USECS_TO_WAIT) {
+			usleep(100);
+			usecs_waited += 100;
+			goto again;
+		} else {
+			printf("opagent: Unable to obtain lock on JIT dumpfile\n");
+			fclose(dumpfile);
+			return NULL;
+		}
+	}
+
+
+	if (define_bfd_vars()) {
+		fclose(dumpfile);
 		return NULL;
+	}
 	header.magic = JITHEADER_MAGIC;
 	header.version = JITHEADER_VERSION;
 	header.totalsize = sizeof(header) + strlen(_bfd_target_name) + 1;
@@ -169,33 +230,42 @@ op_agent_t op_open_agent(void)
 	header.bfd_arch = _bfd_arch;
 	header.bfd_mach = _bfd_mach;
 	if (gettimeofday(&tv, NULL)) {
+		fclose(dumpfile);
 		fprintf(stderr, "gettimeofday failed\n");
 		return NULL;
 	}
 
 	header.timestamp = tv.tv_sec;
 	snprintf(err_msg, PATH_MAX + 16, "Error writing to %s", dump_path);
-	if (!fwrite(&header, sizeof(header), 1, dumpfile)) {
+	if (!fwrite_unlocked(&header, sizeof(header), 1, dumpfile)) {
+		fclose(dumpfile);
 		fprintf(stderr, "%s\n", err_msg);
 		return NULL;
 	}
-	if (!fwrite(_bfd_target_name, strlen(_bfd_target_name) + 1, 1,
+	if (!fwrite_unlocked(_bfd_target_name, strlen(_bfd_target_name) + 1, 1,
 		    dumpfile)) {
+		fclose(dumpfile);
 		fprintf(stderr, "%s\n", err_msg);
 		return NULL;
 	}
 	/* write padding '\0' if necessary */
-	if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, dumpfile)) {
+	if (pad_cnt && !fwrite_unlocked(pad_bytes, pad_cnt, 1, dumpfile)) {
+		fclose(dumpfile);
 		fprintf(stderr, "%s\n", err_msg);
 		return NULL;
 	}
-	fflush(dumpfile);
+	fflush_unlocked(dumpfile);
+	flock(fd, LOCK_UN);
+#undef OP_JITCONV_USECS_TO_WAIT
 	return (op_agent_t)dumpfile;
 }
 
 
 int op_close_agent(op_agent_t hdl)
 {
+#define OP_JITCONV_USECS_TO_WAIT 1000
+	unsigned int usecs_waited = 0;
+	int dumpfd, rc;
 	struct jr_code_close rec;
 	struct timeval tv;
 	FILE * dumpfile = (FILE *) hdl;
@@ -211,9 +281,30 @@ int op_close_agent(op_agent_t hdl)
 	}
 	rec.timestamp = tv.tv_sec;
 
-	if (!fwrite(&rec, sizeof(rec), 1, dumpfile))
+	if ((dumpfd = fileno(dumpfile)) < 0) {
+		fprintf(stderr, "opagent: Unable to get file descriptor for JIT dumpfile\n");
+		return -1;
+	}
+again:
+	/* We need OS-level file locking here because the opjitconv process may need to
+	 * copy the dumpfile while the JIT agent is still writing to it. */
+	rc = flock(dumpfd, LOCK_EX | LOCK_NB);
+	if (rc) {
+		if (usecs_waited < OP_JITCONV_USECS_TO_WAIT) {
+			usleep(100);
+			usecs_waited += 100;
+			goto again;
+		} else {
+			printf("opagent: Unable to obtain lock on JIT dumpfile\n");
+			return -1;
+		}
+	}
+
+	if (!fwrite_unlocked(&rec, sizeof(rec), 1, dumpfile))
 		return -1;
 	fclose(dumpfile);
+	flock(dumpfd, LOCK_UN);
+#undef OP_JITCONV_USECS_TO_WAIT
 	dumpfile = NULL;
 	return 0;
 }
@@ -222,6 +313,9 @@ int op_close_agent(op_agent_t hdl)
 int op_write_native_code(op_agent_t hdl, char const * symbol_name,
 	uint64_t vma, void const * code, unsigned int const size)
 {
+#define OP_JITCONV_USECS_TO_WAIT 1000
+	unsigned int usecs_waited = 0;
+	int dumpfd, rc;
 	struct jr_code_load rec;
 	struct timeval tv;
 	size_t sz_symb_name;
@@ -252,26 +346,59 @@ int op_write_native_code(op_agent_t hdl, char const * symbol_name,
 
 	rec.timestamp = tv.tv_sec;
 
-	/* locking makes sure that we continuously write this record, if
+	if ((dumpfd = fileno(dumpfile)) < 0) {
+		fprintf(stderr, "opagent: Unable to get file descriptor for JIT dumpfile\n");
+		return -1;
+	}
+again:
+	/* We need OS-level file locking here because the opjitconv process may need to
+	 * copy the dumpfile while the JIT agent is still writing to it.
+	 */
+	rc = flock(dumpfd, LOCK_EX | LOCK_NB);
+	if (rc) {
+		if (usecs_waited < OP_JITCONV_USECS_TO_WAIT) {
+			usleep(100);
+			usecs_waited += 100;
+			goto again;
+		} else {
+			printf("opagent: Unable to obtain lock on JIT dumpfile\n");
+			return -1;
+		}
+	}
+
+	/* This locking makes sure that we continuously write this record if
 	 * we are called within a multi-threaded context */
 	flockfile(dumpfile);
 	/* Write record, symbol name, code (optionally), and (if necessary)
-	 * additonal padding \0 bytes.
+	 * additional padding \0 bytes.
 	 */
 	if (fwrite_unlocked(&rec, sizeof(rec), 1, dumpfile) &&
 	    fwrite_unlocked(symbol_name, sz_symb_name, 1, dumpfile)) {
-		if (code)
-			fwrite_unlocked(code, size, 1, dumpfile);
-		if (padding_count)
-			fwrite_unlocked(pad_bytes, padding_count, 1, dumpfile);
+		size_t expected_sz, sz;
+		expected_sz = sz = 0;
+		if (code) {
+			sz = fwrite_unlocked(code, size, 1, dumpfile);
+			expected_sz++;
+		}
+		if (padding_count) {
+			sz += fwrite_unlocked(pad_bytes, padding_count, 1, dumpfile);
+			expected_sz++;
+		}
 		/* Always flush to ensure conversion code to elf will see
 		 * data as soon as possible */
 		fflush_unlocked(dumpfile);
 		funlockfile(dumpfile);
+		flock(dumpfd, LOCK_UN);
+		if (sz != expected_sz) {
+			printf("opagent: fwrite_unlocked failed");
+			return -1;
+		}
 		return 0;
 	}
 	fflush_unlocked(dumpfile);
 	funlockfile(dumpfile);
+	flock(dumpfd, LOCK_UN);
+#undef OP_JITCONV_USECS_TO_WAIT
 	return -1;
 }
 
@@ -280,13 +407,15 @@ int op_write_debug_line_info(op_agent_t hdl, void const * code,
 			     size_t nr_entry,
 			     struct debug_line_info const * compile_map)
 {
+#define OP_JITCONV_USECS_TO_WAIT 1000
+	unsigned int usecs_waited = 0;
 	struct jr_code_debug_info rec;
 	long cur_pos, last_pos;
 	struct timeval tv;
 	size_t i;
 	size_t padding_count;
 	char padd_bytes[7] = {0, 0, 0, 0, 0, 0, 0};
-	int rc = -1;
+	int dumpfd, rc = -1;
 	FILE * dumpfile = (FILE *) hdl;
 
 	if (!dumpfile) {
@@ -311,6 +440,27 @@ int op_write_debug_line_info(op_agent_t hdl, void const * code,
 
 	rec.timestamp = tv.tv_sec;
 
+	if ((dumpfd = fileno(dumpfile)) < 0) {
+		fprintf(stderr, "opagent: Unable to get file descriptor for JIT dumpfile\n");
+		return -1;
+	}
+again:
+	/* We need OS-level file locking here because the opjitconv process may need to
+	 * copy the dumpfile while the JIT agent is still writing to it. */
+	rc = flock(dumpfd, LOCK_EX | LOCK_NB);
+	if (rc) {
+		if (usecs_waited < OP_JITCONV_USECS_TO_WAIT) {
+			usleep(100);
+			usecs_waited += 100;
+			goto again;
+		} else {
+			printf("opagent: Unable to obtain lock on JIT dumpfile\n");
+			return -1;
+		}
+	}
+
+	/* This locking makes sure that we continuously write this record if
+	 * we are called within a multi-threaded context. */
 	flockfile(dumpfile);
 
 	if ((cur_pos = ftell(dumpfile)) == -1l)
@@ -348,12 +498,17 @@ int op_write_debug_line_info(op_agent_t hdl, void const * code,
 error:
 	fflush_unlocked(dumpfile);
 	funlockfile(dumpfile);
+	flock(dumpfd, LOCK_UN);
+#undef OP_JITCONV_USECS_TO_WAIT
 	return rc;
 }
 
 
 int op_unload_native_code(op_agent_t hdl, uint64_t vma)
 {
+#define OP_JITCONV_USECS_TO_WAIT 1000
+	int dumpfd, rc;
+	unsigned int usecs_waited = 0;
 	struct jr_code_unload rec;
 	struct timeval tv;
 	FILE * dumpfile = (FILE *) hdl;
@@ -373,9 +528,35 @@ int op_unload_native_code(op_agent_t hdl, uint64_t vma)
 	}
 	rec.timestamp = tv.tv_sec;
 
-	if (!fwrite(&rec, sizeof(rec), 1, dumpfile))
+	if ((dumpfd = fileno(dumpfile)) < 0) {
+		fprintf(stderr, "opagent: Unable to get file descriptor for JIT dumpfile\n");
 		return -1;
-	fflush(dumpfile);
+	}
+again:
+	/* We need OS-level file locking here because the opjitconv process may need to
+	 * copy the dumpfile while the JIT agent is still writing to it. */
+	rc = flock(dumpfd, LOCK_EX | LOCK_NB);
+	if (rc) {
+		if (usecs_waited < OP_JITCONV_USECS_TO_WAIT) {
+			usleep(100);
+			usecs_waited += 100;
+			goto again;
+		} else {
+			printf("opagent: Unable to obtain lock on JIT dumpfile\n");
+			return -1;
+		}
+	}
+
+	/* This locking makes sure that we continuously write this record if
+	 * we are called within a multi-threaded context. */
+	flockfile(dumpfile);
+
+	if (!fwrite_unlocked(&rec, sizeof(rec), 1, dumpfile))
+		return -1;
+	fflush_unlocked(dumpfile);
+	funlockfile(dumpfile);
+	flock(dumpfd, LOCK_UN);
+#undef OP_JITCONV_USECS_TO_WAIT
 	return 0;
 }
 
diff --git a/libopagent/opagent.h b/libopagent/opagent.h
index 920e399..58a0619 100644
--- a/libopagent/opagent.h
+++ b/libopagent/opagent.h
@@ -29,6 +29,8 @@
 #define _LIB_OPAGENT_H
 
 #include <sys/types.h>
+#include <stdint.h>
+
 
 #if defined(__cplusplus)
 extern "C" {
@@ -45,7 +47,7 @@ typedef void * op_agent_t;
 
 /**
  * This function must be called by agents before any other function.
- * Creates and opens a JIT dump file in /var/lib/oprofile/jitdump
+ * Creates and opens a JIT dump file in /tmp/.oprofile/jitdump
  * using the naming convention <process_id>.dump.
  *
  * Returns a valid op_agent_t handle or NULL.  If NULL is returned, errno
diff --git a/libopt++/Makefile.in b/libopt++/Makefile.in
index 3e95aaf..9b8d3da 100644
--- a/libopt++/Makefile.in
+++ b/libopt++/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -141,7 +140,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -165,20 +163,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libopt++/popt_options.cpp b/libopt++/popt_options.cpp
index 5d3e17f..c8a15e8 100644
--- a/libopt++/popt_options.cpp
+++ b/libopt++/popt_options.cpp
@@ -251,7 +251,7 @@ option_base::option_base(char const * name, char short_name,
                          void * data, unsigned int popt_flags)
 	: option_name(name)
 {
-	poptOption const opt = { name, short_name, popt_flags,
+	poptOption const opt = { name, short_name, (typeof(opt.argInfo))popt_flags,
 	                         data, 0, help, arg_help };
 
 	popt_options().push_back(opt);
diff --git a/libpe_utils/Makefile.am b/libpe_utils/Makefile.am
new file mode 100644
index 0000000..5dbf3d7
--- /dev/null
+++ b/libpe_utils/Makefile.am
@@ -0,0 +1,20 @@
+if BUILD_FOR_PERF_EVENT
+
+AM_CPPFLAGS = \
+	-I ${top_srcdir}/libutil \
+	-I ${top_srcdir}/libutil++ \
+	-I ${top_srcdir}/libop \
+	-I ${top_srcdir}/libpe_utils \
+	-I ${top_srcdir}/libperf_events \
+	@PERF_EVENT_FLAGS@ \
+	@OP_CPPFLAGS@
+
+AM_CXXFLAGS = @OP_CXXFLAGS@
+
+noinst_LIBRARIES = libpe_utils.a
+libpe_utils_a_SOURCES =  \
+	op_pe_utils.h \
+	op_pe_utils.cpp
+
+
+endif
diff --git a/gui/ui/Makefile.in b/libpe_utils/Makefile.in
similarity index 83%
rename from gui/ui/Makefile.in
rename to libpe_utils/Makefile.in
index 91a1fce..ebf63b1 100644
--- a/gui/ui/Makefile.in
+++ b/libpe_utils/Makefile.in
@@ -34,12 +34,11 @@ PRE_UNINSTALL = :
 POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
-subdir = gui/ui
+subdir = libpe_utils
 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -59,12 +58,12 @@ CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
 LIBRARIES = $(noinst_LIBRARIES)
 ARFLAGS = cru
-liboprof_start_a_AR = $(AR) $(ARFLAGS)
-liboprof_start_a_LIBADD =
-@have_qt_TRUE@nodist_liboprof_start_a_OBJECTS =  \
-@have_qt_TRUE@	oprof_start.base.$(OBJEXT) \
-@have_qt_TRUE@	oprof_start.base.moc.$(OBJEXT)
-liboprof_start_a_OBJECTS = $(nodist_liboprof_start_a_OBJECTS)
+libpe_utils_a_AR = $(AR) $(ARFLAGS)
+libpe_utils_a_LIBADD =
+am__libpe_utils_a_SOURCES_DIST = op_pe_utils.h op_pe_utils.cpp
+@BUILD_FOR_PERF_EVENT_TRUE@am_libpe_utils_a_OBJECTS =  \
+@BUILD_FOR_PERF_EVENT_TRUE@	op_pe_utils.$(OBJEXT)
+libpe_utils_a_OBJECTS = $(am_libpe_utils_a_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp = $(SHELL) $(top_srcdir)/depcomp
 am__depfiles_maybe = depfiles
@@ -78,8 +77,17 @@ CXXLD = $(CXX)
 CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
 	--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
-SOURCES = $(nodist_liboprof_start_a_SOURCES)
-DIST_SOURCES =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
+SOURCES = $(libpe_utils_a_SOURCES)
+DIST_SOURCES = $(am__libpe_utils_a_SOURCES_DIST)
 ETAGS = etags
 CTAGS = ctags
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -134,7 +142,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -158,20 +165,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -235,11 +235,21 @@ top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 topdir = @topdir@
-EXTRA_DIST = oprof_start.base.ui
-@have_qt_TRUE@AM_CPPFLAGS = @QT_CFLAGS@ @OP_CPPFLAGS@
-@have_qt_TRUE@AM_CXXFLAGS = @OP_CXXFLAGS@
-@have_qt_TRUE@noinst_LIBRARIES = liboprof_start.a
-@have_qt_TRUE@nodist_liboprof_start_a_SOURCES = oprof_start.base.cpp oprof_start.base.moc.cpp
+@BUILD_FOR_PERF_EVENT_TRUE@AM_CPPFLAGS = \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libutil \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libutil++ \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libop \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libpe_utils \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libperf_events \
+@BUILD_FOR_PERF_EVENT_TRUE@	@PERF_EVENT_FLAGS@ \
+@BUILD_FOR_PERF_EVENT_TRUE@	@OP_CPPFLAGS@
+
+@BUILD_FOR_PERF_EVENT_TRUE@AM_CXXFLAGS = @OP_CXXFLAGS@
+@BUILD_FOR_PERF_EVENT_TRUE@noinst_LIBRARIES = libpe_utils.a
+@BUILD_FOR_PERF_EVENT_TRUE@libpe_utils_a_SOURCES = \
+@BUILD_FOR_PERF_EVENT_TRUE@	op_pe_utils.h \
+@BUILD_FOR_PERF_EVENT_TRUE@	op_pe_utils.cpp
+
 all: all-am
 
 .SUFFIXES:
@@ -253,9 +263,9 @@ $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	      exit 1;; \
 	  esac; \
 	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign gui/ui/Makefile'; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign libpe_utils/Makefile'; \
 	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --foreign gui/ui/Makefile
+	  $(AUTOMAKE) --foreign libpe_utils/Makefile
 .PRECIOUS: Makefile
 Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	@case '$?' in \
@@ -277,10 +287,10 @@ $(am__aclocal_m4_deps):
 
 clean-noinstLIBRARIES:
 	-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
-liboprof_start.a: $(liboprof_start_a_OBJECTS) $(liboprof_start_a_DEPENDENCIES) 
-	-rm -f liboprof_start.a
-	$(liboprof_start_a_AR) liboprof_start.a $(liboprof_start_a_OBJECTS) $(liboprof_start_a_LIBADD)
-	$(RANLIB) liboprof_start.a
+libpe_utils.a: $(libpe_utils_a_OBJECTS) $(libpe_utils_a_DEPENDENCIES) 
+	-rm -f libpe_utils.a
+	$(libpe_utils_a_AR) libpe_utils.a $(libpe_utils_a_OBJECTS) $(libpe_utils_a_LIBADD)
+	$(RANLIB) libpe_utils.a
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
@@ -288,8 +298,7 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start.base.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oprof_start.base.moc.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_pe_utils.Po@am__quote@
 
 .cpp.o:
 @am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@@ -429,11 +438,10 @@ distclean-generic:
 maintainer-clean-generic:
 	@echo "This command is intended for maintainers to use"
 	@echo "it deletes files that may require special tools to rebuild."
-@have_qt_FALSE@clean-local:
 clean: clean-am
 
-clean-am: clean-generic clean-libtool clean-local \
-	clean-noinstLIBRARIES mostlyclean-am
+clean-am: clean-generic clean-libtool clean-noinstLIBRARIES \
+	mostlyclean-am
 
 distclean: distclean-am
 	-rm -rf ./$(DEPDIR)
@@ -504,31 +512,18 @@ uninstall-am:
 .MAKE: install-am install-strip
 
 .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
-	clean-libtool clean-local clean-noinstLIBRARIES ctags \
-	distclean distclean-compile distclean-generic \
-	distclean-libtool distclean-tags distdir dvi dvi-am html \
-	html-am info info-am install install-am install-data \
-	install-data-am install-dvi install-dvi-am install-exec \
-	install-exec-am install-html install-html-am install-info \
-	install-info-am install-man install-pdf install-pdf-am \
-	install-ps install-ps-am install-strip installcheck \
-	installcheck-am installdirs maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-compile \
-	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
-	tags uninstall uninstall-am
-
-
-@have_qt_TRUE@oprof_start.base.h: oprof_start.base.ui
-@have_qt_TRUE@	$(UIC) -o $@ $<
-
-@have_qt_TRUE@oprof_start.base.cpp: oprof_start.base.h oprof_start.base.ui
-@have_qt_TRUE@	$(UIC) -o $@ -impl $^
-
-@have_qt_TRUE@oprof_start.base.moc.cpp: oprof_start.base.h
-@have_qt_TRUE@	$(MOC) -o $@ $<
-
-@have_qt_TRUE@clean-local:
-@have_qt_TRUE@	rm -f oprof_start.base.h oprof_start.base.cpp oprof_start.base.moc.cpp
+	clean-libtool clean-noinstLIBRARIES ctags distclean \
+	distclean-compile distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags uninstall uninstall-am
+
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/libpe_utils/op_pe_utils.cpp b/libpe_utils/op_pe_utils.cpp
new file mode 100644
index 0000000..8c69894
--- /dev/null
+++ b/libpe_utils/op_pe_utils.cpp
@@ -0,0 +1,1023 @@
+/**
+ * @file op_pe_utils.cpp
+ * General utility functions for tools using Linux Performance Events Subsystem.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 21, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ */
+
+#include <linux/perf_event.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <signal.h>
+
+#include <iostream>
+#include <set>
+#include <stdexcept>
+#include <sstream>
+#include <string>
+
+#include "config.h"
+// HAVE_LIBPFM is defined in config.h
+#ifdef HAVE_LIBPFM
+#include <perfmon/pfmlib.h>
+#endif
+#include "op_config.h"
+#include "op_types.h"
+#include "op_pe_utils.h"
+#include "operf_event.h"
+#include "op_libiberty.h"
+#include "cverb.h"
+#include "op_string.h"
+#include "op_netburst.h"
+#include "op_events.h"
+
+
+extern verbose vdebug;
+extern std::vector<operf_event_t> events;
+extern op_cpu cpu_type;
+
+
+using namespace std;
+
+// Global functions
+
+int op_pe_utils::op_get_next_online_cpu(DIR * dir, struct dirent *entry)
+{
+#define OFFLINE 0x30
+	unsigned int cpu_num;
+	char cpu_online_pathname[40];
+	int res;
+	FILE * online;
+	again:
+	do {
+		entry = readdir(dir);
+		if (!entry)
+			return -1;
+	} while (entry->d_type != DT_DIR);
+
+	res = sscanf(entry->d_name, "cpu%u", &cpu_num);
+	if (res <= 0)
+		goto again;
+
+	errno = 0;
+	snprintf(cpu_online_pathname, 40, "/sys/devices/system/cpu/cpu%u/online", cpu_num);
+	if ((online = fopen(cpu_online_pathname, "r")) == NULL) {
+		cerr << "Unable to open " << cpu_online_pathname << endl;
+		if (errno)
+			cerr << strerror(errno) << endl;
+		return -1;
+	}
+	res = fgetc(online);
+	fclose(online);
+	if (res == OFFLINE)
+		goto again;
+	else
+		return cpu_num;
+}
+
+int op_pe_utils::op_get_sys_value(const char * filename)
+{
+	char str[10];
+	int _val = -999;
+	FILE * fp = fopen(filename, "r");
+	if (fp == NULL)
+		return _val;
+	if (fgets(str, 9, fp))
+		sscanf(str, "%d", &_val);
+	fclose(fp);
+	return _val;
+}
+
+int op_pe_utils::op_get_cpu_for_perf_events_cap(void)
+{
+	int retval;
+	string err_msg;
+	char cpus_online[257];
+	FILE * online_cpus;
+	DIR *dir = NULL;
+
+	int total_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (!total_cpus) {
+		err_msg = "Internal Error (1): Number of online cpus cannot be determined.";
+		retval = -1;
+		goto error;
+	}
+
+	online_cpus = fopen("/sys/devices/system/cpu/online", "r");
+	if (!online_cpus) {
+		err_msg = "Internal Error (2): Number of online cpus cannot be determined.";
+		retval = -1;
+		goto error;
+	}
+	memset(cpus_online, 0, sizeof(cpus_online));
+
+	if ( fgets(cpus_online, sizeof(cpus_online), online_cpus) == NULL) {
+		fclose(online_cpus);
+		err_msg = "Internal Error (3): Number of online cpus cannot be determined.";
+		retval = -1;
+		goto error;
+	}
+
+	if (!cpus_online[0]) {
+		fclose(online_cpus);
+		err_msg = "Internal Error (4): Number of online cpus cannot be determined.";
+		retval = -1;
+		goto error;
+
+	}
+	if (index(cpus_online, ',') || cpus_online[0] != '0') {
+		// A comma in cpus_online implies a gap, which in turn implies that not all
+		// CPUs are online.
+		if ((dir = opendir("/sys/devices/system/cpu")) == NULL) {
+			fclose(online_cpus);
+			err_msg = "Internal Error (5): Number of online cpus cannot be determined.";
+			retval = -1;
+			goto error;
+		} else {
+			struct dirent *entry = NULL;
+			retval = op_get_next_online_cpu(dir, entry);
+			closedir(dir);
+		}
+	} else {
+		// All CPUs are available, so we just arbitrarily choose CPU 0.
+		retval = 0;
+	}
+	fclose(online_cpus);
+error:
+	return retval;
+}
+
+int op_pe_utils::op_check_perf_events_cap(bool use_cpu_minus_one)
+{
+	/* If perf_events syscall is not implemented, the syscall below will fail
+	 * with ENOSYS (38).  If implemented, but the processor type on which this
+	 * program is running is not supported by perf_events, the syscall returns
+	 * ENOENT (2).
+	 */
+	struct perf_event_attr attr;
+	pid_t pid ;
+	int cpu_to_try = use_cpu_minus_one ? -1 : op_get_cpu_for_perf_events_cap();
+	errno = 0;
+        memset(&attr, 0, sizeof(attr));
+        attr.size = sizeof(attr);
+        attr.sample_type = PERF_SAMPLE_IP;
+
+	pid = getpid();
+	syscall(__NR_perf_event_open, &attr, pid, cpu_to_try, -1, 0);
+	return errno;
+}
+
+static const char * appname;
+static int find_app_file_in_dir(const struct dirent * d)
+{
+	if (!strcmp(d->d_name, appname))
+		return 1;
+	else
+		return 0;
+}
+
+static char full_pathname[PATH_MAX];
+static int _get_PATH_based_pathname(const char * app_name)
+{
+	int retval = -1;
+
+	char * real_path = getenv("PATH");
+	char * path = (char *) xstrdup(real_path);
+	char * segment = strtok(path, ":");
+	appname = app_name;
+	while (segment) {
+		struct dirent ** namelist;
+		int rc = scandir(segment, &namelist, find_app_file_in_dir, NULL);
+		if (rc < 0) {
+			if (errno != ENOENT) {
+				cerr << strerror(errno) << endl;
+				cerr << app_name << " cannot be found in your PATH." << endl;
+				break;
+			}
+		} else if (rc == 1) {
+			size_t applen = strlen(app_name);
+			size_t dirlen = strlen(segment);
+
+			if (applen + dirlen + 2 > PATH_MAX) {
+				cerr << "Path segment " << segment
+				     << " prepended to the passed app name is too long"
+				     << endl;
+				retval = -1;
+				break;
+			}
+
+			if (!strcmp(segment, ".")) {
+				if (getcwd(full_pathname, PATH_MAX) == NULL) {
+					retval = -1;
+					cerr << "getcwd [3] failed when processing <cur-dir>/" << app_name << " found via PATH. Aborting."
+							<< endl;
+					break;
+				}
+			} else {
+				strncpy(full_pathname, segment, dirlen);
+			}
+			strcat(full_pathname, "/");
+			strncat(full_pathname, app_name, applen);
+			retval = 0;
+			free(namelist[0]);
+			free(namelist);
+
+			break;
+		}
+		segment = strtok(NULL, ":");
+	}
+	free(path);
+	return retval;
+}
+
+int op_pe_utils::op_validate_app_name(char ** app, char ** save_appname)
+{
+	int rc = 0;
+	struct stat filestat;
+	char * app_name = *app;
+	size_t len = strlen(app_name);
+
+	if (len > (size_t) (OP_APPNAME_LEN - 1)) {
+		cerr << "app name longer than max allowed (" << OP_APPNAME_LEN
+		     << " chars)\n";
+		cerr << app_name << endl;
+		rc = -1;
+		goto out;
+	}
+
+	if (index(app_name, '/') == app_name) {
+		// Full pathname of app was specified, starting with "/".
+		strncpy(full_pathname, app_name, len);
+	} else if ((app_name[0] == '.') && (app_name[1] == '/')) {
+		// Passed app is in current directory; e.g., "./myApp"
+		if (getcwd(full_pathname, PATH_MAX) == NULL) {
+			rc = -1;
+			cerr << "getcwd [1] failed when trying to find app name " << app_name << ". Aborting."
+			     << endl;
+			goto out;
+		}
+		strcat(full_pathname, "/");
+		if ((strlen(full_pathname) + strlen(app_name + 2) + 1) > PATH_MAX) {
+			rc = -1;
+			cerr << "Length of current dir (" << full_pathname << ") and app name ("
+			     << (app_name + 2) << ") exceeds max allowed (" << PATH_MAX << "). Aborting."
+			     << endl;
+			goto out;
+		}
+		strcat(full_pathname, (app_name + 2));
+	} else if (index(app_name, '/')) {
+		// Passed app is in a subdirectory of cur dir; e.g., "test-stuff/myApp"
+		if (getcwd(full_pathname, PATH_MAX) == NULL) {
+			rc = -1;
+			cerr << "getcwd [2] failed when trying to find app name " << app_name << ". Aborting."
+			     << endl;
+			goto out;
+		}
+		strcat(full_pathname, "/");
+		strcat(full_pathname, app_name);
+	} else {
+		// Passed app name, at this point, MUST be found in PATH
+		rc = _get_PATH_based_pathname(app_name);
+	}
+
+	if (rc) {
+		cerr << "Problem finding app name " << app_name << ". Aborting."
+		     << endl;
+		goto out;
+	}
+	*save_appname = app_name;
+	*app = full_pathname;
+	if (stat(*app, &filestat)) {
+		char msg[OP_APPNAME_LEN + 50];
+		snprintf(msg, OP_APPNAME_LEN + 50, "Non-existent app name \"%s\"",
+		         *app);
+		perror(msg);
+		rc = -1;
+	}
+
+	out: return rc;
+}
+
+set<int> op_pe_utils::op_get_available_cpus(int max_num_cpus)
+{
+	struct dirent *entry = NULL;
+	int rc = 0;
+	bool all_cpus_avail = true;
+	DIR *dir = NULL;
+	string err_msg;
+	char cpus_online[257];
+	set<int> available_cpus;
+	FILE * online_cpus = fopen("/sys/devices/system/cpu/online", "r");
+
+	if (max_num_cpus == -1) {
+		if (online_cpus)
+			fclose(online_cpus);
+		return available_cpus;
+	}
+
+	if (!online_cpus) {
+		err_msg = "Internal Error: Number of online cpus cannot be determined.";
+		rc = -1;
+		goto out;
+	}
+	memset(cpus_online, 0, sizeof(cpus_online));
+	if (fgets(cpus_online, sizeof(cpus_online), online_cpus) == NULL) {
+		fclose(online_cpus);
+		err_msg = "Internal Error: Number of online cpus cannot be determined.";
+		rc = -1;
+		goto out;
+
+	}
+	if (index(cpus_online, ',') || cpus_online[0] != '0') {
+		all_cpus_avail = false;
+		if ((dir = opendir("/sys/devices/system/cpu")) == NULL) {
+			fclose(online_cpus);
+			err_msg = "Internal Error: Number of online cpus cannot be determined.";
+			rc = -1;
+			goto out;
+		}
+	}
+	fclose(online_cpus);
+
+	for (int cpu = 0; cpu < max_num_cpus; cpu++) {
+		int real_cpu;
+		if (all_cpus_avail) {
+			available_cpus.insert(cpu);
+		} else {
+			real_cpu = op_get_next_online_cpu(dir, entry);
+			if (real_cpu < 0) {
+				err_msg = "Internal Error: Number of online cpus cannot be determined.";
+				rc = -1;
+				goto out;
+			}
+			available_cpus.insert(real_cpu);
+		}
+	}
+out:
+	if (dir)
+		closedir(dir);
+	if (rc)
+		throw runtime_error(err_msg);
+	return available_cpus;
+
+}
+
+
+static void _get_event_code(operf_event_t * event, op_cpu cpu_type)
+{
+	FILE * fp;
+	char oprof_event_code[9];
+	string command;
+	u64 base_code, config;
+	char buf[20];
+	if ((snprintf(buf, 20, "%lu", event->count)) < 0) {
+		cerr << "Error parsing event count of " << event->count << endl;
+		exit(EXIT_FAILURE);
+	}
+	base_code = config = 0ULL;
+
+	command = OP_BINDIR;
+	command += "ophelp ";
+	command += event->name;
+
+	fp = popen(command.c_str(), "r");
+	if (fp == NULL) {
+		cerr << "Unable to execute ophelp to get info for event "
+		     << event->name << endl;
+		exit(EXIT_FAILURE);
+	}
+	if (fgets(oprof_event_code, sizeof(oprof_event_code), fp) == NULL) {
+		pclose(fp);
+		cerr << "Unable to find info for event "
+		     << event->name << endl;
+		exit(EXIT_FAILURE);
+	}
+
+	pclose(fp);
+
+	base_code = strtoull(oprof_event_code, (char **) NULL, 10);
+
+
+#if defined(__i386__) || defined(__x86_64__)
+	char mask[OP_MAX_UM_NAME_LEN];
+	// Setup EventSelct[11:8] field for AMD
+	const char * vendor_AMD = "AuthenticAMD";
+	if (op_is_cpu_vendor((char *)vendor_AMD)) {
+		config = base_code & 0xF00ULL;
+		config = config << 32;
+	}
+
+	// Setup EventSelct[7:0] field
+	config |= base_code & 0xFFULL;
+
+	// Setup unitmask field
+handle_named_um:
+	if (event->um_name[0]) {
+		command = OP_BINDIR;
+		command += "ophelp ";
+		command += "--extra-mask ";
+		command += event->name;
+		command += ":";
+		command += buf;
+		command += ":";
+		command += event->um_name;
+		fp = popen(command.c_str(), "r");
+		if (fp == NULL) {
+			cerr << "Unable to execute ophelp to get info for event "
+			     << event->name << endl;
+			exit(EXIT_FAILURE);
+		}
+		if (fgets(mask, sizeof(mask), fp) == NULL) {
+			pclose(fp);
+			cerr << "Unable to find unit mask info for " << event->um_name << " for event "
+			     << event->name << endl;
+			exit(EXIT_FAILURE);
+		}
+		pclose(fp);
+		// FIXME:  The mask value here is the extra bits from the named unit mask.  It's not
+		// ideal to put that value into the UM's mask, since that's what will show up in
+		// opreport.  It would be better if we could somehow have the unit mask name that the
+		// user passed to us show up in opreort.
+		event->evt_um = strtoull(mask, (char **) NULL, 10);
+		/* A value >= EXTRA_MIN_VAL returned by 'ophelp --extra-mask' is interpreted as a
+		 * valid extra value; otherwise we interpret it as a simple unit mask value
+		 * for a named unit mask with EXTRA_NONE.
+		 */
+		if (event->evt_um >= EXTRA_MIN_VAL)
+			config |= event->evt_um;
+		else
+			config |= ((event->evt_um & 0xFFULL) << 8);
+	} else if (!event->evt_um) {
+		char * endptr;
+		command.clear();
+		command = OP_BINDIR;
+		command += "ophelp ";
+		command += "--unit-mask ";
+		command += event->name;
+		command += ":";
+		command += buf;
+		fp = popen(command.c_str(), "r");
+		if (fp == NULL) {
+			cerr << "Unable to execute ophelp to get unit mask for event "
+			     << event->name << endl;
+			exit(EXIT_FAILURE);
+		}
+		if (fgets(mask, sizeof(mask), fp) == NULL) {
+			pclose(fp);
+			cerr << "Unable to find unit mask info for event " << event->name << endl;
+			exit(EXIT_FAILURE);
+		}
+		pclose(fp);
+		event->evt_um = strtoull(mask, &endptr, 10);
+		if ((endptr >= mask) &&
+				(endptr <= (mask + strlen(mask) - 2))) { // '- 2' to account for linefeed and '\0'
+
+			// Must be a default named unit mask
+			strncpy(event->um_name, mask, OP_MAX_UM_NAME_LEN - 1);
+			goto handle_named_um;
+		}
+		config |= ((event->evt_um & 0xFFULL) << 8);
+	} else {
+		config |= ((event->evt_um & 0xFFULL) << 8);
+	}
+#else
+	config = base_code;
+#endif
+
+	event->op_evt_code = base_code;
+	if (cpu_type == CPU_P4 || cpu_type == CPU_P4_HT2) {
+		if (op_netburst_get_perf_encoding(event->name, event->evt_um, 1, 1, &config)) {
+			cerr << "Unable to get event encoding for " << event->name << endl;
+			exit(EXIT_FAILURE);
+		}
+	}
+	event->evt_code = config;
+}
+
+#if PPC64_ARCH
+/* All ppc64 events (except CYCLES) have a _GRP<n> suffix.  This is
+ * because the legacy opcontrol profiler can only profile events in
+ * the same group (i.e., having the same _GRP<n> suffix).  But operf
+ * can multiplex events, so we should allow the user to pass event
+ * names without the _GRP<n> suffix.
+ *
+ * If event name is not CYCLES or does not have a _GRP<n> suffix,
+ * we'll call ophelp and scan the list of events, searching for one
+ * that matches up to the _GRP<n> suffix.  If we don't find a match,
+ * then we'll exit with the expected error message for invalid event name.
+ */
+static string _handle_powerpc_event_spec(string event_spec)
+{
+	FILE * fp;
+	char line[MAX_INPUT];
+	size_t grp_pos;
+	string evt, err_msg;
+	size_t evt_name_len;
+	bool first_non_cyc_evt_found = false;
+	bool event_found = false;
+	char event_name[OP_MAX_EVT_NAME_LEN], * remaining_evt_spec, * colon_start;
+	string cmd = OP_BINDIR;
+	cmd += "/ophelp";
+
+	colon_start = (char *)index(event_spec.c_str(), ':');
+	if (colon_start)
+		evt_name_len = colon_start - event_spec.c_str();
+	else
+		evt_name_len = event_spec.length();
+	strncpy(event_name, event_spec.c_str(), evt_name_len);
+	event_name[evt_name_len] = '\0';
+	remaining_evt_spec = colon_start ?
+	                                  ((char *)event_spec.c_str() + strlen(event_name) + 1)
+	                                  : NULL;
+	if (!strcmp("CYCLES", event_name)) {
+		event_found = true;
+		goto out;
+	}
+
+	evt = event_name;
+	// Need to make sure the event name truly has a _GRP<n> suffix.
+	grp_pos = evt.rfind("_GRP");
+	if ((grp_pos != string::npos) && ((evt = evt.substr(grp_pos, string::npos))).length() > 4) {
+		char * end;
+		strtoul(evt.substr(4, string::npos).c_str(), &end, 0);
+		if (end && (*end == '\0')) {
+		// Valid group number found after _GRP, so we can skip to the end.
+			event_found = true;
+			goto out;
+		}
+	}
+
+	// If we get here, it implies the user passed a non-CYCLES event without a GRP suffix.
+	// Lets try to find a valid suffix for it.
+	fp = popen(cmd.c_str(), "r");
+	if (fp == NULL) {
+		cerr << "Unable to execute ophelp to get info for event "
+		     << event_spec << endl;
+		exit(EXIT_FAILURE);
+	}
+
+	err_msg = "Cannot find event ";
+	while (fgets(line, MAX_INPUT, fp)) {
+		if (!first_non_cyc_evt_found) {
+			if (!strncmp(line, "PM_", 3))
+				first_non_cyc_evt_found = true;
+			else
+				continue;
+		}
+		if (line[0] == ' ' || line[0] == '\t')
+			continue;
+		if (!strncmp(line, event_name, evt_name_len)) {
+			// Found a potential match.  Check if it's a perfect match.
+			string save_event_name = event_name;
+			size_t full_evt_len = index(line, ':') - line;
+			memset(event_name, '\0', OP_MAX_EVT_NAME_LEN);
+			strncpy(event_name, line, full_evt_len);
+			string candidate = event_name;
+			if (candidate.rfind("_GRP") == evt_name_len) {
+				event_found = true;
+				break;
+			} else {
+				memset(event_name, '\0', OP_MAX_EVT_NAME_LEN);
+				strncpy(event_name, save_event_name.c_str(), evt_name_len);
+			}
+		}
+	}
+	pclose(fp);
+
+out:
+	if (!event_found) {
+		cerr << err_msg << event_name << endl;
+		cerr << "Error retrieving info for event "
+				<< event_spec << endl;
+		exit(EXIT_FAILURE);
+	}
+	ostringstream ret_strm;
+	if (remaining_evt_spec)
+		ret_strm << event_name << ":" << remaining_evt_spec;
+	else
+		ret_strm << event_name;
+	return ret_strm.str();
+}
+
+
+/* Some architectures (e.g., ppc64) do not use the same event value (code) for oprofile
+ * and for perf_events.  The operf-record process requires event values that perf_events
+ * understands, but the operf-read process requires oprofile event values.  The purpose of
+ * the following method is to map the operf-record event value to a value that
+ * opreport can understand.
+ */
+
+extern op_cpu cpu_type;
+#define NIL_CODE ~0U
+
+#if HAVE_LIBPFM3
+static bool _get_codes_for_match(unsigned int pfm_idx, const char name[],
+                                 vector<operf_event_t> * evt_vec)
+{
+	unsigned int num_events = evt_vec->size();
+	int tmp_code, ret;
+	bool edge_detect = false;
+	char evt_name[OP_MAX_EVT_NAME_LEN];
+	unsigned int events_converted = 0;
+	for (unsigned int i = 0; i < num_events; i++) {
+		operf_event_t event = (*evt_vec)[i];
+		if (event.evt_code != NIL_CODE) {
+			events_converted++;
+			continue;
+		}
+		memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
+		if (!strcmp(event.name, "CYCLES")) {
+			strcpy(evt_name ,"PM_CYC") ;
+		} else if (strstr(event.name, "_GRP")) {
+			string str = event.name;
+			strncpy(evt_name, event.name, str.rfind("_GRP"));
+		} else {
+			strncpy(evt_name, event.name, strlen(event.name));
+		}
+
+		/* Events where the "_EDGE_COUNT" suffix has been appended to a
+		 * real native event name are pseudo events (events that have
+		 * not been formally defined in processor documentation), where
+		 * we wish to detect the rising edge of the real native event.
+		 * This "edge detection" technique is useful for events that normally
+		 * count the number of cycles that a particular condition is true.
+		 * Since libpfm does not know about pseudo events, we need to
+		 * convert them to their real native event equivalent, and then
+		 * set the "edge detect" bit (the LSB) in the event code.
+		 */
+		string evt = evt_name;
+		size_t edge_suffix_pos = evt.rfind("_EDGE_COUNT");
+		if (edge_suffix_pos != string::npos) {
+			evt = evt.substr(0, edge_suffix_pos);
+			strncpy(evt_name, evt.c_str(), evt.length() + 1);
+			edge_detect = true;
+		}
+
+		if (strncmp(name, evt_name, OP_MAX_EVT_NAME_LEN))
+			continue;
+		ret = pfm_get_event_code(pfm_idx, &tmp_code);
+		if (ret != PFMLIB_SUCCESS) {
+			string evt_name_str = event.name;
+			string msg = "libpfm cannot find event code for " + evt_name_str +
+					"; cannot continue";
+			throw runtime_error(msg);
+		}
+		event.evt_code = tmp_code;
+		// Setting LSB of code makes this a "rising edge detection" type of event
+		if (edge_detect)
+			event.evt_code |= 1;
+		(*evt_vec)[i] = event;
+		events_converted++;
+		cverb << vdebug << "Successfully converted " << event.name << " to perf_event code "
+		      << hex << event.evt_code << endl;
+	}
+	return (events_converted == num_events);
+}
+#else
+static bool _op_get_event_codes(vector<operf_event_t> * evt_vec)
+{
+	int ret;
+	unsigned int num_events = evt_vec->size();
+	bool edge_detect = false;
+	char evt_name[OP_MAX_EVT_NAME_LEN];
+	unsigned int events_converted = 0;
+	u64 code[1];
+
+	typedef struct {
+		u64    *codes;
+		char        **fstr;
+		size_t      size;
+		int         count;
+		int         idx;
+	} pfm_raw_pmu_encode_t;
+
+	pfm_raw_pmu_encode_t raw;
+	raw.codes = code;
+	raw.count = 1;
+	raw.fstr = NULL;
+
+	if (pfm_initialize() != PFM_SUCCESS)
+		throw runtime_error("Unable to initialize libpfm; cannot continue");
+
+	for (unsigned int i = 0; i < num_events; i++) {
+		operf_event_t event = (*evt_vec)[i];
+		if (event.evt_code != NIL_CODE) {
+			events_converted++;
+			continue;
+		}
+		memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
+		if (!strcmp(event.name, "CYCLES")) {
+			strcpy(evt_name ,"PM_CYC") ;
+		} else if (strstr(event.name, "_GRP")) {
+			string str = event.name;
+			strncpy(evt_name, event.name, str.rfind("_GRP"));
+		} else {
+			strncpy(evt_name, event.name, strlen(event.name));
+		}
+
+		/* Events where the "_EDGE_COUNT" suffix has been appended to a
+		 * real native event name are pseudo events (events that have
+		 * not been formally defined in processor documentation), where
+		 * we wish to detect the rising edge of the real native event.
+		 * This "edge detection" technique is useful for events that normally
+		 * count the number of cycles that a particular condition is true.
+		 * Since libpfm does not know about pseudo events, we need to
+		 * convert them to their real native event equivalent, and then
+		 * set the "edge detect" bit (the LSB) in the event code.
+		 */
+		string evt = evt_name;
+		size_t edge_suffix_pos = evt.rfind("_EDGE_COUNT");
+		if (edge_suffix_pos != string::npos) {
+			evt = evt.substr(0, edge_suffix_pos);
+			strncpy(evt_name, evt.c_str(), evt.length() + 1);
+			edge_detect = true;
+		}
+
+		memset(&raw, 0, sizeof(raw));
+		ret = pfm_get_os_event_encoding(evt_name, PFM_PLM3, PFM_OS_NONE, &raw);
+		if (ret != PFM_SUCCESS) {
+			string evt_name_str = event.name;
+			string msg = "libpfm cannot find event code for " + evt_name_str +
+					"; cannot continue";
+			throw runtime_error(msg);
+		}
+		event.evt_code = raw.codes[0];
+		// Setting LSB of code makes this a "rising edge detection" type of event
+		if (edge_detect)
+			event.evt_code |= 1;
+		(*evt_vec)[i] = event;
+		events_converted++;
+		cverb << vdebug << "Successfully converted " << event.name << " to perf_event code "
+		      << hex << event.evt_code << endl;
+	}
+	return (events_converted == num_events);
+}
+#endif
+
+static bool convert_event_vals(vector<operf_event_t> * evt_vec)
+{
+	for (unsigned int i = 0; i < evt_vec->size(); i++) {
+		operf_event_t event = (*evt_vec)[i];
+		if (cpu_type == CPU_PPC64_POWER7) {
+			if (!strncmp(event.name, "PM_RUN_CYC", strlen("PM_RUN_CYC"))) {
+				event.evt_code = 0x600f4;
+			} else if (!strncmp(event.name, "PM_RUN_INST_CMPL", strlen("PM_RUN_INST_CMPL"))) {
+				event.evt_code = 0x500fa;
+			} else {
+				event.evt_code = NIL_CODE;
+			}
+		} else {
+			event.evt_code = NIL_CODE;
+		}
+		(*evt_vec)[i] = event;
+	}
+
+#if HAVE_LIBPFM3
+	unsigned int i, count;
+	char name[256];
+	int ret;
+
+	if (pfm_initialize() != PFMLIB_SUCCESS)
+		throw runtime_error("Unable to initialize libpfm; cannot continue");
+
+	ret = pfm_get_num_events(&count);
+	if (ret != PFMLIB_SUCCESS)
+		throw runtime_error("Unable to use libpfm to obtain event code; cannot continue");
+	for(i =0 ; i < count; i++)
+	{
+		ret = pfm_get_event_name(i, name, 256);
+		if (ret != PFMLIB_SUCCESS)
+			continue;
+		if (_get_codes_for_match(i, name, evt_vec))
+			break;
+	}
+	return (i != count);
+#else
+	return _op_get_event_codes(evt_vec);
+#endif
+}
+
+#endif // PPC64_ARCH
+
+
+
+void op_pe_utils::op_process_events_list(set<string> & passed_evts,
+                                         bool do_profiling, bool do_callgraph)
+{
+	string cmd = OP_BINDIR;
+
+	if (passed_evts.size() > OP_MAX_EVENTS) {
+		cerr << "Number of events specified is greater than allowed maximum of "
+		     << OP_MAX_EVENTS << "." << endl;
+		exit(EXIT_FAILURE);
+	}
+	cmd += "/ophelp --check-events ";
+	if (!do_profiling)
+		cmd += "--ignore-count ";
+	for (set<string>::iterator it = passed_evts.begin(); it != passed_evts.end(); it++) {
+		FILE * fp;
+		string full_cmd = cmd;
+		string event_spec = *it;
+
+#if PPC64_ARCH
+		// Starting with CPU_PPC64_ARCH_V1, ppc64 events files are formatted like
+		// other architectures, so no special handling is needed.
+		if (cpu_type < CPU_PPC64_ARCH_V1)
+			event_spec = _handle_powerpc_event_spec(event_spec);
+#endif
+
+		if (do_callgraph)
+			full_cmd += " --callgraph=1 ";
+		full_cmd += event_spec;
+		fp = popen(full_cmd.c_str(), "r");
+		if (fp == NULL) {
+			cerr << "Unable to execute ophelp to get info for event "
+			     << event_spec << endl;
+			exit(EXIT_FAILURE);
+		}
+		if (fgetc(fp) == EOF) {
+			pclose(fp);
+			cerr << "Error retrieving info for event "
+			     << event_spec << endl;
+			if (do_callgraph)
+				cerr << "Note: When doing callgraph profiling, the sample count must be"
+				     << endl << "15 times the minimum count value for the event."  << endl;
+			exit(EXIT_FAILURE);
+		}
+		pclose(fp);
+		char * event_str = op_xstrndup(event_spec.c_str(), event_spec.length());
+		operf_event_t event;
+		memset(&event, 0, sizeof(event));
+		strncpy(event.name, strtok(event_str, ":"), OP_MAX_EVT_NAME_LEN - 1);
+		if (do_profiling)
+			event.count = atoi(strtok(NULL, ":"));
+		else
+			event.count = 0UL;
+		/* Event name is required in the event spec in order for
+		 * 'ophelp --check-events' to pass.  But since unit mask
+		 *  and domain control bits are optional, we need to ensure the result of
+		 *  strtok is valid.
+		 */
+		char * info;
+#define	_OP_UM 1
+#define	_OP_KERNEL 2
+#define	_OP_USER 3
+		int place =  _OP_UM;
+		char * endptr = NULL;
+		event.evt_um = 0UL;
+		event.no_kernel = 0;
+		event.no_user = 0;
+		event.throttled = false;
+		event.mode_specified = false;
+		event.umask_specified = false;
+		memset(event.um_name, '\0', OP_MAX_UM_NAME_LEN);
+		memset(event.um_numeric_val_as_str, '\0', OP_MAX_UM_NAME_STR_LEN);
+		while ((info = strtok(NULL, ":"))) {
+			switch (place) {
+			case _OP_UM:
+				event.evt_um = strtoul(info, &endptr, 0);
+				event.umask_specified = true;
+
+				// If any of the UM part is not a number, then we
+				// consider the entire part a string.
+				if (*endptr) {
+					event.evt_um = 0;
+					strncpy(event.um_name, info, OP_MAX_UM_NAME_LEN - 1);
+				} else {
+					/* event.evt_um gets modified later,
+					 * save the specified number as a
+					 * string to output later.
+					 */
+					stringstream strs;
+					strs << "0x" << hex << event.evt_um;
+					strncpy(event.um_numeric_val_as_str,
+						(char *)strs.str().c_str(),
+                                                strs.str().length());
+				}
+				break;
+			case _OP_KERNEL:
+				event.mode_specified = true;
+				if (atoi(info) == 0)
+					event.no_kernel = 1;
+				break;
+			case _OP_USER:
+				event.mode_specified = true;
+				if (atoi(info) == 0)
+					event.no_user = 1;
+				break;
+			}
+			place++;
+		}
+		free(event_str);
+
+#ifdef __s390__
+		if (do_profiling) {
+			if (strncmp(event.name, "CPU_CYCLES", strlen(event.name)) != 0) {
+				cerr << "Profiling with " << event.name << " is not supported." << endl
+				     << "Only CPU_CYCLES is allowed to use with operf." << endl;
+				exit(EXIT_FAILURE);
+			}
+		} else {
+			if (!event.no_kernel && event.no_user) {
+				cerr << "Counting for just the kernel is not supported." << endl
+				     << "Re-run the command and simply pass the event name " << endl
+				     << "(" << event.name << ") for the event spec, without" << endl
+				     << "unit mask/kernel/user bits." << endl;
+				exit(EXIT_FAILURE);
+			}
+		}
+#endif
+
+#ifdef __alpha__
+		// Alpha arch does not support any mode exclusion, so if either user or kernel
+		// mode are excluded by the user, we'll exit with an error message.
+		if (event.no_kernel || event.no_user) {
+			cerr << "Mode exclusion is not supported on Alpha." << endl
+			     << "Re-run the command and simply pass the event name " << endl
+			     << "(" << event.name << ") for the event spec, without" << endl
+			     << "unit mask/kernel/user bits." << endl;
+			exit(EXIT_FAILURE);
+		}
+#endif
+
+		_get_event_code(&event, cpu_type);
+		events.push_back(event);
+	}
+#if PPC64_ARCH
+	{
+		/* For ppc64 architecture processors prior to the introduction of
+		 * architected_events_v1, the oprofile event code needs to be converted
+		 * to the appropriate event code to pass to the perf_event_open syscall.
+		 * But as of the introduction of architected_events_v1, the events
+		 * file contains the necessary event code information, so this conversion
+		 * step is no longer needed.
+		 */
+
+		using namespace op_pe_utils;
+		if ((cpu_type < CPU_PPC64_ARCH_V1) && !convert_event_vals(&events)) {
+			cerr << "Unable to convert all oprofile event values to perf_event values" << endl;
+			exit(EXIT_FAILURE);
+		}
+	}
+#endif
+}
+
+void op_pe_utils::op_get_default_event(bool do_callgraph)
+{
+	operf_event_t dft_evt;
+	struct op_default_event_descr descr;
+	vector<operf_event_t> tmp_events;
+
+
+	op_default_event(cpu_type, &descr);
+	if (descr.name[0] == '\0') {
+		cerr << "Unable to find default event" << endl;
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&dft_evt, 0, sizeof(dft_evt));
+	if (do_callgraph) {
+		struct op_event * _event;
+		op_events(cpu_type);
+		if ((_event = find_event_by_name(descr.name, 0, 0))) {
+			dft_evt.count = _event->min_count * CALLGRAPH_MIN_COUNT_SCALE;
+		} else {
+			cerr << "Error getting event info for " << descr.name << endl;
+			exit(EXIT_FAILURE);
+		}
+	} else {
+		dft_evt.count = descr.count;
+	}
+	dft_evt.evt_um = descr.um;
+	strncpy(dft_evt.name, descr.name, OP_MAX_EVT_NAME_LEN - 1);
+	_get_event_code(&dft_evt, cpu_type);
+	events.push_back(dft_evt);
+
+#if PPC64_ARCH
+	{
+		/* This section of code is for architectures such as ppc[64] for which
+		 * the oprofile event code needs to be converted to the appropriate event
+		 * code to pass to the perf_event_open syscall.
+		 */
+
+		using namespace op_pe_utils;
+		if ((cpu_type < CPU_PPC64_ARCH_V1) && !convert_event_vals(&events)) {
+			cerr << "Unable to convert all oprofile event values to perf_event values" << endl;
+			exit(EXIT_FAILURE);
+		}
+	}
+#endif
+}
diff --git a/libpe_utils/op_pe_utils.h b/libpe_utils/op_pe_utils.h
new file mode 100644
index 0000000..3a4eb38
--- /dev/null
+++ b/libpe_utils/op_pe_utils.h
@@ -0,0 +1,51 @@
+/**
+ * @file op_pe_utils.h
+ * Definitions and prototypes for tools using Linux Performance Events Subsystem.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 21, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ */
+
+
+#ifndef OP_PE_UTILS_H_
+#define OP_PE_UTILS_H_
+
+#include <dirent.h>
+
+#include <vector>
+#include <set>
+
+#include "op_cpu_type.h"
+
+#define OP_APPNAME_LEN 1024
+#define OP_MAX_EVENTS 24
+#define CALLGRAPH_MIN_COUNT_SCALE 15
+
+/* A macro to be used for ppc64 architecture-specific code.  The '__powerpc__' macro
+ * is defined for both ppc64 and ppc32 architectures, so we must further qualify by
+ * including the 'HAVE_LIBPFM' macro, since that macro will be defined only for ppc64.
+ */
+#define PPC64_ARCH (HAVE_LIBPFM) && ((defined(__powerpc__) || defined(__powerpc64__)))
+
+// Candidates for refactoring of operf
+namespace op_pe_utils {
+
+// prototypes
+extern int op_check_perf_events_cap(bool use_cpu_minus_one);
+extern int op_get_sys_value(const char * filename);
+extern int op_get_cpu_for_perf_events_cap(void);
+extern int op_validate_app_name(char ** app, char ** save_appname);
+extern void op_get_default_event(bool do_callgraph);
+extern void op_process_events_list(std::set<std::string> & passed_evts,
+                                   bool do_profiling, bool do_callgraph);
+extern int op_get_next_online_cpu(DIR * dir, struct dirent *entry);
+extern std::set<int> op_get_available_cpus(int max_num_cpus);
+}
+
+
+#endif /* OP_PE_UTILS_H_ */
diff --git a/libperf_events/Makefile.am b/libperf_events/Makefile.am
index 7163610..2ec09f1 100644
--- a/libperf_events/Makefile.am
+++ b/libperf_events/Makefile.am
@@ -7,9 +7,12 @@ AM_CPPFLAGS = \
 	-I ${top_srcdir}/libop \
 	-I ${top_srcdir}/libdb \
 	-I ${top_srcdir}/libperf_events \
+	-I ${top_srcdir}/libpe_utils \
 	@PERF_EVENT_FLAGS@ \
 	@OP_CPPFLAGS@
 
+AM_CXXFLAGS = @OP_CXXFLAGS@
+
 noinst_LIBRARIES = libperf_events.a
 libperf_events_a_SOURCES =  \
 	operf_utils.h \
diff --git a/libperf_events/Makefile.in b/libperf_events/Makefile.in
index c579099..ddd1977 100644
--- a/libperf_events/Makefile.in
+++ b/libperf_events/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -153,7 +152,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -177,20 +175,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -261,9 +252,11 @@ topdir = @topdir@
 @BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libop \
 @BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libdb \
 @BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libperf_events \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libpe_utils \
 @BUILD_FOR_PERF_EVENT_TRUE@	@PERF_EVENT_FLAGS@ \
 @BUILD_FOR_PERF_EVENT_TRUE@	@OP_CPPFLAGS@
 
+@BUILD_FOR_PERF_EVENT_TRUE@AM_CXXFLAGS = @OP_CXXFLAGS@
 @BUILD_FOR_PERF_EVENT_TRUE@noinst_LIBRARIES = libperf_events.a
 @BUILD_FOR_PERF_EVENT_TRUE@libperf_events_a_SOURCES = \
 @BUILD_FOR_PERF_EVENT_TRUE@	operf_utils.h \
diff --git a/libperf_events/operf_counter.cpp b/libperf_events/operf_counter.cpp
index 34fe17e..42c0cd1 100644
--- a/libperf_events/operf_counter.cpp
+++ b/libperf_events/operf_counter.cpp
@@ -1,5 +1,5 @@
 /**
- * @file pe_profiling/operf_counter.cpp
+ * @file libperf_events/operf_counter.cpp
  * C++ class implementation that abstracts the user-to-kernel interface
  * for using Linux Performance Events Subsystem.
  *
@@ -11,7 +11,7 @@
  * (C) Copyright IBM Corp. 2011
  *
  * Modified by Maynard Johnson <maynardj@us.ibm.com>
- * (C) Copyright IBM Corporation 2012
+ * (C) Copyright IBM Corporation 2012, 2014
  *
 */
 
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include <string.h>
 #include <iostream>
+#include <sstream>
 #include <stdlib.h>
 #include "op_events.h"
 #include "operf_counter.h"
@@ -30,6 +31,7 @@
 #include "operf_process_info.h"
 #include "op_libiberty.h"
 #include "operf_stats.h"
+#include "op_pe_utils.h"
 
 
 using namespace std;
@@ -37,7 +39,6 @@ using namespace OP_perf_utils;
 
 
 volatile bool quit;
-volatile bool read_quit;
 int sample_reads;
 int num_mmap_pages;
 unsigned int pagesize;
@@ -48,6 +49,8 @@ extern bool first_time_processing;
 extern bool throttled;
 extern size_t mmap_size;
 extern size_t pg_sz;
+extern bool use_cpu_minus_one;
+extern bool track_new_forks;
 
 namespace {
 
@@ -57,14 +60,32 @@ static const char *__op_magic = "OPFILE";
 
 #define OP_MAGIC	(*(u64 *)__op_magic)
 
+static bool _print_pp_progress(int fd)
+{
+	int msg;
+	if (read(fd, &msg, sizeof(msg)) > 0)
+		return true;
+	else
+		return false;
+}
 
-int _get_perf_event_from_pipe(event_t * event, int sample_data_fd)
+/* This function for reading an event from the sample data pipe must
+ * be robust enough to handle the situation where the operf_record process
+ * writes an event record to the pipe in multiple chunks.
+ */
+#define OP_PIPE_READ_OK 0
+#define OP_PIPE_CLOSED -1
+static int _get_perf_event_from_pipe(event_t * event, int sample_data_fd)
 {
 	static size_t pe_header_size = sizeof(perf_event_header);
+	size_t read_size = pe_header_size;
+	int rc = OP_PIPE_READ_OK;
 	char * evt = (char *)event;
 	ssize_t num_read;
 	perf_event_header * header = (perf_event_header *)event;
 
+	memset(header, '\0', pe_header_size);
+
 	/* A signal handler was setup for the operf_read process to handle interrupts
 	 * (i.e., from ctrl-C), so the read syscalls below may get interrupted.  But the
 	 * operf_read process should ignore the interrupt and continue processing
@@ -74,52 +95,87 @@ int _get_perf_event_from_pipe(event_t * event, int sample_data_fd)
 	 */
 again:
 	errno = 0;
-	if ((num_read = read(sample_data_fd, header, pe_header_size)) < 0) {
+	if ((num_read = read(sample_data_fd, header, read_size)) < 0) {
 		cverb << vdebug << "Read 1 of sample data pipe returned with " << strerror(errno) << endl;
-		if (errno == EINTR)
+		if (errno == EINTR) {
 			goto again;
-		else
-			return -1;
+		} else {
+			rc = OP_PIPE_CLOSED;
+			goto out;
+		}
 	} else if (num_read == 0) {
-		return -1;
+		// Implies pipe has been closed on the write end, so return -1 to quit reading
+		rc = OP_PIPE_CLOSED;
+		goto out;
+	} else if (num_read != (ssize_t)read_size) {
+		header += num_read;
+		read_size -= num_read;
+		goto again;
 	}
+
+	read_size = header->size - pe_header_size;
+	if (read_size == 0)
+		/* This is technically a valid record -- it's just empty. I'm not
+		 * sure if this can happen (i.e., if the kernel ever creates empty
+		 * records), but we'll handle it just in case.
+		 */
+		goto again;
+
+	if (!header->size || (header->size < pe_header_size))
+		/* Bogus header size detected. In this case, we don't set rc to -1,
+		 * because the caller will catch this error when it calls is_header_valid().
+		 * I've seen such bogus stuff occur when profiling lots of processes at
+		 * a very high sampling frequency. This issue is still being investigated,
+		 * so for now, we'll just do our best to detect and handle gracefully.
+		 */
+		goto out;
+
 	evt += pe_header_size;
-	if (!header->size)
-		return -1;
 
 again2:
-	if ((num_read = read(sample_data_fd, evt, header->size - pe_header_size)) < 0) {
+	if ((num_read = read(sample_data_fd, evt, read_size)) < 0) {
 		cverb << vdebug << "Read 2 of sample data pipe returned with " << strerror(errno) << endl;
-		if (errno == EINTR)
+		if (errno == EINTR) {
 			goto again2;
-		else
-			return -1;
+		} else {
+			rc = OP_PIPE_CLOSED;
+			if (errno == EFAULT)
+				cerr << "Size of event record: " << header->size << endl;
+			goto out;
+		}
 	} else if (num_read == 0) {
-		return -1;
+		// Implies pipe has been closed on the write end, so return -1 to quit reading
+		rc = OP_PIPE_CLOSED;
+		goto out;
+	} else if (num_read != (ssize_t)read_size) {
+		evt += num_read;
+		read_size -= num_read;
+		goto again2;
 	}
-	return 0;
+
+out:
+	return rc;
 }
 
-event_t * _get_perf_event_from_file(struct mmap_info & info)
+static event_t * _get_perf_event_from_file(struct mmap_info & info)
 {
-	uint32_t size;
+	uint32_t size = 0;
+	static int num_remaps = 0;
 	event_t * event;
-
-	if (info.offset + info.head >= info.file_data_offset + info.file_data_size)
-		return NULL;
-
-	if (!pg_sz)
-		pg_sz = sysconf(_SC_PAGESIZE);
+	size_t pe_header_size = sizeof(struct perf_event_header);
 
 try_again:
-	event = (event_t *)(info.buf + info.head);
+	event = NULL;
+	if (info.offset + info.head + pe_header_size > info.file_data_size)
+		goto out;
+
+	if (info.head + pe_header_size <= mmap_size)
+		event = (event_t *)(info.buf + info.head);
 
-	if ((mmap_size != info.file_data_size) &&
-			(((info.head + sizeof(event->header)) > mmap_size) ||
-					(info.head + event->header.size > mmap_size))) {
+	if (unlikely(!event || (info.head + event->header.size > mmap_size))) {
 		int ret;
 		u64 shift = pg_sz * (info.head / pg_sz);
-		cverb << vconvert << "Remapping perf data file" << endl;
+		cverb << vdebug << "Remapping perf data file: " << dec << ++num_remaps << endl;
 		ret = munmap(info.buf, mmap_size);
 		if (ret) {
 			string errmsg = "Internal error:  munmap of perf data file failed with errno: ";
@@ -139,23 +195,20 @@ try_again:
 	}
 
 	size = event->header.size;
-
-	// The tail end of the operf data file may be zero'ed out, so we assume if we
-	// find size==0, we're now in that area of the file, so we're done.
-	if (size == 0)
-		return NULL;
-
 	info.head += size;
-	if (info.offset + info.head >= info.file_data_offset + info.file_data_size)
-		return NULL;
-
+out:
+	if (unlikely(!event)) {
+		cverb << vdebug << "No more event records in file.  info.offset: " << dec << info.offset
+		      << "; info.head: " << info.head << "; info.file_data_size: " << info.file_data_size
+		      << endl << "; mmap_size: " << mmap_size  << "; current record size: " << size << endl;
+	}
 	return event;
 }
 
 }  // end anonymous namespace
 
 operf_counter::operf_counter(operf_event_t & evt,  bool enable_on_exec, bool do_cg,
-                             bool separate_cpu)
+                             bool separate_cpu, bool inherit, int event_number)
 {
 	memset(&attr, 0, sizeof(attr));
 	attr.size = sizeof(attr);
@@ -164,50 +217,67 @@ operf_counter::operf_counter(operf_event_t & evt,  bool enable_on_exec, bool do_
 		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
 	if (separate_cpu)
 		attr.sample_type |= PERF_SAMPLE_CPU;
+
+#ifdef __s390__
+	attr.type = PERF_TYPE_HARDWARE;
+#else
 	attr.type = PERF_TYPE_RAW;
+#endif
+#if ((defined(__i386__) || defined(__x86_64__)) && (HAVE_PERF_PRECISE_IP))
+	if (evt.evt_code & EXTRA_PEBS) {
+		attr.precise_ip = 2;
+		evt.evt_code ^= EXTRA_PEBS;
+	}
+#endif
+	attr.exclude_hv = evt.no_hv;
 	attr.config = evt.evt_code;
 	attr.sample_period = evt.count;
-	attr.inherit = 1;
+	attr.inherit = inherit ? 1 : 0;
 	attr.enable_on_exec = enable_on_exec ? 1 : 0;
 	attr.disabled  = 1;
 	attr.exclude_idle = 0;
 	attr.exclude_kernel = evt.no_kernel;
-	attr.exclude_hv = evt.no_hv;
 	attr.read_format = PERF_FORMAT_ID;
 	event_name = evt.name;
 	fd = id = -1;
+	evt_num = event_number;
 }
 
 operf_counter::~operf_counter() {
 }
 
 
-int operf_counter::perf_event_open(pid_t ppid, int cpu, unsigned event, operf_record * rec)
+int operf_counter::perf_event_open(pid_t pid, int cpu, operf_record * rec, bool print_error)
 {
 	struct {
 		u64 count;
 		u64 id;
 	} read_data;
 
-	if (event == 0) {
+	if (evt_num == 0) {
 		attr.mmap = 1;
 		attr.comm = 1;
 	}
-	fd = op_perf_event_open(&attr, ppid, cpu, -1, 0);
+	fd = op_perf_event_open(&attr, pid, cpu, -1, 0);
 	if (fd < 0) {
 		int ret = -1;
 		cverb << vrecord << "perf_event_open failed: " << strerror(errno) << endl;
 		if (errno == EBUSY) {
-			cerr << "The performance monitoring hardware reports EBUSY. Is another profiling tool in use?" << endl
-			     << "On some architectures, tools such as oprofile and perf being used in system-wide "
-			     << "mode can cause this problem." << endl;
+			if (print_error) {
+				cerr << "The performance monitoring hardware reports EBUSY. Is another profiling tool in use?" << endl
+				     << "On some architectures, tools such as oprofile and perf being used in system-wide "
+				     << "mode can cause this problem." << endl;
+			}
 			ret = OP_PERF_HANDLED_ERROR;
 		} else if (errno == ESRCH) {
-			cerr << "!!!! No samples collected !!!" << endl;
-			cerr << "The target program/command ended before profiling was started." << endl;
+			if (print_error) {
+				cerr << "!!!! No samples collected !!!" << endl;
+				cerr << "The target program/command ended before profiling was started." << endl;
+			}
 			ret = OP_PERF_HANDLED_ERROR;
 		} else {
-			cerr << "perf_event_open failed with " << strerror(errno) << endl;
+			if (print_error)
+				cerr << "perf_event_open failed with " << strerror(errno) << endl;
 		}
 		return ret;
 	}
@@ -215,7 +285,7 @@ int operf_counter::perf_event_open(pid_t ppid, int cpu, unsigned event, operf_re
 		perror("Error reading perf_event fd");
 		return -1;
 	}
-	rec->register_perf_event_id(event, read_data.id, attr);
+	rec->register_perf_event_id(evt_num, read_data.id, attr);
 
 	cverb << vrecord << "perf_event_open returning fd " << fd << endl;
 	return fd;
@@ -225,32 +295,38 @@ operf_record::~operf_record()
 {
 	cverb << vrecord << "operf_record::~operf_record()" << endl;
 	opHeader.data_size = total_bytes_recorded;
-	if (total_bytes_recorded)
+	// If recording to a file, we re-write the op_header info
+	// in order to update the data_size field.
+	if (total_bytes_recorded && write_to_file)
 		write_op_header_info();
 
 	if (poll_data)
 		delete[] poll_data;
-	close(output_fd);
-	for (int i = 0; i < samples_array.size(); i++) {
+	for (size_t i = 0; i < samples_array.size(); i++) {
 		struct mmap_data *md = &samples_array[i];
 		munmap(md->base, (num_mmap_pages + 1) * pagesize);
 	}
 	samples_array.clear();
 	evts.clear();
 	perfCounters.clear();
+	/* Close output_fd last. If sample data was being written to a pipe, we want
+	 * to give the pipe reader (i.e., operf_read::convertPerfData) as much time
+	 * as possible in order to drain the pipe of any remaining data.
+	 */
+	close(output_fd);
 }
 
 operf_record::operf_record(int out_fd, bool sys_wide, pid_t the_pid, bool pid_running,
                            vector<operf_event_t> & events, vmlinux_info_t vi, bool do_cg,
-bool separate_by_cpu, bool out_fd_is_file)
+                           bool separate_by_cpu, bool out_fd_is_file,
+                           int _convert_read_pipe, int _convert_write_pipe)
 {
-	int flags = O_CREAT|O_RDWR|O_TRUNC;
 	struct sigaction sa;
 	sigset_t ss;
 	vmlinux_file = vi.image_name;
 	kernel_start = vi.start;
 	kernel_end = vi.end;
-	pid = the_pid;
+	pid_to_profile = the_pid;
 	pid_started = pid_running;
 	system_wide = sys_wide;
 	callgraph = do_cg;
@@ -261,11 +337,13 @@ bool separate_by_cpu, bool out_fd_is_file)
 	valid = false;
 	poll_data = NULL;
 	output_fd = out_fd;
+	read_comm_pipe = _convert_read_pipe;
+	write_comm_pipe = _convert_write_pipe;
 	write_to_file = out_fd_is_file;
 	opHeader.data_size = 0;
 	num_cpus = -1;
 
-	if (system_wide && (pid != -1 || pid_started))
+	if (system_wide && (pid_to_profile != -1 || pid_started))
 		return;  // object is not valid
 
 	cverb << vrecord << "operf_record ctor using output fd " << output_fd << endl;
@@ -294,15 +372,21 @@ int operf_record::_write_header_to_file(void)
 	struct op_file_attr f_attr;
 	int total = 0;
 
-	lseek(output_fd, sizeof(f_header), SEEK_SET);
+	if (lseek(output_fd, sizeof(f_header), SEEK_SET) == (off_t)-1)
+		goto err_out;
+
 
 	for (unsigned i = 0; i < evts.size(); i++) {
 		opHeader.h_attrs[i].id_offset = lseek(output_fd, 0, SEEK_CUR);
+		if (opHeader.h_attrs[i].id_offset == (off_t)-1)
+			goto err_out;
 		total += op_write_output(output_fd, &opHeader.h_attrs[i].ids[0],
 		                         opHeader.h_attrs[i].ids.size() * sizeof(u64));
 	}
 
 	opHeader.attr_offset = lseek(output_fd, 0, SEEK_CUR);
+	if (opHeader.attr_offset == (off_t)-1)
+		goto err_out;
 
 	for (unsigned i = 0; i < evts.size(); i++) {
 		struct op_header_evt_info attr = opHeader.h_attrs[i];
@@ -313,6 +397,9 @@ int operf_record::_write_header_to_file(void)
 	}
 
 	opHeader.data_offset = lseek(output_fd, 0, SEEK_CUR);
+	if (opHeader.data_offset == (off_t)-1)
+		goto err_out;
+
 
 	f_header.magic = OP_MAGIC;
 	f_header.size = sizeof(f_header);
@@ -322,10 +409,17 @@ int operf_record::_write_header_to_file(void)
 	f_header.data.offset = opHeader.data_offset;
 	f_header.data.size = opHeader.data_size;
 
-	lseek(output_fd, 0, SEEK_SET);
+	if (lseek(output_fd, 0, SEEK_SET) == (off_t)-1)
+		goto err_out;
 	total += op_write_output(output_fd, &f_header, sizeof(f_header));
-	lseek(output_fd, opHeader.data_offset + opHeader.data_size, SEEK_SET);
+	if (lseek(output_fd, opHeader.data_offset + opHeader.data_size, SEEK_SET) == (off_t)-1)
+		goto err_out;
 	return total;
+
+err_out:
+	string errmsg = "Internal error doing lseek: ";
+	errmsg += strerror(errno);
+	throw runtime_error(errmsg);
 }
 
 int operf_record::_write_header_to_pipe(void)
@@ -362,8 +456,10 @@ void operf_record::register_perf_event_id(unsigned event, u64 id, perf_event_att
 	// is invoked once for each event for each cpu; but it's not worth the bother of trying
 	// to avoid it.
 	opHeader.h_attrs[event].attr = attr;
-	cverb << vrecord << "Perf header: id = " << hex << (unsigned long long)id << " for event num "
-			<< event << ", code " << attr.config <<  endl;
+	ostringstream message;
+	message  << "Perf header: id = " << hex << (unsigned long long)id << " for event num "
+	         << event << ", code " << attr.config <<  endl;
+	cverb << vrecord << message.str();
 	opHeader.h_attrs[event].ids.push_back(id);
 }
 
@@ -375,16 +471,19 @@ void operf_record::write_op_header_info()
 		add_to_total(_write_header_to_pipe());
 }
 
-int operf_record::prepareToRecord(int cpu, int fd)
+int operf_record::_prepare_to_record_one_fd(int idx, int fd)
 {
-	struct mmap_data md;;
+	struct mmap_data md;
 	md.prev = 0;
 	md.mask = num_mmap_pages * pagesize - 1;
 
-	fcntl(fd, F_SETFL, O_NONBLOCK);
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		perror("fcntl failed");
+		return -1;
+	}
 
-	poll_data[cpu].fd = fd;
-	poll_data[cpu].events = POLLIN;
+	poll_data[idx].fd = fd;
+	poll_data[idx].events = POLLIN;
 	poll_count++;
 
 	md.base = mmap(NULL, (num_mmap_pages + 1) * pagesize,
@@ -407,6 +506,88 @@ int operf_record::prepareToRecord(int cpu, int fd)
 }
 
 
+int operf_record::prepareToRecord(void)
+{
+	int op_ctr_idx = 0;
+	int rc = 0;
+	errno = 0;
+	if (pid_started && (procs.size() > 1)) {
+		/* Implies we're profiling a thread group, where we call perf_event_open
+		 * on each thread (process) in the group, passing cpu=-1.  So we'll do
+		 * one mmap per thread (by way of the _prepare_to_record_one_fd function).
+		 * If more than one event has been specified to profile on, we just do an
+		 * ioctl PERF_EVENT_IOC_SET_OUTPUT to tie that perf_event fd with the fd
+		 * of the first event of the thread.
+		 */
+
+		// Sanity check
+		if ((procs.size() * evts.size()) != perfCounters.size()) {
+			cerr << "Internal error: Number of fds[] (" << perfCounters.size()
+			     << ") != number of processes x number of events ("
+			     << procs.size() << " x " << evts.size() << ")." << endl;
+			return -1;
+		}
+		for (unsigned int proc_idx = 0; proc_idx < procs.size(); proc_idx++) {
+			int fd_for_set_output = perfCounters[op_ctr_idx].get_fd();
+			for (unsigned event = 0; event < evts.size(); event++) {
+				int fd =  perfCounters[op_ctr_idx].get_fd();
+				if (event == 0) {
+					rc = _prepare_to_record_one_fd(proc_idx, fd);
+				} else {
+					if ((rc = ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
+					                fd_for_set_output)) < 0)
+						perror("prepareToRecord: ioctl #1 failed");
+				}
+
+				if (rc < 0)
+					return rc;
+
+				if ((rc = ioctl(fd, PERF_EVENT_IOC_ENABLE)) < 0) {
+					perror("prepareToRecord: ioctl #2 failed");
+					return rc;
+				}
+				op_ctr_idx++;
+			}
+		}
+	} else {
+		/* We're either doing a system-wide profile or a profile of a single process.
+		 * We'll do one mmap per cpu.  If more than one event has been specified
+		 * to profile on, we just do an ioctl PERF_EVENT_IOC_SET_OUTPUT to tie
+		 * that perf_event fd with the fd of the first event of the cpu.
+		 */
+		if ((num_cpus * evts.size()) != perfCounters.size()) {
+			cerr << "Internal error: Number of fds[] (" << perfCounters.size()
+			     << ") != number of cpus x number of events ("
+			     << num_cpus << " x " << evts.size() << ")." << endl;
+			return -1;
+		}
+		for (int cpu = 0; cpu < num_cpus; cpu++) {
+			int fd_for_set_output = perfCounters[op_ctr_idx].get_fd();
+			for (unsigned event = 0; event < evts.size(); event++) {
+				int fd = perfCounters[op_ctr_idx].get_fd();
+				if (event == 0) {
+					rc = _prepare_to_record_one_fd(cpu, fd);
+				} else {
+					if ((rc = ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
+					                fd_for_set_output)) < 0)
+						perror("prepareToRecord: ioctl #3 failed");
+				}
+
+				if (rc < 0)
+					return rc;
+
+				if ((rc = ioctl(fd, PERF_EVENT_IOC_ENABLE)) < 0) {
+					perror("prepareToRecord: ioctl #4 failed");
+					return rc;
+				}
+				op_ctr_idx++;
+			}
+		}
+	}
+	return rc;
+}
+
+
 void operf_record::setup()
 {
 	bool all_cpus_avail = true;
@@ -414,8 +595,8 @@ void operf_record::setup()
 	struct dirent *entry = NULL;
 	DIR *dir = NULL;
 	string err_msg;
-	char cpus_online[129];
-	bool need_IOC_enable = (system_wide || pid_started);
+	char cpus_online[257];
+	bool profile_process_group = false;
 
 
 	if (system_wide)
@@ -423,32 +604,34 @@ void operf_record::setup()
 	else
 		cverb << vrecord << "operf_record::setup() with pid_started = " << pid_started << endl;
 
-	if (!system_wide && pid_started) {
-		/* We need to verify the existence of the passed PID before trying
-		 * perf_event_open or all hell will break loose.
-		 */
-		char fname[PATH_MAX];
-		FILE *fp;
-		snprintf(fname, sizeof(fname), "/proc/%d/status", pid);
-		fp = fopen(fname, "r");
-		if (fp == NULL) {
-			// Process must have finished or invalid PID passed into us.
-			// We'll bail out now.
-			cerr << "Unable to find process information for PID " << pid << "." << endl;
-			cverb << vrecord << "couldn't open " << fname << endl;
-			return;
-		}
-		fclose(fp);
+	if (pid_started || system_wide) {
+		if ((rc = op_get_process_info(system_wide, pid_to_profile, this)) < 0) {
+			if (rc == OP_PERF_HANDLED_ERROR)
+				return;
+			else
+				throw runtime_error("Unexpected error in operf_record setup");
+		}
+		// 'pid_started && (procs.size() > 1)' implies the process that the user
+		// has requested us to profile has cloned one or more children.
+		profile_process_group = pid_started && (procs.size() > 1);
 	}
+
 	pagesize = sysconf(_SC_PAGE_SIZE);
-	num_mmap_pages = (512 * 1024)/pagesize;
-	num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	if (!num_cpus)
-		throw runtime_error("Number of online CPUs is zero; cannot continue");;
+	// If profiling a process group, use a smaller mmap length to avoid EINVAL.
+	num_mmap_pages = profile_process_group ? 1 : (512 * 1024)/pagesize;
 
-	poll_data = new struct pollfd [num_cpus];
+	/* To set up to profile an existing thread group, we need call perf_event_open
+	 * for each thread, and we need to pass cpu=-1 on the syscall.
+	 */
+	use_cpu_minus_one = use_cpu_minus_one ? true : profile_process_group;
+	num_cpus = use_cpu_minus_one ? 1 : sysconf(_SC_NPROCESSORS_ONLN);
+	if (num_cpus < 1) {
+		char int_str[256];
+		sprintf(int_str, "Number of online CPUs is %d; cannot continue", num_cpus);
+		throw runtime_error(int_str);
+	}
 
-	cverb << vrecord << "calling perf_event_open for pid " << pid << " on "
+	cverb << vrecord << "calling perf_event_open for pid " << pid_to_profile << " on "
 	      << num_cpus << " cpus" << endl;
 	FILE * online_cpus = fopen("/sys/devices/system/cpu/online", "r");
 	if (!online_cpus) {
@@ -457,15 +640,14 @@ void operf_record::setup()
 		goto error;
 	}
 	memset(cpus_online, 0, sizeof(cpus_online));
-	fgets(cpus_online, sizeof(cpus_online), online_cpus);
-	if (!cpus_online[0]) {
+	if (fgets(cpus_online, sizeof(cpus_online), online_cpus) == NULL) {
 		fclose(online_cpus);
 		err_msg = "Internal Error: Number of online cpus cannot be determined.";
 		rc = -1;
 		goto error;
 
 	}
-	if (index(cpus_online, ',')) {
+	if (index(cpus_online, ',') || cpus_online[0] != '0') {
 		all_cpus_avail = false;
 		if ((dir = opendir("/sys/devices/system/cpu")) == NULL) {
 			fclose(online_cpus);
@@ -478,62 +660,73 @@ void operf_record::setup()
 
 	for (int cpu = 0; cpu < num_cpus; cpu++) {
 		int real_cpu;
-		int mmap_fd;
-		bool mmap_done_for_cpu = false;
-		if (all_cpus_avail) {
+		if (use_cpu_minus_one) {
+			real_cpu = -1;
+		} else if (all_cpus_avail) {
 			real_cpu = cpu;
 		} else {
-			real_cpu = op_get_next_online_cpu(dir, entry);
+			real_cpu = op_pe_utils::op_get_next_online_cpu(dir, entry);
 			if (real_cpu < 0) {
 				err_msg = "Internal Error: Number of online cpus cannot be determined.";
 				rc = -1;
 				goto error;
 			}
 		}
-
-		// Create new row to hold operf_counter objects since we need one
-		// row for each cpu. Do the same for samples_array.
-		vector<operf_counter> tmp_pcvec;
-
-		perfCounters.push_back(tmp_pcvec);
-		for (unsigned event = 0; event < evts.size(); event++) {
-			evts[event].counter = event;
-			perfCounters[cpu].push_back(operf_counter(evts[event],
-			                                          (!pid_started && !system_wide),
-			                                          callgraph, separate_cpu));
-			if ((rc = perfCounters[cpu][event].perf_event_open(pid, real_cpu, event, this)) < 0) {
-				err_msg = "Internal Error.  Perf event setup failed.";
-				goto error;
-			}
-			if (!mmap_done_for_cpu) {
-				if (((rc = prepareToRecord(cpu, perfCounters[cpu][event].get_fd()))) < 0) {
+		size_t num_procs = profile_process_group ? procs.size() : 1;
+		/* To profile a parent and its children, the perf_events kernel subsystem
+		 * requires us to use cpu=-1 on the perf_event_open call for each of the
+		 * processes in the group.  But perf_events also prevents us from specifying
+		 * "inherit" on the perf_event_attr we pass to perf_event_open when cpu is '-1'.
+		 */
+		bool inherit = !profile_process_group;
+		std::map<u32, struct comm_event>::iterator proc_it = procs.begin();
+		for (unsigned proc_idx = 0; proc_idx < num_procs; proc_idx++) {
+			for (unsigned event = 0; event < evts.size(); event++) {
+				/* For a parent process, comm.tid==comm.pid, but for child
+				 * processes in a process group, comm.pid is the parent, so
+				 * we must use comm.tid for the perf_event_open call.  So
+				 * we can use comm.tid for all cases.
+				 */
+				pid_t pid_for_open;
+				if (profile_process_group)
+					pid_for_open = proc_it++->second.tid;
+				else
+					pid_for_open = pid_to_profile;
+				operf_counter op_ctr(operf_counter(evts[event],
+				                                   (!pid_started && !system_wide),
+				                                   callgraph, separate_cpu,
+				                                   inherit, event));
+				if ((rc = op_ctr.perf_event_open(pid_for_open,
+				                                 real_cpu, this, true)) < 0) {
 					err_msg = "Internal Error.  Perf event setup failed.";
 					goto error;
 				}
-				mmap_fd = perfCounters[cpu][event].get_fd();
-				mmap_done_for_cpu = true;
-			} else {
-				if (ioctl(perfCounters[cpu][event].get_fd(),
-				          PERF_EVENT_IOC_SET_OUTPUT, mmap_fd) < 0)
-					goto error;
+				perfCounters.push_back(op_ctr);
 			}
-			if (need_IOC_enable)
-				if (ioctl(perfCounters[cpu][event].get_fd(), PERF_EVENT_IOC_ENABLE) < 0)
-					goto error;
 		}
 	}
-	if (dir)
-		closedir(dir);
+	int num_mmaps;
+	if (pid_started && (procs.size() > 1))
+		num_mmaps = procs.size();
+	else
+		num_mmaps = num_cpus;
+	poll_data = new struct pollfd [num_mmaps];
+	if ((rc = prepareToRecord()) < 0) {
+		err_msg = "Internal Error.  Perf event setup failed.";
+		goto error;
+	}
 	write_op_header_info();
 
 	// Set bit to indicate we're set to go.
 	valid = true;
+	if (dir)
+		closedir(dir);
 	return;
 
 error:
 	delete[] poll_data;
 	poll_data = NULL;
-	for (int i = 0; i < samples_array.size(); i++) {
+	for (size_t i = 0; i < samples_array.size(); i++) {
 		struct mmap_data *md = &samples_array[i];
 		munmap(md->base, (num_mmap_pages + 1) * pagesize);
 	}
@@ -545,25 +738,113 @@ error:
 		throw runtime_error(err_msg);
 }
 
-void operf_record::recordPerfData(void)
+void operf_record::record_process_info(void)
 {
-	bool disabled = false;
-	if (pid_started || system_wide) {
-		if (op_record_process_info(system_wide, pid, this, output_fd) < 0) {
-			for (int i = 0; i < num_cpus; i++) {
-				for (unsigned int evt = 0; evt < evts.size(); evt++)
-					ioctl(perfCounters[i][evt].get_fd(), PERF_EVENT_IOC_DISABLE);
-			}
-			throw runtime_error("operf_record: error recording process info");
+	map<unsigned int, unsigned int> pids_mapped;
+	pid_t last_tgid = -1;
+	std::map<u32, struct comm_event>::iterator proc_it = procs.begin();
+	for (unsigned int proc_idx = 0; proc_idx < procs.size(); proc_idx++, proc_it++)
+	{
+		struct comm_event ce = proc_it->second;
+		int num = OP_perf_utils::op_write_output(output_fd, &ce, ce.header.size);
+		add_to_total(num);
+		if (cverb << vrecord)
+			cout << "Created COMM event for " << ce.comm << endl;
+
+		if (((pid_t)(ce.pid) == last_tgid) ||
+				(pids_mapped.find(ce.pid) != pids_mapped.end()))
+			continue;
+		OP_perf_utils::op_record_process_exec_mmaps(ce.tid,
+		                                            ce.pid,
+		                                            output_fd, this);
+		pids_mapped[ce.pid] = last_tgid = ce.pid;
+	}
+}
+
+int operf_record::_start_recoding_new_thread(pid_t id)
+{
+	string err_msg;
+	int num_mmaps, rc, fd_for_set_output = -1;
+	struct comm_event ce;
+	u64 sample_id;
+	struct pollfd * old_polldata = poll_data;
+
+	num_mmaps = sizeof(poll_data)/sizeof(poll_data[0]);
+	num_mmaps++;
+	poll_data = new struct pollfd [num_mmaps];
+	// Copy only the existing pollfd objects from the array.  The new pollfd will
+	// be filled in via the call to _prepare_to_record_one_fd.
+	for (int i = 0; i < num_mmaps - 1; i++)
+		poll_data[i] = old_polldata[i];
+	delete[] old_polldata;
+	// Make a pseudo comm_event object.  At this point, the
+	// only field we need to set is tid.
+	memset(&ce, 0, sizeof(ce));
+	ce.tid = id;
+	add_process(ce);
+
+	for (unsigned event = 0; event < evts.size(); event++) {
+		operf_counter op_ctr(operf_counter(evts[event],
+		                                   (!pid_started && !system_wide),
+		                                   callgraph, separate_cpu,
+		                                   false, event));
+		if (op_ctr.perf_event_open(id, -1, this, false) < 0) {
+			sample_id = OP_PERF_NO_SAMPLE_ID;
+			// Send special value to convert process to indicate failure
+			ssize_t len = write(write_comm_pipe, &sample_id, sizeof(sample_id));
+			if (len < 0)
+				perror("Internal error on convert write_comm_pipe");
+			return -1;
+		}
+		perfCounters.push_back(op_ctr);
+		int fd = op_ctr.get_fd();
+		if (event == 0) {
+			rc = _prepare_to_record_one_fd(num_mmaps - 1, fd);
+			fd_for_set_output = fd;
+		} else {
+			if ((rc = ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
+			                fd_for_set_output)) < 0)
+				perror("_start_recoding_new_thread: ioctl #1 failed");
 		}
+
+		if (rc < 0)
+			return rc;
+
+		if ((rc = ioctl(fd, PERF_EVENT_IOC_ENABLE)) < 0) {
+			perror("_start_recoding_new_thread: ioctl #2 failed");
+			return rc;
+		}
+
+		sample_id = opHeader.h_attrs[event].ids.back();
+		ssize_t len = write(write_comm_pipe, &sample_id, sizeof(sample_id));
+		if (len < 0)
+			perror("Internal error on convert write_comm_pipe");
+		else if (len != sizeof(sample_id))
+			cerr << "Incomplete write convert to write_comm_pipe" << endl;
+		else
+			cverb << vrecord << "Sent sample_id " << sample_id << " to convert process" << endl;
 	}
-	op_record_kernel_info(vmlinux_file, kernel_start, kernel_end, output_fd, this);
 
+	return 0;
+}
+
+
+void operf_record::recordPerfData(void)
+{
+	bool disabled = false;
+	if (pid_started || system_wide)
+		record_process_info();
+	else
+		op_get_vsyscall_mapping(pid_to_profile, output_fd, this);
+
+	op_record_kernel_info(vmlinux_file, kernel_start, kernel_end, output_fd, this);
+	cerr << "operf: Profiler started" << endl;
 	while (1) {
 		int prev = sample_reads;
+		pid_t pi;
+		ssize_t len;
 
-
-		for (int i = 0; i < samples_array.size(); i++) {
+		for (size_t i = 0; i < samples_array.size(); i++) {
 			if (samples_array[i].base)
 				op_get_kernel_event_data(&samples_array[i], this);
 		}
@@ -571,47 +852,46 @@ void operf_record::recordPerfData(void)
 			break;
 
 		if (prev == sample_reads) {
-			poll(poll_data, poll_count, -1);
+			(void)poll(poll_data, poll_count, -1);
+		}
+		if (!quit && track_new_forks && procs.size() > 1) {
+			len = read(read_comm_pipe, &pi, sizeof(pi));
+
+			if (len < 0 && errno != EAGAIN) {
+				cverb << vrecord << "Non-fatal error: read_comm_pipe returned too few bytes" << endl;
+			} else if (len == sizeof(pi) && (procs.find(pi) == procs.end())) {
+				// Start profiling this new thread
+				cverb << vrecord << "Start recording for new thread " << pi << endl;
+				// Don't treat as fatal error if it doesn't work
+				if (_start_recoding_new_thread(pi) < 0)
+					cerr << "Unable to collect samples for forked process " << pi
+					     << ". Process may have ended before recording could be started." << endl;
+			}
 		}
 
 		if (quit) {
-			for (int i = 0; i < num_cpus; i++) {
-				for (unsigned int evt = 0; evt < evts.size(); evt++)
-					ioctl(perfCounters[i][evt].get_fd(), PERF_EVENT_IOC_DISABLE);
-			}
+			for (unsigned int i = 0; i < perfCounters.size(); i++)
+				ioctl(perfCounters[i].get_fd(), PERF_EVENT_IOC_DISABLE);
 			disabled = true;
 			cverb << vrecord << "operf_record::recordPerfData received signal to quit." << endl;
 		}
 	}
+
 	cverb << vdebug << "operf recording finished." << endl;
 }
 
 void operf_read::init(int sample_data_pipe_fd, string input_filename, string samples_loc, op_cpu cputype,
-                      vector<operf_event_t> & events, bool systemwide)
+                      bool systemwide, int _record_write_pipe, int _record_read_pipe,
+                      int _post_profiling_pipe)
 {
-	struct sigaction sa;
-	sigset_t ss;
 	sample_data_fd = sample_data_pipe_fd;
+	read_comm_pipe = _record_read_pipe;
+	write_comm_pipe = _record_write_pipe;
+	post_profiling_pipe = _post_profiling_pipe;
 	inputFname = input_filename;
 	sampledir = samples_loc;
-	evts = events;
 	cpu_type = cputype;
 	syswide = systemwide;
-	memset(&sa, 0, sizeof(struct sigaction));
-	sa.sa_sigaction = op_perfread_sigusr1_handler;
-	sigemptyset(&sa.sa_mask);
-	sigemptyset(&ss);
-	sigaddset(&ss, SIGUSR1);
-	sigprocmask(SIG_UNBLOCK, &ss, NULL);
-	sa.sa_mask = ss;
-	sa.sa_flags = SA_NOCLDSTOP | SA_SIGINFO;
-	cverb << vconvert << "operf-read calling sigaction" << endl;
-	if (sigaction(SIGUSR1, &sa, NULL) == -1) {
-		cverb << vconvert << "operf-read init: sigaction failed; errno is: "
-		      << strerror(errno) << endl;
-		_exit(EXIT_FAILURE);
-	}
-
 }
 
 operf_read::~operf_read()
@@ -619,6 +899,11 @@ operf_read::~operf_read()
 	evts.clear();
 }
 
+void operf_read::add_sample_id_to_opHeader(u64 sample_id)
+{
+	for (unsigned int i = 0; i < evts.size(); i++)
+		opHeader.h_attrs[i].ids.push_back(sample_id);
+}
 
 int operf_read::_read_header_info_with_ifstream(void)
 {
@@ -673,7 +958,9 @@ int operf_read::_read_header_info_with_ifstream(void)
 				ret = OP_PERF_HANDLED_ERROR;
 				goto out;
 			}
-			cverb << vconvert << "Perf header: id = " << hex << (unsigned long long)perf_id << endl;
+			ostringstream message;
+			message << "Perf header: id = " << hex << (unsigned long long)perf_id << endl;
+			cverb << vconvert << message.str();
 			opHeader.h_attrs[i].ids.push_back(perf_id);
 		}
 		istrm.seekg(next_f_attr, ios_base::beg);
@@ -763,7 +1050,9 @@ int operf_read::_read_perf_header_from_pipe(void)
 				errmsg = "Error reading perf ID on sample data pipe: " + string(strerror(errno));
 				goto fail;
 			}
-			cverb << vconvert << "Perf header: id = " << hex << (unsigned long long)perf_id << endl;
+			ostringstream message;
+			message << "Perf header: id = " << hex << (unsigned long long)perf_id << endl;
+			cverb << vconvert << message.str();
 			opHeader.h_attrs[i].ids.push_back(perf_id);
 		}
 
@@ -797,21 +1086,32 @@ int operf_read::get_eventnum_by_perf_event_id(u64 id) const
 	return -1;
 }
 
-int operf_read::convertPerfData(void)
+
+unsigned int operf_read::convertPerfData(void)
 {
-	int num_bytes = 0;
+	unsigned int num_bytes = 0;
 	struct mmap_info info;
-	event_t * event;
+	bool error = false;
+	event_t * event = NULL;
+
+	if (fcntl(post_profiling_pipe, F_SETFL, O_NONBLOCK) < 0) {
+		cerr << "Error: fcntl failed with errno:\n\t" << strerror(errno) << endl;
+		throw runtime_error("Error: Unable to set post_profiling_pipe to non blocking");
+	}
 
 	if (!inputFname.empty()) {
 		info.file_data_offset = opHeader.data_offset;
 		info.file_data_size = opHeader.data_size;
+		cverb << vdebug << "Expecting to read approximately " << dec
+		      << info.file_data_size - info.file_data_offset
+		      << " bytes from operf sample data file." << endl;
 		info.traceFD = open(inputFname.c_str(), O_RDONLY);
 		if (info.traceFD == -1) {
 			cerr << "Error: open failed with errno:\n\t" << strerror(errno) << endl;
 			throw runtime_error("Error: Unable to open operf data file");
 		}
 		cverb << vdebug << "operf_read opened " << inputFname << endl;
+		pg_sz = sysconf(_SC_PAGESIZE);
 		if (op_mmap_trace_file(info, true) < 0) {
 			close(info.traceFD);
 			throw runtime_error("Error: Unable to mmap operf data file");
@@ -825,13 +1125,15 @@ int operf_read::convertPerfData(void)
 	for (int i = 0; i < OPERF_MAX_STATS; i++)
 		operf_stats[i] = 0;
 
-	cverb << vdebug << "Converting operf data to oprofile sample data format" << endl;
-	cverb << vdebug << "sample type is " << hex <<  opHeader.h_attrs[0].attr.sample_type << endl;
+	ostringstream message;
+	message << "Converting operf data to oprofile sample data format" << endl;
+	message << "sample type is " << hex <<  opHeader.h_attrs[0].attr.sample_type << endl;
+	cverb << vdebug << message.str();
 	first_time_processing = true;
 	int num_recs = 0;
+	struct perf_event_header last_header;
 	bool print_progress = !inputFname.empty() && syswide;
-	if (print_progress)
-		cerr << "Converting profile data to OProfile format" << endl;
+	bool printed_progress_msg = false;
 	while (1) {
 		streamsize rec_size = 0;
 		if (!inputFname.empty()) {
@@ -843,20 +1145,45 @@ int operf_read::convertPerfData(void)
 				break;
 		}
 		rec_size = event->header.size;
-		op_write_event(event, opHeader.h_attrs[0].attr.sample_type);
+
+		if ((!is_header_valid(event->header)) ||
+				((op_write_event(event, opHeader.h_attrs[0].attr.sample_type)) < 0)) {
+			error = true;
+			last_header = event->header;
+			break;
+		}
 		num_bytes += rec_size;
 		num_recs++;
-		if ((num_recs % 1000000 == 0) && print_progress)
+		if ((num_recs % 1000000 == 0) && (print_progress || _print_pp_progress(post_profiling_pipe))) {
+			if (!printed_progress_msg) {
+				cerr << "\nConverting profile data to OProfile format " << endl;
+				printed_progress_msg = true;
+			}
 			cerr << ".";
+		}
+	}
+	if (unlikely(error)) {
+		if (!inputFname.empty()) {
+			cerr << "ERROR: operf_read::convertPerfData quitting. Bad data read from file." << endl;
+		} else {
+			cerr << "ERROR: operf_read::convertPerfData quitting. Bad data read from pipe." << endl;
+			cerr << "Closing read end of data pipe. operf-record process will stop with SIGPIPE (13)."
+			     << endl;
+		}
+		cerr << "Try lowering the sample frequency to avoid this error; e.g., double the 'count'"
+		     << endl << "value in your event specification." << endl;
+		cverb << vdebug << "Event header type: " << last_header.type << "; size: " << last_header.size << endl;
 	}
-	if (print_progress)
-		cerr << endl;
 
 	first_time_processing = false;
-	op_reprocess_unresolved_events(opHeader.h_attrs[0].attr.sample_type);
+	if (!error)
+		op_reprocess_unresolved_events(opHeader.h_attrs[0].attr.sample_type, print_progress);
+
+	if (printed_progress_msg)
+		cerr << endl;
 
 	op_release_resources();
-	operf_print_stats(operf_options::session_dir, start_time_human_readable, throttled);
+	operf_print_stats(operf_options::session_dir, start_time_human_readable, throttled, evts);
 
 	char * cbuf;
 	cbuf = (char *)xmalloc(operf_options::session_dir.length() + 5);
@@ -864,7 +1191,7 @@ int operf_read::convertPerfData(void)
 	strcat(cbuf, "/abi");
 	op_write_abi_to_file(cbuf);
 	free(cbuf);
-	if (inputFname.empty())
+	if (!inputFname.empty())
 		close(info.traceFD);
 	else
 		free(event);
diff --git a/libperf_events/operf_counter.h b/libperf_events/operf_counter.h
index 01e2b2a..4eb7775 100644
--- a/libperf_events/operf_counter.h
+++ b/libperf_events/operf_counter.h
@@ -1,5 +1,5 @@
 /**
- * @file pe_profiling/operf_counter.h
+ * @file libperf_events/operf_counter.h
  * C++ class definition that abstracts the user-to-kernel interface
  * for using Linux Performance Events Subsystem.
  *
@@ -11,7 +11,7 @@
  * (C) Copyright IBM Corp. 2011
  *
  * Modified by Maynard Johnson <maynardj@us.ibm.com>
- * (C) Copyright IBM Corporation 2012
+ * (C) Copyright IBM Corporation 2012, 2014
  *
  */
 
@@ -51,23 +51,26 @@ op_perf_event_open(struct perf_event_attr * attr,
 }
 
 #define OP_PERF_HANDLED_ERROR -101
+#define OP_PERF_NO_SAMPLE_ID 0xdeadbeefdeadbeefULL
 
 
 class operf_counter {
 public:
 	operf_counter(operf_event_t & evt, bool enable_on_exec, bool callgraph,
-	              bool separate_by_cpu);
+	              bool separate_by_cpu, bool inherit, int event_number);
 	~operf_counter();
-	int perf_event_open(pid_t ppid, int cpu, unsigned counter, operf_record * pr);
+	int perf_event_open(pid_t pid, int cpu, operf_record * pr, bool print_error);
 	const struct perf_event_attr * the_attr(void) const { return &attr; }
 	int get_fd(void) const { return fd; }
 	int get_id(void) const { return id; }
+	int get_evt_num(void) const { return evt_num; }
 	const std::string get_event_name(void) const { return event_name; }
 
 private:
 	struct perf_event_attr attr;
 	int fd;
 	int id;
+	int evt_num;
 	std::string event_name;
 };
 
@@ -80,34 +83,48 @@ public:
 	 */
 	operf_record(int output_fd, bool sys_wide, pid_t the_pid, bool pid_running,
 	             std::vector<operf_event_t> & evts, OP_perf_utils::vmlinux_info_t vi,
-	             bool callgraph, bool separate_by_cpu, bool output_fd_is_file);
+	             bool callgraph, bool separate_by_cpu, bool output_fd_is_file,
+	             int _convert_read_pipe, int _convert_write_pipe);
 	~operf_record();
 	void recordPerfData(void);
 	int out_fd(void) const { return output_fd; }
 	void add_to_total(int n) { total_bytes_recorded += n; }
-	int get_total_bytes_recorded(void) const { return total_bytes_recorded; }
+	void add_process(struct comm_event proc) { procs[proc.tid] = proc; }
+	unsigned int get_total_bytes_recorded(void) const { return total_bytes_recorded; }
 	void register_perf_event_id(unsigned counter, u64 id, perf_event_attr evt_attr);
 	bool get_valid(void) { return valid; }
 
 private:
 	void create(std::string outfile, std::vector<operf_event_t> & evts);
 	void setup(void);
-	int prepareToRecord(int cpu, int fd);
+	int prepareToRecord(void);
+	int _prepare_to_record_one_fd(int idx, int fd);
+	int _start_recoding_new_thread(pid_t id);
+	void record_process_info(void);
 	void write_op_header_info(void);
 	int _write_header_to_file(void);
 	int _write_header_to_pipe(void);
 	int output_fd;
+	int read_comm_pipe;
+	int write_comm_pipe;
 	bool write_to_file;
+	// Array of size 'num_cpus_used_for_perf_event_open * num_pids * num_events'
 	struct pollfd * poll_data;
 	std::vector<struct mmap_data> samples_array;
 	int num_cpus;
-	pid_t pid;
+	pid_t pid_to_profile;
+	/* When doing --pid or --system-wide profiling, we'll obtain process information
+	 * for all processes to be profiled (including forked/cloned processes) and store
+	 * that information in a collection of type 'comm_event'.  We'll use this collection
+	 * for synthesizing PERF_RECORD_COMM events into the profile data stream.
+	 */
+	std::map<u32, struct comm_event> procs;
 	bool pid_started;
 	bool system_wide;
 	bool callgraph;
 	bool separate_cpu;
-	std::vector< std::vector<operf_counter> > perfCounters;
-	int total_bytes_recorded;
+	std::vector<operf_counter> perfCounters;
+	unsigned int total_bytes_recorded;
 	int poll_count;
 	struct OP_header opHeader;
 	std::vector<operf_event_t> evts;
@@ -118,27 +135,37 @@ private:
 
 class operf_read {
 public:
-	operf_read(void) : sample_data_fd(-1), inputFname(""), cpu_type(CPU_NO_GOOD) { valid = syswide = false;}
+	operf_read(std::vector<operf_event_t> & _evts)
+	: sample_data_fd(-1), inputFname(""), evts(_evts), cpu_type(CPU_NO_GOOD)
+	  { valid = syswide = false;
+	  write_comm_pipe = read_comm_pipe = 1;
+	  post_profiling_pipe = -1; }
 	void init(int sample_data_pipe_fd, std::string input_filename, std::string samples_dir, op_cpu cputype,
-	          std::vector<operf_event_t> & evts, bool systemwide);
+	          bool systemwide, int _record_write_pipe, int _record_read_pipe,
+	          int _post_profiling_pipe);
 	~operf_read();
 	int readPerfHeader(void);
-	int convertPerfData(void);
+	unsigned int convertPerfData(void);
 	bool is_valid(void) {return valid; }
 	int get_eventnum_by_perf_event_id(u64 id) const;
 	inline const operf_event_t * get_event_by_counter(u32 counter) { return &evts[counter]; }
+	int get_write_comm_pipe(void) { return write_comm_pipe; }
+	int get_read_comm_pipe(void)  { return read_comm_pipe; }
+	void add_sample_id_to_opHeader(u64 sample_id);
 
 private:
 	int sample_data_fd;
+	int write_comm_pipe;
+	int read_comm_pipe;
+	int post_profiling_pipe;
 	std::string inputFname;
 	std::string sampledir;
 	std::ifstream istrm;
 	struct OP_header opHeader;
-	std::vector<operf_event_t> evts;
+	std::vector<operf_event_t> & evts;
 	bool valid;
 	bool syswide;
 	op_cpu cpu_type;
-	int _get_one_perf_event(event_t *);
 	int _read_header_info_with_ifstream(void);
 	int _read_perf_header_from_file(void);
 	int _read_perf_header_from_pipe(void);
diff --git a/libperf_events/operf_event.h b/libperf_events/operf_event.h
index 2b35826..c64a63f 100644
--- a/libperf_events/operf_event.h
+++ b/libperf_events/operf_event.h
@@ -22,6 +22,7 @@
 
 #define OP_MAX_EVT_NAME_LEN 64
 #define OP_MAX_UM_NAME_LEN 64
+#define OP_MAX_UM_NAME_STR_LEN 17
 #define OP_MAX_NUM_EVENTS 512
 
 struct ip_event {
@@ -73,6 +74,13 @@ struct sample_event {
 	u64 array[];
 };
 
+struct throttle_event {
+	struct perf_event_header header;
+	u64 time;
+	u64 id;
+	u64 stream_id;
+};
+
 typedef union event_union {
 	struct perf_event_header header;
 	struct ip_event	ip;
@@ -81,7 +89,8 @@ typedef union event_union {
 	struct fork_event fork;
 	struct lost_event lost;
 	struct read_event read;
-	struct sample_event	sample;
+	struct sample_event sample;
+	struct throttle_event throttle;
 } event_t;
 
 struct mmap_data {
@@ -122,10 +131,13 @@ typedef struct operf_event {
 	unsigned long evt_um;
 	char um_name[OP_MAX_UM_NAME_LEN];
 	unsigned long count;
-	u32 counter;
 	bool no_kernel;
 	bool no_user;
 	bool no_hv;
+	bool mode_specified; /* user specified user or kernel modes */
+	bool umask_specified; /* user specified a unit mask */
+	char um_numeric_val_as_str[OP_MAX_UM_NAME_STR_LEN];
+	bool throttled;  /* set to true if the event is ever throttled */
 } operf_event_t;
 
 struct mmap_info {
diff --git a/libperf_events/operf_kernel.cpp b/libperf_events/operf_kernel.cpp
index dbe22de..e7ba024 100644
--- a/libperf_events/operf_kernel.cpp
+++ b/libperf_events/operf_kernel.cpp
@@ -13,6 +13,7 @@
 
 #include <stdio.h>
 #include <iostream>
+#include <sstream>
 #include <unistd.h>
 #include <stdlib.h>
 #include "operf_kernel.h"
@@ -47,12 +48,16 @@ void operf_create_vmlinux(char const * name, char const * arg)
 
 	sscanf(arg, "%llx,%llx", &vmlinux_image.start, &vmlinux_image.end);
 
-	cverb << vmisc << "kernel_start = " << hex <<  vmlinux_image.start
-			<< "; kernel_end = " << vmlinux_image.end << endl;
+	ostringstream message;
+	message << "kernel_start = " << hex <<  vmlinux_image.start
+	        << "; kernel_end = " << vmlinux_image.end << endl;
+	cverb << vmisc << message.str();
 
 	if (!vmlinux_image.start && !vmlinux_image.end) {
-		cerr << "error: mis-parsed kernel range: " << hex << vmlinux_image.start
-				<< "; kernel_end = " << vmlinux_image.end << endl;
+		ostringstream message;
+		message << "error: mis-parsed kernel range: " << hex << vmlinux_image.start
+		        << "; kernel_end = " << vmlinux_image.end << endl;
+		cerr << message.str();
 		exit(EXIT_FAILURE);
 	}
 }
diff --git a/libperf_events/operf_mangling.cpp b/libperf_events/operf_mangling.cpp
index 7ded563..0c160d7 100644
--- a/libperf_events/operf_mangling.cpp
+++ b/libperf_events/operf_mangling.cpp
@@ -25,6 +25,7 @@
 #include "op_events.h"
 #include "op_libiberty.h"
 #include "cverb.h"
+#include "utility.h"
 
 #include <limits.h>
 #include <stdio.h>
@@ -108,8 +109,7 @@ mangle_filename(struct operf_sfile * last, struct operf_sfile const * sf, int co
 
 static void fill_header(struct opd_header * header, unsigned long counter,
                         vma_t anon_start, vma_t cg_to_anon_start,
-                        int is_kernel, int cg_to_is_kernel,
-                        int spu_samples, uint64_t embed_offset, time_t mtime)
+                        int is_kernel, int cg_to_is_kernel, time_t mtime)
 {
 	const operf_event_t * event = operfRead.get_event_by_counter(counter);
 
@@ -125,8 +125,6 @@ static void fill_header(struct opd_header * header, unsigned long counter,
 	header->cpu_speed = cpu_speed;
 	header->mtime = mtime;
 	header->anon_start = anon_start;
-	header->spu_profile = spu_samples;
-	header->embedded_offset = embed_offset;
 	header->cg_to_anon_start = cg_to_anon_start;
 }
 
@@ -137,6 +135,7 @@ int operf_open_sample_file(odb_t *file, struct operf_sfile *last,
 	char const * binary;
 	vma_t last_start = 0;
 	int err;
+	time_t mtime;
 
 	mangled = mangle_filename(last, sf, counter, cg);
 
@@ -145,7 +144,11 @@ int operf_open_sample_file(odb_t *file, struct operf_sfile *last,
 
 	cverb << vsfile << "Opening \"" << mangled << "\"" << endl;
 
-	create_path(mangled);
+	err = create_path(mangled);
+	if (err) {
+		cerr << "operf: create path for " << mangled << " failed: " << strerror(err) << endl;
+		goto out;
+	}
 
 	/* locking sf will lock associated cg files too */
 	operf_sfile_get(sf);
@@ -173,19 +176,32 @@ retry:
 		goto out;
 	}
 
-	if (!sf->kernel)
+	if (!sf->kernel) {
 		binary = sf->image_name;
-	else
+		mtime = op_get_mtime(binary);
+	} else {
 		binary = sf->kernel->name;
 
+		if (binary) {
+			if (strncmp(KALL_SYM_FILE, binary,
+				    strlen(KALL_SYM_FILE)) == 0 )
+			  /* The Kallsyms file is not a real file.  op_get_mtime() may
+			   * return different values for each call.
+			   */
+				mtime = 0;
+			else
+				mtime = op_get_mtime(binary);
+		} else {
+			mtime = 0;
+		}
+	}
+
 	if (last && last->is_anon)
 		last_start = last->start_addr;
 
 	fill_header((struct opd_header *)odb_get_data(file), counter,
 		    sf->is_anon ? sf->start_addr : 0, last_start,
-		    !!sf->kernel, last ? !!last->kernel : 0,
-		    0, 0,
-		    binary ? op_get_mtime(binary) : 0);
+		    !!sf->kernel, last ? !!last->kernel : 0, mtime);
 
 out:
 	operf_sfile_put(sf);
diff --git a/libperf_events/operf_process_info.cpp b/libperf_events/operf_process_info.cpp
index a5710cc..00063af 100644
--- a/libperf_events/operf_process_info.cpp
+++ b/libperf_events/operf_process_info.cpp
@@ -9,12 +9,19 @@
  * Created on: Dec 13, 2011
  * @author Maynard Johnson
  * (C) Copyright IBM Corp. 2011
+ *
+ * Modified by Maynard Johnson <maynardj@us.ibm.com>
+ * (C) Copyright IBM Corporation 2013
+ *
  */
 
 #include <stdio.h>
+#include <unistd.h>
 #include <iostream>
+#include <sstream>
 #include <map>
 #include <string.h>
+#include <errno.h>
 #include "operf_process_info.h"
 #include "file_manip.h"
 #include "operf_utils.h"
@@ -22,23 +29,13 @@
 using namespace std;
 using namespace OP_perf_utils;
 
-operf_process_info::operf_process_info(pid_t tgid, const char * appname, bool app_arg_is_fullname, bool is_valid)
-: pid(tgid), _appname(appname ? appname : ""), valid(is_valid)
+operf_process_info::operf_process_info(pid_t tgid, const char * appname,
+                                       bool app_arg_is_fullname, bool is_valid)
+: pid(tgid), valid(is_valid), appname_valid(false), look_for_appname_match(false),
+  forked(false), appname_is_fullname(NOT_FULLNAME), num_app_chars_matched(-1)
 {
-	if (app_arg_is_fullname && appname) {
-		appname_is_fullname = YES_FULLNAME;
-		app_basename = op_basename(appname);
-		num_app_chars_matched = (int)app_basename.length();
-	} else if (appname) {
-		appname_is_fullname = MAYBE_FULLNAME;
-		num_app_chars_matched = -1;
-		app_basename = appname;
-	} else {
-		appname_is_fullname = NOT_FULLNAME;
-		num_app_chars_matched = -1;
-		app_basename = "";
-	}
-	forked = false;
+	_appname = "";
+	set_appname(appname, app_arg_is_fullname);
 	parent_of_fork = NULL;
 }
 
@@ -50,64 +47,101 @@ operf_process_info::~operf_process_info()
 	if (valid) {
 		it = mmappings.begin();
 		end = mmappings.end();
-	} else {
-		it = deferred_mmappings.begin();
-		end = deferred_mmappings.end();
 	}
 	mmappings.clear();
-	deferred_mmappings.clear();
 }
 
-void operf_process_info::process_new_mapping(struct operf_mmap * mapping)
+void operf_process_info::set_appname(const char * appname, bool app_arg_is_fullname)
 {
-	// If we do not know the full pathname of our app yet,
-	// let's try to determine if the passed filename is a good
-	// candidate appname.
+	char exe_symlink[64];
+	char exe_realpath[PATH_MAX];
+	/* A combination of non-null appname and app_arg_is_fullname==true may be passed
+	 * from various locations.  But a non-null appname and app_arg_is_fullname==false
+	 * may only be passed as a result of a PERF_RECORD_COMM event.
+	 */
+	bool from_COMM_event = (appname && !app_arg_is_fullname);
 
-	if (!mapping->is_anon_mapping && (appname_is_fullname < YES_FULLNAME) && (num_app_chars_matched < (int)app_basename.length())) {
-		string basename;
-		int num_matched_chars = get_num_matching_chars(mapping->filename, basename);
-		if (num_matched_chars > num_app_chars_matched) {
-			appname_is_fullname = MAYBE_FULLNAME;
-			_appname = mapping->filename;
-			app_basename = basename;
-			num_app_chars_matched = num_matched_chars;
-			cverb << vmisc << "Best appname match is " << _appname << endl;
-		}
-	}
-	mmappings[mapping->start_addr] = mapping;
-	vector<operf_process_info *>::iterator it = forked_processes.begin();
-	while (it != forked_processes.end()) {
-		operf_process_info * p = *it;
-		p->copy_new_parent_mapping(mapping);
-		cverb << vmisc << "Copied new parent mapping for " << mapping->filename
-		      << " for forked process " << p->pid << endl;
-		it++;
+	if (appname_valid)
+		return;
+	/* If stored _appname is not empty, it implies we've been through this function before
+	 * (and would have tried the readlink method or, perhaps, fallen back to some other
+	 * method to set the stored _appname).  If we're here because of something other than
+	 * a COMM event (e.g. MMAP event), then we should compare our stored _appname with our
+	 * collection of mmapping basenames to see if we can find an appname match; otherwise,
+	 * if the passed appname is NULL, we just return, since a NULL appname won't help us here.
+	 */
+	if (_appname.length()) {
+		if (look_for_appname_match && !from_COMM_event)
+			return find_best_match_appname_all_mappings();
+		else if (!appname)
+			return;
 	}
 
+	snprintf(exe_symlink, 64, "/proc/%d/exe", pid);
+	memset(exe_realpath, '\0', PATH_MAX);
+
+	/* If the user is running a command via taskset, the kernel will send us a PERF_RECORD_COMM
+	 * for both comm=taskset and comm=<user_command> for the same process ID !!
+	 * The user will not be interested in taskset samples; thus, we ignore such COMM events.
+	 * This is a hack, but there doesn't seem to be a better way around the possibility of having
+	 * application samples attributed to "taskset" instead of the application.
+	 */
+	if (readlink(exe_symlink, exe_realpath, sizeof(exe_realpath)-1) > 0) {
+		_appname = exe_realpath;
+		app_basename = op_basename(_appname);
+		if (!strncmp(app_basename.c_str(), "taskset", strlen("taskset"))) {
+			_appname = "unknown";
+			app_basename = "unknown";
+		} else {
+			appname_valid = true;
+		}
+	} else {
+		/* Most likely that the process has ended already, so we'll need to determine
+		 * the appname through different means.
+		 */
+		if (cverb << vmisc) {
+			ostringstream message;
+			message << "PID: " << hex << pid << " Unable to obtain appname from " << exe_symlink << endl
+			        <<  "\t" << strerror(errno) << endl;
+			cout << message.str();
+		}
+		if (appname && strcmp(appname, "taskset")) {
+			_appname = appname;
+			if (app_arg_is_fullname) {
+				appname_valid = true;
+			} else {
+				look_for_appname_match = true;
+			}
+		} else {
+			_appname = "unknown";
+		}
+		app_basename = _appname;
+	}
+	ostringstream message;
+	message << "PID: " << hex << pid << " appname is set to "
+	        << _appname << endl;
+	cverb << vmisc << message.str();
+	if (look_for_appname_match)
+		find_best_match_appname_all_mappings();
 }
 
-/* This method should only be invoked when a "delayed" COMM event is processed.
- * By "delayed", I mean that we have already received MMAP events for the associated
- * process, for which we've had to create a partial operf_process_info object -- one
- * that has no _appname yet and is marked invalid.
- *
- * Given the above statement, the passed app_shortname "must" come from a comm.comm
- * field, which is 16 chars in length (thus the name of the arg).
+/* This operf_process_info object may be a parent to processes that it has forked.
+ * If the forked process has not done an 'exec' yet (i.e., we've not received a
+ * COMM event for it), then it's still a dependent process of its parent.
+ * If so, it will be in the parent's collection of forked processes.  So,
+ * when adding a new mapping, we should copy that mapping to each forked
+ * child's operf_process_info object.  Then, if samples are taken for that
+ * mapping for that forked process, the samples can be correctly attributed.
  */
-void operf_process_info::process_deferred_mappings(string app_shortname)
+void operf_process_info::process_mapping(struct operf_mmap * mapping, bool do_self)
 {
-	_appname = app_shortname;
-	app_basename = app_shortname;
-	valid = true;
-	map<u64, struct operf_mmap *>::iterator it = deferred_mmappings.begin();
-	while (it != deferred_mmappings.end()) {
-		process_new_mapping(it->second);
-		cverb << vmisc << "Processed deferred mapping for " << it->second->filename << endl;
-		it++;
+	if (!appname_valid && !is_forked()) {
+		if (look_for_appname_match)
+			check_mapping_for_appname(mapping);
+		else
+			set_appname(NULL, false);
 	}
-	deferred_mmappings.clear();
-	process_deferred_forked_processes();
+	set_new_mapping_recursive(mapping, do_self);
 }
 
 int operf_process_info::get_num_matching_chars(string mapped_filename, string & basename)
@@ -141,11 +175,54 @@ int operf_process_info::get_num_matching_chars(string mapped_filename, string &
 	return num_matched_chars ? num_matched_chars : -1;
 }
 
-const struct operf_mmap * operf_process_info::find_mapping_for_sample(u64 sample_addr)
+/* If we do not know the full pathname of our app yet,
+ * let's try to determine if the passed filename is a good
+ * candidate appname.
+ * ASSUMPTION: This function is called only when look_for_appname_match==true.
+ */
+void operf_process_info::check_mapping_for_appname(struct operf_mmap * mapping)
+{
+	if (!mapping->is_anon_mapping) {
+		string basename;
+		int num_matched_chars = get_num_matching_chars(mapping->filename, basename);
+		if (num_matched_chars > num_app_chars_matched) {
+			if (num_matched_chars == (int)app_basename.length()) {
+				appname_is_fullname = YES_FULLNAME;
+				look_for_appname_match = false;
+				appname_valid = true;
+			} else {
+				appname_is_fullname = MAYBE_FULLNAME;
+			}
+			_appname = mapping->filename;
+			app_basename = basename;
+			num_app_chars_matched = num_matched_chars;
+			cverb << vmisc << "Best appname match is " << _appname << endl;
+		}
+	}
+}
+
+void operf_process_info::find_best_match_appname_all_mappings(void)
+{
+	map<u64, struct operf_mmap *>::iterator it;
+
+	// We may not even have a candidate shortname (from a COMM event) for the app yet
+	if (_appname == "unknown")
+		return;
+
+	it = mmappings.begin();
+	while (it != mmappings.end()) {
+		check_mapping_for_appname(it->second);
+		it++;
+	}
+
+}
+
+const struct operf_mmap * operf_process_info::find_mapping_for_sample(u64 sample_addr, bool hypervisor_sample)
 {
 	map<u64, struct operf_mmap *>::iterator it = mmappings.begin();
 	while (it != mmappings.end()) {
-		if (sample_addr >= it->second->start_addr && sample_addr <= it->second->end_addr)
+		if (sample_addr >= it->second->start_addr && sample_addr <= it->second->end_addr &&
+				it->second->is_hypervisor == hypervisor_sample)
 			return it->second;
 		it++;
 	}
@@ -178,23 +255,15 @@ void operf_process_info::process_hypervisor_mapping(u64 ip)
 	map<u64, struct operf_mmap *>::iterator end;
 
 	curr_end = curr_start = ~0ULL;
-	if (valid) {
-		it = mmappings.begin();
-		end = mmappings.end();
-	} else {
-		it = deferred_mmappings.begin();
-		end = deferred_mmappings.end();
-	}
+	it = mmappings.begin();
+	end = mmappings.end();
 	while (it != end) {
 		if (it->second->is_hypervisor) {
 			struct operf_mmap * _mmap = it->second;
 			curr_start = _mmap->start_addr;
 			curr_end = _mmap->end_addr;
 			if (curr_start > ip) {
-				if (valid)
-					mmappings.erase(it);
-				else
-					deferred_mmappings.erase(it);
+				mmappings.erase(it);
 				delete _mmap;
 			} else {
 				create_new_hyperv_mmap = false;
@@ -216,14 +285,13 @@ void operf_process_info::process_hypervisor_mapping(u64 ip)
 		hypervisor_mmap->pgoff = 0;
 		hypervisor_mmap->is_hypervisor = true;
 		if (cverb << vmisc) {
-			cout << "Synthesize mmapping for " << hypervisor_mmap->filename << endl;
-			cout << "\tstart_addr: " << hex << hypervisor_mmap->start_addr;
-			cout << "; end addr: " << hypervisor_mmap->end_addr << endl;
+			ostringstream message;
+			message << "Synthesize mmapping for " << hypervisor_mmap->filename << endl;
+			message << "\tstart_addr: " << hex << hypervisor_mmap->start_addr;
+			message << "; end addr: " << hypervisor_mmap->end_addr << endl;
+			cout << message.str();
 		}
-		if (valid)
-			process_new_mapping(hypervisor_mmap);
-		else
-			add_deferred_mapping(hypervisor_mmap);
+		process_mapping(hypervisor_mmap, false);
 	}
 }
 
@@ -236,51 +304,40 @@ void operf_process_info::copy_mappings_to_forked_process(operf_process_info * fo
 		 * original object is created in operf_utils:__handle_mmap_event and
 		 * is saved in the global all_images_map.
 		 */
-	        forked_pid->process_new_mapping(mapping);
+	        forked_pid->process_mapping(mapping, true);
 	        it++;
 	}
 }
 
-void operf_process_info::connect_forked_process_to_parent(operf_process_info * parent)
+void operf_process_info::set_fork_info(operf_process_info * parent)
 {
 	forked = true;
 	parent_of_fork = parent;
-	if (parent->is_valid()) {
-		valid = true;
-		_appname = parent->get_app_name();
-		if (parent->is_appname_valid() && !_appname.empty()) {
-			appname_is_fullname = YES_FULLNAME;
-			app_basename = op_basename(_appname);
-			num_app_chars_matched = (int)app_basename.length();
-		} else if (!_appname.empty()) {
-			appname_is_fullname = MAYBE_FULLNAME;
-			num_app_chars_matched = -1;
-			app_basename = _appname;
-		} else {
-			appname_is_fullname = NOT_FULLNAME;
-			num_app_chars_matched = -1;
-			app_basename = "";
-		}
-		parent->copy_mappings_to_forked_process(this);
-	}
+	parent_of_fork->add_forked_pid_association(this);
+	parent_of_fork->copy_mappings_to_forked_process(this);
 }
 
-void operf_process_info::process_deferred_forked_processes(void)
+/* ASSUMPTION: This function should only be called during reprocessing phase
+ * since we blindly set the _appname to that of the parent.  If this function
+ * were called from elsewhere, the parent's _appname might not yet be fully baked.
+ */
+void operf_process_info::connect_forked_process_to_parent(void)
 {
-	vector<operf_process_info *>::iterator it = forked_processes.begin();
-	while (it != forked_processes.end()) {
-		operf_process_info * p = *it;
-		p->connect_forked_process_to_parent(this);
-		cverb << vmisc << "Processed deferred forked process " << p->pid << endl;
-		it++;
-	}
+	if (cverb << vmisc)
+		cout << "Connecting forked proc " << pid << " to parent " << parent_of_fork << endl;
+	valid = true;
+	_appname = parent_of_fork->get_app_name();
+	app_basename = op_basename(_appname);
+	appname_valid = true;
 }
 
+
 void operf_process_info::remove_forked_process(pid_t forked_pid)
 {
 	std::vector<operf_process_info *>::iterator it = forked_processes.begin();
 	while (it != forked_processes.end()) {
-		if ((*it)->pid == forked_pid) {
+		operf_process_info * p = *it;
+		if (p->pid == forked_pid) {
 			forked_processes.erase(it);
 			break;
 		}
@@ -288,29 +345,75 @@ void operf_process_info::remove_forked_process(pid_t forked_pid)
 	}
 }
 
-/* This function is called as a result of the following scenario:
- *   1. An operf_process_info was created for a FORK event
- *   2. The forked process was connected to (associated with) its parent,
- *      adding the parent's mmappings to the forked process's operf_process_info.
- *   3. Then the forked process does an exec, which results in a COMM
- *      event. The forked process is now considered completely separate
- *      from its parent, so we need to disassociate it from the parent.
+/* See comment in operf_utils::__handle_comm_event for conditions under
+ * which this function is called.
  */
-void operf_process_info::disassociate_from_parent(char * app_shortname)
+void operf_process_info::try_disassociate_from_parent(char * app_shortname)
 {
-	_appname = app_shortname;
-	app_basename = app_shortname;
-	appname_is_fullname = NOT_FULLNAME;
+	if (parent_of_fork && (parent_of_fork->pid == this->pid))
+		return;
+
+	if (cverb << vmisc && parent_of_fork)
+		cout << "Dis-associating forked proc " << pid
+		     << " from parent " << parent_of_fork->pid << endl;
+
 	valid = true;
-	/* Now that we have a valid app shortname (from the COMM event data),
-	 * let's spin through our mmappings and process them -- see if we can
-	 * find one that has a good appname candidate.
-	 */
-	num_app_chars_matched = 0;
+	set_appname(app_shortname, false);
+
 	map<u64, struct operf_mmap *>::iterator it = mmappings.begin();
 	while (it != mmappings.end()) {
-		process_new_mapping(it->second);
+		operf_mmap * cur = it->second;
+		/* mmappings from the parent may have been added to this proc info prior
+		 * to this proc info becoming valid since we could not know at the time if
+		 * this proc would ever be valid. But now we know it's valid (which is why
+		 * we're dis-associating from the parent), so we remove these unnecessary
+		 * parent mmappings.
+		 */
+		if (mmappings_from_parent[cur->start_addr]) {
+			mmappings_from_parent[cur->start_addr] = false;
+			mmappings.erase(it++);
+		} else {
+			process_mapping(cur, false);
+			it++;
+		}
+	}
+	if (parent_of_fork) {
+		parent_of_fork->remove_forked_process(this->pid);
+		parent_of_fork = NULL;
+	}
+	forked = false;
+}
+
+/* This function adds a new mapping to the current operf_process_info
+ * and then calls the same function on each of its forked children.
+ * If do_self==true, it means this function is being called by a parent
+ * on a forked child's operf_process_info.  Then, if the mapping already
+ * exists, we do not set the corresponding mmappings_from_parent since we
+ * want to retain the knowledge that the mapping had already been added for
+ * this process versus from the parent. If do_self==false, it means this
+ * operf_process_info is the top-level parent and should set the corresponding
+ * mmappings_from_parent to false. The mmappings_from_parent map allows us to
+ * know whether to keep or discard the mapping if/when we dis-associate from
+ * the parent,
+ */
+void operf_process_info::set_new_mapping_recursive(struct operf_mmap * mapping, bool do_self)
+{
+	if (do_self) {
+		map<u64, struct operf_mmap *>::iterator it = mmappings.find(mapping->start_addr);
+		if (it == mmappings.end())
+			mmappings_from_parent[mapping->start_addr] = true;
+		else
+			mmappings_from_parent[mapping->start_addr] = false;
+	} else {
+		mmappings_from_parent[mapping->start_addr] = false;
+	}
+	mmappings[mapping->start_addr] = mapping;
+	std::vector<operf_process_info *>::iterator it = forked_processes.begin();
+	while (it != forked_processes.end()) {
+		operf_process_info * fp = *it;
+		fp->set_new_mapping_recursive(mapping, true);
+		cverb << vmisc << "Copied new parent mapping for " << mapping->filename
+		      << " for forked process " << fp->pid << endl;
 		it++;
 	}
-	parent_of_fork->remove_forked_process(this->pid);
 }
diff --git a/libperf_events/operf_process_info.h b/libperf_events/operf_process_info.h
index 675cae3..f98591f 100644
--- a/libperf_events/operf_process_info.h
+++ b/libperf_events/operf_process_info.h
@@ -46,54 +46,32 @@ struct operf_mmap {
  * be "ET_EXEC").
  *
  * This class is designed to handle the possibility that MMAP events may occur for a process
- * prior to the COMM event.  I don't know if this is possible, but it didn't take much to
- * add code to handle this exigency.
+ * prior to the COMM event.
  */
 class operf_process_info {
 public:
-	operf_process_info(pid_t tgid, const char * appname, bool app_arg_is_fullname, bool is_valid);
+	operf_process_info(pid_t tgid, const char * appname, bool app_arg_is_fullname,
+	                   bool is_valid);
 	~operf_process_info(void);
 	bool is_valid(void) { return (valid); }
+	bool is_appname_valid(void) { return (valid && appname_valid); }
+	void set_valid(void) { valid = true; }
+	void set_appname_valid(void) { appname_valid = true; }
 	bool is_forked(void) { return forked; }
-	void process_new_mapping(struct operf_mmap * mapping);
+	void process_mapping(struct operf_mmap * mapping, bool do_self);
 	void process_hypervisor_mapping(u64 ip);
-	void process_deferred_mappings(std::string app_shortname);
-	void connect_forked_process_to_parent(operf_process_info * parent);
-	void copy_new_parent_mapping(struct operf_mmap * mapping)
-	{ mmappings[mapping->start_addr] = mapping; }
+	void connect_forked_process_to_parent(void);
+	void set_fork_info(operf_process_info * parent);
 	void add_forked_pid_association(operf_process_info * forked_pid)
 	{ forked_processes.push_back(forked_pid); }
 	void copy_mappings_to_forked_process(operf_process_info * forked_pid);
-	void disassociate_from_parent(char * appname);
+	void try_disassociate_from_parent(char * appname);
 	void remove_forked_process(pid_t forked_pid);
 	std::string get_app_name(void) { return _appname; }
-	void add_deferred_mapping(struct operf_mmap * mapping)
-	{ deferred_mmappings[mapping->start_addr] = mapping; }
-	const struct operf_mmap * find_mapping_for_sample(u64 sample_addr);
+	const struct operf_mmap * find_mapping_for_sample(u64 sample_addr, bool hypervisor_sample);
+	void set_appname(const char * appname, bool app_arg_is_fullname);
+	void check_mapping_for_appname(struct operf_mmap * mapping);
 
-	/* The valid bit is set when a COMM event has been received for the process
-	 * represented by this object.  But since the COMM event only gives a shortname
-	 * for the app (16 chars at most), the process_info object is not completely
-	 * baked until appname_valid() returns true.  In truth, if appname_valid returns
-	 * true, we can't really be sure we've got a valid full app name since the true
-	 * result could be from:
-	 *    (appname_is_fullname == MAYBE_FULLNAME) &&(num_app_chars_matched > 0)
-	 * But this is the best guess we can make.
-	 */
-	bool is_appname_valid(void)
-	{
-		bool result;
-		if (!valid)
-			return false;
-		if (appname_is_fullname == YES_FULLNAME)
-			result = true;
-		else if ((appname_is_fullname == MAYBE_FULLNAME) &&
-				(num_app_chars_matched > 0))
-			result = true;
-		else
-			result = false;
-		return result;
-	}
 
 private:
 	typedef enum {
@@ -103,24 +81,28 @@ private:
 	} op_fullname_t;
 	pid_t pid;
 	std::string _appname;
-	bool valid;
+	bool valid, appname_valid, look_for_appname_match;
 	bool forked;
 	op_fullname_t appname_is_fullname;
 	std::string app_basename;
 	int  num_app_chars_matched;
 	std::map<u64, struct operf_mmap *> mmappings;
-	std::map<u64, struct operf_mmap *> deferred_mmappings;
-	/* When a FORK event is recieved, we try to associate that forked
-	 * process with its parent, but if the parent operf_process_info is
-	 * not yet valid, we have to defer this association until
-	 * after the parent becomes valid.  This forked_processes collection
-	 * holds those forked processes for which the association to the
-	 * parent has been deferred.
+	std::map<u64, bool> mmappings_from_parent;
+	/* When a FORK event is received, we associate that forked process
+	 * with its parent by adding it to the parent's forked_processes
+	 * collection. The main reason we need this collection is because
+	 * PERF_RECORD_MMAP events may arrive for the parent out of order,
+	 * after a PERF_RECORD_FORK.  Since forked processes inherit their
+	 * parent's mmappings, we want to make sure those mmappings exist
+	 * for the forked process so that samples may be properly attributed.
+	 * Therefore, the various paths of adding mmapings to a parent, will
+	 * also result in adding those mmappings to forked children.
 	 */
 	std::vector<operf_process_info *> forked_processes;
 	operf_process_info * parent_of_fork;
+	void set_new_mapping_recursive(struct operf_mmap * mapping, bool do_self);
 	int get_num_matching_chars(std::string mapped_filename, std::string & basename);
-	void process_deferred_forked_processes(void);
+	void find_best_match_appname_all_mappings(void);
 };
 
 
diff --git a/libperf_events/operf_sfile.cpp b/libperf_events/operf_sfile.cpp
index bda9138..3ffa325 100644
--- a/libperf_events/operf_sfile.cpp
+++ b/libperf_events/operf_sfile.cpp
@@ -13,7 +13,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <assert.h>
 #include <iostream>
+#include <sstream>
 
 #include "operf_sfile.h"
 #include "operf_kernel.h"
@@ -151,7 +153,7 @@ create_sfile(unsigned long hash, struct operf_transient const * trans,
 	sf->start_addr = trans->start_addr;
 	sf->end_addr = trans->end_addr;
 
-	for (i = 0 ; i < op_nr_counters ; ++i)
+	for (i = 0 ; i < op_nr_events ; ++i)
 		odb_init(&sf->files[i]);
 
 	// TODO:  handle extended
@@ -179,12 +181,18 @@ struct operf_sfile * operf_sfile_find(struct operf_transient const * trans)
 	struct operf_kernel_image * ki = NULL;
 	unsigned long hash;
 
+	// The code that calls this function would always have set trans->image_name, but coverity
+	// isn't smart enough to know that.  So we add the assert here just to shut up coverity.
+	assert(trans->image_name);
 
 	if (trans->in_kernel) {
 		ki = operf_find_kernel_image(trans->pc);
 		if (!ki) {
-			if (cverb << vsfile)
-				cout << "Lost kernel sample " << std::hex << trans->pc << std::endl;;
+			if (cverb << vsfile) {
+				ostringstream message;
+				message << "Lost kernel sample " << std::hex << trans->pc << std::endl;;
+				cout << message.str();
+			}
 			operf_stats[OPERF_LOST_KERNEL]++;
 			return NULL;
 		}
@@ -218,7 +226,7 @@ void operf_sfile_dup(struct operf_sfile * to, struct operf_sfile * from)
 
 	memcpy(to, from, sizeof (struct operf_sfile));
 
-	for (i = 0 ; i < op_nr_counters ; ++i)
+	for (i = 0 ; i < op_nr_events ; ++i)
 		odb_init(&to->files[i]);
 
 	// TODO: handle extended
@@ -246,7 +254,7 @@ static odb_t * get_file(struct operf_transient const * trans, int is_cg)
 		return opd_ext_operf_sfile_get(trans, is_cg);
 	 */
 
-	if (trans->event >= (int)op_nr_counters) {
+	if (trans->event >= (int)op_nr_events) {
 		fprintf(stderr, "%s: Invalid counter %d\n", __FUNCTION__,
 			trans->event);
 		abort();
@@ -410,7 +418,7 @@ static int close_sfile(struct operf_sfile * sf, void * data __attribute__((unuse
 	size_t i;
 
 	/* it's OK to close a non-open odb file */
-	for (i = 0; i < op_nr_counters; ++i)
+	for (i = 0; i < op_nr_events; ++i)
 		odb_close(&sf->files[i]);
 
 	// TODO: handle extended
@@ -432,7 +440,7 @@ static int sync_sfile(struct operf_sfile * sf, void * data __attribute__((unused
 {
 	size_t i;
 
-	for (i = 0; i < op_nr_counters; ++i)
+	for (i = 0; i < op_nr_events; ++i)
 		odb_sync(&sf->files[i]);
 
 	// TODO: handle extended
diff --git a/libperf_events/operf_sfile.h b/libperf_events/operf_sfile.h
index 9b91180..bc00355 100644
--- a/libperf_events/operf_sfile.h
+++ b/libperf_events/operf_sfile.h
@@ -29,6 +29,7 @@ struct operf_kernel_image;
 #define INVALID_IMAGE "INVALID IMAGE"
 
 #define VMA_SHIFT 13
+
 /**
  * Each set of sample files (where a set is over the physical counter
  * types) will have one of these for it. We match against the
@@ -61,7 +62,7 @@ struct operf_sfile {
 	/** true if this file should be ignored in profiles */
 	int ignored;
 	/** opened sample files */
-	odb_t files[OP_MAX_COUNTERS];
+	odb_t files[OP_MAX_EVENTS];
 	/** extended sample files */
 	odb_t * ext_files;
 	/** hash table of opened cg sample files */
@@ -89,7 +90,7 @@ struct operf_transient {
 	operf_process_info * cur_procinfo;
 	vma_t pc;
 	const char * image_name;
-	const char * app_filename;
+	char app_filename[PATH_MAX];
 	size_t image_len, app_len;
 	vma_t last_pc;
 	int event;
diff --git a/libperf_events/operf_stats.cpp b/libperf_events/operf_stats.cpp
index 1d93f89..3cc28a7 100644
--- a/libperf_events/operf_stats.cpp
+++ b/libperf_events/operf_stats.cpp
@@ -11,9 +11,12 @@
  */
 
 #include <stdio.h>
+#include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <iostream>
+#include <errno.h>
 
 #include "operf_stats.h"
 #include "op_get_time.h"
@@ -24,10 +27,38 @@ unsigned long operf_stats[OPERF_MAX_STATS];
  * operf_print_stats - print out latest statistics to operf.log
  */
 using namespace std;
-void operf_print_stats(string sessiondir, char * starttime, bool throttled)
+
+static string create_stats_dir(string const & cur_sampledir);
+static void write_throttled_event_files(vector< operf_event_t> const & events,
+                                        string const & stats_dir);
+
+static void _write_stats_file(string const & stats_filename, unsigned long lost_sample_count)
+{
+	ofstream stats_file(stats_filename.c_str(), ios_base::out);
+	if (stats_file.good()) {
+		stats_file << lost_sample_count;
+		stats_file.close();
+	} else {
+		cerr << "Unable to write to stats file " << stats_filename << endl;
+	}
+}
+
+void operf_print_stats(string sessiondir, char * starttime, bool throttled,
+                       vector< operf_event_t> const & events)
 {
 	string operf_log (sessiondir);
-	int total_lost_samples = 0;
+	unsigned long total_lost_samples = 0;
+	bool stats_dir_valid = true;
+
+	string stats_dir = create_stats_dir(sessiondir + "/" + "samples/current/");
+	if (strcmp(stats_dir.c_str(), "") != 0) {
+		// If there are throttled events print them
+		if (throttled)
+			write_throttled_event_files(events, stats_dir);
+	} else {
+		stats_dir_valid = false;
+		perror("Unable to create stats dir");
+	}
 
 	operf_log.append("/samples/operf.log");
 	FILE * fp = fopen(operf_log.c_str(), "a");
@@ -70,16 +101,85 @@ void operf_print_stats(string sessiondir, char * starttime, bool throttled)
 		fprintf(stderr, "best option if you want to avoid throttling.\n");
 	}
 
-	// TODO: handle extended stats
-	//operf_ext_print_stats();
-
-	for (int i = OPERF_INDEX_OF_FIRST_LOST_STAT; i < OPERF_MAX_STATS; i++)
+	for (int i = OPERF_INDEX_OF_FIRST_LOST_STAT; i < OPERF_MAX_STATS; i++) {
+		if (stats_dir_valid && operf_stats[i])
+			_write_stats_file(stats_dir + "/" + stats_filenames[i], operf_stats[i]);
 		total_lost_samples += operf_stats[i];
+	}
+	// Write total_samples into stats file if we see any indication of lost samples
+	if (total_lost_samples)
+		_write_stats_file(stats_dir + "/" + stats_filenames[OPERF_SAMPLES], operf_stats[OPERF_SAMPLES]);
 
 	if (total_lost_samples > (int)(OPERF_WARN_LOST_SAMPLES_THRESHOLD
-				       * operf_stats[OPERF_SAMPLES]))
-		fprintf(stderr, "\nSee the %s file for statistics about lost samples.\n", operf_log.c_str());
-
+				       * operf_stats[OPERF_SAMPLES])) {
+		fprintf(stderr, "\nWARNING: Lost samples detected! See %s for details.\n", operf_log.c_str());
+		fprintf(stderr, "Lowering the sampling rate may reduce or eliminate lost samples.\n");
+		fprintf(stderr, "See the '--events' option description in the operf man page for help.\n");
+	}
 	fflush(fp);
 	fclose(fp);
+};
+
+static void write_throttled_event_files(vector< operf_event_t> const & events,
+                                        string const & stats_dir)
+{
+	string outputfile;
+	ofstream outfile;
+	string event_name;
+	string throttled_dir;
+	bool throttled_dir_created = false;
+	int rc;
+
+	throttled_dir =  stats_dir + "/throttled";
+	for (unsigned index = 0; index < events.size(); index++) {
+		if (events[index].throttled == true) {
+
+			if (!throttled_dir_created) {
+				rc = mkdir(throttled_dir.c_str(),
+					   S_IRWXU | S_IRWXG
+					   | S_IROTH | S_IXOTH);
+				if (rc && (errno != EEXIST)) {
+					cerr << "Error trying to create " << throttled_dir
+					     << endl;
+					perror("mkdir failed with");
+					return;
+				}
+				throttled_dir_created = true;
+			}
+
+			/* Write file entry to indicate if the data sample was
+			 * throttled.
+			 */
+			outputfile = throttled_dir + "/"
+				+ events[index].name;
+
+			outfile.open(outputfile.c_str());
+
+			if (!outfile.is_open()) {
+				cerr << "Internal error: Could not create " << outputfile
+				     <<  strerror(errno) << endl;
+			} else {
+				outfile.close();
+			}
+		}
+	  }
+}
+
+
+static string create_stats_dir(string const & cur_sampledir)
+{
+	int rc;
+	std::string stats_dir;
+
+	/* Assumption: cur_sampledir ends in slash */
+	stats_dir =  cur_sampledir + "stats";
+	rc = mkdir(stats_dir.c_str(),
+		   S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+
+	if (rc && (errno != EEXIST)) {
+		cerr << "Error trying to create stats dir. " << endl;
+		perror("mkdir failed with");
+		return NULL;
+	}
+	return stats_dir;
 }
diff --git a/libperf_events/operf_stats.h b/libperf_events/operf_stats.h
index 15d39b3..24590d2 100644
--- a/libperf_events/operf_stats.h
+++ b/libperf_events/operf_stats.h
@@ -11,33 +11,15 @@
  */
 
 #include <string>
+#include <vector>
+#include "operf_counter.h"
 
 #ifndef OPERF_STATS_H
 #define OPERF_STATS_H
 
 extern unsigned long operf_stats[];
 
-enum {	OPERF_SAMPLES, /**< nr. samples */
-	OPERF_KERNEL, /**< nr. kernel samples */
-	OPERF_PROCESS, /**< nr. userspace samples */
-	OPERF_INVALID_CTX, /**< nr. samples lost due to sample address not in expected range for domain */
-	OPERF_LOST_KERNEL,  /**< nr. kernel samples lost */
-	OPERF_LOST_SAMPLEFILE, /**< nr samples for which sample file can't be opened */
-	OPERF_LOST_NO_MAPPING, /**< nr samples lost due to no mapping */
-	OPERF_NO_APP_KERNEL_SAMPLE, /**<nr. user ctx kernel samples dropped due to no app context available */
-	OPERF_NO_APP_USER_SAMPLE, /**<nr. user samples dropped due to no app context available */
-	OPERF_BT_LOST_NO_MAPPING, /**<nr. backtrace samples dropped due to no mapping */
-	OPERF_LOST_INVALID_HYPERV_ADDR, /**<nr. hypervisor samples dropped due to address out-of-range */
-	OPERF_RECORD_LOST_SAMPLE, /**<nr. samples lost reported by perf_events kernel */
-	OPERF_MAX_STATS /**< end of stats */
-};
-#define OPERF_INDEX_OF_FIRST_LOST_STAT 3
-
-/* Warn on lost samples if number of lost samples is greater the this fraction
- * of the total samples
-*/
-#define OPERF_WARN_LOST_SAMPLES_THRESHOLD   0.0001
-
-void operf_print_stats(std::string sampledir, char * starttime, bool throttled);
+void operf_print_stats(std::string sampledir, char * starttime, bool throttled,
+                       std::vector< operf_event_t> const & events);
 
 #endif /* OPERF_STATS_H */
diff --git a/libperf_events/operf_utils.cpp b/libperf_events/operf_utils.cpp
index 06cd566..a87524b 100644
--- a/libperf_events/operf_utils.cpp
+++ b/libperf_events/operf_utils.cpp
@@ -10,7 +10,7 @@
  * (C) Copyright IBM Corp. 2011
  *
  * Modified by Maynard Johnson <maynardj@us.ibm.com>
- * (C) Copyright IBM Corporation 2012
+ * (C) Copyright IBM Corporation 2012, 2013, 2014
  *
  */
 
@@ -22,6 +22,7 @@
 #include <fcntl.h>
 #include <cverb.h>
 #include <iostream>
+#include <sstream>
 #include "operf_counter.h"
 #include "operf_utils.h"
 #ifdef HAVE_LIBPFM
@@ -35,11 +36,10 @@
 #include "op_fileio.h"
 #include "op_libiberty.h"
 #include "operf_stats.h"
+#include "utility.h"
 
 
-extern verbose vmisc;
 extern volatile bool quit;
-extern volatile bool read_quit;
 extern operf_read operfRead;
 extern int sample_reads;
 extern unsigned int pagesize;
@@ -47,6 +47,8 @@ extern char * app_name;
 extern pid_t app_PID;
 extern verbose vrecord;
 extern verbose vconvert;
+extern void __set_event_throttled(int index);
+extern bool track_new_forks;
 
 using namespace std;
 
@@ -63,159 +65,6 @@ static list<event_t *> unresolved_events;
 static struct operf_transient trans;
 static bool sfile_init_done;
 
-/* The handling of mmap's for a process was a bit tricky to get right, in particular,
- * the handling of what I refer to as "deferred mmap's" -- i.e., when we receive an
- * mmap event for which we've not yet received a comm event (so we don't know app name
- * for the process).  I have left in some debugging code here (compiled out via #ifdef)
- * so we can easily test and validate any changes we ever may need to make to this code.
- */
-//#define _TEST_DEFERRED_MAPPING
-#ifdef _TEST_DEFERRED_MAPPING
-static bool do_comm_event;
-static event_t comm_event;
-#endif
-
-
-/* Some architectures (e.g., ppc64) do not use the same event value (code) for oprofile
- * and for perf_events.  The operf-record process requires event values that perf_events
- * understands, but the operf-read process requires oprofile event values.  The purpose of
- * the following method is to map the operf-record event value to a value that
- * opreport can understand.
- */
-#if (defined(__powerpc__) || defined(__powerpc64__))
-#define NIL_CODE ~0U
-
-#if HAVE_LIBPFM3
-static bool _get_codes_for_match(unsigned int pfm_idx, const char name[],
-                                 vector<operf_event_t> * evt_vec)
-{
-	unsigned int num_events = evt_vec->size();
-	int tmp_code, ret;
-	char evt_name[OP_MAX_EVT_NAME_LEN];
-	char * grp_name;
-	unsigned int events_converted = 0;
-	for (unsigned int i = 0; i < num_events; i++) {
-		operf_event_t event = (*evt_vec)[i];
-		if (event.evt_code != NIL_CODE) {
-			events_converted++;
-			continue;
-		}
-		memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
-		if (!strcmp(event.name, "CYCLES")) {
-			strcpy(evt_name ,"PM_CYC") ;
-		} else if ((grp_name = strstr(event.name, "_GRP"))) {
-			strncpy(evt_name, event.name, grp_name - event.name);
-		} else {
-			strncpy(evt_name, event.name, strlen(event.name));
-		}
-		if (strncmp(name, evt_name, OP_MAX_EVT_NAME_LEN))
-			continue;
-		ret = pfm_get_event_code(pfm_idx, &tmp_code);
-		if (ret != PFMLIB_SUCCESS) {
-			string evt_name_str = event.name;
-			string msg = "libpfm cannot find event code for " + evt_name_str +
-					"; cannot continue";
-			throw runtime_error(msg);
-		}
-		event.evt_code = tmp_code;
-		(*evt_vec)[i] = event;
-		events_converted++;
-		cverb << vrecord << "Successfully converted " << event.name << " to perf_event code "
-		      << hex << tmp_code << endl;
-	}
-	return (events_converted == num_events);
-}
-#else
-static bool _op_get_event_codes(vector<operf_event_t> * evt_vec)
-{
-	int ret, i;
-	unsigned int num_events = evt_vec->size();
-	char evt_name[OP_MAX_EVT_NAME_LEN];
-	char * grp_name;
-	unsigned int events_converted = 0;
-	uint64_t code[1];
-
-	typedef struct {
-		uint64_t    *codes;
-		char        **fstr;
-		size_t      size;
-		int         count;
-		int         idx;
-	} pfm_raw_pmu_encode_t;
-
-	pfm_raw_pmu_encode_t raw;
-	raw.codes = code;
-	raw.count = 1;
-	raw.fstr = NULL;
-
-	if (pfm_initialize() != PFM_SUCCESS)
-		throw runtime_error("Unable to initialize libpfm; cannot continue");
-
-	for (unsigned int i = 0; i < num_events; i++) {
-		operf_event_t event = (*evt_vec)[i];
-		memset(evt_name, 0, OP_MAX_EVT_NAME_LEN);
-		if (!strcmp(event.name, "CYCLES")) {
-			strcpy(evt_name ,"PM_CYC") ;
-		} else if ((grp_name = strstr(event.name, "_GRP"))) {
-			strncpy(evt_name, event.name, grp_name - event.name);
-		} else {
-			strncpy(evt_name, event.name, strlen(event.name));
-		}
-
-		memset(&raw, 0, sizeof(raw));
-		ret = pfm_get_os_event_encoding(evt_name, PFM_PLM3, PFM_OS_NONE, &raw);
-		if (ret != PFM_SUCCESS) {
-			string evt_name_str = event.name;
-			string msg = "libpfm cannot find event code for " + evt_name_str +
-					"; cannot continue";
-			throw runtime_error(msg);
-		}
-
-		event.evt_code = raw.codes[0];
-		(*evt_vec)[i] = event;
-		events_converted++;
-		cverb << vrecord << "Successfully converted " << event.name << " to perf_event code "
-		      << hex << event.evt_code << endl;
-	}
-	return (events_converted == num_events);
-}
-#endif
-
-bool OP_perf_utils::op_convert_event_vals(vector<operf_event_t> * evt_vec)
-{
-	unsigned int i, count;
-	char name[256];
-	int ret;
-	for (unsigned int i = 0; i < evt_vec->size(); i++) {
-		operf_event_t event = (*evt_vec)[i];
-		event.evt_code = NIL_CODE;
-		(*evt_vec)[i] = event;
-	}
-
-#if HAVE_LIBPFM3
-	if (pfm_initialize() != PFMLIB_SUCCESS)
-		throw runtime_error("Unable to initialize libpfm; cannot continue");
-
-	ret = pfm_get_num_events(&count);
-	if (ret != PFMLIB_SUCCESS)
-		throw runtime_error("Unable to use libpfm to obtain event code; cannot continue");
-	for(i =0 ; i < count; i++)
-	{
-		ret = pfm_get_event_name(i, name, 256);
-		if (ret != PFMLIB_SUCCESS)
-			continue;
-		if (_get_codes_for_match(i, name, evt_vec))
-			break;
-	}
-	return (i != count);
-#else
-	return _op_get_event_codes(evt_vec);
-#endif
-}
-
-#endif
-
-
 static inline void update_trans_last(struct operf_transient * trans)
 {
 	trans->last = trans->current;
@@ -232,12 +81,15 @@ static void __handle_fork_event(event_t * event)
 {
 	if (cverb << vconvert)
 		cout << "PERF_RECORD_FORK for tgid/tid = " << event->fork.pid
-		     << "/" << event->fork.tid << endl;
+		     << "/" << event->fork.tid << "; parent " << event->fork.ppid
+		     << "/" << event->fork.ptid << endl;
 
 	map<pid_t, operf_process_info *>::iterator it;
 	operf_process_info * parent = NULL;
 	operf_process_info * forked_proc = NULL;
 
+	// First, see if we already have a proc_info object for the parent process
+	// that did the fork
 	it = process_map.find(event->fork.ppid);
 	if (it != process_map.end()) {
 		parent = it->second;
@@ -245,62 +97,96 @@ static void __handle_fork_event(event_t * event)
 		// Create a new proc info object for the parent, but mark it invalid since we have
 		// not yet received a COMM event for this PID.
 		parent = new operf_process_info(event->fork.ppid, app_name ? app_name : NULL,
-		                                                           app_name != NULL, false);
+		                                app_name != NULL, false);
 		if (cverb << vconvert)
-			cout << "Adding new proc info to collection for PID " << event->fork.ppid << endl;
+			cout << "Adding new proc info to collection for parent PID "
+			     << event->fork.ppid << endl;
 		process_map[event->fork.ppid] = parent;
 	}
 
+	/* If the user requested to profile by "--pid", then we must notify the
+	 * recording process whenever we see a fork event. If the record process
+	 * isn't already recording samples for this thread/process, it will start
+	 * recording now.
+	 */
+	if (track_new_forks) {
+		if (cverb << vconvert)
+			cout << "Inform record process of new pid/tid "
+			     << event->fork.pid << "/" << event->fork.tid << endl;
+		pid_t id = (event->fork.pid == event->fork.ppid) ? event->fork.tid :
+				event->fork.pid;
+		ssize_t len = write(operfRead.get_write_comm_pipe(), &id, sizeof(id));
+		if (len < 0)
+			perror("Internal error on record write_comm_pipe");
+		else if (len != sizeof(id))
+			cerr << "Incomplete write to record write_comm_pipe" << endl;
+		u64 sample_id;
+		// get sample id from recording process
+		len = read(operfRead.get_read_comm_pipe(), &sample_id, sizeof(sample_id));
+		if (sample_id == OP_PERF_NO_SAMPLE_ID) {
+			cverb << vconvert << "convert: No sample_id from record process" << endl;
+		} else {
+			cverb << vconvert << "Add sample_id " << sample_id << " to opHeader" << endl;
+			operfRead.add_sample_id_to_opHeader(sample_id);
+		}
+	}
+
+	/* If the forked process's pid is the same as the parent's, we simply ignore
+	 * the FORK event. This is because operf_process_info objects are stored in the map
+	 * collection by pid, meaning that the forked process and its parent reference the same
+	 * operf_process_info object.
+	 */
+	if (event->fork.pid == event->fork.ppid)
+		return;
+
+	// Now try to find a proc_info for the forked process itself.
 	it = process_map.find(event->fork.pid);
 	if (it == process_map.end()) {
-		forked_proc = new operf_process_info(event->fork.pid,
-		                                     parent->get_app_name().c_str(),
-		                                     parent->is_appname_valid(), parent->is_valid());
+		forked_proc = new operf_process_info(event->fork.pid, NULL, false, false);
 		if (cverb << vconvert)
-			cout << "Adding new proc info to collection for PID " << event->fork.pid << endl;
+			cout << "Adding new proc info to collection for forked PID "
+			     << event->fork.pid << endl;
 		process_map[event->fork.pid] = forked_proc;
-		forked_proc->connect_forked_process_to_parent(parent);
-		parent->add_forked_pid_association(forked_proc);
-		if (cverb << vconvert)
-			cout << "Connecting forked proc " << event->fork.pid << " to parent" << endl;
+		forked_proc->set_fork_info(parent);
 	} else {
-		/* There are two ways that we may get to this point. One way is if
-		 * we've received a COMM event for the forked process before the FORK event.
+		 /*
 		 * Normally, if parent process A forks child process B which then does an exec, we
-		 * first see a FORK event, followed by a COMM event. But apparently there's no
-		 * guarantee in what order these events may be seen by userspace. No matter -- since
-		 * the exec'ed process is now a standalone process (which will get MMAP events
-		 * for all of its mmappings, there's no need to re-associate it back to the parent
-		 * as we do for a non-exec'ed forked process.  So we'll just ignore it.
+		 * first see a FORK event, followed by a COMM event. In this case, the
+		 * operf_process_info created for the forked process is marked as valid.  But there's
+		 * no guarantee what order these events may be seen by userspace -- we could easily
+		 * get MMAP, FORK, and finally a COMM event, which is opposite of "expected". So we
+		 * must handle this.
 		 *
-		 * But the second way that there may be an existing operf_process_info object is if
-		 * a new mmap event (a real MMAP event or a synthesized event (e.g. for hypervisor
-		 * mmapping) occurred for the forked process before a COMM event was received for it.
-		 * In this case, the forked process will be marked invalid until the COMM event
-		 * is received. But if this process does *not* do an exec, there will never be a
-		 * COMM event for it.  Such forked processes should be tightly connected to their
-		 * parent, so we'll go ahead and associate the forked process with its parent.
-		 * If a COMM event comes later for the forked process, we'll disassociate them.
+		 * For a valid operf_process_info, if the forked process pid is unique from that of
+		 * the parent, it implies a COMM event was already received for this forked process.
+		 * Such processes are treated as standalone processes, so we ignore the FORK event.
+		 * For all other cases, if the forked process has not already been associated with
+		 * its parent (i.e., !is_forked()), we go ahead and set that association.
 		 */
+
 		forked_proc = it->second;
-		if (!forked_proc->is_valid()) {
-			forked_proc->connect_forked_process_to_parent(parent);
-			parent->add_forked_pid_association(forked_proc);
+		if (forked_proc->is_valid()) {
+			// Ignore the FORK event
 			if (cverb << vconvert)
-				cout << "Connecting existing incomplete forked proc " << event->fork.pid
-				     << " to parent" << endl;
+				cout << "Forked proc " << event->fork.pid
+				     << " is currently valid (i.e., PERF_RECORD_COMM already received),"
+				     << " so is independent from parent "
+				     << event->fork.ppid << endl;
+			return;
+		}
+
+		if (!forked_proc->is_forked()) {
+			forked_proc->set_fork_info(parent);
+			if (cverb << vconvert)
+				cout << "Set fork info for PID " << event->fork.pid
+				     << " with parent " << event->fork.ppid << endl;
 		}
 	}
 }
 
+
 static void __handle_comm_event(event_t * event)
 {
-#ifdef _TEST_DEFERRED_MAPPING
-	if (!do_comm_event) {
-		comm_event = event;
-		return;
-	}
-#endif
 	if (cverb << vconvert)
 		cout << "PERF_RECORD_COMM for " << event->comm.comm << ", tgid/tid = "
 		     << event->comm.pid << "/" << event->comm.tid << endl;
@@ -308,55 +194,71 @@ static void __handle_comm_event(event_t * event)
 	map<pid_t, operf_process_info *>::iterator it;
 	it = process_map.find(event->comm.pid);
 	if (it == process_map.end()) {
-		/* TODO: Handle system housekeeping tasks.  For certain kinds of processes,
-		 * we will get a COMM event, but never get an MMAP event (e.g, kpsmoused).
-		 * Without receiving an MMAP event, we have no clue whether the name given
-		 * with the COMM event is a full "appname" or not, so the operf_process_info
-		 * is marked invalid.  We end up dropping all samples for such tasks when
-		 * doing a system-wide profile.
-		 */
-
 		/* A COMM event can occur as the result of the app doing a fork/exec,
 		 * where the COMM event is for the forked process.  In that case, we
 		 * pass the event->comm field as the appname argument to the ctor.
 		 */
 		const char * appname_arg;
 		bool is_complete_appname;
-		if (app_name && (app_PID == event->comm.pid)) {
+		if (app_name && (app_PID == (pid_t) event->comm.pid)) {
 			appname_arg = app_name;
 			is_complete_appname = true;
 		} else {
 			appname_arg = event->comm.comm;
 			is_complete_appname = false;
 		}
-		operf_process_info * proc = new operf_process_info(event->comm.pid,appname_arg,
-		                                                   is_complete_appname, true);
+		/* If tid != pid, this may be a forked process for which we've not yet received
+		 * the PERF_RECORD_FORK event, nor have we received any other events for the
+		 * process (e.g., COMM event for parent).  We mark such proc infos as "invalid" so we
+		 * don't falsely attribute samples to a child thread which should, instead,
+		 * be attributed to its parent.  If this is indeed a forked process, we should
+		 * eventually receive a COMM event for the parent (where tid==pid), at which time,
+		 * we'll mark the proc info valid.  If we never receive a COMM event for a parent,
+		 * the proc info will get marked valid during reprocessing so we can attribute
+		 * deferred samples at that time.
+		 */
+
+		bool valid_bit = (event->comm.pid == event->comm.tid);
+		operf_process_info * proc = new operf_process_info(event->comm.pid, appname_arg,
+		                                                   is_complete_appname, valid_bit);
 		if (cverb << vconvert)
 			cout << "Adding new proc info to collection for PID " << event->comm.pid << endl;
 		process_map[event->comm.pid] = proc;
 	} else {
-		if (it->second->is_valid()) {
-			if (it->second->is_forked()) {
-				/* If the operf_process_info object we found was created as a result of
-				 * a FORK event, then it was associated with the parent process and contains
-				 * the parent's appname.  But now we're getting a COMM event for this forked
-				 * process, which means it did an exec, so we need to change the appname
-				 * to the executable associated with this COMM event, which is done via
-				 * calling disassociate_from_parent().
-				 */
-				if (cverb << vconvert)
-					cout << "Disassociating forked proc " << event->comm.pid
-					     << " from parent" << endl;
-				it->second->disassociate_from_parent(event->comm.comm);
-			} else {
-				if (cverb << vconvert)
-					cout << "Received extraneous COMM event for " << event->comm.comm
-					<< ", PID " << event->comm.pid << endl;
+		/* If we reach this point, it means a proc info object for this pid already exists;
+		 * however, if it was created by something other than a "valid" COMM event (e.g., MMAP event),
+		 * its 'valid' bit will be set to false.  NOTE: A "valid" COMM event is one in which
+		 * tid==pid.
+		 *
+		 * We must handle the following situations:
+		 *  o If valid:
+		 *  	- Existing proc info created for a parent (i.e., tid == pid), and the current
+		 *  	  COMM event is for a child -- and we ignore all child COMM events.
+		 *  	- Existing proc info may have invalid appname, so we call set_appname()
+		 *        and see if this COMM event has an appropriate appname.
+		 *
+		 *  o If not valid:
+		 *  	- Existing proc info was created for the parent by an MMAP type of event, and the
+		 *  	  current COMM event is for the parent.
+		 *  	- Existing proc info was created by FORK; now that we have a COMM event for it,
+		 *  	  the process should be treated as a standalone process, so we call
+		 *  	  try_disassociate_from_parent().
+		 */
+		if (!it->second->is_valid()) {
+			// Ignore child COMM events (i.e., pid != tid).
+			if (event->comm.pid == event->comm.tid) {
+				if (it->second->is_forked()) {
+					it->second->try_disassociate_from_parent(event->comm.comm);
+				} else {
+					// Existing proc info created by MMAP event or some such
+					it->second->set_valid();
+					it->second->set_appname(event->comm.comm, false);
+				}
 			}
 		} else {
-			if (cverb << vconvert)
-				cout << "Processing deferred mappings" << endl;
-			it->second->process_deferred_mappings(event->comm.comm);
+			if ((event->comm.pid == event->comm.tid) && !it->second->is_appname_valid()) {
+				it->second->set_appname(event->comm.comm, false);
+			}
 		}
 	}
 }
@@ -398,9 +300,10 @@ static void __handle_mmap_event(event_t * event)
 		mapping->pgoff = event->mmap.pgoff;
 
 		if (cverb << vconvert) {
-			cout << "PERF_RECORD_MMAP for " << event->mmap.filename << endl;
-			cout << "\tstart_addr: " << hex << mapping->start_addr;
-			cout << "; end addr: " << mapping->end_addr << endl;
+			cout << "PERF_RECORD_MMAP for process " << hex << event->mmap.pid << "/"
+			     << event->mmap.tid << ": " << event->mmap.filename << endl;
+			cout << "\tstart_addr: " << hex << mapping->start_addr
+			     << "; end addr: " << mapping->end_addr << endl;
 		}
 
 		if (event->header.misc & PERF_RECORD_MISC_USER)
@@ -447,7 +350,7 @@ static void __handle_mmap_event(event_t * event)
 			 */
 			const char * appname_arg;
 			bool is_complete_appname;
-			if (app_name && (app_PID == event->mmap.pid)) {
+			if (app_name && (app_PID == (pid_t)event->mmap.pid)) {
 				appname_arg = app_name;
 				is_complete_appname = true;
 			} else {
@@ -457,28 +360,14 @@ static void __handle_mmap_event(event_t * event)
 
 			operf_process_info * proc = new operf_process_info(event->mmap.pid, appname_arg,
 			                                                   is_complete_appname, false);
-			proc->add_deferred_mapping(mapping);
-			if (cverb << vconvert)
-				cout << "Added deferred mapping " << event->mmap.filename
-				      << " for new process_info object" << endl;
 			process_map[event->mmap.pid] = proc;
-#ifdef _TEST_DEFERRED_MAPPING
-			if (!do_comm_event) {
-				do_comm_event = true;
-				__handle_comm_event(comm_event, out);
-			}
-#endif
-		} else if (!it->second->is_valid()) {
-			it->second->add_deferred_mapping(mapping);
-			if (cverb << vconvert)
-				cout << "Added deferred mapping " << event->mmap.filename
-				      << " for existing but incomplete process_info object" << endl;
+			proc->process_mapping(mapping, false);
 		} else {
-			if (cverb << vconvert)
-				cout << "Process mapping for " << event->mmap.filename << " on behalf of "
-				     << event->mmap.pid << endl;
-			it->second->process_new_mapping(mapping);
+			it->second->process_mapping(mapping, false);
 		}
+		if (cverb << vconvert)
+			cout << "Process mapping for " << event->mmap.filename << " on behalf of "
+			<< event->mmap.pid << endl;
 	}
 }
 
@@ -497,22 +386,12 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data, boo
 	} else {
 		// Find operf_process info for data.tgid.
 		std::map<pid_t, operf_process_info *>::const_iterator it = process_map.find(data->pid);
-		if (it != process_map.end() && (it->second->is_appname_valid())) {
+		if (it != process_map.end() && it->second->is_appname_valid()) {
 			proc = it->second;
 		} else {
-			/* This can happen for the following reasons:
-			 *   - We get a sample before getting a COMM or MMAP
-			 *     event for the process being profiled
-			 *   - The COMM event has been processed, but since that
-			 *     only gives 16 chars of the app name, we don't
-			 *     have a valid app name yet
-			 *   - The kernel incorrectly records a sample for a
-			 *     process other than the one we requested (not
-			 *     likely -- this would be a kernel bug if it did)
-			 *
-			*/
+			// This can validly happen if get a sample before getting a COMM event for the process
 			if ((cverb << vconvert) && !first_time_processing) {
-				cerr << "Dropping sample -- process info unavailable" << endl;
+				cout << "Dropping sample -- process info unavailable for PID " << data->pid << endl;
 				if (kernel_mode)
 					operf_stats[OPERF_NO_APP_KERNEL_SAMPLE]++;
 				else
@@ -551,7 +430,7 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data, boo
 			 */
 		}
 	} else {
-		op_mmap = proc->find_mapping_for_sample(data->ip);
+		op_mmap = proc->find_mapping_for_sample(data->ip, hypervisor_domain);
 		if (op_mmap && op_mmap->is_hypervisor && !hypervisor_domain) {
 			cverb << vconvert << "Invalid sample: Address falls within hypervisor address range, but is not a hypervisor domain sample." << endl;
 			operf_stats[OPERF_INVALID_CTX]++;
@@ -563,9 +442,10 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data, boo
 			cout << "Found mmap for sample; image_name is " << op_mmap->filename <<
 			" and app name is " << proc->get_app_name() << endl;
 		trans.image_name = op_mmap->filename;
-		trans.app_filename = proc->get_app_name().c_str();
+		trans.app_len = proc->get_app_name().size();
+		strncpy(trans.app_filename, proc->get_app_name().c_str(), trans.app_len);
+		trans.app_filename[trans.app_len] = '\0';
 		trans.image_len = strlen(trans.image_name);
-		trans.app_len = strlen(trans.app_filename);
 		trans.start_addr = op_mmap->start_addr;
 		trans.end_addr = op_mmap->end_addr;
 		trans.tgid = data->pid;
@@ -582,11 +462,15 @@ static struct operf_transient * __get_operf_trans(struct sample_data * data, boo
 		trans.sample_id = data->id;
 		retval = &trans;
 	} else {
-		if ((cverb << vconvert) && !first_time_processing) {
-			string domain = trans.in_kernel ? "kernel" : "userspace";
-			cerr << "Discarding " << domain << " sample for process " << data->pid
-			     << " where no appropriate mapping was found. (pc=0x"
-			     << hex << data->ip <<")" << endl;
+		if (!first_time_processing) {
+			if (cverb << vconvert) {
+				string domain = trans.in_kernel ? "kernel" : "userspace";
+				ostringstream message;
+				message << "Discarding " << domain << " sample for process " << data->pid
+						<< " where no appropriate mapping was found. (pc=0x"
+						<< hex << data->ip <<")" << endl;
+				cout << message.str();
+			}
 			operf_stats[OPERF_LOST_NO_MAPPING]++;
 		}
 		retval = NULL;
@@ -598,11 +482,12 @@ out:
 static void __handle_callchain(u64 * array, struct sample_data * data)
 {
 	bool in_kernel = false;
+	u64 sampled_addr = data->ip;
 	data->callchain = (struct ip_callchain *) array;
 	if (data->callchain->nr) {
 		if (cverb << vconvert)
 			cout << "Processing callchain" << endl;
-		for (int i = 0; i < data->callchain->nr; i++) {
+		for (u64 i = 0; i < data->callchain->nr; i++) {
 			data->ip = data->callchain->ips[i];
 			if (data->ip >= PERF_CONTEXT_MAX) {
 				switch (data->ip) {
@@ -619,6 +504,8 @@ static void __handle_callchain(u64 * array, struct sample_data * data)
 					default:
 						break;
 				}
+				if (i == 0 && (data->callchain->ips[i+1]==sampled_addr))
+					i++;
 				continue;
 			}
 			if (data->ip && __get_operf_trans(data, false, in_kernel)) {
@@ -627,13 +514,14 @@ static void __handle_callchain(u64 * array, struct sample_data * data)
 					update_trans_last(&trans);
 				}
 			} else {
-				if (data->ip)
+				if (data->ip && !first_time_processing)
 					operf_stats[OPERF_BT_LOST_NO_MAPPING]++;
 			}
 		}
 	}
 }
 
+#if PPC64_ARCH
 static void __map_hypervisor_sample(u64 ip, u32 pid)
 {
 	operf_process_info * proc;
@@ -650,7 +538,7 @@ static void __map_hypervisor_sample(u64 ip, u32 pid)
 		 */
 		const char * appname_arg;
 		bool is_complete_appname;
-		if (app_name && (app_PID == pid)) {
+		if (app_name && (app_PID == (pid_t)pid)) {
 			appname_arg = app_name;
 			is_complete_appname = true;
 		} else {
@@ -670,19 +558,39 @@ static void __map_hypervisor_sample(u64 ip, u32 pid)
 	}
 	proc->process_hypervisor_mapping(ip);
 }
+#endif
+
+static int __handle_throttle_event(event_t * event)
+{
+	int rc = 0;
+	trans.event = operfRead.get_eventnum_by_perf_event_id(event->throttle.id);
+	if (trans.event >= 0)
+		__set_event_throttled(trans.event);
+	else
+		rc = -1;
+	return rc;
+}
 
-static void __handle_sample_event(event_t * event, u64 sample_type)
+static int __handle_sample_event(event_t * event, u64 sample_type)
 {
 	struct sample_data data;
 	bool found_trans = false;
 	bool in_kernel;
-	const struct operf_mmap * op_mmap = NULL;
+	int rc = 0;
 	bool hypervisor = (event->header.misc == PERF_RECORD_MISC_HYPERVISOR);
 	u64 *array = event->sample.array;
 
+	/* As we extract the various pieces of information from the sample data array,
+	 * if we find that the sample type does not match up with an expected mandatory
+	 * perf_event_sample_format, we consider this as corruption of the sample data
+	 * stream.  Since it wouldn't make sense to continue with suspect data, we quit.
+	 */
 	if (sample_type & PERF_SAMPLE_IP) {
 		data.ip = event->ip.ip;
 		array++;
+	} else {
+		rc = -1;
+		goto done;
 	}
 
 	if (sample_type & PERF_SAMPLE_TID) {
@@ -690,14 +598,21 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 		data.pid = p[0];
 		data.tid = p[1];
 		array++;
+	} else {
+		rc = -1;
+		goto done;
 	}
 
 	data.id = ~0ULL;
 	if (sample_type & PERF_SAMPLE_ID) {
 		data.id = *array;
 		array++;
+	} else {
+		rc = -1;
+		goto done;
 	}
 
+	// PERF_SAMPLE_CPU is optional (see --separate-cpu).
 	if (sample_type & PERF_SAMPLE_CPU) {
 		u_int32_t *p = (u_int32_t *)array;
 		data.cpu = *p;
@@ -708,7 +623,7 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 	} else if (event->header.misc == PERF_RECORD_MISC_USER) {
 		in_kernel = false;
 	}
-#if (defined(__powerpc__) || defined(__powerpc64__))
+#if PPC64_ARCH
 	else if (event->header.misc == PERF_RECORD_MISC_HYPERVISOR) {
 #define MAX_HYPERVISOR_ADDRESS 0xfffffffULL
 		if (data.ip > MAX_HYPERVISOR_ADDRESS) {
@@ -732,18 +647,22 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 			case PERF_RECORD_MISC_HYPERVISOR:
 				domain = "hypervisor";
 				break;
+#if HAVE_PERF_GUEST_MACROS
 			case PERF_RECORD_MISC_GUEST_KERNEL:
 				domain = "guest OS";
 				break;
 			case PERF_RECORD_MISC_GUEST_USER:
 				domain = "guest user";
 				break;
+#endif
 			default:
 				domain = "unknown";
 				break;
 			}
-			cerr << "Discarding sample from " << domain << " domain: "
-			     << hex << data.ip << endl;
+			ostringstream message;
+			message << "Discarding sample from " << domain << " domain: "
+			        << hex << data.ip << endl;
+			cout << message.str();
 		}
 		goto out;
 	}
@@ -759,17 +678,23 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 		goto out;
 	}
 
-	if (cverb << vconvert)
-		cout << "(IP, " <<  event->header.misc << "): " << dec << data.pid << "/"
-		      << data.tid << ": " << hex << (unsigned long long)data.ip
-		      << endl << "\tdata ID: " << data.id << endl;
+	if (cverb << vconvert) {
+		ostringstream message;
+		message << "(IP, " <<  event->header.misc << "): " << dec << data.pid << "/"
+		        << data.tid << ": " << hex << (unsigned long long)data.ip
+		        << endl << "\tdata ID: " << data.id << endl;
+		cout << message.str();
+	}
 
 	// Verify the sample.
-	trans.event = operfRead.get_eventnum_by_perf_event_id(data.id);
-	if (trans.event < 0) {
-		cerr << "Event num " << trans.event << " for id " << data.id
-		     << " is invalid. Skipping sample." << endl;
-		goto out;
+	if (data.id != trans.sample_id) {
+		trans.event = operfRead.get_eventnum_by_perf_event_id(data.id);
+		if (trans.event < 0) {
+			cerr << "Event num " << trans.event << " for id " << data.id
+					<< " is invalid. Sample data appears to be corrupted." << endl;
+			rc = -1;
+			goto out;
+		}
 	}
 
 	/* Only need to check for "no_user" since "no_kernel" is done by
@@ -798,6 +723,10 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 			cout << "Deferring processing of hypervisor sample." << endl;
 		goto out;
 	}
+	// This sample is for a different event than the last sample
+	if (data.id != trans.sample_id)
+		goto find_trans;
+
 	/* Check for the common case first -- i.e., where the current sample is from
 	 * the same context as the previous sample.  For the "no-vmlinux" case, start_addr
 	 * and end_addr will be zero, so need to make sure we detect that.
@@ -825,6 +754,7 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 		found_trans = true;
 	}
 
+find_trans:
 	if (!found_trans && __get_operf_trans(&data, hypervisor, in_kernel)) {
 		trans.current = operf_sfile_find(&trans);
 		found_trans = true;
@@ -845,7 +775,7 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 	}
 
 	if (first_time_processing) {
-		event_t * ev = (event_t *)xmalloc(event->header.size);
+		event_t * ev = (event_t *)malloc(event->header.size);
 		memcpy(ev, event, event->header.size);
 		unresolved_events.push_back(ev);
 	}
@@ -853,7 +783,7 @@ static void __handle_sample_event(event_t * event, u64 sample_type)
 out:
 	clear_trans(&trans);
 done:
-	return;
+	return rc;
 }
 
 
@@ -865,8 +795,11 @@ done:
  * when profiling began.  Additional PERF_RECORD_MMAP records may appear later in the data
  * stream (e.g., dlopen for single-process profiling or new process startup for system-wide
  * profiling.
+ *
+ * This function returns '0' on success and '-1' on failure.  A failure implies the sample
+ * data is probably corrupt and the calling function should handle appropriately.
  */
-void OP_perf_utils::op_write_event(event_t * event, u64 sample_type)
+int OP_perf_utils::op_write_event(event_t * event, u64 sample_type)
 {
 #if 0
 	if (event->header.type < PERF_RECORD_MAX) {
@@ -876,47 +809,80 @@ void OP_perf_utils::op_write_event(event_t * event, u64 sample_type)
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
-		__handle_sample_event(event, sample_type);
-		return;
+		return __handle_sample_event(event, sample_type);
 	case PERF_RECORD_MMAP:
 		__handle_mmap_event(event);
-		return;
+		return 0;
 	case PERF_RECORD_COMM:
 		if (!sfile_init_done) {
 			operf_sfile_init();
 			sfile_init_done = true;
 		}
 		__handle_comm_event(event);
-		return;
+		return 0;
 	case PERF_RECORD_FORK:
 		__handle_fork_event(event);
-		return;
+		return 0;
 	case PERF_RECORD_THROTTLE:
-		throttled = true;
-		return;
+		return __handle_throttle_event(event);
 	case PERF_RECORD_LOST:
 		operf_stats[OPERF_RECORD_LOST_SAMPLE] += event->lost.lost;
-		return;
+		return 0;
 	case PERF_RECORD_EXIT:
-		return;
+		return 0;
 	default:
-		// OK, ignore all other header types.
-		cverb << vconvert << "No matching event type for " << hex << event->header.type << endl;
-		return;
+		if (event->header.type > PERF_RECORD_MAX) {
+			// Bad header
+			ostringstream message;
+			message << "Invalid event type " << hex << event->header.type << endl;
+			message << "Sample data is probably corrupted." << endl;
+			cerr << message.str();
+			return -1;
+		} else {
+			ostringstream message;
+			message << "Event type "<< hex << event->header.type
+			        << " is ignored." << endl;
+			cverb << vconvert << message.str();
+			return 0;
+		}
 	}
 }
 
-void OP_perf_utils::op_reprocess_unresolved_events(u64 sample_type)
+void OP_perf_utils::op_reprocess_unresolved_events(u64 sample_type, bool print_progress)
 {
+	int num_recs = 0;
+
 	cverb << vconvert << "Reprocessing samples" << endl;
+
+	map<pid_t, operf_process_info *>::iterator procs = process_map.begin();
+	for (; procs != process_map.end(); procs++) {
+		if (!procs->second->is_valid()) {
+			if (procs->second->is_forked()) {
+				procs->second->connect_forked_process_to_parent();
+			} else {
+				procs->second->set_valid();
+			}
+		}
+		// Force the appname_valid to true so we don't drop any samples for this process.
+		// The appname may not be accurate, but it's the best we can do now.
+		procs->second->set_appname_valid();
+	}
 	list<event_t *>::const_iterator it = unresolved_events.begin();
+	int data_error = 0;
 	for (; it != unresolved_events.end(); it++) {
 		event_t * evt = (*it);
+		if (data_error < 0) {
+			free(evt);
+			continue;
+		}
 		// This is just a sanity check, since all events in this list
 		// are unresolved sample events.
 		if (evt->header.type == PERF_RECORD_SAMPLE) {
-			__handle_sample_event(evt, sample_type);
+			data_error = __handle_sample_event(evt, sample_type);
 			free(evt);
+			num_recs++;
+			if ((num_recs % 1000000 == 0) && print_progress)
+				cerr << ".";
 		}
 	}
 }
@@ -946,13 +912,6 @@ void OP_perf_utils::op_perfrecord_sigusr1_handler(int sig __attribute__((unused)
 	quit = true;
 }
 
-void OP_perf_utils::op_perfread_sigusr1_handler(int sig __attribute__((unused)),
-		siginfo_t * siginfo __attribute__((unused)),
-		void *u_context __attribute__((unused)))
-{
-	read_quit = true;
-}
-
 int OP_perf_utils::op_read_from_stream(ifstream & is, char * buf, streamsize sz)
 {
 	int rc = 0;
@@ -975,13 +934,18 @@ static int __mmap_trace_file(struct mmap_info & info)
 	info.buf = (char *) mmap(NULL, mmap_size, mmap_prot,
 	                         mmap_flags, info.traceFD, info.offset);
 	if (info.buf == MAP_FAILED) {
-		cerr << "Error: mmap failed with errno:\n\t" << strerror(errno) << endl;
+		ostringstream message;
+		message << "Error: mmap failed with errno:\n\t" << strerror(errno) << endl;
+		message << "\tmmap_size: 0x" << hex << mmap_size << "; offset: 0x" << info.offset << endl;
+		cerr << message.str();
 		return -1;
 	}
 	else {
-		cverb << vconvert << hex << "mmap with the following parameters" << endl
-		      << "\tinfo.head: " << info.head << endl
-		      << "\tinfo.offset: " << info.offset << endl;
+		ostringstream message;
+		message << hex << "mmap with the following parameters" << endl
+		        << "\tinfo.head: " << info.head << endl
+		        << "\tinfo.offset: " << info.offset << endl;
+		cverb << vconvert << message.str();
 		return 0;
 	}
 }
@@ -991,8 +955,6 @@ int OP_perf_utils::op_mmap_trace_file(struct mmap_info & info, bool init)
 {
 	u64 shift;
 	if (init) {
-		if (!pg_sz)
-			pg_sz = sysconf(_SC_PAGESIZE);
 		if (!mmap_size) {
 			if (MMAP_WINDOW_SZ > info.file_data_size) {
 				mmap_size = info.file_data_size;
@@ -1017,7 +979,10 @@ int OP_perf_utils::op_write_output(int output, void *buf, size_t size)
 		int ret = write(output, buf, size);
 
 		if (ret < 0) {
-			string errmsg = "Internal error:  Failed to write sample data to pipe. errno is ";
+			if (errno == EINTR)
+				continue;
+
+			string errmsg = "Internal error:  Failed to write sample data to output fd. errno is ";
 			errmsg += strerror(errno);
 			throw runtime_error(errmsg);
 		}
@@ -1029,8 +994,74 @@ int OP_perf_utils::op_write_output(int output, void *buf, size_t size)
 	return sum;
 }
 
+/* On certain architectures and older kernels (3.0 and older, I think), a static mapping
+ * was placed into every process's memory map to provide vsyscall functionality.  The
+ * mapping is labeled '[vsyscall]'.  For some reason (which I don't care to investigate,
+ * since vsyscall is now obsolete), the kernel's perf_events subsystem does not send a
+ * PERF_RECORD_MMAP message for this mapping.  The function below is used to synthesize
+ * such a message so that samples taken in the vsyscall memory range can be correctly
+ * attributed.
+ */
+void OP_perf_utils::op_get_vsyscall_mapping(pid_t tgid, int output_fd, operf_record * pr)
+{
+	char fname[PATH_MAX];
+	FILE *fp;
+	char line_buffer[BUFSIZ];
+	char perms[5], pathname[PATH_MAX], dev[16];
+	unsigned long long start_addr, end_addr, offset;
+	u_int32_t inode;
+	struct mmap_event mmap;
+	size_t size;
+
+	memset(pathname, '\0', sizeof(pathname));
+	memset(&mmap, 0, sizeof(mmap));
+
+	snprintf(fname, sizeof(fname), "/proc/%d/maps", tgid);
+
+	fp = fopen(fname, "r");
+	if (fp == NULL) {
+		// Process must have exited already or invalid pid.
+		cverb << vrecord << "couldn't open " << fname << endl;
+		return;
+	}
+
+	while (1) {
+		mmap.pgoff = 0;
+		mmap.header.type = PERF_RECORD_MMAP;
+		mmap.header.misc = PERF_RECORD_MISC_USER;
+
+		if (fgets(line_buffer, sizeof(line_buffer), fp) == NULL)
+			break;
+
+		sscanf(line_buffer, "%llx-%llx %s %llx %s %d %s",
+				&start_addr, &end_addr, perms, &offset, dev, &inode, pathname);
+		if (perms[2] == 'x') {
+			char * imagename;
+			if ((imagename = strstr(pathname, "[vsyscall]")) == NULL)
+				continue;
+
+			size = strlen(imagename) + 1;
+			strcpy(mmap.filename, imagename);
+			size = align_64bit(size);
+			mmap.start = start_addr;
+			mmap.len = end_addr - mmap.start;
+			mmap.pid = tgid;
+			mmap.tid = tgid;
+			mmap.header.size = (sizeof(mmap) -
+					(sizeof(mmap.filename) - size));
+			int num = OP_perf_utils::op_write_output(output_fd, &mmap, mmap.header.size);
+			if (cverb << vrecord)
+				cout << "Created MMAP event for " << imagename << endl;
+			pr->add_to_total(num);
+			break;
+		}
+	}
+
+	fclose(fp);
+	return;
+}
 
-static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, operf_record * pr)
+void OP_perf_utils::op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, operf_record * pr)
 {
 	char fname[PATH_MAX];
 	FILE *fp;
@@ -1048,6 +1079,8 @@ static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, o
 		char line_buffer[BUFSIZ];
 		char perms[5], pathname[PATH_MAX], dev[16];
 		unsigned long long start_addr, end_addr, offset;
+		const char * anon_mem = "//anon";
+
 		u_int32_t inode;
 
 		memset(pathname, '\0', sizeof(pathname));
@@ -1069,6 +1102,12 @@ static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, o
 			if (imagename == NULL)
 				imagename = strstr(pathname, "[vdso]");
 
+			if (imagename == NULL)
+				imagename = strstr(pathname, "[vsyscall]");
+
+			if ((imagename == NULL) && !strstr(pathname, "["))
+				imagename = (char *)anon_mem;
+
 			if (imagename == NULL)
 				continue;
 
@@ -1092,8 +1131,7 @@ static void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, o
 	return;
 }
 
-static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
-                                    int output_fd)
+static int _get_one_process_info(bool sys_wide, pid_t pid, operf_record * pr)
 {
 	struct comm_event comm;
 	char fname[PATH_MAX];
@@ -1116,7 +1154,7 @@ static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
 		if (!sys_wide) {
 			cerr << "Unable to find process information for process " << pid << "." << endl;
 			cverb << vrecord << "couldn't open " << fname << endl;
-			return -1;
+			return OP_PERF_HANDLED_ERROR;
 		} else {
 			return 0;
 		}
@@ -1151,10 +1189,10 @@ static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
 	size = align_64bit(size);
 	comm.header.size = sizeof(comm) - (sizeof(comm.comm) - size);
 	if (tgid != pid) {
-		// passed pid must have been a secondary thread
+		// passed pid must have been a secondary thread, and we
+		// don't go looking at the /proc/<pid>/task of such processes.
 		comm.tid = pid;
-		int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size);
-		pr->add_to_total(num);
+		pr->add_process(comm);
 		goto out;
 	}
 
@@ -1163,7 +1201,8 @@ static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
 	if (tids == NULL) {
 		// process must have exited
 		ret = -1;
-		cverb << vrecord << "opendir returned NULL" << endl;
+		cverb << vrecord << "Process " << pid << " apparently exited while "
+		      << "process info was being collected"<< endl;
 		goto out;
 	}
 
@@ -1174,43 +1213,31 @@ static int _record_one_process_info(pid_t pid, bool sys_wide, operf_record * pr,
 			continue;
 
 		comm.tid = pid;
-
-		int num = OP_perf_utils::op_write_output(output_fd, &comm, comm.header.size);
-		pr->add_to_total(num);
+		pr->add_process(comm);
 	}
 	closedir(tids);
-	if (cverb << vrecord)
-		cout << "Created COMM event for " << comm.comm << endl;
 
 out:
-	op_record_process_exec_mmaps(pid, tgid, output_fd, pr);
-
 	fclose(fp);
 	if (ret) {
 		cverb << vrecord << "couldn't get app name and tgid for pid "
 		      << dec << pid << " from /proc fs." << endl;
 	}
 	return ret;
-
 }
 
 /* Obtain process information for an active process (where the user has
  * passed in a process ID via the --pid option) or all active processes
- * (where system_wide==true).  Then generate the necessary PERF_RECORD_COMM
- * and PERF_RECORD_MMAP entries into the profile data stream.
+ * (where system_wide==true).
  */
-int OP_perf_utils::op_record_process_info(bool system_wide, pid_t pid, operf_record * pr,
-                                          int output_fd)
+int OP_perf_utils::op_get_process_info(bool system_wide, pid_t pid, operf_record * pr)
 {
 	int ret = 0;
 	if (cverb << vrecord)
-		cout << "op_record_process_info" << endl;
+		cout << "op_get_process_info" << endl;
 	if (!system_wide) {
-		ret = _record_one_process_info(pid, system_wide, pr, output_fd);
+		ret = _get_one_process_info(system_wide, pid, pr);
 	} else {
-		char buff[BUFSIZ];
-		pid_t tgid = 0;
-		size_t size = 0;
 		DIR *pids;
 		struct dirent dirent, *next;
 
@@ -1223,12 +1250,9 @@ int OP_perf_utils::op_record_process_info(bool system_wide, pid_t pid, operf_rec
 		while (!readdir_r(pids, &dirent, &next) && next) {
 			char *end;
 			pid = strtol(dirent.d_name, &end, 10);
-			if (((errno == ERANGE && (pid == LONG_MAX || pid == LONG_MIN))
-					|| (errno != 0 && pid == 0)) || (end == dirent.d_name)) {
-				cverb << vmisc << "/proc entry " << dirent.d_name << " is not a PID" << endl;
+			if (*end)
 				continue;
-			}
-			if ((ret = _record_one_process_info(pid, system_wide, pr, output_fd)) < 0)
+			if ((ret = _get_one_process_info(system_wide, pid, pr)) < 0)
 				break;
 		}
 		closedir(pids);
@@ -1249,7 +1273,6 @@ static void _record_module_info(int output_fd, operf_record * pr)
 	const char * fname = "/proc/modules";
 	FILE *fp;
 	char * line;
-	struct operf_kernel_image * image;
 	int module_size;
 	char ref_count[32+1];
 	int ret;
@@ -1289,6 +1312,14 @@ static void _record_module_info(int output_fd, operf_record * pr)
 			continue;
 		}
 
+		if (start_address == 0) {
+			cerr << "Unable to obtain module information. Set "
+			     << "/proc/sys/kernel/kptr_restrict to 0 to "
+			     << "collect kernel module samples." << endl;
+			fclose(fp);
+			return;
+		}
+
 		mmap.header.type = PERF_RECORD_MMAP;
 		mmap.header.misc = PERF_RECORD_MISC_KERNEL;
 		size = strlen(module_name) + 1;
@@ -1321,10 +1352,21 @@ void OP_perf_utils::op_record_kernel_info(string vmlinux_file, u64 start_addr, u
 	mmap.header.type = PERF_RECORD_MMAP;
 	mmap.header.misc = PERF_RECORD_MISC_KERNEL;
 	if (vmlinux_file.empty()) {
-		size = strlen( "no_vmlinux") + 1;
-		strncpy(mmap.filename, "no-vmlinux", size);
-		mmap.start = 0ULL;
-		mmap.len = 0ULL;
+		if ((start_addr == 0) && (end_addr == 0)) {
+			/* Did not have permission to read
+			 * /proc/kallsyms and no vmlinux file
+			 */
+			size = strlen( "no_vmlinux") + 1;
+			strncpy(mmap.filename, "no-vmlinux", size);
+			mmap.start = 0ULL;
+			mmap.len = 0ULL;
+		} else {
+			size = sizeof(KALL_SYM_FILE) + 1;
+			strncpy(mmap.filename, KALL_SYM_FILE, size);
+			mmap.start = start_addr;
+			mmap.len = end_addr - mmap.start;
+		}
+
 	} else {
 		size = vmlinux_file.length() + 1;
 		strncpy(mmap.filename, vmlinux_file.c_str(), size);
@@ -1337,11 +1379,16 @@ void OP_perf_utils::op_record_kernel_info(string vmlinux_file, u64 start_addr, u
 	mmap.header.size = (sizeof(mmap) -
 			(sizeof(mmap.filename) - size));
 	int num = op_write_output(output_fd, &mmap, mmap.header.size);
-	if (cverb << vrecord)
-		cout << "Created MMAP event of size " << mmap.header.size << " for " <<mmap.filename << ". length: "
-		     << hex << mmap.len << "; start addr: " << mmap.start << endl;
+	if (cverb << vrecord) {
+		ostringstream message;
+		message << "Created MMAP event of size " << mmap.header.size << " for " <<mmap.filename << ". length: "
+		        << hex << mmap.len << "; start addr: " << mmap.start << endl;
+		cout << message.str();
+	}
 	pr->add_to_total(num);
-	_record_module_info(output_fd, pr);
+
+	if (start_addr && end_addr)
+		_record_module_info(output_fd, pr);
 }
 
 void OP_perf_utils::op_get_kernel_event_data(struct mmap_data *md, operf_record * pr)
@@ -1360,6 +1407,9 @@ void OP_perf_utils::op_get_kernel_event_data(struct mmap_data *md, operf_record
 	void *buf;
 	int64_t diff;
 
+	if (old == head)
+		return;
+
 	diff = head - old;
 	if (diff < 0) {
 		throw runtime_error("ERROR: event buffer wrapped, which should NEVER happen.");
@@ -1384,38 +1434,3 @@ void OP_perf_utils::op_get_kernel_event_data(struct mmap_data *md, operf_record
 	md->prev = old;
 	pc->data_tail = old;
 }
-
-
-int OP_perf_utils::op_get_next_online_cpu(DIR * dir, struct dirent *entry)
-{
-#define OFFLINE 0x30
-	unsigned int cpu_num;
-	char cpu_online_pathname[40];
-	int res;
-	FILE * online;
-	again:
-	do {
-		entry = readdir(dir);
-		if (!entry)
-			return -1;
-	} while (entry->d_type != DT_DIR);
-
-	res = sscanf(entry->d_name, "cpu%u", &cpu_num);
-	if (res <= 0)
-		goto again;
-
-	errno = 0;
-	snprintf(cpu_online_pathname, 40, "/sys/devices/system/cpu/cpu%u/online", cpu_num);
-	if ((online = fopen(cpu_online_pathname, "r")) == NULL) {
-		cerr << "Unable to open " << cpu_online_pathname << endl;
-		if (errno)
-			cerr << strerror(errno) << endl;
-		return -1;
-	}
-	res = fgetc(online);
-	fclose(online);
-	if (res == OFFLINE)
-		goto again;
-	else
-		return cpu_num;
-}
diff --git a/libperf_events/operf_utils.h b/libperf_events/operf_utils.h
index c14942d..32954cc 100644
--- a/libperf_events/operf_utils.h
+++ b/libperf_events/operf_utils.h
@@ -45,7 +45,23 @@ extern bool throttled;
 #define MMAP_WINDOW_SZ (32 * 1024 * 1024ULL)
 #endif
 
-extern unsigned int op_nr_counters;
+#define OP_MAX_EVENTS 24
+
+/* A macro to be used for ppc64 architecture-specific code.  The '__powerpc__' macro
+ * is defined for both ppc64 and ppc32 architectures, so we must further qualify by
+ * including the 'HAVE_LIBPFM' macro, since that macro will be defined only for ppc64.
+ */
+#define PPC64_ARCH (HAVE_LIBPFM) && ((defined(__powerpc__) || defined(__powerpc64__)))
+
+# define likely(x)	__builtin_expect(!!(x), 1)
+# define unlikely(x)	__builtin_expect(!!(x), 0)
+
+#define is_header_valid(hdr)			\
+	((hdr.size > sizeof(hdr)) &&		\
+	(hdr.type) && (hdr.size))
+
+
+extern unsigned int op_nr_events;
 
 static inline size_t align_64bit(u64 x)
 {
@@ -65,17 +81,14 @@ void op_get_kernel_event_data(struct mmap_data *md, operf_record * pr);
 void op_perfrecord_sigusr1_handler(int sig __attribute__((unused)),
 		siginfo_t * siginfo __attribute__((unused)),
 		void *u_context __attribute__((unused)));
-void op_perfread_sigusr1_handler(int sig __attribute__((unused)),
-		siginfo_t * siginfo __attribute__((unused)),
-		void *u_context __attribute__((unused)));
-int op_record_process_info(bool system_wide, pid_t pid, operf_record * pr, int output_fd);
+int op_get_process_info(bool system_wide, pid_t pid, operf_record * pr);
+void op_record_process_exec_mmaps(pid_t pid, pid_t tgid, int output_fd, operf_record * pr);
+void op_get_vsyscall_mapping(pid_t tgid, int output_fd, operf_record * pr);
 int op_write_output(int output, void *buf, size_t size);
-void op_write_event(event_t * event, u64 sample_type);
+int op_write_event(event_t * event, u64 sample_type);
 int op_read_from_stream(std::ifstream & is, char * buf, std::streamsize sz);
 int op_mmap_trace_file(struct mmap_info & info, bool init);
-int op_get_next_online_cpu(DIR * dir, struct dirent *entry);
-bool op_convert_event_vals(std::vector<operf_event_t> * evt_vec);
-void op_reprocess_unresolved_events(u64 sample_type);
+void op_reprocess_unresolved_events(u64 sample_type, bool print_progress);
 void op_release_resources(void);
 }
 
@@ -148,6 +161,11 @@ void op_release_resources(void);
 #define cpu_relax()	asm volatile("":::"memory")
 #endif
 
+#ifdef __aarch64__
+#define rmb()		asm volatile("dmb ld" ::: "memory")
+#define cpu_relax()	asm volatile("yield" ::: "memory")
+#endif
+
 #ifdef __mips__
 #include <asm/unistd.h>
 #define rmb()		asm volatile(					\
@@ -166,4 +184,10 @@ void op_release_resources(void);
 #define cpu_relax()	({__insn_mfspr(SPR_PASS); barrier();})
 #endif
 
+#ifdef __arc__
+#include <asm/unistd.h>
+#define rmb()          asm volatile("" ::: "memory")
+#define cpu_relax()    rmb()
+#endif
+
 #endif // OPERF_H_
diff --git a/libpp/Makefile.am b/libpp/Makefile.am
index d3fcbdd..79c415e 100644
--- a/libpp/Makefile.am
+++ b/libpp/Makefile.am
@@ -52,7 +52,5 @@ libpp_a_SOURCES = \
 	symbol_sort.cpp \
 	symbol_sort.h \
 	xml_utils.h \
-	xml_utils.cpp \
-	populate_for_spu.cpp \
-	populate_for_spu.h
+	xml_utils.cpp
 
diff --git a/libpp/Makefile.in b/libpp/Makefile.in
index 3aeb313..b9ee648 100644
--- a/libpp/Makefile.in
+++ b/libpp/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -70,7 +69,7 @@ am_libpp_a_OBJECTS = arrange_profiles.$(OBJEXT) \
 	profile_container.$(OBJEXT) profile_spec.$(OBJEXT) \
 	sample_container.$(OBJEXT) symbol_container.$(OBJEXT) \
 	symbol_functors.$(OBJEXT) symbol_sort.$(OBJEXT) \
-	xml_utils.$(OBJEXT) populate_for_spu.$(OBJEXT)
+	xml_utils.$(OBJEXT)
 libpp_a_OBJECTS = $(am_libpp_a_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp = $(SHELL) $(top_srcdir)/depcomp
@@ -150,7 +149,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -174,20 +172,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -304,9 +295,7 @@ libpp_a_SOURCES = \
 	symbol_sort.cpp \
 	symbol_sort.h \
 	xml_utils.h \
-	xml_utils.cpp \
-	populate_for_spu.cpp \
-	populate_for_spu.h
+	xml_utils.cpp
 
 all: all-am
 
@@ -367,7 +356,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_header.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parse_filename.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/populate.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/populate_for_spu.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/profile.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/profile_container.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/profile_spec.Po@am__quote@
diff --git a/libpp/arrange_profiles.cpp b/libpp/arrange_profiles.cpp
index 13e7feb..338a748 100644
--- a/libpp/arrange_profiles.cpp
+++ b/libpp/arrange_profiles.cpp
@@ -351,7 +351,9 @@ void name_classes(profile_classes & classes, merge_option const & merge_by)
 			it->name += it->ptemplate.cpu;
 			it->longname = "Samples on CPU " + it->ptemplate.cpu;
 			break;
-		case AXIS_MAX:;
+		case AXIS_MAX:
+			cerr << "Internal error - no equivalence class axis" << endl;
+			abort();
 		}
 	}
 }
@@ -676,6 +678,11 @@ arrange_profiles(list<string> const & files, merge_option const & merge_by,
 	copy(temp_classes.begin(), temp_classes.end(),
 	     back_inserter(classes.v));
 
+	/* Coverity complains about classes.axis not being initialized upon
+	 * returning a copy of the classes object, so we'll silence it by
+	 * initializing axis to the max value.
+	 */
+	classes.axis = AXIS_MAX;
 	if (classes.v.empty())
 		return classes;
 
diff --git a/libpp/callgraph_container.cpp b/libpp/callgraph_container.cpp
index 1d42ee4..2affa8f 100644
--- a/libpp/callgraph_container.cpp
+++ b/libpp/callgraph_container.cpp
@@ -29,18 +29,13 @@
 #include "op_bfd.h"
 #include "op_sample_file.h"
 #include "locate_images.h"
+#include "utility.h"
+#include <string.h>
 
 using namespace std;
 
 namespace {
 
-bool operator==(cg_symbol const & lhs, cg_symbol const & rhs)
-{
-	less_symbol cmp_symb;
-	return !cmp_symb(lhs, rhs) && !cmp_symb(rhs, lhs);
-}
-
-
 // we store {caller,callee} inside a single u64
 odb_key_t caller_to_key(u32 value)
 {
@@ -128,8 +123,8 @@ public:
 	call_data(profile_container const & p, profile_t const & pr,
 	          op_bfd const & bfd, u32 boff, image_name_id iid,
 	          image_name_id aid, bool debug_info)
-		: pc(p), profile(pr), b(bfd), boffset(boff), image(iid),
-		  app(aid), debug(debug_info) {}
+		: callee_end(0), pc(p), profile(pr), b(bfd), boffset(boff),
+		  image(iid), app(aid), debug(debug_info) {}
 
 	/// point to a caller symbol
 	void caller_sym(symbol_index_t i) {
@@ -449,6 +444,8 @@ void callgraph_container::populate(list<string> const & cg_files,
 	list<string>::const_iterator const end = cg_files.end();
 	for (it = cg_files.begin(); it != end; ++it) {
 		cverb << vdebug << "samples file : " << *it << endl;
+		op_bfd *caller_bfd;
+		op_bfd *callee_bfd;
 
 		parsed_filename caller_file =
 			parse_filename(*it, extra_found_images);
@@ -463,8 +460,15 @@ void callgraph_container::populate(list<string> const & cg_files,
 					   error, false, extra_found_images);
 
 		bool caller_bfd_ok = true;
-		op_bfd caller_bfd(caller_file.lib_image,
-			string_filter(), extra_found_images, caller_bfd_ok);
+
+		if (strncmp(caller_file.lib_image.c_str(), KALL_SYM_FILE,
+			    strlen(caller_file.lib_image.c_str())) == 0)
+			caller_bfd = new op_bfd(caller_file.lib_image, extra_found_images);
+
+		else
+			caller_bfd = new op_bfd(caller_file.lib_image, string_filter(),
+						extra_found_images, caller_bfd_ok);
+
 		if (!caller_bfd_ok)
 			report_image_error(caller_file.lib_image,
 			                   image_format_failure, false,
@@ -480,8 +484,14 @@ void callgraph_container::populate(list<string> const & cg_files,
 					   error, false, extra_found_images);
 
 		bool callee_bfd_ok = true;
-		op_bfd callee_bfd(callee_file.cg_image,
-			string_filter(), extra_found_images, callee_bfd_ok);
+		if (strncmp(callee_file.cg_image.c_str(), KALL_SYM_FILE,
+			    strlen(callee_file.cg_image.c_str())) == 0)
+			callee_bfd = new op_bfd(callee_file.cg_image, extra_found_images);
+
+		else
+			callee_bfd = new op_bfd(callee_file.cg_image, string_filter(),
+						extra_found_images, callee_bfd_ok);
+
 		if (!callee_bfd_ok)
 			report_image_error(callee_file.cg_image,
 		                           image_format_failure, false,
@@ -491,9 +501,13 @@ void callgraph_container::populate(list<string> const & cg_files,
 		// We can't use start_offset support in profile_t, give
 		// it a zero offset and we will fix that in add()
 		profile.add_sample_file(*it);
-		add(profile, caller_bfd, caller_bfd_ok, callee_bfd,
+
+		add(profile, *caller_bfd, caller_bfd_ok, *callee_bfd,
 		    merge_lib ? app_image : app_name, pc,
 		    debug_info, pclass);
+
+		delete caller_bfd;
+		delete callee_bfd;
 	}
 }
 
diff --git a/libpp/filename_spec.cpp b/libpp/filename_spec.cpp
index 9ba74db..a139fd5 100644
--- a/libpp/filename_spec.cpp
+++ b/libpp/filename_spec.cpp
@@ -27,7 +27,7 @@ filename_spec::filename_spec(string const & filename,
 
 
 filename_spec::filename_spec()
-	: image("*"), lib_image("*")
+	: image("*"), lib_image("*"), count(0), unitmask(0)
 {
 }
 
diff --git a/libpp/format_output.cpp b/libpp/format_output.cpp
index de42dc8..11cd396 100644
--- a/libpp/format_output.cpp
+++ b/libpp/format_output.cpp
@@ -98,6 +98,7 @@ formatter::formatter(extra_images const & extra)
 	vma_64(false),
 	long_filenames(false),
 	need_header(true),
+	global_percent(false),
 	extra_found_images(extra)
 {
 	format_map[ff_vma] = field_description(9, "vma", &formatter::format_vma);
@@ -511,6 +512,9 @@ void cg_formatter::output(ostream & out, symbol_collection const & syms)
 
 	for (it = syms.begin(); it < end; ++it) {
 		cg_symbol const * sym = dynamic_cast<cg_symbol const *>(*it);
+		// To silence coverity (since dynamic cast can theoretically return NULL)
+		if (!sym)
+			continue;
 
 		cg_symbol::children::const_iterator cit;
 		cg_symbol::children::const_iterator cend = sym->callers.end();
@@ -610,6 +614,7 @@ xml_formatter(profile_container const * p,
 	profile(p),
 	symbols(s),
 	need_details(false),
+	detail_count(0),
 	symbol_filter(sf)
 {
 	if (profile)
@@ -660,26 +665,12 @@ xml_formatter::get_bfd_object(symbol_entry const * symb, op_bfd * & abfd) const
 	bool ok = true;
 
 	string const & image_name = get_image_name(symb->image_name,
-		image_name_storage::int_filename, extra_found_images);
-	if (symb->spu_offset) {
-		// FIXME: what about archive:tmp, actually it's not supported
-		// for spu since oparchive doesn't archive the real file but
-		// in future it would work ?
-		string tmp = get_image_name(symb->embedding_filename, 
-			image_name_storage::int_filename, extra_found_images);
-		if (abfd && abfd->get_filename() == tmp)
-			return true;
-		delete abfd;
-		abfd = new op_bfd(symb->spu_offset, tmp,
-				  symbol_filter, extra_found_images, ok);
-	} else {
-		if (abfd && abfd->get_filename() == image_name)
-			return true;
-		delete abfd;
-		abfd = new op_bfd(image_name, symbol_filter,
-				  extra_found_images, ok);
-
-	}
+	                                           image_name_storage::int_filename, extra_found_images);
+	if (abfd && abfd->get_filename() == image_name)
+		return true;
+	delete abfd;
+	abfd = new op_bfd(image_name, symbol_filter,
+	                  extra_found_images, ok);
 
 	if (!ok) {
 		report_image_error(image_name, image_format_failure,
diff --git a/libpp/format_output.h b/libpp/format_output.h
index 8e527d5..6531bdc 100644
--- a/libpp/format_output.h
+++ b/libpp/format_output.h
@@ -119,7 +119,8 @@ protected:
  
 	/// decribe one field of the colummned output.
 	struct field_description {
-		field_description() {}
+		field_description()
+			: width(0), header_name(""), formatter(NULL) {}
 		field_description(std::size_t w, std::string h,
 				  fct_format f)
 			: width(w), header_name(h), formatter(f) {}
diff --git a/libpp/image_errors.cpp b/libpp/image_errors.cpp
index 09d211e..eab21d9 100644
--- a/libpp/image_errors.cpp
+++ b/libpp/image_errors.cpp
@@ -38,8 +38,11 @@ void report_image_error(string const & image, image_error error, bool fatal,
 		reported_images_error.insert(image_name);
 
 		// FIXME: hacky
-		if (error == image_not_found && is_prefix(image, "anon "))
+		if (error == image_not_found && (is_prefix(image, "anon ") ||
+				image == "/no-vmlinux" || is_prefix(image, "[vdso]") ||
+                                is_prefix(image, "[hypervisor_bucket]"))) {
 			return;
+		}
 
 		cerr << (fatal ? "error: " : "warning: ");
 		cerr << image_name << ' ';
diff --git a/libpp/op_header.cpp b/libpp/op_header.cpp
index 7029c61..29801fd 100644
--- a/libpp/op_header.cpp
+++ b/libpp/op_header.cpp
@@ -34,6 +34,7 @@
 #include "format_output.h"
 #include "xml_utils.h"
 #include "cverb.h"
+#include "utility.h"
 
 using namespace std;
 
@@ -89,6 +90,13 @@ void check_mtime(string const & file, opd_header const & header)
 {
 	u64 newmtime = op_get_mtime(file.c_str());
 
+	if (strncmp(file.c_str(), KALL_SYM_FILE, strlen(file.c_str())) == 0)
+		/* The /proc/kallsyms file isn't a real file.  It
+		 * is generated when read.  The time comparison doesn't
+		 * really apply here as we are not using a real bfd file.
+		 */
+		return;
+
 	if (newmtime == header.mtime)
 		return;
 
@@ -186,17 +194,15 @@ string const op_print_event(op_cpu cpu_type, u32 type, u32 um, u32 count)
 	str += string("Counted ") + event->name;
 	str += string(" events (") + event->desc + ")";
 
-	if (cpu_type != CPU_RTC) {
-		str += " with a unit mask of 0x";
+	str += " with a unit mask of 0x";
 
-		ostringstream ss;
-		ss << hex << setw(2) << setfill('0') << unsigned(um);
-		str += ss.str();
+	ostringstream ss;
+	ss << hex << setw(2) << setfill('0') << unsigned(um);
+	str += ss.str();
 
-		str += " (";
-		str += um_desc ? um_desc : "multiple flags";
-		str += ")";
-	}
+	str += " (";
+	str += um_desc ? um_desc : "multiple flags";
+	str += ")";
 
 	str += " count " + op_lexical_cast<string>(count);
 	return str;
@@ -206,7 +212,7 @@ string const op_xml_print_event(op_cpu cpu_type, u32 type, u32 um, u32 count)
 {
 	string unit_mask;
 
-	if (cpu_type == CPU_TIMER_INT || cpu_type == CPU_RTC)
+	if (cpu_type == CPU_TIMER_INT)
 		return xml_utils::get_timer_setup((size_t)count);
 
 	struct op_event * event = op_find_event(cpu_type, type, um);
@@ -218,11 +224,9 @@ string const op_xml_print_event(op_cpu cpu_type, u32 type, u32 um, u32 count)
 		}
 	}
 
-	if (cpu_type != CPU_RTC) {
-		ostringstream str_out;
-		str_out << um;
-		unit_mask = str_out.str();
-	}
+	ostringstream str_out;
+	str_out << um;
+	unit_mask = str_out.str();
 
 	return xml_utils::get_event_setup(string(event->name),
 		(size_t)count, unit_mask);
diff --git a/libpp/parse_filename.h b/libpp/parse_filename.h
index d024f9b..0996bd6 100644
--- a/libpp/parse_filename.h
+++ b/libpp/parse_filename.h
@@ -41,6 +41,12 @@ struct parsed_filename
 	 */
 	std::string filename;
 	bool jit_dumpfile_exists;
+
+	/* Provide default constructor to initialize jit_dumpfile_exists,
+	 * otherwise it can contain garbage when its containing object is
+	 * instantiated on the stack.
+	 */
+	parsed_filename() : jit_dumpfile_exists(false) {}
 };
 
 
diff --git a/libpp/populate.cpp b/libpp/populate.cpp
index 7183531..bd49276 100644
--- a/libpp/populate.cpp
+++ b/libpp/populate.cpp
@@ -18,9 +18,10 @@
 #include "op_bfd.h"
 #include "op_header.h"
 #include "populate.h"
-#include "populate_for_spu.h"
 
 #include "image_errors.h"
+#include "utility.h"
+#include <string.h>
 
 #include <iostream>
 
@@ -59,15 +60,17 @@ void
 populate_for_image(profile_container & samples, inverted_profile const & ip,
 	string_filter const & symbol_filter, bool * has_debug_info)
 {
-	if (is_spu_profile(ip)) {
-		populate_for_spu_image(samples, ip, symbol_filter,
-				       has_debug_info);
-		return;
-	}
+	op_bfd *abfd;
 
 	bool ok = ip.error == image_ok;
-	op_bfd abfd(ip.image, symbol_filter,
-		    samples.extra_found_images, ok);
+
+	if (strncmp(ip.image.c_str(), KALL_SYM_FILE, strlen(ip.image.c_str())) == 0)
+		abfd = new op_bfd(ip.image, samples.extra_found_images);
+
+	else
+		abfd = new op_bfd(ip.image, symbol_filter,
+				  samples.extra_found_images, ok);
+
 	if (!ok && ip.error == image_ok)
 		ip.error = image_format_failure;
 
@@ -89,9 +92,9 @@ populate_for_image(profile_container & samples, inverted_profile const & ip,
 		// to the wrong app_image otherwise
 		for (; it != end; ++it) {
 			profile_t profile;
-			if (populate_from_files(profile, abfd, it->files)) {
+			if (populate_from_files(profile, *abfd, it->files)) {
 				header = profile.get_header();
-				samples.add(profile, abfd, it->app_image, i);
+				samples.add(profile, *abfd, it->app_image, i);
 				found = true;
 			}
 		}
@@ -106,5 +109,7 @@ populate_for_image(profile_container & samples, inverted_profile const & ip,
 	}
 
 	if (has_debug_info)
-		*has_debug_info = abfd.has_debug_info();
+		*has_debug_info = abfd->has_debug_info();
+
+	delete abfd;
 }
diff --git a/libpp/populate_for_spu.cpp b/libpp/populate_for_spu.cpp
deleted file mode 100644
index 0f4606b..0000000
--- a/libpp/populate_for_spu.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * @file libpp/populate_for_spu.cpp
- * Fill up a profile_container from inverted profiles for
- * a Cell BE SPU profile
- *
- * @remark Copyright 2007 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Maynard Johnson
- * (C) Copyright IBM Corporation 2007
- */
-
-#include "profile.h"
-#include "profile_container.h"
-#include "arrange_profiles.h"
-#include "op_bfd.h"
-#include "op_header.h"
-#include "populate.h"
-#include "populate_for_spu.h"
-
-#include "image_errors.h"
-
-#include <iostream>
-
-using namespace std;
-
-namespace {
-
-static int spu_profile = unknown_profile;
-
-/*
- * On Cell Broadband Engine, an application executing on an SPE may
- * have been loaded from a separate SPU executable binary file or may
- * have been loaded from an embedded section of a PPE application or
- * shared library.  In the embedded case, the embedding file may actually
- * contain multiple SPU images, resulting in different SPU images being loaded
- * onto different SPUs.  Thus, the SPUs may be executing different code, even
- * though the application of the parent PPE process is the same.  Therefore,
- * we must be sure to create a separate op_bfd object for each SPU.  When doing
- * so below, we examine header.embedded_offset.  If embedded_offset is > 0, it's
- * interpreted as the offset of an SPU image embedded in the containing file,
- * so the filename to do the check_mtime on is the containing file, ip.image;
- * otherwise, the filename to do the check_mtime on is the separate backing
- * file of the SPU image, abfd->filename.
- */
-void
-populate_spu_profile_from_files(list<profile_sample_files> const & files,
-				string const app_image,
-				profile_container & samples,
-				inverted_profile const & ip,
-				string_filter const & symbol_filter,
-				size_t ip_grp_num, bool * has_debug_info)
-{
-	string archive_path = samples.extra_found_images.get_archive_path();
-	bool ok = ip.error == image_ok;
-	op_bfd * abfd = NULL;
-	string fname_to_check;
-	list<profile_sample_files>::const_iterator it = files.begin();
-	list<profile_sample_files>::const_iterator const end = files.end();
-	for (; it != end; ++it) {
-		profile_t profile;
-		if (it->sample_filename.empty())
-			continue;
-
-		profile.add_sample_file(it->sample_filename);
-		opd_header header = profile.get_header();
-		if (header.embedded_offset) {
-			abfd = new op_bfd(header.embedded_offset,
-					  ip.image,
-					  symbol_filter,
-					  samples.extra_found_images,
-					  ok);
-			fname_to_check = ip.image;
-		} else {
-			abfd = new op_bfd(ip.image,
-					  symbol_filter,
-					  samples.extra_found_images,
-					  ok);
-			fname_to_check = abfd->get_filename();
-		}
-		profile.set_offset(*abfd);
-		if (!ok && ip.error == image_ok)
-			ip.error = image_format_failure;
-
-		if (ip.error == image_format_failure)
-			report_image_error(ip, false,
-					   samples.extra_found_images);
-
-		samples.add(profile, *abfd, app_image, ip_grp_num);
-		if (ip.error == image_ok) {
-			image_error error;
-			string filename =
-				samples.extra_found_images.find_image_path(
-					fname_to_check, error, true);
-			check_mtime(filename, profile.get_header());
-		}
-
-		if (has_debug_info && !*has_debug_info)
-			*has_debug_info = abfd->has_debug_info();
-		delete abfd;
-	}
-}
-}  // anon namespace
-
-void
-populate_for_spu_image(profile_container & samples,
-		       inverted_profile const & ip,
-		       string_filter const & symbol_filter,
-		       bool * has_debug_info)
-{
-
-	for (size_t i = 0; i < ip.groups.size(); ++i) {
-		list < image_set >::const_iterator it=
-			ip.groups[i].begin();
-		list < image_set >::const_iterator const end
-			= ip.groups[i].end();
-
-		for (; it != end; ++it)
-			populate_spu_profile_from_files(it->files,
-				it->app_image, samples, ip,
-				symbol_filter, i, has_debug_info);
-	}
-}
-
-bool is_spu_profile(inverted_profile const & ip)
-{
-	bool retval = false;
-	string sfname = "";
-	if (spu_profile != unknown_profile)
-		return spu_profile;
-
-	if (!ip.groups.size())
-		return false;
-
-	for (size_t i = 0; i < ip.groups.size(); ++i) {
-		list<image_set>::const_iterator grp_it
-			= ip.groups[i].begin();
-		list<image_set>::const_iterator const grp_end
-			= ip.groups[i].end();
-
-		for (; grp_it != grp_end; ++grp_it) {
-			list<profile_sample_files>::const_iterator sfiles_it =
-				grp_it->files.begin();
-			list<profile_sample_files>::const_iterator sfiles_end =
-				grp_it->files.end();
-			for (; sfiles_it != sfiles_end; ++sfiles_it) {
-				if (!sfiles_it->sample_filename.empty()) {
-					sfname = sfiles_it->sample_filename;
-					goto do_check;
-				}
-			}
-		}
-	}
-	goto out;
-
-do_check:
-	spu_profile = profile_t::is_spu_sample_file(sfname);
-
-	if (spu_profile == cell_spu_profile)
-		retval = true;
-
-out:
-	return retval;
-}
-
-
diff --git a/libpp/populate_for_spu.h b/libpp/populate_for_spu.h
deleted file mode 100644
index ec48099..0000000
--- a/libpp/populate_for_spu.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * @file libpp/populate_for_spu.h
- * Fill up a profile_container from inverted profiles for
- * a Cell BE SPU profile
- *
- * @remark Copyright 2007 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Maynard Johnson
- * (C) Copyright IBM Corporation 2007
- */
-
-#ifndef POPULATE_FOR_SPU_H
-#define POPULATE_FOR_SPU_H
-
-class profile_container;
-class inverted_profile;
-class string_filter;
-
-/*
- * When profiling SPUs on Cell Broadband Engine, all sample file
- * headers get a flag set indicating "spu_profile".  This function
- * checks the first sample file for this indicator.
- */
-bool is_spu_profile(inverted_profile const & ip);
-
-/*
- * This is a special-purpose function for CELL BE SPU profiling.
- * See populate_spu_profile_from_files prologue for more details.
- */
-void populate_for_spu_image(profile_container & samples,
-			    inverted_profile const & ip,
-			    string_filter const & symbol_filter,
-			    bool * has_debug_info);
-
-enum profile_type {
-	unknown_profile = -1,
-	normal_profile,
-	cell_spu_profile
-};
-
-#endif /* POPULATE_FOR_SPU_H */
diff --git a/libpp/profile.cpp b/libpp/profile.cpp
index f117508..be3ff93 100644
--- a/libpp/profile.cpp
+++ b/libpp/profile.cpp
@@ -27,7 +27,6 @@
 #include "profile.h"
 #include "op_bfd.h"
 #include "cverb.h"
-#include "populate_for_spu.h"
 
 using namespace std;
 
@@ -56,19 +55,6 @@ count_type profile_t::sample_count(string const & filename)
 	return count;
 }
 
-//static member
-enum profile_type profile_t::is_spu_sample_file(string const & filename)
-{
-	profile_type retval;
-	odb_t samples_db;
-	open_sample_file(filename, samples_db);
-	opd_header const & hdr =
-		*static_cast<opd_header *>(odb_get_data(&samples_db));
-	retval = hdr.spu_profile ? cell_spu_profile: normal_profile;
-	odb_close(&samples_db);
-	return retval;
-}
-
 //static member
 void profile_t::open_sample_file(string const & filename, odb_t & db)
 {
@@ -78,9 +64,9 @@ void profile_t::open_sample_file(string const & filename, odb_t & db)
 
 	if (head.version != OPD_VERSION) {
 		ostringstream os;
-		os << "oprofpp: samples files version mismatch, are you "
-		   << "running a daemon and post-profile tools with version "
-		   <<  "mismatch ?\n";
+		os << "oprofpp: samples files version mismatch." << endl
+		   << "Be sure you are running the oprofile post-profile tool that" << endl
+		   << "matches the version of operf used to collect the profile" << endl;
 		throw op_fatal_error(os.str());
 	}
 
diff --git a/libpp/profile.h b/libpp/profile.h
index 163355d..78cda72 100644
--- a/libpp/profile.h
+++ b/libpp/profile.h
@@ -20,7 +20,6 @@
 #include "odb.h"
 #include "op_types.h"
 #include "utility.h"
-#include "populate_for_spu.h"
 
 class opd_header;
 class op_bfd;
@@ -55,16 +54,6 @@ public:
 	 */
 	static count_type sample_count(std::string const & filename);
 
-	/**
-	 * Indicate if given sample file is from a Cell Broadband Engine
-	 * SPU profile
-	 * @param filename sample filename
-	 *
-	 * Convenience interface put here so all access to samples files
-	 * go through profile_t static or non static member.
-	 */
-	static enum profile_type is_spu_sample_file(std::string const & filename);
-
 	/**
 	 * cumulate sample file to our container of samples
 	 * @param filename  sample file name
@@ -77,6 +66,7 @@ public:
 
 	/// Set an appropriate start offset, see comments below.
 	void set_offset(op_bfd const & abfd);
+	u64 get_offset(void) const { return start_offset; }
 
 	class const_iterator;
 	typedef std::pair<const_iterator, const_iterator> iterator_pair;
diff --git a/libpp/profile_container.cpp b/libpp/profile_container.cpp
index 6ac330a..dca6fdd 100644
--- a/libpp/profile_container.cpp
+++ b/libpp/profile_container.cpp
@@ -24,7 +24,7 @@
 #include "profile_container.h"
 #include "sample_container.h"
 #include "symbol_container.h"
-#include "populate_for_spu.h"
+#include "cverb.h"
 
 using namespace std;
 
@@ -75,7 +75,7 @@ void profile_container::add(profile_t const & profile,
                             size_t pclass)
 {
 	string const image_name = abfd.get_filename();
-	opd_header header = profile.get_header();
+	count_type sym_count_total = 0;
 
 	for (symbol_index_t i = 0; i < abfd.syms.size(); ++i) {
 
@@ -92,6 +92,7 @@ void profile_container::add(profile_t const & profile,
 		if (count == 0)
 			continue;
 
+		sym_count_total += count;
 		symb_entry.sample.counts[pclass] = count;
 		total_count[pclass] += count;
 
@@ -115,19 +116,24 @@ void profile_container::add(profile_t const & profile,
 		symb_entry.app_name = image_names.create(app_name);
 
 		symb_entry.sample.vma = abfd.syms[i].vma();
-		if ((header.spu_profile == cell_spu_profile) &&
-		    header.embedded_offset) {
-			symb_entry.spu_offset = header.embedded_offset;
-			symb_entry.embedding_filename =
-				image_names.create(abfd.get_embedding_filename());
-		} else {
-			symb_entry.spu_offset = 0;
-		}
 		symbol_entry const * symbol = symbols->insert(symb_entry);
 
 		if (need_details)
 			add_samples(abfd, i, p_it, symbol, pclass, start);
 	}
+
+	if (cverb << vdebug) {
+		profile_t::iterator_pair summary_it =
+			profile.samples_range(profile.get_offset(), ~0ULL);
+		count_type module_summary_count = accumulate(summary_it.first, summary_it.second, 0ull);
+		if (sym_count_total < module_summary_count)
+			cout << "INFO: Sample counts differ:  Module summary count: " << dec
+			     << module_summary_count << "; total symbols count: " << sym_count_total
+			     << endl << "\timage name: " << image_name << endl;
+		else if (module_summary_count < sym_count_total)
+			cout << "Warning: Number of samples for module unexpectedly less than total "
+			     "symbols count!"  << endl << "\timage name: " << image_name << endl;
+	}
 }
 
 
@@ -177,13 +183,16 @@ profile_container::select_symbols(symbol_choice & choice) const
 		    && (image_names.name(it->image_name) != choice.image_name))
 			continue;
 
-		double const percent =
-			op_ratio(it->sample.counts[0], total_count[0]);
+		for (size_t j = 0; j < total_count.size(); j++) {
+			double const percent =
+					op_ratio(it->sample.counts[j], total_count[j]);
 
-		if (percent >= threshold) {
-			result.push_back(&*it);
+			if (percent >= threshold) {
+				result.push_back(&*it);
 
-			choice.hints = it->output_hint(choice.hints);
+				choice.hints = it->output_hint(choice.hints);
+				break;
+			}
 		}
 	}
 
@@ -220,9 +229,13 @@ profile_container::select_filename(double threshold) const
 		// FIXME: is samples_count() the right interface now ?
 		count_array_t counts = samples_count(*it);
 
-		double const ratio = op_ratio(counts[0], total_count[0]);
-		filename_by_samples const f(*it, ratio);
-
+		double highest_ratio = 0.0;
+		for (size_t j = 0; j < total_count.size(); j++ ) {
+			double const ratio = op_ratio(counts[j], total_count[j]);
+			if (ratio > highest_ratio)
+				highest_ratio = ratio;
+		}
+		filename_by_samples const f(*it, highest_ratio);
 		file_by_samples.push_back(f);
 	}
 
diff --git a/libpp/profile_spec.cpp b/libpp/profile_spec.cpp
index af05e20..cd4bd80 100644
--- a/libpp/profile_spec.cpp
+++ b/libpp/profile_spec.cpp
@@ -385,7 +385,13 @@ bool valid_candidate(string const & base_dir, string const & filename,
 	if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/"))
 		return false;
 
-	/* When overflows occur in the oprofile kernel driver's sample
+	/** NOTE: This comment and associated code is actually obsolete now,
+	 * since opcontrol and the oprofile kernel driver are no longer
+	 * in use with oprofile (as of release 1.0).  It seems extremely
+	 * unlikely we could encounter the same sort of issue using operf,
+	 * but it doesn't hurt to keep the code to be on the safe side.
+	 *
+	 * When overflows occur in the oprofile kernel driver's sample
 	 * buffers (caused by too high of a sampling rate), it's possible
 	 * for samples to be mis-attributed.  A common scenario is that,
 	 * while profiling process 'abc' running binary 'xzy', the task
@@ -453,7 +459,7 @@ bool valid_candidate(string const & base_dir, string const & filename,
  * Print a warning message if we detect any sample buffer overflows
  * occurred in the kernel driver. 
  */
-void warn_if_kern_buffs_overflow(string const & session_samples_dir)
+static void warn_if_kern_buffs_overflow(string const & session_samples_dir)
 {
 	DIR * dir;
 	struct dirent * dirent;
@@ -494,6 +500,61 @@ void warn_if_kern_buffs_overflow(string const & session_samples_dir)
 	}
 }
 
+static void warn_if_kern_throttling(string const & session_samples_dir)
+{
+	DIR * dir;
+	string stats_path;
+
+	/* check for throttled */
+	stats_path = session_samples_dir + "stats/throttled";
+	dir = opendir(stats_path.c_str());
+	if (dir != NULL) {
+		cerr << "\nWARNING! Some of the events were throttled. "
+		     << "Throttling occurs when\n";
+		cerr << "the initial sample rate is too high, causing an "
+		     << "excessive number of\n";
+		cerr << "interrupts.  Decrease the sampling frequency. "
+		     << "Check the directory\n";
+		cerr << stats_path << "\n"
+		     << "for the throttled event names.\n\n";
+		closedir(dir);
+	}
+}
+
+static void warn_if_lost_samples(string const & session_samples_dir)
+{
+	string stats_path;
+	string operf_log(op_session_dir);
+	unsigned long total_samples;
+	unsigned long total_lost_samples = 0;
+
+	stats_path = session_samples_dir + "stats/";
+	total_samples = op_read_long_from_file((stats_path + "total_samples").
+	                             c_str(), 0);
+	if (total_samples == ((unsigned long)-1))
+		return;
+
+	for (int i = OPERF_INDEX_OF_FIRST_LOST_STAT; i < OPERF_MAX_STATS; i++) {
+		unsigned long lost_samples_count = op_read_long_from_file((stats_path + stats_filenames[i]).c_str(), 0);
+		if (!(lost_samples_count == ((unsigned long)-1)))
+			total_lost_samples += lost_samples_count;
+	}
+
+	if (total_lost_samples > (unsigned int)(OPERF_WARN_LOST_SAMPLES_THRESHOLD
+				       * total_samples)) {
+		operf_log.append("/samples/operf.log");
+		cerr << "\nWARNING: Lost samples detected! See " <<  operf_log
+		     << " for details." << endl;
+	}
+}
+
+static void warn_if_sampling_problems(string const & session_samples_dir)
+{
+	warn_if_kern_buffs_overflow(session_samples_dir);
+	warn_if_kern_throttling(session_samples_dir);
+	warn_if_lost_samples(session_samples_dir);
+}
+
 
 }  // anonymous namespace
 
@@ -540,7 +601,7 @@ list<string> profile_spec::generate_file_list(bool exclude_dependent,
 
 		if (!files.empty()) {
 			found_file = true;
-			warn_if_kern_buffs_overflow(base_dir + "/");
+			warn_if_sampling_problems(base_dir + "/");
 		}
 
 		list<string>::const_iterator it = files.begin();
@@ -561,9 +622,8 @@ list<string> profile_spec::generate_file_list(bool exclude_dependent,
 
 	if (!found_file) {
 		ostringstream os;
-		os  << "No sample file found: If using opcontrol for profiling,\n"
-		    << "try running 'opcontrol --dump'; otherwise, specify a session containing\n"
-		    << "sample files.\n";
+		os  << "No sample found: Please specify a session containing \n"
+		    << "sample data.\n";
 		throw op_fatal_error(os.str());
 	}
 
diff --git a/libpp/symbol.h b/libpp/symbol.h
index 0071324..017e04c 100644
--- a/libpp/symbol.h
+++ b/libpp/symbol.h
@@ -62,7 +62,7 @@ struct sample_entry {
 /// associate a symbol with a file location, samples count and vma address
 class symbol_entry {
 public:
-	symbol_entry() : size(0) {}
+	symbol_entry() :  sym_index(0), size(0), vma_adj(0) {}
 	virtual ~symbol_entry() {}
 
 	/// which image this symbol belongs to
@@ -88,8 +88,6 @@ public:
 	 * the start is below it, but the the hint is only used for formatting
 	 */
 	column_flags output_hint(column_flags fl) const;
-	uint64_t spu_offset;
-	image_name_id embedding_filename;
 
 	/**
 	 * The vma_adj is set according to the corresponding op_bfd::vma_adj.
diff --git a/libpp/xml_utils.cpp b/libpp/xml_utils.cpp
index 11dfcdb..3de41e5 100644
--- a/libpp/xml_utils.cpp
+++ b/libpp/xml_utils.cpp
@@ -101,12 +101,6 @@ bool has_separated_thread_info()
 }
 
 
-string get_cpu_num(size_t pclass)
-{
-	return classes.v[pclass].ptemplate.cpu;
-}
-
-
 };  // anonymous namespace
 
 xml_utils::xml_utils(format_output::xml_formatter * xo,
@@ -245,11 +239,11 @@ void xml_utils::add_option(tag_t tag, bool value)
 void xml_utils::output_xml_header(string const & command_options,
                        string const & cpu_info, string const & events)
 {
-	// the integer portion indicates the schema version and should change
+	// The integer portion indicates the schema version and should change
 	// both here and in the schema file when major changes are made to
-	// the schema.  changes to opreport, or minor changes to the schema
+	// the schema.  Changes to opreport, or minor changes to the schema
 	// can be indicated by changes to the fraction part.
-	string const schema_version = "3.0";
+	string const schema_version = "3.1";
 
 	// This is the XML version, not schema version.
 	string const xml_header = "<?xml version=\"1.0\" ?>";
@@ -683,12 +677,14 @@ summarize_binaries(extra_images const & extra_found_images)
 			current_binary = binaries_root.add_binary(binary, it);
 			current_binary_name = binary;
 		}
-
-		current_binary->add_module_symbol(module, binary, it);
+		// To silence coverity, check current_binary !=NULL
+		if (current_binary)
+			current_binary->add_module_symbol(module, binary, it);
 	}
 
 	// close out last binary and module
-	current_binary->close_binary(symbols_end);
+	if (current_binary)
+		current_binary->close_binary(symbols_end);
 }
 
 
diff --git a/libregex/Makefile.in b/libregex/Makefile.in
index 5a93808..1246a33 100644
--- a/libregex/Makefile.in
+++ b/libregex/Makefile.in
@@ -41,7 +41,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -50,7 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -205,7 +204,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -229,20 +227,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libregex/op_regex.cpp b/libregex/op_regex.cpp
index d5ffa12..9140ec5 100644
--- a/libregex/op_regex.cpp
+++ b/libregex/op_regex.cpp
@@ -28,8 +28,9 @@ string op_regerror(int err, regex_t const & regexp)
 	size_t needed_size = regerror(err, &regexp, 0, 0);
 	char * buffer = new char[needed_size];
 	regerror(err, &regexp, buffer, needed_size);
-
-	return buffer;
+	string retval = buffer;
+	delete [] buffer;
+	return retval;
 }
 
 
diff --git a/libregex/tests/Makefile.in b/libregex/tests/Makefile.in
index 94a12cd..9a8763e 100644
--- a/libregex/tests/Makefile.in
+++ b/libregex/tests/Makefile.in
@@ -41,7 +41,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -50,7 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -136,7 +135,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -160,20 +158,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libutil++/Makefile.am b/libutil++/Makefile.am
index cfd8551..2f2f87b 100644
--- a/libutil++/Makefile.am
+++ b/libutil++/Makefile.am
@@ -40,6 +40,4 @@ libutil___a_SOURCES = \
 	cached_value.h \
 	comma_list.h \
 	xml_output.h \
-	xml_output.cpp \
-	bfd_spu_support.cpp \
-	op_spu_bfd.cpp
+	xml_output.cpp
diff --git a/libutil++/Makefile.in b/libutil++/Makefile.in
index fddc9e8..a6d7c08 100644
--- a/libutil++/Makefile.in
+++ b/libutil++/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -66,8 +65,7 @@ am_libutil___a_OBJECTS = op_bfd.$(OBJEXT) bfd_support.$(OBJEXT) \
 	path_filter.$(OBJEXT) file_manip.$(OBJEXT) \
 	stream_util.$(OBJEXT) string_manip.$(OBJEXT) cverb.$(OBJEXT) \
 	op_exception.$(OBJEXT) child_reader.$(OBJEXT) \
-	xml_output.$(OBJEXT) bfd_spu_support.$(OBJEXT) \
-	op_spu_bfd.$(OBJEXT)
+	xml_output.$(OBJEXT)
 libutil___a_OBJECTS = $(am_libutil___a_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp = $(SHELL) $(top_srcdir)/depcomp
@@ -185,7 +183,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -209,20 +206,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -326,9 +316,7 @@ libutil___a_SOURCES = \
 	cached_value.h \
 	comma_list.h \
 	xml_output.h \
-	xml_output.cpp \
-	bfd_spu_support.cpp \
-	op_spu_bfd.cpp
+	xml_output.cpp
 
 all: all-recursive
 
@@ -378,7 +366,6 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bfd_spu_support.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bfd_support.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/child_reader.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cverb.Po@am__quote@
@@ -386,7 +373,6 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/glob_filter.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_bfd.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_exception.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/op_spu_bfd.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/path_filter.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stream_util.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/string_filter.Po@am__quote@
diff --git a/libutil++/bfd_spu_support.cpp b/libutil++/bfd_spu_support.cpp
deleted file mode 100644
index 2dd497d..0000000
--- a/libutil++/bfd_spu_support.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- * @file libutil++/bfd_spu_support.cpp
- * Special BFD functions for processing a Cell BE SPU profile
- *
- * @remark Copyright 2007 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Maynard Johnson
- * (C) Copyright IBM Corporation 2007
- */
-
-#include "bfd_support.h"
-#include "op_bfd.h"
-#include "config.h"
-#include "cverb.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <cstring>
-#include <sys/types.h>
-
-struct spu_elf {
-	FILE * stream;
-	off_t spu_offset;
-};
-
-using namespace std;
-
-extern verbose vbfd;
-
-#ifdef HAVE_BFD_OPENR_IOVEC_WITH_7PARMS
-
-namespace {
-
-static void *
-spu_bfd_iovec_open(bfd * nbfd, void * open_closure)
-{
-	/* Checking nbfd isn't really necessary, except to silence
-	 * compile warning.  In fact, nbfd will always be non-NULL.
-	 */
-	if (nbfd)
-		return open_closure;
-	else
-		return NULL;
-}
-
-static int
-spu_bfd_iovec_close(bfd * nbfd, void * stream)
-{
-	spu_elf * my_stream = (spu_elf *) stream;
-
-	fclose(my_stream->stream);
-	free(my_stream);
-	/* Checking nbfd isn't really necessary, except to silence
-	 * compile warning.  In fact, nbfd will always be non-NULL.
-	 */
-	if (nbfd)
-		return 1;
-	else
-		return 0;
-}
-
-static file_ptr
-spu_bfd_iovec_pread(bfd * abfd, void * stream, void * buf,
-		    file_ptr nbytes, file_ptr offset)
-{
-	spu_elf * my_stream = (spu_elf *) stream;
-	fseek(my_stream->stream, my_stream->spu_offset + offset,
-	      SEEK_SET);
-	nbytes = fread(buf, sizeof(char), nbytes, my_stream->stream);
-	/* Checking abfd isn't really necessary, except to silence
-	 * compile warning.  In fact, abfd will always be non-NULL.
-	 */
-	if (abfd)
-		return nbytes;
-	else
-		return 0;
-}
-} // namespace anon
-#endif
-
-bfd *
-spu_open_bfd(string const name, int fd, uint64_t offset_to_spu_elf)
-{
-
-	bfd * nbfd = NULL;
-	spu_elf * spu_elf_stream = (spu_elf *)malloc(sizeof(spu_elf));
-
-	FILE * fp = fdopen(fd, "r");
-	spu_elf_stream->stream = fp;
-	spu_elf_stream->spu_offset = offset_to_spu_elf;
-#ifdef HAVE_BFD_OPENR_IOVEC_WITH_7PARMS
-	nbfd = bfd_openr_iovec(strdup(name.c_str()), "elf32-spu",
-			       spu_bfd_iovec_open, spu_elf_stream,
-			       spu_bfd_iovec_pread, spu_bfd_iovec_close, NULL);
-#else
-	ostringstream os;
-	os << "Attempt to process a Cell Broadband Engine SPU profile without"
-	   << "proper BFD support.\n"
-	   << "Rebuild the opreport utility with the correct BFD library.\n"
-	   << "See the OProfile user manual for more information.\n";
-	throw op_runtime_error(os.str());
-#endif
-	if (!nbfd) {
-		cverb << vbfd << "spu_open_bfd failed for " << name << endl;
-		return NULL;
-	}
-
-	bfd_check_format(nbfd, bfd_object);
-
-	return nbfd;
-}
diff --git a/libutil++/bfd_support.cpp b/libutil++/bfd_support.cpp
index d1383f8..d5fd70d 100644
--- a/libutil++/bfd_support.cpp
+++ b/libutil++/bfd_support.cpp
@@ -17,11 +17,17 @@
 #include "file_manip.h"
 #include "cverb.h"
 #include "locate_images.h"
+#include "op_libiberty.h"
+#include "op_exception.h"
 
+#include <unistd.h>
+#include <errno.h>
+#include <elf.h>
 #include <cstdlib>
 #include <cstring>
 #include <cassert>
 #include <iostream>
+#include <sstream>
 #include <fstream>
 #include <sstream>
 #include <string>
@@ -34,6 +40,11 @@ extern verbose vbfd;
 
 namespace {
 
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+static size_t build_id_size;
+
 
 void check_format(string const & file, bfd ** ibfd)
 {
@@ -71,10 +82,103 @@ bool separate_debug_file_exists(string & name, unsigned long const crc,
 				      reinterpret_cast<unsigned char *>(&buffer[0]),
 				      file.gcount());
 	}
-	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
+	ostringstream message;
+	message << " with crc32 = " << hex << file_crc << endl;
+	cverb << vbfd << message.str();
 	return crc == file_crc;
 }
 
+static bool find_debuginfo_file_by_buildid(unsigned char * buildid, string & debug_filename)
+{
+	size_t build_id_fname_size = strlen (DEBUGDIR) + (sizeof "/.build-id/" - 1) + 1
+			+ (2 * build_id_size) + (sizeof ".debug" - 1) + 1;
+	char * buildid_symlink = (char *) xmalloc(build_id_fname_size);
+	char * sptr = buildid_symlink;
+	unsigned char * bptr = buildid;
+	bool retval = false;
+	size_t build_id_segment_len = strlen("/.build-id/");
+
+
+	memcpy(sptr, DEBUGDIR, strlen(DEBUGDIR));
+	sptr += strlen(DEBUGDIR);
+	memcpy(sptr, "/.build-id/", build_id_segment_len);
+	sptr += build_id_segment_len;
+	sptr += sprintf(sptr, "%02x", (unsigned) *bptr++);
+	*sptr++ = '/';
+	for (int i = build_id_size - 1; i > 0; i--)
+		sptr += sprintf(sptr, "%02x", (unsigned) *bptr++);
+
+	strcpy(sptr, ".debug");
+
+	if (access (buildid_symlink, F_OK) == 0) {
+		debug_filename = op_realpath (buildid_symlink);
+		if (debug_filename.compare(buildid_symlink)) {
+			retval = true;
+			cverb << vbfd << "Using build-id symlink" << endl;
+		}
+	}
+	free(buildid_symlink);
+	if (!retval)
+		cverb << vbfd << "build-id file not found; falling back to CRC method." << endl;
+
+	return retval;
+}
+
+static bool get_build_id(bfd * ibfd, unsigned char * build_id)
+{
+	Elf32_Nhdr op_note_hdr;
+	asection * sect;
+	char * ptr;
+	bool retval = false;
+
+	cverb << vbfd << "fetching build-id from runtime binary ...";
+	if (!(sect = bfd_get_section_by_name(ibfd, ".note.gnu.build-id"))) {
+		if (!(sect = bfd_get_section_by_name(ibfd, ".notes"))) {
+			cverb << vbfd << " No build-id section found" << endl;
+			return false;
+		}
+	}
+
+	bfd_size_type buildid_sect_size = bfd_section_size(ibfd, sect);
+	char * contents = (char *) xmalloc(buildid_sect_size);
+	errno = 0;
+	if (!bfd_get_section_contents(ibfd, sect,
+				 reinterpret_cast<unsigned char *>(contents),
+				 static_cast<file_ptr>(0), buildid_sect_size)) {
+		string msg = "bfd_get_section_contents:get_build_id";
+		if (errno) {
+			msg += ": ";
+			msg += strerror(errno);
+		}
+		throw op_fatal_error(msg);
+	}
+
+	ptr = contents;
+	while (ptr < (contents + buildid_sect_size)) {
+		op_note_hdr.n_namesz = bfd_get_32(ibfd,
+		                                  reinterpret_cast<bfd_byte *>(contents));
+		op_note_hdr.n_descsz = bfd_get_32(ibfd,
+		                                  reinterpret_cast<bfd_byte *>(contents + 4));
+		op_note_hdr.n_type = bfd_get_32(ibfd,
+		                                reinterpret_cast<bfd_byte *>(contents + 8));
+		ptr += sizeof(op_note_hdr);
+		if ((op_note_hdr.n_type == NT_GNU_BUILD_ID) &&
+				(op_note_hdr.n_namesz == sizeof("GNU")) &&
+				(strcmp("GNU", ptr ) == 0)) {
+			build_id_size = op_note_hdr.n_descsz;
+			memcpy(build_id, ptr + op_note_hdr.n_namesz, build_id_size);
+			retval = true;
+			cverb << vbfd << "Found build-id" << endl;
+			break;
+		}
+		ptr += op_note_hdr.n_namesz + op_note_hdr.n_descsz;
+	}
+	if (!retval)
+		cverb << vbfd << " No build-id found" << endl;
+	free(contents);
+
+	return retval;
+}
 
 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
 {
@@ -87,15 +191,19 @@ bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
 		return false;
 	
 	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);  
-	char contents[debuglink_size];
+	char * contents = (char *) xmalloc(debuglink_size);
 	cverb << vbfd
 	      << ".gnu_debuglink section has size " << debuglink_size << endl;
 	
 	if (!bfd_get_section_contents(ibfd, sect, 
 				 reinterpret_cast<unsigned char *>(contents), 
 				 static_cast<file_ptr>(0), debuglink_size)) {
-		bfd_perror("bfd_get_section_contents:get_debug:");
-		exit(2);
+		string msg = "bfd_get_section_contents:get_debug";
+		if (errno) {
+			msg += ": ";
+			msg += strerror(errno);
+		}
+		throw op_fatal_error(msg);
 	}
 	
 	/* CRC value is stored after the filename, aligned up to 4 bytes. */
@@ -107,6 +215,7 @@ bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
 			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
 	filename = string(contents, filename_len);
 	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
+	free(contents);
 	return true;
 }
 
@@ -298,11 +407,26 @@ bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
 {
 	string filepath(filepath_in);
 	string basename;
-	unsigned long crc32;
+	unsigned long crc32 = 0;
+	// The readelf program uses a char [64], so that's what we'll use.
+	// To my knowledge, the build-id should not be bigger than 20 chars.
+	unsigned char buildid[64];
 	
+	if (get_build_id(ibfd, buildid) &&
+	   find_debuginfo_file_by_buildid(buildid, debug_filename))
+		return true;
+
 	if (!get_debug_link_info(ibfd, basename, crc32))
 		return false;
 
+	/* Use old method of finding debuginfo file by comparing runtime binary's
+	 * CRC with the CRC we calculate from the debuginfo file's contents.
+	 * NOTE:  This method breaks on systems where "MiniDebugInfo" is used
+	 * since the CRC stored in the runtime binary won't match the compressed
+	 * debuginfo file's CRC.  But in practice, we shouldn't ever run into such
+	 * a scenario since the build-id should always be available.
+	 */
+
 	// Work out the image file's directory prefix
 	string filedir = op_dirname(filepath);
 	// Make sure it starts with /
@@ -313,8 +437,10 @@ bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
 	string second_try(DEBUGDIR + filedir + basename);
 	string third_try(filedir + basename);
 
-	cverb << vbfd << "looking for debugging file " << basename 
-	      << " with crc32 = " << hex << crc32 << endl;
+	ostringstream message;
+	message << "looking for debugging file " << basename
+	        << " with crc32 = " << hex << crc32 << endl;
+	cverb << vbfd << message.str();
 
 	if (separate_debug_file_exists(first_try, crc32, extra)) 
 		debug_filename = first_try; 
@@ -349,7 +475,8 @@ bool interesting_symbol(asymbol * sym)
 	/* ARM assembler internal mapping symbols aren't interesting */
 	if ((strcmp("$a", sym->name) == 0) ||
 	    (strcmp("$t", sym->name) == 0) ||
-	    (strcmp("$d", sym->name) == 0))
+	    (strcmp("$d", sym->name) == 0) ||
+	    (strcmp("$x", sym->name) == 0))
 		return false;
 
 	// C++ exception stuff
@@ -507,10 +634,14 @@ void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
 
 bool bfd_info::get_synth_symbols()
 {
-	extern const bfd_target bfd_elf64_powerpc_vec;
-	extern const bfd_target bfd_elf64_powerpcle_vec;
-	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
-		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
+	const char* targname = bfd_get_target(abfd);
+	// Match elf64-powerpc and elf64-powerpc-freebsd, but not
+	// elf64-powerpcle.  elf64-powerpcle is a different ABI without
+	// function descriptors, so we don't need the synthetic
+	// symbols to have function code marked by a symbol.
+	bool is_elf64_powerpc_target = (!strncmp(targname, "elf64-powerpc", 13)
+					&& (targname[13] == 0
+					    || targname[13] == '-'));
 
 	if (!is_elf64_powerpc_target)
 		return false;
@@ -609,8 +740,10 @@ void bfd_info::get_symbols()
 	if (bfd_get_file_flags(abfd) & HAS_SYMS)
 		nr_syms = bfd_get_symtab_upper_bound(abfd);
 
-	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
-	      << nr_syms << hex << endl;
+	ostringstream message;
+	message << "bfd_get_symtab_upper_bound: " << dec
+	        << nr_syms << hex << endl;
+	cverb << vbfd << message.str();
 
 	nr_syms /= sizeof(asymbol *);
 
@@ -622,8 +755,10 @@ void bfd_info::get_symbols()
 	} else {
 		syms.reset(new asymbol *[nr_syms]);
 		nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
-		cverb << vbfd << "bfd_canonicalize_symtab: " << dec
-		      << nr_syms << hex << endl;
+		ostringstream message;
+		message << "bfd_canonicalize_symtab: " << dec
+		        << nr_syms << hex << endl;
+		cverb << vbfd << message.str();
 	}
 }
 
diff --git a/libutil++/bfd_support.h b/libutil++/bfd_support.h
index de3abe7..6ff4b25 100644
--- a/libutil++/bfd_support.h
+++ b/libutil++/bfd_support.h
@@ -36,6 +36,13 @@ struct bfd_info {
 	/// return true if BFD is readable
 	bool valid() const { return abfd; }
 
+	/* return true if the BFD is a pseudo file.  A pseudo BFD file is used
+	 * when the kernel symbols are obtained from /proc/kallsyms rather
+	 * then vmlinux.
+	 */
+	bool is_pseudo_bfd() const {
+	return (abfd == ((bfd *) 1)); }
+
 	/// return true if BFD has debug info
 	bool has_debug_info() const;
 
@@ -125,9 +132,6 @@ bfd * open_bfd(std::string const & file);
 /// open the given BFD from the fd
 bfd * fdopen_bfd(std::string const & file, int fd);
 
-/// Return a BFD for an SPU ELF embedded in PPE binary file
-bfd * spu_open_bfd(std::string const name, int fd, uint64_t offset_to_spu_elf);
-
 /// Return true if the symbol is worth looking at
 bool interesting_symbol(asymbol * sym);
 
diff --git a/libutil++/cached_value.h b/libutil++/cached_value.h
index 362e16a..fdc3e88 100644
--- a/libutil++/cached_value.h
+++ b/libutil++/cached_value.h
@@ -20,7 +20,7 @@ template <class T>
 class cached_value
 {
 public:
-	cached_value() : set(false) {}
+	cached_value() : set(false) { value = T(); }
 
 	typedef T value_type;
 
diff --git a/libutil++/child_reader.cpp b/libutil++/child_reader.cpp
index b9545b5..fd7bc5f 100644
--- a/libutil++/child_reader.cpp
+++ b/libutil++/child_reader.cpp
@@ -223,7 +223,7 @@ int child_reader::terminate_process()
 	// we must protect against multiple call
 	if (!is_terminated) {
 		int ret;
-		waitpid(pid, &ret, 0);
+		(void)waitpid(pid, &ret, 0);
 
 		is_terminated = true;
 
diff --git a/libutil++/op_bfd.cpp b/libutil++/op_bfd.cpp
index 7a03d38..389c920 100644
--- a/libutil++/op_bfd.cpp
+++ b/libutil++/op_bfd.cpp
@@ -17,6 +17,7 @@
 #include <cstring>
 
 #include <sys/stat.h>
+#include <unistd.h>
 
 #include <cstdlib>
 
@@ -24,6 +25,8 @@
 #include <iostream>
 #include <iomanip>
 #include <sstream>
+#include <cstdio>
+#include <fstream>
 
 #include "op_bfd.h"
 #include "locate_images.h"
@@ -81,7 +84,7 @@ op_bfd_symbol::op_bfd_symbol(bfd_vma vma, size_t size, string const & name)
 	: bfd_symbol(0), symb_value(vma),
 	  section_filepos(0), section_vma(0),
 	  symb_size(size), symb_name(name),
-	  symb_artificial(true)
+	  symb_hidden(false), symb_weak(false), symb_artificial(true)
 {
 }
 
@@ -104,13 +107,14 @@ op_bfd::op_bfd(string const & fname, string_filter const & symbol_filter,
 	archive_path(extra_images.get_archive_path()),
 	extra_found_images(extra_images),
 	file_size(-1),
-	anon_obj(false)
+	anon_obj(false),
+	vma_adj(0)
 {
-	int fd;
+	fd =  -1;
 	struct stat st;
 	// after creating all symbol it's convenient for user code to access
 	// symbols through a vector. We use an intermediate list to avoid a
-	// O(N²) behavior when we will filter vector element below
+	// O(Nï¿½) behavior when we will filter vector element below
 	symbols_found_t symbols;
 	asection const * sect;
 	string suf = ".jo";
@@ -143,7 +147,6 @@ op_bfd::op_bfd(string const & fname, string_filter const & symbol_filter,
 	file_size = st.st_size;
 
 	ibfd.abfd = fdopen_bfd(image_path, fd);
-
 	if (!ibfd.valid()) {
 		cverb << vbfd << "fdopen_bfd failed for " << image_path << endl;
 		ok = false;
@@ -189,6 +192,14 @@ out_fail:
 
 op_bfd::~op_bfd()
 {
+	if (ibfd.abfd == (bfd * ) 1)
+		/* Kallsyms setup a pseudo ibfd file.  Set to NULL
+		 * to prevent a segmentation fault.
+		 */
+		ibfd.abfd = (bfd * ) NULL;
+
+	if (fd != -1)
+		close(fd);
 }
 
 
@@ -220,7 +231,7 @@ void op_bfd::get_symbols(op_bfd::symbols_found_t & symbols)
 	if (dbfd.valid() && !ibfd.nr_syms)
 		vma_adj = ibfd.abfd->start_address - dbfd.abfd->start_address;
 	else
-	vma_adj= 0;
+		vma_adj= 0;
 
 	size_t i;
 	for (i = 0; i < ibfd.nr_syms; ++i) {
@@ -280,6 +291,118 @@ void op_bfd::get_symbols(op_bfd::symbols_found_t & symbols)
 	}
 }
 
+#define KERN_ADDR_SPACE_START_SYMBOL  "_text"
+#define KERN_ADDR_SPACE_END_SYMBOL    "_etext"
+
+void op_bfd::get_kallsym_symbols(symbols_found_t & symbols, ifstream& infile)
+{
+	string name, name_prev;
+	string address_str;
+	std::string line;
+	char type;
+	stringstream iss;
+
+	bfd_vma start = 0, start_prev = 0;
+	unsigned long long length, start_value;
+	bool ignore_symbol = true;
+	bfd_vma base_addr = 0;
+	name_prev = "";
+
+	while ( !infile.eof() ) {
+		getline(infile, line);
+
+		/* some of the lines have tab[nfs] on the end, remove */
+		iss.clear();
+		iss << line;
+
+		iss >> address_str;
+		iss >> type;
+		iss >> name;
+
+		sscanf(address_str.c_str(), "%llx", &start_value);
+		start = start_value;
+
+		if (start_prev == start)
+			length = 0;
+		else
+			length = start - start_prev;
+
+		if (!ignore_symbol)
+			symbols.push_back(op_bfd_symbol(op_bfd_symbol(start_prev - base_addr,
+							length, name_prev)));
+		start_prev = start;
+		name_prev = name;
+
+		if (strncmp(name.c_str(), KERN_ADDR_SPACE_START_SYMBOL,
+			    strlen(name.c_str())) == 0) {
+			/* ignore all symbols before the symbol for the
+			 * start of the kernel address space.
+			 */
+			if (start == 0)
+				/* do not have the proper permission to
+				 * read /proc/kallsyms.
+				 */
+				return;
+
+			base_addr = start;
+			ignore_symbol = false;
+		}
+
+		if (strncmp(name.c_str(), KERN_ADDR_SPACE_END_SYMBOL,
+			    strlen(name.c_str())) == 0)
+			break;
+	}
+
+	/* Add symbols */
+	copy(symbols.begin(), symbols.end(), back_inserter(syms));
+
+	ostringstream msg;
+	msg << "Kallsyms, number of symbols now "
+	    << dec << syms.size() << hex << endl;
+	cverb << vbfd << msg.str();
+	return;
+}
+
+/*
+ * This overload of the op_bfd constructor is patterned after the
+ * constructor in libutil++/op_bfd.cpp, with the additional processing
+ * needed to handle getting the kernel symbols from kallsyms.
+ */
+op_bfd::op_bfd(string const & fname, extra_images const & extra_images)
+	:
+	filename(fname),
+	archive_path(""),
+	extra_found_images(extra_images),
+	file_size(-1),
+	anon_obj(false),
+	vma_adj(0)
+
+{
+	symbols_found_t symbols;
+	ifstream infile;
+	fd =  -1;
+
+	ibfd.abfd = (bfd * ) NULL;
+
+	/* Technically this is not a bfd file but we need to set ibfd.abfd
+	 * so the abfd.valid() check in profile_t::set_offset() will be true.
+	 * It will be set to 1 so we know we are using kallsyms.  The
+	 * destructor will be looking for ibfd.abfd = 1.
+	 */
+	infile.open(fname.c_str());
+	if (infile) {
+		ibfd.abfd = (bfd * ) 1;
+	} else {
+		cverb << vbfd << "open failed for " << fname << endl;
+		return;
+	}
+
+	/* go read the kallsyms file and put them into symbols */
+	get_kallsym_symbols(symbols, infile);
+
+	infile.close();
+	return;
+}
 
 void op_bfd::add_symbols(op_bfd::symbols_found_t & symbols,
                          string_filter const & symbol_filter)
@@ -346,7 +469,7 @@ bool op_bfd::has_debug_info() const
 	if (debug_info.cached())
 		return debug_info.get();
 
-	if (!ibfd.valid())
+	if (ibfd.is_pseudo_bfd() || !ibfd.valid())
 		return debug_info.reset(false);
 
 	if (ibfd.has_debug_info())
diff --git a/libutil++/op_bfd.h b/libutil++/op_bfd.h
index 6bd7810..c7092b0 100644
--- a/libutil++/op_bfd.h
+++ b/libutil++/op_bfd.h
@@ -111,16 +111,11 @@ public:
 	       bool & ok);
 
 	/**
-	 * This constructor is used when processing an SPU profile
-	 * where the SPU ELF is embedded within the PPE binary.
+	 * This constructor is used when the /proc/kallsyms file is used
+	 * to get the kernel symbols.
 	 */
-	op_bfd(uint64_t spu_offset,
-	       std::string const & filename,
-	       string_filter const & symbol_filter,
-	       extra_images const & extra_images,
-	       bool & ok);
-
-	std::string get_embedding_filename() const { return embedding_filename; }
+	op_bfd(std::string const & filename,
+	       extra_images const & extra_images);
 
 	/// close an opened bfd image and free all related resources
 	~op_bfd();
@@ -229,6 +224,9 @@ private:
 	 */
 	void get_symbols(symbols_found_t & symbols);
 
+	/* functions for reading kallsyms */
+	void get_kallsym_symbols(symbols_found_t & symbols, std::ifstream& infile);
+
 	/**
 	 * Helper function for get_symbols.
 	 * Populates bfd_syms and extracts the "interesting_symbol"s.
@@ -294,12 +292,6 @@ private:
 	// mapping of section names to filepos in the original binary
 	filepos_map_t filepos_map;
 
-	/**
-	 * If spu_offset is non-zero, embedding_filename is the file containing
-	 * the embedded SPU image.
-	 */
-	std::string embedding_filename;
-
 	bool anon_obj;
 
 	/**
@@ -323,6 +315,13 @@ private:
 	 */
         bfd_vma vma_adj;
 
+        /**
+         * The file descriptor for an image file that we pass to fdopen_bfd must be kept
+         * open through the life of the op_bfd to enable proper behavior of certain
+         * BFD functions -- in particular, bfd_find_nearest_line().
+         */
+        int fd;
+
 };
 
 
diff --git a/libutil++/op_spu_bfd.cpp b/libutil++/op_spu_bfd.cpp
deleted file mode 100644
index 42949e7..0000000
--- a/libutil++/op_spu_bfd.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/**
- * @file libutil++/op_spu_bfd.cpp
- * Encapsulation of bfd objects for Cell BE SPU
- *
- * @remark Copyright 2007 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Maynard Johnson
- * (C) Copyright IBM Corporation 2007
- */
-
-
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cstdlib>
-#include <cstring>
-
-#include <iostream>
-#include <cstring>
-#include <cstdlib>
-
-#include "op_bfd.h"
-#include "locate_images.h"
-#include "op_libiberty.h"
-#include "string_filter.h"
-#include "cverb.h"
-
-#define OP_SPU_DYN_FLAG		0x10000000	/* kernel module adds this offset */
-						/* to SPU code it can't find in the map */
-#define OP_SPU_MEMSIZE		0x3ffff		/* Physical memory size on an SPU */
-
-using namespace std;
-
-extern verbose vbfd;
-
-/*
- * This overload of the op_bfd constructor is patterned after the
- * constructor in libutil++/op_bfd.cpp, with the additional processing
- * needed to handle an embedded spu offset.
- */
-op_bfd::op_bfd(uint64_t spu_offset, string const & fname,
-	       string_filter const & symbol_filter, 
-	       extra_images const & extra_images, bool & ok)
-	:
-	archive_path(extra_images.get_archive_path()),
-	extra_found_images(extra_images),
-	file_size(-1),
-	embedding_filename(fname),
-	anon_obj(false)
-{
-	int fd;
-	struct stat st;
-	int notes_remaining;
-	bool spu_note_found = false;
-	size_t sec_size = 0;
-	unsigned int oct_per_byte;
-	asection * note = NULL;
-
-	symbols_found_t symbols;
-	asection const * sect;
-
-	image_error image_ok;
-	string const image_path =
-		extra_images.find_image_path(fname, image_ok, true);
-
-	cverb << vbfd << "op_bfd ctor for " << image_path << endl;
-	if (!ok)
-		goto out_fail;
-
-	fd = open(image_path.c_str(), O_RDONLY);
-	if (fd == -1) {
-		cverb << vbfd << "open failed for " << image_path << endl;
-		ok = false;
-		goto out_fail;
-	}
-
-	if (fstat(fd, &st)) {
-		cverb << vbfd << "stat failed for " << image_path << endl;
-		ok = false;
-		goto out_fail;
-	}
-
-	file_size = st.st_size;
-	ibfd.abfd = spu_open_bfd(image_path, fd, spu_offset);
-
-	if (!ibfd.valid()) {
-		cverb << vbfd << "fdopen_bfd failed for " << image_path << endl;
-		ok = false;
-		goto out_fail;
-	}
-
-	/* For embedded SPU ELF, a note section named '.note.spu_name'
-	 * contains the name of the SPU binary image in the description
-	 * field.
-	 */
-	note = bfd_get_section_by_name(ibfd.abfd, ".note.spu_name");
-	if (!note) {
-		cverb << vbfd << "No .note.spu-name section found" << endl;
-		goto find_sec_code;
-	}
-	cverb << vbfd << "found .note.spu_name section" << endl;
-
-	bfd_byte * sec_contents;
-	oct_per_byte = bfd_octets_per_byte(ibfd.abfd);
-	sec_size = bfd_section_size(ibfd.abfd, note)/oct_per_byte;
-
-	sec_contents = (bfd_byte *) xmalloc(sec_size);
-	if (!bfd_get_section_contents(ibfd.abfd, note, sec_contents,
-				      0, sec_size)) {
-		cverb << vbfd << "bfd_get_section_contents with size "
-		      << sec_size << " returned an error" << endl;
-		ok = false;
-		goto out_fail;
-	}
-	notes_remaining = sec_size;
-	while (notes_remaining && !spu_note_found) {
-		unsigned int  nsize, dsize, type;
-		nsize = *((unsigned int *) sec_contents);
-		dsize = *((unsigned int *) sec_contents +1);
-		type = *((unsigned int *) sec_contents +2);
-		int remainder, desc_start, name_pad_length, desc_pad_length;
-		name_pad_length = desc_pad_length = 0;
-		/* Calculate padding for 4-byte alignment */
-		remainder = nsize % 4;
-		if (remainder != 0)
-			name_pad_length = 4 - remainder;
-		desc_start = 12 + nsize + name_pad_length;
-		if (type != 1) {
-			int note_record_length;
-			if ((remainder = (dsize % 4)) != 0)
-				desc_pad_length = 4 - remainder;
-			note_record_length = 12 + nsize +
-				name_pad_length + dsize + desc_pad_length;
-			notes_remaining -= note_record_length;
-			sec_contents += note_record_length;
-			continue;
-		} else {
-			spu_note_found = true;
-			/* Must memcpy the data from sec_contents to a
-			 * 'char *' first, then stringify it, since
-			 * the type of sec_contents (bfd_byte *) cannot be
-			 * used as input for creating a string.
-			 */
-			char * description = (char *) xmalloc(dsize);
-			memcpy(description, sec_contents + desc_start, dsize);
-			filename = description;
-			free(description);
-		}
-	}
-	free(sec_contents);
-	/* Default to app name for the image name */
-	if (spu_note_found == false)
-		filename = fname;
-
-find_sec_code:
-	for (sect = ibfd.abfd->sections; sect; sect = sect->next) {
-		if (sect->flags & SEC_CODE) {
-			if (filepos_map[sect->name] != 0) {
-				cerr << "Found section \"" << sect->name
-				     << "\" twice for " << get_filename()
-				     << endl;
-				abort();
-			}
-
-			filepos_map[sect->name] = sect->filepos;
-		}
-	}
-
-	get_symbols(symbols);
-
-	/* In some cases the SPU library code generates code stubs on the stack. */
-	/* The kernel module remaps those addresses so add an entry to catch/report them. */
-	symbols.push_back(op_bfd_symbol(OP_SPU_DYN_FLAG, OP_SPU_MEMSIZE,
-			  "__send_to_ppe(stack)"));
-
-out:
-	add_symbols(symbols, symbol_filter);
-	return;
-out_fail:
-	ibfd.close();
-	dbfd.close();
-	file_size = -1;
-	goto out;
-}
-
diff --git a/libutil++/tests/Makefile.in b/libutil++/tests/Makefile.in
index fcb829d..fd92083 100644
--- a/libutil++/tests/Makefile.in
+++ b/libutil++/tests/Makefile.in
@@ -44,7 +44,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -53,7 +52,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -164,7 +163,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -188,20 +186,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libutil++/utility.h b/libutil++/utility.h
index 83c36ca..07c4d19 100644
--- a/libutil++/utility.h
+++ b/libutil++/utility.h
@@ -14,6 +14,8 @@
 
 #include <cstddef>
 
+#define KALL_SYM_FILE "/proc/kallsyms"
+
 /** noncopyable : object of class derived from this class can't be copied
  * and isn't copy-constructible */
 class noncopyable {
diff --git a/libutil/Makefile.in b/libutil/Makefile.in
index dbbdacc..a3a4ce6 100644
--- a/libutil/Makefile.in
+++ b/libutil/Makefile.in
@@ -39,7 +39,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +47,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -174,7 +173,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -198,20 +196,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/libutil/op_cpufreq.c b/libutil/op_cpufreq.c
index 17cfee4..8678b08 100644
--- a/libutil/op_cpufreq.c
+++ b/libutil/op_cpufreq.c
@@ -110,7 +110,10 @@ static double op_cpu_freq_sys_devices(void)
 		free(line);
 	op_close_file(fp);
 
-	return fval;
+        /* Return the frequency in MHz.  When the frequency is
+         * printed it is assumed to be in units of MHz.
+	 */
+	return fval/1000;
 }
 
 
diff --git a/libutil/op_fileio.c b/libutil/op_fileio.c
index 9b3e21d..4c75865 100644
--- a/libutil/op_fileio.c
+++ b/libutil/op_fileio.c
@@ -91,6 +91,36 @@ void op_write_u64(FILE * fp, u64 val)
 	op_write_file(fp, &val, sizeof(val));
 }
 
+unsigned long op_read_long_from_file(char const * filename, int fatal)
+{
+	FILE * fp;
+	unsigned long value;
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		if (!fatal)
+			return (unsigned long)-1;
+		fprintf(stderr,
+			"op_read_long_from_file: Failed to open %s, reason %s\n",
+			filename, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	if (fscanf(fp, "%lu", &value) != 1) {
+		fclose(fp);
+		if (!fatal)
+			return (unsigned long)-1;
+		fprintf(stderr,
+			"op_read_long_from_file: Failed to convert contents of file %s to unsigned long\n",
+			filename);
+		exit(EXIT_FAILURE);
+	}
+
+	fclose(fp);
+
+	return value;
+}
+
 
 u32 op_read_int_from_file(char const * filename, int fatal)
 {
diff --git a/libutil/op_fileio.h b/libutil/op_fileio.h
index 49b126d..7c20127 100644
--- a/libutil/op_fileio.h
+++ b/libutil/op_fileio.h
@@ -50,6 +50,17 @@ FILE * op_open_file(char const * name, char const * mode);
  */
 u32 op_read_int_from_file(char const * filename, int fatal);
 
+/**
+ * op_read_long_from_file - parse an ASCII value from a file into an unsigned long
+ * @param filename  name of file to parse value from
+ * @param fatal  non-zero if any error must be fatal
+ *
+ * Reads an ASCII unsigned long number from the given file. If an error occurs,and
+ * the passed 'fatal' arg is zero, then (unsigned long)-1 is returned; else the
+ * value read in is returned.
+ */
+unsigned long op_read_long_from_file(char const * filename, int fatal);
+
 /**
  * op_close_file - close a file
  * @param fp  file pointer
diff --git a/libutil/tests/Makefile.in b/libutil/tests/Makefile.in
index d106b4a..f5880ca 100644
--- a/libutil/tests/Makefile.in
+++ b/libutil/tests/Makefile.in
@@ -40,7 +40,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -49,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -135,7 +134,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -159,20 +157,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/m4/Makefile.am b/m4/Makefile.am
index 482fe2a..a6828ba 100644
--- a/m4/Makefile.am
+++ b/m4/Makefile.am
@@ -11,7 +11,6 @@ EXTRA_DIST = \
 	mallocattribute.m4 \
 	poptconst.m4 \
 	precompiledheader.m4 \
-	qt.m4 \
 	resultyn.m4 \
 	sstream.m4 \
 	typedef.m4
diff --git a/m4/Makefile.in b/m4/Makefile.in
index a71171f..974b552 100644
--- a/m4/Makefile.in
+++ b/m4/Makefile.in
@@ -40,7 +40,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -49,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -112,7 +111,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -136,20 +134,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -223,7 +214,6 @@ EXTRA_DIST = \
 	mallocattribute.m4 \
 	poptconst.m4 \
 	precompiledheader.m4 \
-	qt.m4 \
 	resultyn.m4 \
 	sstream.m4 \
 	typedef.m4
diff --git a/m4/binutils.m4 b/m4/binutils.m4
index 25fb15a..c50e2f3 100644
--- a/m4/binutils.m4
+++ b/m4/binutils.m4
@@ -22,32 +22,34 @@ dnl Use a different bfd function here so as not to use cached result from above
 
 AC_LANG_PUSH(C)
 # Determine if bfd_get_synthetic_symtab macro is available
-OS="`uname`"
-if test "$OS" = "Linux"; then
-	AC_MSG_CHECKING([whether bfd_get_synthetic_symtab() exists in BFD library])
-	rm -f test-for-synth
-	AC_LANG_CONFTEST(
-		[AC_LANG_PROGRAM([[#include <bfd.h>]],
-			[[asymbol * synthsyms;	bfd * ibfd = 0; 
-			long synth_count = bfd_get_synthetic_symtab(ibfd, 0, 0, 0, 0, &synthsyms);
-			extern const bfd_target bfd_elf64_powerpc_vec;
-			extern const bfd_target bfd_elf64_powerpcle_vec;
-			char * ppc_name = bfd_elf64_powerpc_vec.name;
-			char * ppcle_name = bfd_elf64_powerpcle_vec.name;
-			printf("%s %s\n", ppc_name, ppcle_name);]])
-		])
-	$CC conftest.$ac_ext $CFLAGS $LDFLAGS $LIBS -o  test-for-synth > /dev/null 2>&1
-	if test -f test-for-synth; then
-		echo "yes"
-		SYNTHESIZE_SYMBOLS='1'
-	else
-		echo "no"
-		SYNTHESIZE_SYMBOLS='0'
-	fi
-	AC_DEFINE_UNQUOTED(SYNTHESIZE_SYMBOLS, $SYNTHESIZE_SYMBOLS, [Synthesize special symbols when needed])
-	rm -f test-for-synth*
+AC_MSG_CHECKING([whether bfd_get_synthetic_symtab() exists in BFD library])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <bfd.h>]
+	[#include <stdio.h>]
+	[static bfd _ibfd;]],
+	[[asymbol * synthsyms;	bfd * ibfd = &_ibfd;
+	long synth_count = bfd_get_synthetic_symtab(ibfd, 0, 0, 0, 0, &synthsyms);
+	extern const bfd_target powerpc_elf64_vec;
+	char *ppc_name = powerpc_elf64_vec.name;
+	printf("%s\n", ppc_name);
+	]])],
+	[AC_MSG_RESULT([yes])
+	SYNTHESIZE_SYMBOLS=2],
+	[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <bfd.h>]
+		[#include <stdio.h>]
+		[static bfd _ibfd;]],
+		[[asymbol * synthsyms;	bfd * ibfd = &_ibfd;
+		long synth_count = bfd_get_synthetic_symtab(ibfd, 0, 0, 0, 0, &synthsyms);
+		extern const bfd_target bfd_elf64_powerpc_vec;
+		char *ppc_name = bfd_elf64_powerpc_vec.name;
+		printf("%s\n", ppc_name);
+		]])],
+		[AC_MSG_RESULT([yes])
+		SYNTHESIZE_SYMBOLS=1],
+		[AC_MSG_RESULT([no])
+		SYNTHESIZE_SYMBOLS=0])
+	])
+AC_DEFINE_UNQUOTED(SYNTHESIZE_SYMBOLS, $SYNTHESIZE_SYMBOLS, [Synthesize special symbols when needed])
 
-fi
 AC_LANG_POP(C)
 ]
 )
diff --git a/m4/cellspubfdsupport.m4 b/m4/cellspubfdsupport.m4
deleted file mode 100644
index e04ee87..0000000
--- a/m4/cellspubfdsupport.m4
+++ /dev/null
@@ -1,52 +0,0 @@
-dnl AX_CELL_SPU - check for needed binutils stuff for Cell BE SPU
-AC_DEFUN([AX_CELL_SPU],
-[
-# On Cell BE architecture, OProfile uses bfd_openr_iovec when processing some
-# SPU profiles.  To parse embedded SPU ELF on Cell BE, OProfile requires a
-# version of bfd_openr_iovec that supports the elf32-spu target.
-# This version of the function also has a 7th parameter that's been added.
-# First, we check for existence of the base bfd_openr_iovec.  If it exists,
-# we then use a temporary test program below that passes 7 arguments to
-# bfd_openr_iovec; if it compiles OK, we assume we have the right BFD
-# library to support Cell BE SPU profiling.
-
-AC_LANG_PUSH(C)
-
-AC_CHECK_LIB(bfd, bfd_openr_iovec,
-	[bfd_openr_iovec_exists="yes"],
-	[bfd_openr_iovec_exists="no"]
-)
-
-if test "$bfd_openr_iovec_exists" = "yes"; then
-	AC_MSG_CHECKING([whether bfd_openr_iovec has seven parameters])
-	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <bfd.h>
-	  #include <stdlib.h>
-	],
-	 [[struct bfd *nbfd = bfd_openr_iovec("some-file", "elf32-spu",
-			NULL, NULL, NULL, NULL, NULL);
-	  return 0;
-	]])],
-	[AC_DEFINE([HAVE_BFD_OPENR_IOVEC_WITH_7PARMS],
-		[],
-		[Defined if you have the version of bfd_openr_iovec with 7 parameters])
-	bfd_open_iovec_7="yes"
-	AC_MSG_RESULT([yes])],
-	[AC_MSG_RESULT([no])]
-	)
-fi
-
-AC_LANG_POP(C)
-
-arch="unknown"
-AC_ARG_WITH(target,
-[  --with-target=cell-be   Check BFD support for Cell Broadband Engine SPU profiling], arch=$withval)
-
-if test "$arch" = "cell-be"; then
-        if test "$bfd_open_iovec_7" = "yes"; then
-	        AC_MSG_NOTICE([BFD library has support for Cell Broadband Engine SPU profiling])
-	else
-		AC_ERROR([BFD library does not support elf32-spu target; SPU profiling is unsupported])
-	fi
-fi
-]
-)
diff --git a/m4/kernelversion.m4 b/m4/kernelversion.m4
index db4b805..6655703 100644
--- a/m4/kernelversion.m4
+++ b/m4/kernelversion.m4
@@ -1,7 +1,7 @@
 dnl AX_KERNEL_VERSION(major, minor, level, comparison, action-if-true, action-if-false)
 AC_DEFUN([AX_KERNEL_VERSION], [
 SAVE_CFLAGS=$CFLAGS
-CFLAGS="-I$KINC -D__KERNEL__ -Werror"
+CFLAGS="-I$KINC -Werror"
 AC_TRY_COMPILE( 
   [
   #include <linux/version.h>
diff --git a/m4/qt.m4 b/m4/qt.m4
deleted file mode 100644
index 48fd6e2..0000000
--- a/m4/qt.m4
+++ /dev/null
@@ -1,225 +0,0 @@
-dnl find a binary in the path
-AC_DEFUN([QT_FIND_PATH],
-[
-	AC_MSG_CHECKING([for $1])
-	AC_CACHE_VAL(qt_cv_path_$1,
-	[
-		qt_cv_path_$1="NONE"
-		if test -n "$$2"; then
-			qt_cv_path_$1="$$2";
-		else
-			dirs="$3"
-			qt_save_IFS=$IFS
-			IFS=':'
-			for dir in $PATH; do
-				dirs="$dirs $dir"
-			done
-			IFS=$qt_save_IFS
- 
-			for dir in $dirs; do
-				if test -x "$dir/$1"; then
-					if test -n "$5"; then
-						evalstr="$dir/$1 $5 2>&1 "
-						if eval $evalstr; then
-							qt_cv_path_$1="$dir/$1"
-							break
-						fi
-					else
-						qt_cv_path_$1="$dir/$1"
-						break
-					fi
-				fi
-			done
-		fi
-	])
- 
-	if test -z "$qt_cv_path_$1" || test "$qt_cv_path_$1" = "NONE"; then
-		AC_MSG_RESULT(not found)
-		$4
-	else
-		AC_MSG_RESULT($qt_cv_path_$1)
-		$2=$qt_cv_path_$1
-	fi
-])
-
-dnl Find the uic compiler on the path or in qt_cv_dir
-AC_DEFUN([QT_FIND_UIC],
-[
-	QT_FIND_PATH(uic, ac_uic, $qt_cv_dir/bin)
-	if test -z "$ac_uic" -a "$FATAL" = 1; then
-		AC_MSG_ERROR([uic binary not found in \$PATH or $qt_cv_dir/bin !])
-	fi
-])
- 
-dnl Find the right moc in path/qt_cv_dir
-AC_DEFUN([QT_FIND_MOC],
-[
-	QT_FIND_PATH(moc2, ac_moc2, $qt_cv_dir/bin)
-	QT_FIND_PATH(moc, ac_moc1, $qt_cv_dir/bin)
-
-	if test -n "$ac_moc1" -a -n "$ac_moc2"; then
-		dnl found both. Prefer Qt3's if it exists else moc2
-		$ac_moc1 -v 2>&1 | grep "Qt 3" >/dev/null
-		if test "$?" = 0; then
-			ac_moc=$ac_moc1;
-		else
-			ac_moc=$ac_moc2;
-		fi
-	else
-		if test -n "$ac_moc1"; then
-			ac_moc=$ac_moc1;
-		else
-			ac_moc=$ac_moc2;
-		fi
-	fi
-
-	if test -z "$ac_moc"  -a "$FATAL" = 1; then
-		AC_MSG_ERROR([moc binary not found in \$PATH or $qt_cv_dir/bin !])
-	fi
-])
-
-dnl check a particular libname
-AC_DEFUN([QT_TRY_LINK],
-[
-	SAVE_LIBS="$LIBS"
-	LIBS="$LIBS $1"
-	AC_TRY_LINK([
-	#include <qglobal.h>
-	#include <qstring.h>
-		],
-	[
-	QString s("mangle_failure");
-	#if (QT_VERSION < 221)
-	break_me_(\\\);
-	#endif
-	],
-	qt_cv_libname=$1,
-	)
-	LIBS="$SAVE_LIBS"
-])
- 
-dnl check we can do a compile
-AC_DEFUN([QT_CHECK_COMPILE],
-[
-	AC_MSG_CHECKING([$1 for Qt library name])
- 
-	AC_CACHE_VAL(qt_cv_libname,
-	[
-		AC_LANG_CPLUSPLUS
-		SAVE_CXXFLAGS=$CXXFLAGS
-		CXXFLAGS="$CXXFLAGS $QT_INCLUDES $QT_LDFLAGS" 
-
-		for libname in -lqt-mt -lqt3 -lqt2 -lqt;
-		do
-			QT_TRY_LINK($libname)
-			if test -n "$qt_cv_libname"; then
-				break;
-			fi
-		done
-
-		CXXFLAGS=$SAVE_CXXFLAGS
-	])
-
-	if test -z "$qt_cv_libname"; then
-		AC_MSG_RESULT([failed]) 
-		if test "$FATAL" = 1 ; then
-			AC_MSG_ERROR([Cannot compile a simple Qt executable. Check you have the right \$QTDIR !])
-		fi
-	else
-		AC_MSG_RESULT([$qt_cv_libname])
-	fi
-])
-
-dnl get Qt version we're using
-AC_DEFUN([QT_GET_VERSION],
-[
-	AC_CACHE_CHECK([Qt version],lyx_cv_qtversion,
-	[
-		AC_LANG_CPLUSPLUS
-		SAVE_CPPFLAGS=$CPPFLAGS
-		CPPFLAGS="$CPPFLAGS $QT_INCLUDES"
-
-		cat > conftest.$ac_ext <<EOF
-#line __oline__ "configure"
-#include "confdefs.h"
-#include <qglobal.h>
-"%%%"QT_VERSION_STR"%%%"
-EOF
-		lyx_cv_qtversion=`(eval "$ac_cpp conftest.$ac_ext") 2>&5 | \
-			grep '^"%%%"'  2>/dev/null | \
-			sed -e 's/"%%%"//g' -e 's/"//g'`
-		rm -f conftest.$ac_ext
-		CPPFLAGS=$SAVE_CPPFLAGS
-	])
- 
-	QT_VERSION=$lyx_cv_qtversion
-	AC_SUBST(QT_VERSION)
-])
- 
-dnl start here 
-AC_DEFUN([QT_DO_IT_ALL],
-[
-	dnl Please leave this alone. I use this file in
-	dnl oprofile.
-	FATAL=0
-
-	AC_ARG_WITH(qt-dir, [  --with-qt-dir           where the root of Qt is installed ],
-		[ qt_cv_dir=`eval echo "$withval"/` ])
-	 
-	AC_ARG_WITH(qt-includes, [  --with-qt-includes      where the Qt includes are. ],
-		[ qt_cv_includes=`eval echo "$withval"` ])
- 
-	AC_ARG_WITH(qt-libraries, [  --with-qt-libraries     where the Qt library is installed.],
-		[  qt_cv_libraries=`eval echo "$withval"` ])
-
-	dnl pay attention to $QTDIR unless overridden
-	if test -z "$qt_cv_dir"; then
-		qt_cv_dir=$QTDIR
-	fi
- 
-	dnl derive inc/lib if needed
-	if test -n "$qt_cv_dir"; then
-		if test -z "$qt_cv_includes"; then
-			qt_cv_includes=$qt_cv_dir/include
-		fi
-		if test -z "$qt_cv_libraries"; then
-			qt_cv_libraries=$qt_cv_dir/lib
-		fi
-	fi
-
-	dnl flags for compilation
-	QT_INCLUDES=
-	QT_LDFLAGS=
-	if test -n "$qt_cv_includes"; then
-		QT_INCLUDES="-isystem $qt_cv_includes"
-	fi
-	if test -n "$qt_cv_libraries"; then
-		QT_LDFLAGS="-L$qt_cv_libraries"
-	fi
-	AC_SUBST(QT_INCLUDES)
-	AC_SUBST(QT_LDFLAGS)
- 
-	QT_FIND_MOC
-	MOC=$ac_moc
-	AC_SUBST(MOC)
-	QT_FIND_UIC
-	UIC=$ac_uic
-	AC_SUBST(UIC)
-
-	QT_CHECK_COMPILE(in lib)
-	if test -z "$qt_cv_libname"; then
-		if test -n "$qt_cv_dir"; then
-		dnl Try again using lib64 vs lib
-			qt_cv_libraries=$qt_cv_dir/lib64
-			QT_LDFLAGS="-L$qt_cv_libraries"
-			QT_CHECK_COMPILE(in lib64)
-		fi
-	fi
-
-	QT_LIB=$qt_cv_libname;
-	AC_SUBST(QT_LIB)
-
-	if test -n "$qt_cv_libname"; then
-		QT_GET_VERSION
-	fi
-])
diff --git a/opjitconv/Makefile.am b/opjitconv/Makefile.am
index fe5c00d..fc5585f 100644
--- a/opjitconv/Makefile.am
+++ b/opjitconv/Makefile.am
@@ -1,6 +1,5 @@
 AM_CPPFLAGS = -I ${top_srcdir}/libopagent  \
 	-I ${top_srcdir}/libutil \
-	-I ${top_srcdir}/daemon \
 	@OP_CPPFLAGS@
 
 AM_CFLAGS = @OP_CFLAGS@
diff --git a/opjitconv/Makefile.in b/opjitconv/Makefile.in
index ba4adab..ef38012 100644
--- a/opjitconv/Makefile.in
+++ b/opjitconv/Makefile.in
@@ -40,7 +40,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -49,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -134,7 +133,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -158,20 +156,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -237,7 +228,6 @@ top_srcdir = @top_srcdir@
 topdir = @topdir@
 AM_CPPFLAGS = -I ${top_srcdir}/libopagent  \
 	-I ${top_srcdir}/libutil \
-	-I ${top_srcdir}/daemon \
 	@OP_CPPFLAGS@
 
 AM_CFLAGS = @OP_CFLAGS@
diff --git a/opjitconv/conversion.c b/opjitconv/conversion.c
index 111fe9d..add0f95 100644
--- a/opjitconv/conversion.c
+++ b/opjitconv/conversion.c
@@ -39,10 +39,10 @@ static void free_jit_debug_line(void)
 	jitentry_debug_line_list = NULL;
 }
 
-int op_jit_convert(struct op_jitdump_info file_info, char const * elffile,
+int op_jit_convert(struct op_jitdump_info * file_info, char const * elffile,
                    unsigned long long start_time, unsigned long long end_time)
 {
-	void const * jitdump = file_info.dmp_file;
+	void const * jitdump = file_info->dmp_file;
 	int rc= OP_JIT_CONV_OK;
 
 	entry_count = 0;
@@ -53,7 +53,7 @@ int op_jit_convert(struct op_jitdump_info file_info, char const * elffile,
 	jitentry_debug_line_list = NULL;
 	entries_symbols_ascending = entries_address_ascending = NULL;
 
-	if ((rc = parse_all(jitdump, jitdump + file_info.dmp_file_stat.st_size,
+	if ((rc = parse_all(jitdump, jitdump + file_info->dmp_file_stat.st_size,
 	                    end_time)) == OP_JIT_CONV_FAIL)
 		goto out;
 
diff --git a/opjitconv/create_bfd.c b/opjitconv/create_bfd.c
index b90e59f..48db143 100644
--- a/opjitconv/create_bfd.c
+++ b/opjitconv/create_bfd.c
@@ -15,10 +15,10 @@
  */
 
 #include "opjitconv.h"
-#include "opd_printf.h"
 #include "op_libiberty.h"
 
 #include <bfd.h>
+#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 
@@ -42,6 +42,9 @@ static int fill_symtab(void)
 	
 	syms = xmalloc(sizeof(asymbol *) * (entry_count+1));
 	syms[entry_count] = NULL;
+	assert(entries_address_ascending[0]->section);
+	// Do this to silence Coverity
+	section = entries_address_ascending[0]->section;
 	for (i = 0; i < entry_count; i++) {
 		e = entries_address_ascending[i];
 		if (e->section)
@@ -83,10 +86,7 @@ asection * create_section(bfd * abfd, char const * section_name,
 		bfd_perror("bfd_make_section");
 		goto error;
 	}
-	if (bfd_set_section_vma(abfd, section, vma) == FALSE) {
-		bfd_perror("bfd_set_section_vma");
-		goto error;
-	}
+	bfd_set_section_vma(abfd, section, vma);
 	if (bfd_set_section_size(abfd, section, size) == FALSE) {
 		bfd_perror("bfd_set_section_size");
 		goto error;
@@ -261,7 +261,7 @@ bfd * open_elf(char const * filename)
 		goto error;
 	}
 	if (bfd_set_arch_mach(abfd, dump_bfd_arch, dump_bfd_mach) == FALSE) {
-		bfd_perror("bfd_set_format");
+		bfd_perror("bfd_set_arch_mach");
 		goto error;
 	}
 	return abfd;
diff --git a/opjitconv/debug_line.c b/opjitconv/debug_line.c
index babb943..a9c0ba6 100644
--- a/opjitconv/debug_line.c
+++ b/opjitconv/debug_line.c
@@ -270,7 +270,7 @@ static void emit_lne_define_filename(struct growable_buffer * b,
 static void emit_lne_set_address(struct growable_buffer * b,
 				 void const * address)
 {
-	emit_extended_opcode(b, DW_LNE_set_address, &address, sizeof(address));
+	emit_extended_opcode(b, DW_LNE_set_address, &address, sizeof(unsigned long));
 }
 
 
diff --git a/opjitconv/jitsymbol.c b/opjitconv/jitsymbol.c
index 1aa6db1..e2b1e66 100644
--- a/opjitconv/jitsymbol.c
+++ b/opjitconv/jitsymbol.c
@@ -15,7 +15,6 @@
  */
 
 #include "opjitconv.h"
-#include "opd_printf.h"
 #include "op_libiberty.h"
 #include "op_types.h"
 
@@ -375,7 +374,7 @@ static int handle_overlap_region(int start_idx, int end_idx)
 	int cnt;
 	char * name;
 	int i, j;
-	unsigned long long totaltime;
+	unsigned long long totaltime, pct;
 
 	if (debug) {
 		for (i = start_idx; i <= end_idx; i++) {
@@ -389,23 +388,28 @@ static int handle_overlap_region(int start_idx, int end_idx)
 		}
 	}
 	idx = select_one(start_idx, end_idx);
+	// This can't happen, but we check anyway, just to silence Coverity
+	if (idx == OP_JIT_CONV_FAIL) {
+		rc = OP_JIT_CONV_FAIL;
+		goto out;
+	}
 	totaltime = eliminate_overlaps(start_idx, end_idx, idx);
 	if (totaltime == ULONG_MAX) {
 		rc = OP_JIT_CONV_FAIL;
 		goto out;
 	}
 	e = entries_address_ascending[idx];
+	pct = (totaltime == 0) ? 100 : (e->life_end - e->life_start) * 100 / totaltime;
 
 	cnt = 1;
-	j = (e->life_end - e->life_start) * 100 / totaltime;
+	j = pct;
 	while ((j = j/10))
 		cnt++;
 
 	// Mark symbol name with a %% to indicate the overlap.
 	cnt += strlen(e->symbol_name) + 2 + 1;
 	name = xmalloc(cnt);
-	snprintf(name, cnt, "%s%%%llu", e->symbol_name,
-		 (e->life_end - e->life_start) * 100 / totaltime);
+	snprintf(name, cnt, "%s%%%llu", e->symbol_name, pct);
 	if (e->sym_name_malloced)
 		free(e->symbol_name);
 	e->symbol_name = name;
diff --git a/opjitconv/opjitconv.c b/opjitconv/opjitconv.c
index b813309..fb38fb3 100644
--- a/opjitconv/opjitconv.c
+++ b/opjitconv/opjitconv.c
@@ -15,15 +15,16 @@
  */
 
 #include "opjitconv.h"
-#include "opd_printf.h"
 #include "op_file.h"
 #include "op_libiberty.h"
 
+#include <getopt.h>
 #include <dirent.h>
 #include <fnmatch.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <assert.h>
 #include <pwd.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -33,6 +34,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <wait.h>
+#include <sys/file.h>
 
 /*
  * list head.  The linked list is used during parsing (parse_all) to
@@ -74,6 +76,19 @@ int debug;
 int non_root;
 /* indicates we should delete jitdump files owned by the user */
 int delete_jitdumps;
+/* Session directory where sample data is stored */
+char * session_dir;
+
+static struct option long_options [] = {
+                                        { "session-dir", required_argument, NULL, 's'},
+                                        { "debug", no_argument, NULL, 'd'},
+                                        { "delete-jitdumps", no_argument, NULL, 'j'},
+                                        { "non-root", no_argument, NULL, 'n'},
+                                        { "help", no_argument, NULL, 'h'},
+                                        { NULL, 9, NULL, 0}
+};
+const char * short_options = "s:djnh";
+
 LIST_HEAD(jitdump_deletion_candidates);
 
 /*
@@ -143,6 +158,8 @@ static int mmap_jitdump(char const * dumpfile,
 		rc = OP_JIT_CONV_FAIL;
 	}
 out:
+	if (dumpfd != -1)
+		close(dumpfd);
 	return rc;
 }
 
@@ -150,8 +167,12 @@ static char const * find_anon_dir_match(struct list_head * anon_dirs,
 					char const * proc_id)
 {
 	struct list_head * pos;
-	char match_filter[10];
-	snprintf(match_filter, 10, "*/%s.*", proc_id);
+	/* Current PID_MAX_LIMIT (as defined in include/linux/threads.h) is
+	 *         4 x 4 x 1024 * 1024 (for 64-bit kernels)
+	 * So need to have space for 7 chars for proc_id.
+	 */
+	char match_filter[12];
+	snprintf(match_filter, 12, "*/%s.*", proc_id);
 	list_for_each(pos, anon_dirs) {
 		struct pathname * anon_dir =
 			list_entry(pos, struct pathname, neighbor);
@@ -191,10 +212,31 @@ out:
  */
 int copy_dumpfile(char const * dumpfile, char * tmp_dumpfile)
 {
+#define OP_JITCONV_USECS_TO_WAIT 1000
+	int file_locked = 0;
+	unsigned int usecs_waited = 0;
 	int rc = OP_JIT_CONV_OK;
-
+	int fd = open(dumpfile, S_IRUSR);
+	if (fd < 0) {
+		perror("opjitconv failed to open JIT dumpfile");
+		return OP_JIT_CONV_FAIL;
+	}
+again:
+	// Need OS-level file locking here since opagent may still be writing to the file.
+	rc = flock(fd, LOCK_EX | LOCK_NB);
+	if (rc) {
+		if (usecs_waited < OP_JITCONV_USECS_TO_WAIT) {
+			usleep(100);
+			usecs_waited += 100;
+			goto again;
+		} else {
+			printf("opjitconv: Unable to obtain lock on %s.\n", dumpfile);
+			rc = OP_JIT_CONV_FAIL;
+			goto out;
+		}
+	}
+	file_locked = 1;
 	sprintf(sys_cmd_buffer, "/bin/cp -p %s %s", dumpfile, tmp_dumpfile);
-
 	if (system(sys_cmd_buffer) != 0) {
 		printf("opjitconv: Calling system() to copy files failed.\n");
 		rc = OP_JIT_CONV_FAIL;
@@ -208,6 +250,11 @@ int copy_dumpfile(char const * dumpfile, char * tmp_dumpfile)
 	}
 	
 out:
+#undef OP_JITCONV_USECS_TO_WAIT
+	close(fd);
+	if (file_locked)
+		flock(fd, LOCK_UN);
+
 	return rc;
 }
 
@@ -314,10 +361,8 @@ chk_proc_id:
 		goto out;
 	}
 	if (!(anon_dir = find_anon_dir_match(anon_sample_dirs, proc_id))) {
-		/* With the capability of profiling with operf (as well as with
-		 * the legacy oprofile daemon), users will not be using opcontrol
-		 * to remove all jitdump files; instead, opjitconv will remove old
-		 * jitdump files (see _cleanup_jitdumps() for details).  But this cleanup
+		/* When profiling with operf, opjitconv will remove old jitdump
+		 * files (see _cleanup_jitdumps() for details).  But this cleanup
 		 * strategy makes it quite likely that opjitconv will sometimes find
 		 * jitdump files that are not owned by the current user or are in use
 		 * by other operf users, thus, the current profile data would not have
@@ -362,6 +407,7 @@ chk_proc_id:
 		if (jofd < 0)
 			goto create_elf;
 		rc = fstat(jofd, &file_stat);
+		close(jofd);
 		if (rc < 0) {
 			perror("opjitconv:fstat on .jo file");
 			rc = OP_JIT_CONV_FAIL;
@@ -399,7 +445,10 @@ chk_proc_id:
 			goto free_res3;
 		}
 		/* Convert the dump file as the special user 'oprofile'. */
-		rc = op_jit_convert(dmp_info, tmp_elffile, start_time, end_time);
+		rc = op_jit_convert(&dmp_info, tmp_elffile, start_time, end_time);
+		if (rc < 0)
+			goto free_res3;
+
 		/* Set eUID back to the original user. */
 		if (!non_root && seteuid(getuid()) != 0) {
 			perror("opjitconv: seteuid to original user failed");
@@ -518,9 +567,11 @@ static void _add_jitdumps_to_deletion_list(void * all_jitdumps, char const * jit
 		if (fstat(fd, &mystat) < 0) {
 			// Non-fatal error, so just display debug message and continue
 			verbprintf(debug, "opjitconv: cannot fstat jitdump file");
+			close(fd);
 			continue;
 		}
-		if (geteuid() == mystat.st_uid) {
+		close(fd);
+		if (!non_root || geteuid() == mystat.st_uid) {
 			struct jitdump_deletion_candidate * jdc =
 					xmalloc(sizeof(struct jitdump_deletion_candidate));
 			jdc->name = xstrdup(dmpfile->name);
@@ -536,7 +587,7 @@ static int op_process_jit_dumpfiles(char const * session_dir,
 	int rc = OP_JIT_CONV_OK;
 	char jitdumpfile[PATH_MAX + 1];
 	char oprofile_tmp_template[PATH_MAX + 1];
-	char const * jitdump_dir = "/var/lib/oprofile/jitdump/";
+	char const * jitdump_dir = "/tmp/.oprofile/jitdump/";
 
 	LIST_HEAD(jd_fnames);
 	char const * anon_dir_filter = "*/{dep}/{anon:anon}/[0-9]*.*";
@@ -667,7 +718,7 @@ rm_tmp:
 	/* Delete temporary working directory with all its files
 	 * (i.e. dump and ELF file).
 	 */
-	sprintf(sys_cmd_buffer, "/bin/rm -rf %s", tmp_conv_dir);
+	sprintf(sys_cmd_buffer, "/bin/rm -rf '%s'", tmp_conv_dir);
 	if (system(sys_cmd_buffer) != 0) {
 		printf("opjitconv: Removing temporary working directory failed.\n");
 		rc = OP_JIT_CONV_TMPDIR_NOT_REMOVED;
@@ -680,7 +731,7 @@ out:
 static void _cleanup_jitdumps(void)
 {
 	struct list_head * pos1, *pos2;
-	char const * jitdump_dir = "/var/lib/oprofile/jitdump/";
+	char const * jitdump_dir = "/tmp/.oprofile/jitdump/";
 	size_t dir_len = strlen(jitdump_dir);
 	char dmpfile_pathname[dir_len + 20];
 	char proc_fd_dir[PATH_MAX];
@@ -725,7 +776,9 @@ static void _cleanup_jitdumps(void)
 				if (dirent->d_type == DT_LNK) {
 					char buf[1024];
 					char fname[1024];
+					memset(buf, '\0', 1024);
 					memset(fname, '\0', 1024);
+					memset(buf, '\0', 1024);
 					strcpy(fname, proc_fd_dir);
 					strncat(fname, dirent->d_name, 1023 - proc_fd_dir_len);
 					if (readlink(fname, buf, 1023) > 0) {
@@ -738,9 +791,13 @@ static void _cleanup_jitdumps(void)
 					}
 				}
 			}
+			closedir(dir);
+		}
+		if (!do_not_delete) {
+			if (remove(dmpfile_pathname))
+				verbprintf(debug, "Unable to delete %s: %s\n", dmpfile_pathname,
+				           strerror(errno));
 		}
-		if (!do_not_delete)
-			remove(dmpfile_pathname);
 	}
 	list_for_each_safe(pos1, pos2, &jitdump_deletion_candidates) {
 		struct jitdump_deletion_candidate * pname = list_entry(pos1,
@@ -753,55 +810,97 @@ static void _cleanup_jitdumps(void)
 
 }
 
-int main(int argc, char ** argv)
+static void __print_usage(void)
+{
+	fprintf(stderr, "usage: opjitconv [--debug | --non-root | --delete-jitdumps ] --session-dir=<dir> <starttime> <endtime>\n");
+}
+
+static int _process_args(int argc, char * const argv[])
+{
+	int keep_trying = 1;
+	int idx_of_non_options = 0;
+	setenv("POSIXLY_CORRECT", "1", 0);
+	while (keep_trying) {
+		int option_idx = 0;
+		int c = getopt_long(argc, argv, short_options, long_options, &option_idx);
+		switch (c) {
+		case -1:
+			if (optind != argc) {
+				idx_of_non_options = optind;
+			}
+			keep_trying = 0;
+			break;
+		case '?':
+			printf("non-option detected at optind %d\n", optind);
+			keep_trying = 0;
+			idx_of_non_options = -1;
+			break;
+		case 's':
+			session_dir = optarg;
+			break;
+		case 'd':
+			debug = 1;
+			break;
+		case 'n':
+			non_root = 1;
+			break;
+		case 'j':
+			delete_jitdumps = 1;
+			break;
+		case 'h':
+			break;
+		default:
+			break;
+		}
+	}
+	return idx_of_non_options;
+}
+
+int main(int argc, char * const argv[])
 {
 	unsigned long long start_time, end_time;
-	char const * session_dir;
-	int rc = 0;
+	struct stat filestat;
+	int non_options_idx, rc = 0;
+	size_t sessdir_len = 0;
 
 	debug = 0;
-	if (argc > 1 && strcmp(argv[1], "-d") == 0) {
-		debug = 1;
-		argc--;
-		argv++;
-	}
 	non_root = 0;
-	if (argc > 1 && strcmp(argv[1], "--non-root") == 0) {
-		non_root = 1;
-		argc--;
-		argv++;
-	}
-
 	delete_jitdumps = 0;
-	if (argc > 1 && strcmp(argv[1], "--delete-jitdumps") == 0) {
-		delete_jitdumps = 1;
-		argc--;
-		argv++;
-	}
-
-	if (argc != 4) {
-		printf("Usage: opjitconv [-d] <session_dir> <starttime>"
-		       " <endtime>\n");
+	session_dir = NULL;
+	non_options_idx = _process_args(argc, argv);
+	// We need the session_dir and two non-option values passed -- starttime and endtime.
+	if (!session_dir || (non_options_idx != argc - 2)) {
+		__print_usage();
 		fflush(stdout);
 		rc = EXIT_FAILURE;
 		goto out;
 	}
 
-	session_dir = argv[1];
 	/*
-	 * Check for a maximum of 4096 bytes (Linux path name length limit) decremented
-	 * by 16 bytes (will be used later for appending samples sub directory).
+	 * Check for a maximum of 4096 bytes (Linux path name length limit) minus 16 bytes
+	 * (to be used later for appending samples sub directory) minus 1 (for terminator).
 	 * Integer overflows according to the session dir parameter (user controlled)
 	 * are not possible anymore.
 	 */
-	if (strlen(session_dir) > PATH_MAX - 16) {
-		printf("opjitconv: Path name length limit exceeded for session directory: %s\n", session_dir);
+	if ((sessdir_len = strlen(session_dir)) >= (PATH_MAX - 17)) {
+		printf("opjitconv: Path name length limit exceeded for session directory\n");
+		rc = EXIT_FAILURE;
+		goto out;
+	}
+
+	if (stat(session_dir, &filestat)) {
+		perror("stat operation on passed session-dir failed");
+		rc = EXIT_FAILURE;
+		goto out;
+	}
+	if (!S_ISDIR(filestat.st_mode)) {
+		printf("Passed session-dir %s is not a directory\n", session_dir);
 		rc = EXIT_FAILURE;
 		goto out;
 	}
 
-	start_time = atol(argv[2]);
-	end_time = atol(argv[3]);
+	start_time = atol(argv[non_options_idx++]);
+	end_time = atol(argv[non_options_idx]);
 
 	if (start_time > end_time) {
 		rc = EXIT_FAILURE;
diff --git a/opjitconv/opjitconv.h b/opjitconv/opjitconv.h
index f6243c9..ff87d74 100644
--- a/opjitconv/opjitconv.h
+++ b/opjitconv/opjitconv.h
@@ -34,6 +34,14 @@
 #include "op_list.h"
 #include "op_types.h"
 
+#define verbprintf(x, args...) \
+        do { \
+                /* This is a bit fragile, but we must catch verbprintf("%s", "") */ \
+                if (x == 1) \
+                        printf(args); \
+        } while (0)
+
+
 /* Structure that contains all information
  * for one function entry in the jit dump file.
  * the jit dump file gets mmapped and code and
@@ -99,7 +107,7 @@ int parse_all(void const * start, void const * end,
 	      unsigned long long end_time);
 
 /* conversion.c */
-int op_jit_convert(struct op_jitdump_info file_info, char const * elffile,
+int op_jit_convert(struct op_jitdump_info *file_info, char const * elffile,
                    unsigned long long start_time, unsigned long long end_time);
 
 /* create_bfd.c */
diff --git a/opjitconv/parse_dump.c b/opjitconv/parse_dump.c
index 2625c7c..f50807e 100644
--- a/opjitconv/parse_dump.c
+++ b/opjitconv/parse_dump.c
@@ -16,7 +16,6 @@
 
 #include "opjitconv.h"
 #include "jitdump.h"
-#include "opd_printf.h"
 #include "op_libiberty.h"
 
 #include <string.h>
diff --git a/pe_counting/Makefile.am b/pe_counting/Makefile.am
new file mode 100644
index 0000000..fadd4d9
--- /dev/null
+++ b/pe_counting/Makefile.am
@@ -0,0 +1,28 @@
+LIBS=@LIBERTY_LIBS@ @PFM_LIB@ @RT_LIB@
+if BUILD_FOR_PERF_EVENT
+
+AM_CPPFLAGS = \
+	-I ${top_srcdir}/libutil \
+	-I ${top_srcdir}/libutil++ \
+	-I ${top_srcdir}/libop \
+	-I ${top_srcdir}/libperf_events \
+	-I ${top_srcdir}/libpe_utils \
+	@PERF_EVENT_FLAGS@ \
+	@OP_CPPFLAGS@
+
+ocount_SOURCES = ocount.cpp \
+	ocount_counter.h \
+	ocount_counter.cpp
+
+
+AM_CXXFLAGS = @OP_CXXFLAGS@
+AM_LDFLAGS = @OP_LDFLAGS@
+
+bin_PROGRAMS = ocount
+ocount_LDADD = ../libpe_utils/libpe_utils.a \
+	../libpe_utils/libpe_utils.a \
+	../libop/libop.a \
+	../libutil/libutil.a \
+	../libutil++/libutil++.a
+
+endif
diff --git a/daemon/liblegacy/Makefile.in b/pe_counting/Makefile.in
similarity index 66%
rename from daemon/liblegacy/Makefile.in
rename to pe_counting/Makefile.in
index f16c45b..4d2ddc9 100644
--- a/daemon/liblegacy/Makefile.in
+++ b/pe_counting/Makefile.in
@@ -34,12 +34,12 @@ PRE_UNINSTALL = :
 POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
-subdir = daemon/liblegacy
+@BUILD_FOR_PERF_EVENT_TRUE@bin_PROGRAMS = ocount$(EXEEXT)
+subdir = pe_counting
 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -48,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -57,19 +57,32 @@ mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = $(top_builddir)/config.h
 CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
-LIBRARIES = $(noinst_LIBRARIES)
-ARFLAGS = cru
-liblegacy_a_AR = $(AR) $(ARFLAGS)
-liblegacy_a_LIBADD =
-am_liblegacy_a_OBJECTS = opd_24_stats.$(OBJEXT) opd_kernel.$(OBJEXT) \
-	opd_proc.$(OBJEXT) opd_image.$(OBJEXT) opd_mapping.$(OBJEXT) \
-	opd_parse_proc.$(OBJEXT) opd_sample_files.$(OBJEXT) \
-	init.$(OBJEXT)
-liblegacy_a_OBJECTS = $(am_liblegacy_a_OBJECTS)
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__ocount_SOURCES_DIST = ocount.cpp ocount_counter.h \
+	ocount_counter.cpp
+@BUILD_FOR_PERF_EVENT_TRUE@am_ocount_OBJECTS = ocount.$(OBJEXT) \
+@BUILD_FOR_PERF_EVENT_TRUE@	ocount_counter.$(OBJEXT)
+ocount_OBJECTS = $(am_ocount_OBJECTS)
+@BUILD_FOR_PERF_EVENT_TRUE@ocount_DEPENDENCIES =  \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libpe_utils/libpe_utils.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libpe_utils/libpe_utils.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libop/libop.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libutil/libutil.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libutil++/libutil++.a
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp = $(SHELL) $(top_srcdir)/depcomp
 am__depfiles_maybe = depfiles
 am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+	--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
 	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
 LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
@@ -79,8 +92,8 @@ CCLD = $(CC)
 LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
 	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
-SOURCES = $(liblegacy_a_SOURCES)
-DIST_SOURCES = $(liblegacy_a_SOURCES)
+SOURCES = $(ocount_SOURCES)
+DIST_SOURCES = $(am__ocount_SOURCES_DIST)
 ETAGS = etags
 CTAGS = ctags
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -128,14 +141,13 @@ LD = @LD@
 LDFLAGS = @LDFLAGS@
 LIBERTY_LIBS = @LIBERTY_LIBS@
 LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
+LIBS = @LIBERTY_LIBS@ @PFM_LIB@ @RT_LIB@
 LIBTOOL = @LIBTOOL@
 LIPO = @LIPO@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -159,20 +171,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -236,39 +241,31 @@ top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 topdir = @topdir@
-noinst_LIBRARIES = liblegacy.a
-
-# -fno-omit-frame-pointer needed for daemon build: see ChangeLog-2004 02-23
-AM_CFLAGS = @OP_CFLAGS@ -fno-omit-frame-pointer
-AM_CPPFLAGS = \
-	-I ${top_srcdir}/libutil \
-	-I ${top_srcdir}/libop \
-	-I ${top_srcdir}/libdb \
-	-I ${top_srcdir}/daemon \
-	@OP_CPPFLAGS@
-
-liblegacy_a_SOURCES = \
-	opd_24_stats.c \
-	opd_24_stats.h \
-	opd_kernel.c \
-	opd_proc.c \
-	opd_image.c \
-	opd_mapping.c \
-	opd_parse_proc.c \
-	opd_image.h \
-	opd_mapping.h \
-	p_module.h \
-	opd_kernel.h \
-	opd_parse_proc.h \
-	opd_proc.h \
-	opd_sample_files.c \
-	opd_sample_files.h \
-	init.c
+@BUILD_FOR_PERF_EVENT_TRUE@AM_CPPFLAGS = \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libutil \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libutil++ \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libop \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libperf_events \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libpe_utils \
+@BUILD_FOR_PERF_EVENT_TRUE@	@PERF_EVENT_FLAGS@ \
+@BUILD_FOR_PERF_EVENT_TRUE@	@OP_CPPFLAGS@
+
+@BUILD_FOR_PERF_EVENT_TRUE@ocount_SOURCES = ocount.cpp \
+@BUILD_FOR_PERF_EVENT_TRUE@	ocount_counter.h \
+@BUILD_FOR_PERF_EVENT_TRUE@	ocount_counter.cpp
+
+@BUILD_FOR_PERF_EVENT_TRUE@AM_CXXFLAGS = @OP_CXXFLAGS@
+@BUILD_FOR_PERF_EVENT_TRUE@AM_LDFLAGS = @OP_LDFLAGS@
+@BUILD_FOR_PERF_EVENT_TRUE@ocount_LDADD = ../libpe_utils/libpe_utils.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libpe_utils/libpe_utils.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libop/libop.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libutil/libutil.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libutil++/libutil++.a
 
 all: all-am
 
 .SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .cpp .lo .o .obj
 $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
@@ -278,9 +275,9 @@ $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	      exit 1;; \
 	  esac; \
 	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign daemon/liblegacy/Makefile'; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign pe_counting/Makefile'; \
 	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --foreign daemon/liblegacy/Makefile
+	  $(AUTOMAKE) --foreign pe_counting/Makefile
 .PRECIOUS: Makefile
 Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	@case '$?' in \
@@ -299,13 +296,52 @@ $(top_srcdir)/configure:  $(am__configure_deps)
 $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 $(am__aclocal_m4_deps):
-
-clean-noinstLIBRARIES:
-	-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
-liblegacy.a: $(liblegacy_a_OBJECTS) $(liblegacy_a_DEPENDENCIES) 
-	-rm -f liblegacy.a
-	$(liblegacy_a_AR) liblegacy.a $(liblegacy_a_OBJECTS) $(liblegacy_a_LIBADD)
-	$(RANLIB) liblegacy.a
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p || test -f $$p1; \
+	  then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	    echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	    $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' `; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+	@list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+	echo " rm -f" $$list; \
+	rm -f $$list || exit $$?; \
+	test -n "$(EXEEXT)" || exit 0; \
+	list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+	echo " rm -f" $$list; \
+	rm -f $$list
+ocount$(EXEEXT): $(ocount_OBJECTS) $(ocount_DEPENDENCIES) 
+	@rm -f ocount$(EXEEXT)
+	$(CXXLINK) $(ocount_OBJECTS) $(ocount_LDADD) $(LIBS)
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
@@ -313,35 +349,29 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_24_stats.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_image.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_kernel.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_mapping.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_parse_proc.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_proc.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opd_sample_files.Po@am__quote@
-
-.c.o:
-@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(COMPILE) -c $<
-
-.c.obj:
-@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
-@am__fastdepCC_TRUE@	$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ocount.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ocount_counter.Po@am__quote@
+
+.cpp.o:
+@am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ $<
+
+.cpp.obj:
+@am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cpp.lo:
+@am__fastdepCXX_TRUE@	$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(LTCXXCOMPILE) -c -o $@ $<
 
 mostlyclean-libtool:
 	-rm -f *.lo
@@ -433,8 +463,11 @@ distdir: $(DISTFILES)
 	done
 check-am: all-am
 check: check-am
-all-am: Makefile $(LIBRARIES)
+all-am: Makefile $(PROGRAMS)
 installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
 install: install-am
 install-exec: install-exec-am
 install-data: install-data-am
@@ -462,8 +495,7 @@ maintainer-clean-generic:
 	@echo "it deletes files that may require special tools to rebuild."
 clean: clean-am
 
-clean-am: clean-generic clean-libtool clean-noinstLIBRARIES \
-	mostlyclean-am
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
 
 distclean: distclean-am
 	-rm -rf ./$(DEPDIR)
@@ -489,7 +521,7 @@ install-dvi: install-dvi-am
 
 install-dvi-am:
 
-install-exec-am:
+install-exec-am: install-binPROGRAMS
 
 install-html: install-html-am
 
@@ -529,22 +561,23 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am:
+uninstall-am: uninstall-binPROGRAMS
 
 .MAKE: install-am install-strip
 
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
-	clean-libtool clean-noinstLIBRARIES ctags distclean \
-	distclean-compile distclean-generic distclean-libtool \
-	distclean-tags distdir dvi dvi-am html html-am info info-am \
-	install install-am install-data install-data-am install-dvi \
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+	clean-generic clean-libtool ctags distclean distclean-compile \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-binPROGRAMS install-data install-data-am install-dvi \
 	install-dvi-am install-exec install-exec-am install-html \
 	install-html-am install-info install-info-am install-man \
 	install-pdf install-pdf-am install-ps install-ps-am \
 	install-strip installcheck installcheck-am installdirs \
 	maintainer-clean maintainer-clean-generic mostlyclean \
 	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
-	pdf pdf-am ps ps-am tags uninstall uninstall-am
+	pdf pdf-am ps ps-am tags uninstall uninstall-am \
+	uninstall-binPROGRAMS
 
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/pe_counting/ocount.cpp b/pe_counting/ocount.cpp
new file mode 100644
index 0000000..07dfd0c
--- /dev/null
+++ b/pe_counting/ocount.cpp
@@ -0,0 +1,929 @@
+/**
+ * @file ocount.cpp
+ * Tool for event counting using the new Linux Performance Events Subsystem.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 21, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ */
+
+#include "config.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <fstream>
+#include <vector>
+#include <set>
+
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include <math.h>
+
+#include "op_pe_utils.h"
+#include "ocount_counter.h"
+#include "op_cpu_type.h"
+#include "op_cpufreq.h"
+#include "operf_event.h"
+#include "cverb.h"
+#include "op_libiberty.h"
+
+// Globals
+char * app_name = NULL;
+bool use_cpu_minus_one = false;
+std::vector<operf_event_t> events;
+op_cpu cpu_type;
+
+#define OCOUNT_MSECS_PER_SEC 1000
+// Current implementation supports a display interval of 100 ms
+#define OCOUNT_DSP_INTVL_MSECS 100
+#define OCOUNT_DSP_INTVLS_PER_SEC (OCOUNT_MSECS_PER_SEC/OCOUNT_DSP_INTVL_MSECS)
+#define OCOUNT_NSECS_PER_MSEC 1000000
+#define OCOUNT_NSECS_PER_DSP_INTVL (OCOUNT_NSECS_PER_MSEC * OCOUNT_DSP_INTVL_MSECS)
+
+static char * app_name_SAVE = NULL;
+static char ** app_args = NULL;
+static bool app_started;
+static bool startApp;
+static bool stop = false;
+static std::ofstream outfile;
+static pid_t my_uid;
+static double cpu_speed;
+static ocount_record * orecord;
+static pid_t app_PID = -1;
+
+using namespace std;
+using namespace op_pe_utils;
+
+
+typedef enum END_CODE {
+	ALL_OK = 0,
+	APP_ABNORMAL_END =  1,
+	PERF_RECORD_ERROR = 2,
+	PERF_READ_ERROR   = 4,
+	PERF_BOTH_ERROR   = 8
+} end_code_t;
+
+namespace ocount_options {
+bool verbose;
+bool system_wide;
+vector<pid_t> processes;
+vector<pid_t> threads;
+vector<int> cpus;
+string outfile;
+bool separate_cpu;
+bool separate_thread;
+set<string> evts;
+bool csv_output;
+long display_interval;
+long num_intervals;
+}
+
+
+static enum op_runmode runmode = OP_MAX_RUNMODE;
+static string runmode_options[] = { "<command> [command-args]", "--system-wide", "--cpu-list",
+                                    "--process-list", "--thread-list"
+};
+
+
+struct option long_options [] =
+{
+ {"verbose", no_argument, NULL, 'V'},
+ {"system-wide", no_argument, NULL, 's'},
+ {"cpu-list", required_argument, NULL, 'C'},
+ {"process-list", required_argument, NULL, 'p'},
+ {"thread-list", required_argument, NULL, 'r'},
+ {"events", required_argument, NULL, 'e'},
+ {"output-file", required_argument, NULL, 'f'},
+ {"separate-cpu", no_argument, NULL, 'c'},
+ {"separate-thread", no_argument, NULL, 't'},
+ {"brief-format", no_argument, NULL, 'b'},
+ {"time-interval", required_argument, NULL, 'i'},
+ {"help", no_argument, NULL, 'h'},
+ {"usage", no_argument, NULL, 'u'},
+ {"version", no_argument, NULL, 'v'},
+ {NULL, 9, NULL, 0}
+};
+
+const char * short_options = "VsC:p:r:e:f:ctbi:huv";
+
+static void cleanup(void)
+{
+	free(app_name_SAVE);
+	free(app_args);
+	events.clear();
+	if (!ocount_options::outfile.empty())
+		outfile.close();
+}
+
+
+// Signal handler for main (parent) process.
+static void op_sig_stop(int sigval __attribute__((unused)))
+{
+	// Received a signal to quit, so we need to stop the
+	// app being counted.
+	size_t dummy __attribute__ ((__unused__));
+	stop = true;
+	if (cverb << vdebug)
+		dummy = write(1, "in op_sig_stop\n", 15);
+	if (startApp)
+		kill(app_PID, SIGKILL);
+}
+
+void set_signals_for_parent(void)
+{
+	struct sigaction act;
+	sigset_t ss;
+
+	sigfillset(&ss);
+	sigprocmask(SIG_UNBLOCK, &ss, NULL);
+
+	act.sa_handler = op_sig_stop;
+	act.sa_flags = 0;
+	sigemptyset(&act.sa_mask);
+	sigaddset(&act.sa_mask, SIGINT);
+
+	if (sigaction(SIGINT, &act, NULL)) {
+		perror("ocount: install of SIGINT handler failed: ");
+		exit(EXIT_FAILURE);
+	}
+}
+
+
+static void __print_usage_and_exit(const char * extra_msg)
+{
+	if (extra_msg)
+		cerr << extra_msg << endl;
+	cerr << "usage: ocount [ options ] [ --system-wide | -p <pids> | -r <tids> | -C <cpus> | [ command [ args ] ] ]" << endl;
+	cerr << "See ocount man page for details." << endl;
+	exit(EXIT_FAILURE);
+}
+
+static string args_to_string(void)
+{
+	string ret;
+	char * const * ptr = app_args + 1;
+	while (*ptr != NULL) {
+		ret.append(*ptr);
+		ret += ' ';
+		ptr++;
+	}
+	return ret;
+}
+
+static int app_ready_pipe[2], start_app_pipe[2];
+
+void run_app(void)
+{
+	// ASSUMPTION: app_name is a fully-qualified pathname
+	char * app_fname = rindex(app_name, '/') + 1;
+	app_args[0] = app_fname;
+
+	string arg_str = args_to_string();
+	cverb << vdebug << "Exec args are: " << app_fname << " " << arg_str << endl;
+	// Fake an exec to warm-up the resolver
+	execvp("", app_args);
+	// signal to the parent that we're ready to exec
+	int startup = 1;
+	if (write(app_ready_pipe[1], &startup, sizeof(startup)) < 0) {
+		perror("Internal error on app_ready_pipe");
+		_exit(EXIT_FAILURE);
+	}
+
+	// wait for parent to tell us to start
+	int startme = 0;
+	if (read(start_app_pipe[0], &startme, sizeof(startme)) == -1) {
+		perror("Internal error in run_app on start_app_pipe");
+		_exit(EXIT_FAILURE);
+	}
+	if (startme != 1)
+		_exit(EXIT_SUCCESS);
+
+	cverb << vdebug << "parent says start app " << app_name << endl;
+	execvp(app_name, app_args);
+	cerr <<  "Failed to exec " << app_fname << " " << arg_str << ": " << strerror(errno) << endl;
+	/* We don't want any cleanup in the child */
+	_exit(EXIT_FAILURE);
+
+}
+
+bool start_counting(void)
+{
+	vector<pid_t> proc_list; // May contain processes or threads
+
+	// The only process that should return from this function is the process
+	// which invoked it.  Any forked process must do _exit() rather than return().
+
+	startApp = runmode == OP_START_APP;
+
+	if (startApp) {
+		if (pipe(app_ready_pipe) < 0 || pipe(start_app_pipe) < 0) {
+			perror("Internal error: ocount-record could not create pipe");
+			return false;
+		}
+		app_PID = fork();
+		if (app_PID < 0) {
+			perror("Internal error: fork failed");
+			return false;
+		} else if (app_PID == 0) { // child process for exec'ing app
+			run_app();
+		}
+	}
+
+	// parent
+	int startup;
+	if (startApp) {
+		if (read(app_ready_pipe[0], &startup, sizeof(startup)) == -1) {
+			perror("Internal error on app_ready_pipe");
+			return false;
+		} else if (startup != 1) {
+			cerr << "app is not ready to start; exiting" << endl;
+			return false;
+		}
+		proc_list.push_back(app_PID);
+	} else if (!ocount_options::threads.empty()) {
+		proc_list = ocount_options::threads;
+	} else if (!ocount_options::processes.empty()) {
+		proc_list = ocount_options::processes;
+	}
+
+	if (startApp) {
+		// Tell app_PID to start the app
+		cverb << vdebug << "telling child to start app" << endl;
+		if (write(start_app_pipe[1], &startup, sizeof(startup)) < 0) {
+			perror("Internal error on start_app_pipe");
+			return false;
+		}
+		app_started = true;
+	}
+
+	orecord = new ocount_record(runmode, events, ocount_options::display_interval ? true : false);
+	bool ret;
+	switch (runmode) {
+	case OP_START_APP:
+		ret = orecord->start_counting_app_process(app_PID);
+		break;
+	case OP_SYSWIDE:
+		ret = orecord->start_counting_syswide();
+		break;
+	case OP_CPULIST:
+		ret = orecord->start_counting_cpulist(ocount_options::cpus);
+		break;
+	case OP_PROCLIST:
+		ret = orecord->start_counting_tasklist(ocount_options::processes, false);
+		break;
+	case OP_THREADLIST:
+		ret = orecord->start_counting_tasklist(ocount_options::threads, true);
+		break;
+	default:
+		ret = false;
+		break;   // impossible to get here, since we validate runmode prior to this point
+	}
+	if (!orecord->get_valid()) {
+		/* If valid is false, it means that one of the "known" errors has
+		 * occurred:
+		 *   - monitored process has already ended
+		 *   - passed PID was invalid
+		 *   - device or resource busy
+		 */
+		cverb << vdebug << "ocount record init failed" << endl;
+		ret = false;
+	}
+
+	return ret;
+}
+
+static void do_results(ostream & out)
+{
+	try {
+		orecord->output_results(out, ocount_options::separate_cpu | ocount_options::separate_thread,
+		                        ocount_options::csv_output);
+	} catch (const runtime_error & e) {
+		cerr << "Caught runtime error from ocount_record::output_results" << endl;
+		cerr << e.what() << endl;
+		cleanup();
+		exit(EXIT_FAILURE);
+	}
+}
+
+end_code_t _get_waitpid_status(int waitpid_status, int wait_rc)
+{
+	end_code_t rc = ALL_OK;
+	if (wait_rc < 0) {
+		if (errno == EINTR) {
+			//  Ctrl-C will only kill the monitored app.  See the op_sig_stop signal handler.
+			cverb << vdebug << "Caught ctrl-C.  Killed app process." << endl;
+		} else {
+			cerr << "waitpid for app process failed: " << strerror(errno) << endl;
+			rc = APP_ABNORMAL_END;
+		}
+	} else if (wait_rc) {
+		if (WIFEXITED(waitpid_status) && (!WEXITSTATUS(waitpid_status))) {
+			cverb << vdebug << "app process ended normally." << endl;
+		} else if (WIFEXITED(waitpid_status)) {
+			cerr << "app process exited with the following status: "
+					<< WEXITSTATUS(waitpid_status) << endl;
+			rc = APP_ABNORMAL_END;
+		}  else if (WIFSIGNALED(waitpid_status)) {
+			if (WTERMSIG(waitpid_status) != SIGKILL) {
+				cerr << "app process killed by signal "
+						<< WTERMSIG(waitpid_status) << endl;
+				rc = APP_ABNORMAL_END;
+			}
+		}
+	}
+	return rc;
+}
+
+end_code_t _wait_for_app(ostream & out)
+{
+	int wait_rc;
+	end_code_t rc = ALL_OK;
+	int waitpid_status = 0;
+
+	bool done = false;
+	cverb << vdebug << "going into waitpid on monitored app " << app_PID << endl;
+	if (ocount_options::display_interval) {
+		long number_intervals = ocount_options::num_intervals;
+		do {
+			struct timeval mytime;
+			unsigned int countdown, dsp_intvl_in_100ms_units = 0;
+			struct timespec ts_req;
+
+			/* The display_interval is in milliseconds; but at this time, we only allow
+			 * 100ms granularity. */
+			dsp_intvl_in_100ms_units = (int)round(((double)ocount_options::display_interval/
+					OCOUNT_DSP_INTVL_MSECS));
+			if (dsp_intvl_in_100ms_units == 0)
+				dsp_intvl_in_100ms_units = 1; // special case of rounding up; prevent 0 time interval
+			countdown = dsp_intvl_in_100ms_units;
+			cverb << vdebug << "Actual display interval used: " << dsp_intvl_in_100ms_units
+					<< "x100ms" << endl;
+			ts_req.tv_nsec = countdown * OCOUNT_NSECS_PER_DSP_INTVL;
+			ts_req.tv_sec = 0;
+			while (countdown) {
+				/* We want to avoid the scenario where, say, the user requests a
+				 * 10 second time interval and the app being counted ends
+				 * immediately after we call sleep().  If we called sleep() with
+				 * the full time interval, we'd be sleeping unnecessarily for
+				 * 10 seconds.  So, for any time interval of one second or longer,
+				 * we do a one second sleep(), wake up and check if the app is
+				 * still alive.
+				 */
+				if (countdown >= OCOUNT_DSP_INTVLS_PER_SEC) {
+					countdown -= OCOUNT_DSP_INTVLS_PER_SEC;
+					sleep(1);
+					if (countdown && (countdown < OCOUNT_DSP_INTVLS_PER_SEC)) {
+						/* The next time through the loop, we'll be taking the
+						 * 'else' leg of this if-statement -- i.e., we will have
+						 * finished the specified time interval, modulo
+						 * OCOUNT_DSP_INTVLS_PER_SEC. So we set sleep time
+						 * accordingly and force dsp_intvl_in_100ms_units to be
+						 * equal to the current countdown value so the "-=" op
+						 * below makes the countdown finally zero.
+						 */
+						ts_req.tv_nsec = countdown * OCOUNT_NSECS_PER_DSP_INTVL;
+						dsp_intvl_in_100ms_units  = countdown;
+					}
+
+				} else {
+					/* We don't bother keeping track of remaining time, since signals delivered
+					 * to this process will be rare (see nanosleep man page). The real time (in
+					 * the supported granularity of 100 ms) is printed out for each time interval,
+					 * so we leave it to the user to note any time gaps.
+					 */
+					(void)nanosleep(&ts_req, NULL);
+					countdown -= dsp_intvl_in_100ms_units;
+				}
+				if (countdown == 0) {
+					if (gettimeofday(&mytime, NULL) < 0) {
+						cleanup();
+						perror("gettimeofday");
+						exit(EXIT_FAILURE);
+					}
+					if (!ocount_options::csv_output)
+						out << endl << "Current time (seconds since epoch): ";
+					else
+						out << endl << "timestamp,";
+					if (dsp_intvl_in_100ms_units % OCOUNT_DSP_INTVLS_PER_SEC) {
+						int tenths_secs = (int)round(((double)mytime.tv_usec/100000));
+						if (tenths_secs == 10)
+							out << dec << mytime.tv_sec + 1 << "." << "0";
+						else
+							out << dec << mytime.tv_sec << "." << tenths_secs;
+					} else {
+						out << dec << mytime.tv_sec;
+					}
+					do_results(out);
+				}
+				wait_rc = waitpid(app_PID, &waitpid_status, WNOHANG);
+				if (wait_rc) {
+					rc = _get_waitpid_status(waitpid_status, wait_rc);
+					done = true;
+					countdown = 0;
+				}
+			}
+			if (--number_intervals == 0) {
+				done = true;
+				kill(app_PID, SIGKILL);
+			}
+		} while (!done);
+	} else {
+		wait_rc = waitpid(app_PID, &waitpid_status, 0);
+		rc = _get_waitpid_status(waitpid_status, wait_rc);
+	}
+	return rc;
+}
+
+static end_code_t _run(ostream & out)
+{
+	end_code_t rc = ALL_OK;
+
+	// Fork processes with signals blocked.
+	sigset_t ss;
+	sigfillset(&ss);
+	sigprocmask(SIG_BLOCK, &ss, NULL);
+
+	try {
+		if (!start_counting()) {
+			return PERF_RECORD_ERROR;
+		}
+	} catch (const runtime_error & e) {
+		cerr << "Caught runtime error while setting up counters" << endl;
+		cerr << e.what() << endl;
+		return PERF_RECORD_ERROR;
+	}
+	// parent continues here
+	if (startApp)
+		cverb << vdebug << "app " << app_PID << " is running" << endl;
+
+	set_signals_for_parent();
+	if (startApp) {
+		rc = _wait_for_app(out);
+	} else {
+		cout << "ocount: Press Ctl-c or 'kill -SIGINT " << getpid() << "' to stop counting" << endl;
+		if (ocount_options::display_interval) {
+			long number_intervals = ocount_options::num_intervals;
+			struct timeval mytime;
+			/* The display_interval is in milliseconds; but at this time, we only allow
+			 * 100ms granularity. */
+			struct timespec ts_req;
+			unsigned int dsp_intvl_in_100ms_units = (int)round(((double)ocount_options::display_interval/
+					OCOUNT_DSP_INTVL_MSECS));
+			if (dsp_intvl_in_100ms_units == 0)
+				dsp_intvl_in_100ms_units = 1; // special case of rounding up; prevent 0 time interval
+			cverb << vdebug << "Actual display interval used: " << dsp_intvl_in_100ms_units
+					<< "x100ms" << endl;
+			// 10 dsp_intvl_in_100ms_units is one second, so we set ts_req accordingly
+			ts_req.tv_sec = dsp_intvl_in_100ms_units/10;
+			ts_req.tv_nsec = (dsp_intvl_in_100ms_units % 10) * OCOUNT_NSECS_PER_DSP_INTVL;
+			while (!stop) {
+				(void)nanosleep(&ts_req, NULL);
+				if (gettimeofday(&mytime, NULL) < 0) {
+					cleanup();
+					perror("gettimeofday");
+					exit(EXIT_FAILURE);
+				}
+				if (!ocount_options::csv_output)
+					out << endl << "Current time (seconds since epoch): ";
+				else
+					out << endl << "t:";
+				if (dsp_intvl_in_100ms_units % OCOUNT_DSP_INTVLS_PER_SEC) {
+					int tenths_secs = (int)round(((double)mytime.tv_usec/100000));
+					if (tenths_secs == 10)
+						out << dec << mytime.tv_sec + 1 << "." << "0";
+					else
+						out << dec << mytime.tv_sec << "." << tenths_secs;
+				} else {
+					out << dec << mytime.tv_sec;
+				}
+				do_results(out);
+				if (--number_intervals == 0)
+					stop = true;
+			}
+		} else {
+			while (!stop)
+				sleep(1);
+		}
+	}
+	return rc;
+}
+
+static void _parse_cpu_list(void)
+{
+	char * comma_sep;
+	char * endptr;
+	char * aCpu = strtok_r(optarg, ",", &comma_sep);
+	do {
+		int tmp = strtol(aCpu, &endptr, 10);
+		if ((endptr >= aCpu) && (endptr <= (aCpu + strlen(aCpu) - 1))) {
+			// Check if user has passed a range of cpu numbers:  e.g., '3-8'
+			char * dash_sep;
+			char * ending_cpu_str, * starting_cpu_str = strtok_r(aCpu, "-", &dash_sep);
+			int starting_cpu, ending_cpu;
+			if (starting_cpu_str) {
+				ending_cpu_str = strtok_r(NULL, "-", &dash_sep);
+				if (!ending_cpu_str) {
+					__print_usage_and_exit("ocount: Invalid cpu range.");
+				}
+				starting_cpu = strtol(starting_cpu_str, &endptr, 10);
+				if ((endptr >= starting_cpu_str) &&
+						(endptr <= (starting_cpu_str + strlen(starting_cpu_str) - 1))) {
+					__print_usage_and_exit("ocount: Invalid numeric value for --cpu-list option.");
+				}
+				ending_cpu = strtol(ending_cpu_str, &endptr, 10);
+				if ((endptr >= ending_cpu_str) &&
+						(endptr <= (ending_cpu_str + strlen(ending_cpu_str) - 1))) {
+					__print_usage_and_exit("ocount: Invalid numeric value for --cpu-list option.");
+				}
+				for (int i = starting_cpu; i < ending_cpu + 1; i++)
+					ocount_options::cpus.push_back(i);
+			} else {
+				__print_usage_and_exit("ocount: Invalid numeric value for --cpu-list option.");
+			}
+		} else {
+			ocount_options::cpus.push_back(tmp);
+		}
+	} while ((aCpu = strtok_r(NULL, ",", &comma_sep)));
+}
+
+static void _parse_time_interval(void)
+{
+	char * endptr;
+	char * num_intervals, * interval = strtok(optarg, ":");
+	ocount_options::display_interval = strtol(interval, &endptr, 10);
+	if (((endptr >= interval) && (endptr <= (interval + strlen(interval) - 1))) ||
+			(ocount_options::display_interval < 0))
+		__print_usage_and_exit("ocount: Invalid numeric value for interval_length.");
+
+	// User has specified num_intervals: e.g., '-i 5:10'
+	num_intervals = strtok(NULL, ":");
+	if (num_intervals) {
+		ocount_options::num_intervals = strtol(num_intervals, &endptr, 10);
+		if (((endptr >= num_intervals) && (endptr <= (num_intervals + strlen(num_intervals) - 1))) ||
+				(ocount_options::num_intervals < 0))
+			__print_usage_and_exit("ocount: Invalid numeric value for num_intervals.");
+	}
+}
+
+static int _process_ocount_and_app_args(int argc, char * const argv[])
+{
+	bool keep_trying = true;
+	int idx_of_non_options = 0;
+	setenv("POSIXLY_CORRECT", "1", 0);
+	while (keep_trying) {
+		int option_idx = 0;
+		int c = getopt_long(argc, argv, short_options, long_options, &option_idx);
+		switch (c) {
+		char * endptr;
+		char * event;
+
+		case -1:
+			if (optind != argc) {
+				idx_of_non_options = optind;
+			}
+			keep_trying = false;
+			break;
+		case '?':
+			cerr << "ocount: non-option detected at optind " << optind << endl;
+			keep_trying = false;
+			idx_of_non_options = -1;
+			break;
+		case 'V':
+			ocount_options::verbose = true;
+			break;
+		case 's':
+			ocount_options::system_wide = true;
+			break;
+		case 'C':
+			_parse_cpu_list();
+			break;
+		case 'p':
+		{
+			char * aPid = strtok(optarg, ",");
+			do {
+				ocount_options::processes.push_back(strtol(aPid, &endptr, 10));
+				if ((endptr >= aPid) && (endptr <= (aPid + strlen(aPid) - 1)))
+					__print_usage_and_exit("ocount: Invalid numeric value for --process-list option.");
+			} while ((aPid = strtok(NULL, ",")));
+			break;
+		}
+		case 'r':
+		{
+			char * aTid = strtok(optarg, ",");
+			do {
+				ocount_options::threads.push_back(strtol(aTid, &endptr, 10));
+				if ((endptr >= aTid) && (endptr <= (aTid + strlen(aTid) - 1)))
+					__print_usage_and_exit("ocount: Invalid numeric value for --thread-list option.");
+			} while ((aTid = strtok(NULL, ",")));
+			break;
+		}
+		case 'e':
+			event = strtok(optarg, ",");
+			do {
+				ocount_options::evts.insert(event);
+			} while ((event = strtok(NULL, ",")));
+			break;
+		case 'f':
+			ocount_options::outfile = optarg;
+			break;
+		case 'c':
+			ocount_options::separate_cpu = true;
+			break;
+		case 't':
+			ocount_options::separate_thread = true;
+			break;
+		case 'b':
+			ocount_options::csv_output = true;
+			break;
+		case 'i':
+			_parse_time_interval();
+			break;
+		case 'h':
+			__print_usage_and_exit(NULL);
+			break;
+		case 'u':
+			__print_usage_and_exit(NULL);
+			break;
+		case 'v':
+			cout << argv[0] << ": " << PACKAGE << " " << VERSION << " compiled on " << __DATE__
+			     << " " << __TIME__ << endl;
+			exit(EXIT_SUCCESS);
+			break;
+		default:
+			__print_usage_and_exit("ocount: unexpected end of arg parsing");
+		}
+	}
+	return idx_of_non_options;
+}
+
+
+static enum op_runmode _get_runmode(int starting_point)
+{
+	enum op_runmode ret_rm = OP_MAX_RUNMODE;
+	for (int i = starting_point; i < OP_MAX_RUNMODE && ret_rm == OP_MAX_RUNMODE; i++) {
+		switch (i) {
+		// There is no option to check for OP_START_APP; we include a case
+		// statement here just to silence Coverity.
+		case OP_START_APP:
+			break;
+		case OP_SYSWIDE:
+			if (ocount_options::system_wide)
+				ret_rm = OP_SYSWIDE;
+			break;
+		case OP_CPULIST:
+			if (!ocount_options::cpus.empty())
+				ret_rm = OP_CPULIST;
+			break;
+		case OP_PROCLIST:
+			if (!ocount_options::processes.empty())
+				ret_rm = OP_PROCLIST;
+			break;
+		case OP_THREADLIST:
+			if (!ocount_options::threads.empty())
+				ret_rm = OP_THREADLIST;
+			break;
+		default:
+			break;
+		}
+	}
+	return ret_rm;
+}
+
+static void _validate_args(void)
+{
+	if (ocount_options::verbose && !verbose::setup("debug")) {
+		cerr << "unknown --verbose= options\n";
+		__print_usage_and_exit(NULL);
+	}
+	if (runmode == OP_START_APP) {
+		enum op_runmode conflicting_mode = OP_MAX_RUNMODE;
+		if (ocount_options::system_wide)
+			conflicting_mode = OP_SYSWIDE;
+		else if (!ocount_options::cpus.empty())
+			conflicting_mode = OP_CPULIST;
+		else if (!ocount_options::processes.empty())
+			conflicting_mode = OP_PROCLIST;
+		else if (!ocount_options::threads.empty())
+			conflicting_mode = OP_THREADLIST;
+
+		if (conflicting_mode != OP_MAX_RUNMODE) {
+			cerr << "Run mode " << runmode_options[OP_START_APP] << " is incompatible with "
+			     << runmode_options[conflicting_mode] << endl;
+			__print_usage_and_exit(NULL);
+		}
+	} else {
+		enum op_runmode rm2;
+		runmode = _get_runmode(OP_SYSWIDE);
+		if (runmode == OP_MAX_RUNMODE) {
+			__print_usage_and_exit("You must either pass in the name of a command or app to run or specify a run mode");
+		}
+		rm2 = _get_runmode(runmode + 1);
+		if (rm2 != OP_MAX_RUNMODE) {
+			cerr << "Run mode " << runmode_options[rm2] << " is incompatible with "
+			     << runmode_options[runmode] << endl;
+			__print_usage_and_exit(NULL);
+		}
+
+	}
+
+	if (ocount_options::separate_cpu && !(ocount_options::system_wide || !ocount_options::cpus.empty())) {
+		cerr << "The --separate-cpu option is only valid with --system-wide or --cpu-list." << endl;
+		__print_usage_and_exit(NULL);
+	}
+
+	if (ocount_options::separate_thread && !(!ocount_options::threads.empty() || !ocount_options::processes.empty())) {
+		cerr << "The --separate-thread option is only valid with --process_list or --thread_list." << endl;
+		__print_usage_and_exit(NULL);
+	}
+
+	if (runmode == OP_CPULIST) {
+		int num_cpus = use_cpu_minus_one ? 1 : sysconf(_SC_NPROCESSORS_ONLN);
+		if (num_cpus < 1) {
+			cerr << "System config says number of online CPUs is " << num_cpus << "; cannot continue" << endl;
+			exit(EXIT_FAILURE);
+		}
+
+		set<int> available_cpus = op_pe_utils::op_get_available_cpus(num_cpus);
+		size_t k;
+		for (k = 0; k < ocount_options::cpus.size(); k++) {
+			if (available_cpus.find(ocount_options::cpus[k]) == available_cpus.end()) {
+				cerr << "Specified CPU " << ocount_options::cpus[k] << " is not valid" << endl;
+				__print_usage_and_exit(NULL);
+			}
+		}
+	}
+}
+
+static void process_args(int argc, char * const argv[])
+{
+	int non_options_idx  = _process_ocount_and_app_args(argc, argv);
+
+	if (non_options_idx < 0) {
+		__print_usage_and_exit(NULL);
+	} else if ((non_options_idx) > 0) {
+		runmode = OP_START_APP;
+		app_name = (char *) xmalloc(strlen(argv[non_options_idx]) + 1);
+		strcpy(app_name, argv[non_options_idx]);
+		// Note 1: app_args[0] is placeholder for app_fname (filled in later).
+		// Note 2: app_args[<end>] is set to NULL (required by execvp)
+		if (non_options_idx < (argc -1)) {
+			app_args = (char **) xmalloc((sizeof *app_args) *
+			                             (argc - non_options_idx + 1));
+			for(int i = non_options_idx + 1; i < argc; i++) {
+				app_args[i - non_options_idx] = argv[i];
+			}
+			app_args[argc - non_options_idx] = NULL;
+		} else {
+			app_args = (char **) xmalloc((sizeof *app_args) * 2);
+			app_args[1] = NULL;
+		}
+		if (op_validate_app_name(&app_name, &app_name_SAVE) < 0) {
+			__print_usage_and_exit(NULL);
+		}
+	}
+	_validate_args();
+
+	/*  At this point, we know which of the three counting modes the user requested:
+	 *    - count events in named app
+	 *    - count events in app by PID
+	 *    - count events in whole system
+	 */
+
+	if (ocount_options::evts.empty()) {
+		// Use default event
+		op_pe_utils::op_get_default_event(false);
+	} else  {
+		op_pe_utils::op_process_events_list(ocount_options::evts, false, false);
+	}
+	cverb << vdebug << "Number of events passed is " << events.size() << endl;
+	return;
+}
+
+int main(int argc, char * const argv[])
+{
+	int rc;
+	bool get_results = true;
+	int perf_event_paranoid = op_get_sys_value("/proc/sys/kernel/perf_event_paranoid");
+
+	my_uid = geteuid();
+	rc = op_check_perf_events_cap(use_cpu_minus_one);
+	if (rc == EACCES) {
+		/* Early perf_events kernels required the cpu argument to perf_event_open
+		 * to be '-1' when setting up to monitor a single process if 1) the user is
+		 * not root; and 2) perf_event_paranoid is > 0.  An EACCES error would be
+		 * returned if passing '0' or greater for the cpu arg and the above criteria
+		 * was not met.  Unfortunately, later kernels turned this requirement around
+		 * such that the passed cpu arg must be '0' or greater when the user is not
+		 * root.
+		 *
+		 * We don't really have a good way to check whether we're running on such an
+		 * early kernel except to try the perf_event_open with different values to see
+		 * what works.
+		 */
+		if (my_uid != 0 && perf_event_paranoid > 0) {
+			use_cpu_minus_one = true;
+			rc = op_check_perf_events_cap(use_cpu_minus_one);
+		}
+	}
+	if (rc == EBUSY)
+		cerr << "Performance monitor unit is busy.  Ensure that no other profilers are running on the system." << endl
+		     << "Note: For example, the obsolete opcontrol profiler (available in earlier oprofile releases)" << endl
+		     << "does not allow other performance tools to run simultaneously. To check for this, look for the" << endl
+		     << "'oprofiled' process using the 'ps' command." << endl;
+	else if (rc == ENOSYS)
+		cerr << "Your kernel does not implement a required syscall"
+		     << " for the ocount program." << endl;
+	else if (rc == ENOENT)
+		cerr << "Your kernel's Performance Events Subsystem does not support"
+		     << " your processor type." << endl;
+	else if (rc)
+		cerr << "Unexpected error running ocount: " << strerror(rc) << endl;
+
+	if (rc)
+		exit(1);
+
+	cpu_type = op_get_cpu_type();
+
+	if (cpu_type == CPU_NO_GOOD) {
+		cerr << "Unable to ascertain cpu type.  Exiting." << endl;
+		cleanup();
+		exit(1);
+	}
+
+	if (cpu_type == CPU_TIMER_INT) {
+		cerr << "CPU type 'timer' was detected, but ocount does not support 'timer' as a cpu type." << endl
+		     << "Ensure the obsolete opcontrol profiler (available in pre-1.0 oprofile releases)" << endl
+		     << "is not running on the system.  To check for this, look for the file" << endl
+		     << "/dev/oprofile/cpu_type; if this file exists, locate the pre-1.0 oprofile" << endl
+		     << "installation, and use its 'opcontrol' command with the --deinit option." << endl;
+		cleanup();
+		exit(1);
+	}
+	cpu_speed = op_cpu_frequency();
+	try {
+		process_args(argc, argv);
+	} catch (const runtime_error & e) {
+		cerr << "Caught runtime error while processing args" << endl;
+		cerr << e.what() << endl;
+		cleanup();
+		exit(EXIT_FAILURE);
+	}
+
+	if ((runmode == OP_SYSWIDE || runmode == OP_CPULIST) && ((my_uid != 0) && (perf_event_paranoid > 0))) {
+		cerr << "To do ";
+		if (runmode == OP_SYSWIDE)
+			cerr << "system-wide ";
+		else
+			cerr << "cpu-list ";
+		cerr << "event counting, either you must be root or" << endl;
+		cerr << "/proc/sys/kernel/perf_event_paranoid must be set to 0 or -1." << endl;
+		cleanup();
+		exit(1);
+	}
+
+	if (!ocount_options::outfile.empty()) {
+		outfile.open(ocount_options::outfile.c_str());
+	}
+	ostream & out = !ocount_options::outfile.empty() ? outfile : cout;
+
+	end_code_t run_result;
+	if ((run_result = _run(out))) {
+		get_results = false;
+		if (startApp && app_started && (run_result != APP_ABNORMAL_END)) {
+			int rc;
+			cverb << vdebug << "Killing monitored app . . ." << endl;
+			rc = kill(app_PID, SIGKILL);
+			if (rc) {
+				if (errno == ESRCH)
+					cverb << vdebug
+					      << "Unable to kill monitored app because it has already ended"
+					      << endl;
+				else
+					perror("Attempt to kill monitored app failed.");
+			}
+		}
+		if ((run_result == PERF_RECORD_ERROR) || (run_result == PERF_BOTH_ERROR)) {
+			cerr <<  "Error running ocount" << endl;
+		} else {
+			get_results = true;
+			cverb << vdebug << "WARNING: Results may be incomplete due to to abend of monitored app." << endl;
+		}
+	}
+	if (get_results)
+		// We don't do a final display of results if we've been doing it on an interval already.
+		if (!ocount_options::display_interval)
+			do_results(out);
+
+	cleanup();
+	return 0;
+}
diff --git a/pe_counting/ocount_counter.cpp b/pe_counting/ocount_counter.cpp
new file mode 100644
index 0000000..1573ed4
--- /dev/null
+++ b/pe_counting/ocount_counter.cpp
@@ -0,0 +1,795 @@
+/**
+ * @file ocount_counter.cpp
+ * Functions and classes for ocount tool.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 22, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ */
+#include <time.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+
+#include "ocount_counter.h"
+#include "op_pe_utils.h"
+#include "operf_event.h"
+#include "cverb.h"
+
+extern verbose vdebug;
+extern bool use_cpu_minus_one;
+extern char * app_name;
+
+using namespace std;
+
+static string print_mask_modes(bool mode_specified,bool um_specified,
+			       int no_kernel, int no_user,
+			       string um_numeric_as_str, string umask_value)
+{
+	ostringstream qualifier_string;
+
+	if (um_specified) {
+		if (umask_value.size() == 0)
+			umask_value = um_numeric_as_str;
+
+		qualifier_string << ":" << umask_value;
+	}
+
+	if (mode_specified) {
+		if (no_kernel)
+			qualifier_string << ":0";
+		else
+			qualifier_string << ":1";
+
+		if (no_user)
+			qualifier_string << ":0";
+		else
+			qualifier_string << ":1";
+
+	}
+
+	return qualifier_string.str();
+}
+
+ocount_counter::ocount_counter(operf_event_t & evt,  bool enable_on_exec,
+                               bool inherit)
+{
+	memset(&attr, 0, sizeof(attr));
+	attr.size = sizeof(attr);
+	attr.config = evt.evt_code;
+#ifdef __s390__
+	attr.type = PERF_TYPE_HARDWARE;
+	if (evt.no_kernel && !evt.no_user)
+		attr.config |= 32;
+#else
+	attr.type = PERF_TYPE_RAW;
+#endif
+	attr.exclude_hv = evt.no_hv;
+	attr.inherit = inherit ? 1 : 0;
+	attr.enable_on_exec = enable_on_exec ? 1 : 0;
+	attr.disabled  = attr.enable_on_exec;
+	attr.exclude_idle = 0;
+	attr.exclude_kernel = evt.no_kernel;
+	attr.exclude_user = evt.no_user;
+	// This format allows us to tell user percent of time an event was scheduled
+	// when multiplexing has been done by the kernel.
+	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+			    PERF_FORMAT_TOTAL_TIME_RUNNING;
+	event = evt;
+	fd = cpu = pid = -1;
+}
+
+ocount_counter::~ocount_counter() {
+}
+
+#include <stdio.h>
+int ocount_counter::perf_event_open(pid_t _pid, int _cpu)
+{
+	fd = op_perf_event_open(&attr, _pid, _cpu, -1, 0);
+	if (fd < 0) {
+		int ret = -1;
+		cverb << vdebug << "perf_event_open failed: " << strerror(errno) << endl;
+		if (errno == EBUSY) {
+			cerr << "The performance monitoring hardware reports EBUSY. Is another profiling tool in use?" << endl
+			     << "On some architectures, tools such as oprofile and perf being used in system-wide "
+			     << "mode can cause this problem." << endl;
+			ret = OP_PERF_HANDLED_ERROR;
+		} else if (errno == ESRCH) {
+			cerr << "!!!! No samples collected !!!" << endl;
+			cerr << "The target program/command ended before profiling was started." << endl;
+			ret = OP_PERF_HANDLED_ERROR;
+		} else {
+			cerr << "perf_event_open failed with " << strerror(errno) << endl;
+		}
+		return ret;
+	}
+	pid = _pid;
+	cpu = _cpu;
+
+	cverb << vdebug << "perf_event_open returning fd " << fd << endl;
+	return fd;
+}
+
+int ocount_counter::read_count_data(ocount_accum_t * count_data)
+{
+	size_t len = 3 * sizeof(u64);
+	char * buf = (char *)count_data;
+
+	while (len) {
+		int ret = read(fd, buf, len);
+
+		if (ret <= 0)
+			return ret;
+
+		len -= ret;
+		buf += ret;
+	}
+
+	return 0;
+}
+
+ocount_record::ocount_record(enum op_runmode _runmode, std::vector<operf_event_t> & _evts,
+                             bool _with_time_interval)
+{
+	runmode = _runmode;
+	with_time_interval = _with_time_interval;
+	evts = _evts;
+	valid = false;
+	system_wide = false;
+	tasks_are_threads = false;
+	num_cpus = 0;
+	app_pid = -1;
+	start_time = 0ULL;
+	total_bytes_recorded = 0;
+}
+
+bool ocount_record::start_counting_app_process(pid_t _pid)
+{
+	if (valid) {
+		cerr << "ocount internal error: ocount_record already initialized" << endl;
+		return false;
+	}
+	if (runmode != OP_START_APP) {
+		cerr << "ocount internal error: Current run mode " << runmode << " is incompatible with "
+		     "starting app." << endl;
+		return false;
+	}
+	app_pid = _pid;
+	setup();
+	return true;
+}
+
+/*
+ * There are separate ocount options for counting events for a set of processes ("--process-list")
+ * or a set of threads ("--thread-list"). This function is used for passing the set of either
+ * processes or threads to ocount_record, along with a boolean argument to indicate whether or not
+ * the set of passed tasks are threads.  If they are threads, we set up perf_event_open to NOT
+ * do "inherit".
+ */
+bool ocount_record::start_counting_tasklist(std::vector<pid_t> _tasks, bool _are_threads)
+{
+	if (valid) {
+		cerr << "ocount internal error: ocount_record already initialized" << endl;
+		return false;
+	}
+	tasks_are_threads = _are_threads;
+	specified_tasks = _tasks;
+	if (tasks_are_threads) {
+		if (runmode != OP_THREADLIST) {
+			cerr << "ocount internal error: Current run mode " << runmode << " is incompatible with "
+			     "--thread-list option." << endl;
+			return false;
+		}
+	} else {
+		if (runmode != OP_PROCLIST) {
+			cerr << "ocount internal error: Current run mode " << runmode << " is incompatible with "
+			     "--process-list option." << endl;
+			return false;
+		}
+	}
+	setup();
+	if (tasks_to_count.empty()) {
+		cerr << "No valid tasks to monitor -- quitting." << endl;
+		return false;
+	}
+	return true;
+}
+
+bool ocount_record::start_counting_cpulist(std::vector<int> _cpus)
+{
+	if (valid) {
+		cerr << "ocount internal error: ocount_record already initialized" << endl;
+		return false;
+	}
+	if (runmode != OP_CPULIST) {
+		cerr << "ocount internal error: Current run mode " << runmode << " is incompatible with "
+		     "--cpu-list option." << endl;
+		return false;
+	}
+	specified_cpus = _cpus;
+	setup();
+	return true;
+}
+
+bool ocount_record::start_counting_syswide(void)
+{
+	if (valid) {
+		cerr << "ocount internal error: ocount_record already initialized" << endl;
+		return false;
+	}
+	if (runmode != OP_SYSWIDE) {
+		cerr << "ocount internal error: Current run mode " << runmode << " is incompatible with "
+		     "--system-wide option." << endl;
+		return false;
+	}
+	system_wide = true;
+	setup();
+	return true;
+}
+
+int ocount_record::do_counting_per_task(void)
+{
+	string err_msg;
+	int rc = 0;
+
+	for (set<pid_t>::iterator it = tasks_to_count.begin(); it != tasks_to_count.end(); it++) {
+		pid_t the_pid = *it;
+		bool inherit = are_tasks_processes();
+		cverb << vdebug << "calling perf_event_open for task " << the_pid << endl;
+		for (unsigned event = 0; event < evts.size(); event++) {
+			ocount_accum_t count_data = {0ULL, 0ULL, 0ULL};
+			accum_counts.push_back(count_data);
+			prev_accum_counts.push_back(0ULL);
+			ocount_counter op_ctr(ocount_counter(evts[event], false, inherit));
+			if ((rc = op_ctr.perf_event_open(the_pid, -1)) < 0) {
+				err_msg = "Internal Error.  Perf event setup failed.";
+				goto out;
+			} else {
+				rc = 0;
+			}
+			perfCounters.push_back(op_ctr);
+		}
+	}
+out:
+	if (rc && rc != OP_PERF_HANDLED_ERROR)
+		throw runtime_error(err_msg);
+	return rc;
+}
+
+int ocount_record::do_counting_per_cpu(void)
+{
+	string err_msg;
+	int rc = 0;
+
+	/* We'll do this sanity check here, but we also do it at the front-end where user
+	 * args are being validated.  If we wait until we get here, the invalid CPU argument
+	 * becomes an ugly thrown exception.
+	 */
+	set<int> available_cpus = op_pe_utils::op_get_available_cpus(num_cpus);
+	if (runmode == OP_CPULIST) {
+		size_t k;
+		for (k = 0; k < specified_cpus.size(); k++) {
+			if (available_cpus.find(specified_cpus[k]) == available_cpus.end()) {
+				ostringstream err_msg_ostr;
+				err_msg_ostr << "Specified CPU " << specified_cpus[k] << " is not valid";
+				err_msg = err_msg_ostr.str();
+				rc = -1;
+				goto out;
+			} else {
+				cpus_to_count.insert(specified_cpus[k]);
+			}
+		}
+	} else {
+		cpus_to_count = available_cpus;
+	}
+
+	for (set<pid_t>::iterator it = cpus_to_count.begin(); it != cpus_to_count.end(); it++) {
+		int the_cpu = *it;
+		cverb << vdebug << "calling perf_event_open for cpu " << the_cpu << endl;
+		for (unsigned event = 0; event < evts.size(); event++) {
+			ocount_accum_t count_data = {0ULL, 0ULL, 0ULL};
+			accum_counts.push_back(count_data);
+			prev_accum_counts.push_back(0ULL);
+			ocount_counter op_ctr(ocount_counter(evts[event], false, true));
+			if ((rc = op_ctr.perf_event_open(-1, the_cpu)) < 0) {
+				err_msg = "Internal Error.  Perf event setup failed.";
+				goto out;
+			} else {
+				rc = 0;
+			}
+			perfCounters.push_back(op_ctr);
+		}
+	}
+out:
+	if (rc && rc != OP_PERF_HANDLED_ERROR)
+		throw runtime_error(err_msg);
+	return rc;
+}
+
+void ocount_record::setup()
+{
+	int rc = 0;
+	string err_msg;
+
+	if (!specified_tasks.empty()) {
+		if ((rc = get_process_info(specified_tasks)) < 0) {
+			if (rc == OP_PERF_HANDLED_ERROR)
+				return;
+			else
+				throw runtime_error("Unexpected error in ocount_record setup");
+		}
+	}
+
+	/* To set up to count events for an existing thread group, we need call perf_event_open
+	 * for each thread, and we need to pass cpu=-1 on the syscall.
+	 */
+	use_cpu_minus_one = use_cpu_minus_one ? true : ((system_wide || (runmode == OP_CPULIST)) ? false : true);
+	num_cpus = use_cpu_minus_one ? 1 : sysconf(_SC_NPROCESSORS_ONLN);
+	if (num_cpus < 1) {
+		char int_str[256];
+		sprintf(int_str, "Number of online CPUs is %d; cannot continue", num_cpus);
+		throw runtime_error(int_str);
+	}
+	if (system_wide || (runmode == OP_CPULIST)) {
+		rc = do_counting_per_cpu();
+	} else if (!specified_tasks.empty()) {
+		rc = do_counting_per_task();
+	} else {
+		cverb << vdebug << "calling perf_event_open for pid " << app_pid << endl;
+		for (unsigned event = 0; event < evts.size(); event++) {
+			ocount_accum_t count_data = {0ULL, 0ULL, 0ULL};
+			accum_counts.push_back(count_data);
+			prev_accum_counts.push_back(0ULL);
+			ocount_counter op_ctr(ocount_counter(evts[event], true, true));
+			if ((rc = op_ctr.perf_event_open(app_pid, -1)) < 0) {
+				err_msg = "Internal Error.  Perf event setup failed.";
+				goto error;
+			} else {
+				rc = 0;
+			}
+			perfCounters.push_back(op_ctr);
+		}
+	}
+	if (!rc) {
+		cverb << vdebug << "perf counter setup complete" << endl;
+		// Set bit to indicate we're set to go.
+		valid = true;
+		// Now that all events are programmed to start counting, init the start time
+		struct timespec tspec;
+		clock_gettime(CLOCK_MONOTONIC, &tspec);
+		start_time = tspec.tv_sec * 1000000000ULL + tspec.tv_nsec;
+
+		return;
+	}
+
+error:
+	if (rc != OP_PERF_HANDLED_ERROR)
+		throw runtime_error(err_msg);
+}
+
+void ocount_record::output_short_results(ostream & out, bool use_separation, bool scaled)
+{
+	size_t num_iterations = use_separation ? perfCounters.size() : evts.size();
+	out << endl;
+	for (size_t num = 0; num < num_iterations; num++) {
+		ostringstream count_str;
+		ocount_accum_t tmp_accum;
+		double fraction_time_running;
+		string qual_string;
+
+		if (use_separation) {
+			if (cpus_to_count.size()) {
+				out << perfCounters[num].get_cpu();
+			} else {
+				out << perfCounters[num].get_pid();
+			}
+			out << "," << perfCounters[num].get_event_name();
+
+			qual_string =
+			  print_mask_modes(perfCounters[num].get_mode_specified(),
+			                   perfCounters[num].get_um_specified(),
+			                   perfCounters[num].get_no_kernel(),
+			                   perfCounters[num].get_no_user(),
+			                   perfCounters[num].get_um_numeric_val_as_str(),
+			                   perfCounters[num].get_umask_value());
+			out << qual_string;
+			out  << ",";
+
+			errno = 0;
+			cverb << vdebug << "Reading counter data for event " << perfCounters[num].get_event_name() << endl;
+			if (perfCounters[num].read_count_data(&tmp_accum) < 0) {
+				string err_msg = "Internal error: read of perfCounter fd failed with ";
+				err_msg += errno ? strerror(errno) : "unknown error";
+				throw runtime_error(err_msg);
+			}
+			fraction_time_running = scaled ? (double)tmp_accum.running_time/tmp_accum.enabled_time : 1;
+			if (with_time_interval) {
+				u64 save_prev = prev_accum_counts[num];
+				prev_accum_counts[num] = tmp_accum.count;
+				tmp_accum.count -= save_prev;
+			}
+			u64 scaled_count = tmp_accum.count ? tmp_accum.count/fraction_time_running : 0;
+			out << dec << scaled_count << ",";
+		} else {
+			fraction_time_running = scaled ? (double)accum_counts[num].running_time/accum_counts[num].enabled_time : 1;
+			u64 scaled_count = accum_counts[num].count ? accum_counts[num].count/fraction_time_running : 0;
+			out << perfCounters[num].get_event_name();
+
+			qual_string = print_mask_modes(perfCounters[num].get_mode_specified(),
+			                               perfCounters[num].get_um_specified(),
+			                               perfCounters[num].get_no_kernel(),
+			                               perfCounters[num].get_no_user(),
+			                               perfCounters[num].get_um_numeric_val_as_str(),
+			                               perfCounters[num].get_umask_value());
+			out << qual_string;
+			out << "," << dec << scaled_count << ",";
+		}
+		ostringstream strm_tmp;
+		if (use_separation) {
+			if (!tmp_accum.enabled_time) {
+				out << 0 << endl;
+			} else {
+				strm_tmp.precision(2);
+				strm_tmp << fixed << fraction_time_running * 100
+				         << endl;
+				out << strm_tmp.str();
+			}
+		} else {
+			if (!accum_counts[num].enabled_time) {
+				out << "Event not counted" << endl;
+			} else {
+				strm_tmp.precision(2);
+				strm_tmp << fixed << fraction_time_running * 100
+				         << endl;
+				out << strm_tmp.str();
+			}
+		}
+	}
+}
+
+void ocount_record::output_long_results(ostream & out, bool use_separation,
+                                        int evt_name_col_size, bool scaled,
+                                        u64 time_enabled)
+{
+#define COUNT_COLUMN_WIDTH 25
+#define SEPARATION_ELEMENT_COLUMN_WIDTH 10
+#define MIN_NAME_COLUMN_SPACING 8
+
+	char space_padding[64], temp[64];
+	char const * cpu, * task, * scaling;
+	u64 num_seconds_enabled = time_enabled/1000000000;
+	unsigned int num_minutes_enabled = num_seconds_enabled/60;
+	cpu = "CPU";
+	task = "Task ID";
+	scaling = scaled ? "(scaled) " : "(actual) ";
+
+	unsigned int begin_second_col;
+	unsigned int num_pads;
+	ostringstream debug_string;
+
+	/* Need to account for any events that will be printing user/kernel
+	 * mode or unit mask names when setting up the columns of the data.
+	 */
+	begin_second_col = evt_name_col_size + MIN_NAME_COLUMN_SPACING;
+	num_pads = begin_second_col - strlen("Event");
+
+	memset(space_padding, ' ', 64);
+	strncpy(temp, space_padding, num_pads);
+	temp[num_pads] = '\0';
+	out << endl;
+	if (!with_time_interval) {
+		ostringstream strm;
+		strm << "Events were actively counted for ";
+		if (num_minutes_enabled) {
+			strm << " ";
+			strm << num_minutes_enabled;
+			if (num_minutes_enabled > 1)
+				strm << " minutes and ";
+			else
+				strm << " minute and ";
+			strm << num_seconds_enabled % 60;
+			strm << " seconds.";
+		} else {
+			if (num_seconds_enabled) {
+				// Show 1/10's of seconds
+				strm.precision(1);
+				strm << fixed << (double)time_enabled/1000000000;
+				strm << " seconds.";
+			} else {
+				// Show full nanoseconds
+				strm << time_enabled << " nanoseconds.";
+			}
+		}
+		out << strm.str() << endl;
+	}
+	out << "Event counts " << scaling;
+	if (app_name)
+		out << "for " << app_name << ":";
+	else if (system_wide)
+		out << "for the whole system:";
+	else if (!cpus_to_count.empty())
+		out << "for the specified CPU(s):";
+	else if (tasks_are_threads)
+		out << "for the specified thread(s):";
+	else
+		out << "for the specified process(es):";
+	out << endl;
+
+	out << "\tEvent" << temp;
+	if (use_separation) {
+		if (cpus_to_count.size()) {
+			out << cpu;
+			num_pads = SEPARATION_ELEMENT_COLUMN_WIDTH - strlen(cpu);
+		} else {
+			out << task;
+			num_pads = SEPARATION_ELEMENT_COLUMN_WIDTH - strlen(task);
+
+		}
+		strncpy(temp, space_padding, num_pads);
+		temp[num_pads] = '\0';
+		out << temp;
+	}
+	out << "Count";
+	num_pads = COUNT_COLUMN_WIDTH - strlen("Count");
+	strncpy(temp, space_padding, num_pads);
+	temp[num_pads] = '\0';
+	out << temp << "% time counted" << endl;
+
+	/* If counting per-cpu or per-thread, I refer generically to cpu or thread values
+	 * as "elements of separation".  We will have one ocount_counter object per element of
+	 * separation per event.  So if we're counting 2 events for 4 processes (or threads),
+	 * we'll have 2x4 (8) ocount_counter objects.
+	 *
+	 * If 'use_separation' is true, then we need to print individual counts for
+	 * each element of separation for each event; otherwise, we print aggregated counts
+	 * for each event.
+	 */
+	size_t num_iterations = use_separation ? perfCounters.size() : evts.size();
+	for (size_t num = 0; num < num_iterations; num++) {
+		double fraction_time_running;
+		string qual_string;
+
+		out << "\t" << perfCounters[num].get_event_name();
+		qual_string = print_mask_modes(perfCounters[num].get_mode_specified(),
+		                               perfCounters[num].get_um_specified(),
+		                               perfCounters[num].get_no_kernel(),
+		                               perfCounters[num].get_no_user(),
+		                               perfCounters[num].get_um_numeric_val_as_str(),
+		                               perfCounters[num].get_umask_value());
+		out << qual_string;
+		num_pads = begin_second_col - qual_string.size()
+			- perfCounters[num].get_event_name().size();
+
+		strncpy(temp, space_padding, num_pads);
+		temp[num_pads] = '\0';
+		out << temp;
+
+		ostringstream count_str;
+		ocount_accum_t tmp_accum;
+		if (use_separation) {
+			ostringstream separation_element_str;
+			strncpy(temp, space_padding, num_pads);
+			temp[num_pads] = '\0';
+			if (cpus_to_count.size()) {
+				separation_element_str << dec << perfCounters[num].get_cpu();
+				out << perfCounters[num].get_cpu();
+			} else {
+				separation_element_str << dec << perfCounters[num].get_pid();
+				out << perfCounters[num].get_pid();
+			}
+			num_pads = SEPARATION_ELEMENT_COLUMN_WIDTH - separation_element_str.str().length();
+			strncpy(temp, space_padding, num_pads);
+			temp[num_pads] = '\0';
+			out << temp;
+
+			errno = 0;
+			cverb << vdebug << "Reading counter data for event " << perfCounters[num].get_event_name() << endl;
+			if (perfCounters[num].read_count_data(&tmp_accum) < 0) {
+				string err_msg = "Internal error: read of perfCounter fd failed with ";
+				err_msg += errno ? strerror(errno) : "unknown error";
+				throw runtime_error(err_msg);
+			}
+			fraction_time_running = scaled ? (double)tmp_accum.running_time/tmp_accum.enabled_time : 1;
+
+			if (with_time_interval) {
+				u64 save_prev = prev_accum_counts[num];
+				prev_accum_counts[num] = tmp_accum.count;
+				tmp_accum.count -= save_prev;
+			}
+			u64 scaled_count = tmp_accum.count ? tmp_accum.count/fraction_time_running : 0;
+			count_str << dec << scaled_count;
+		} else {
+			fraction_time_running = scaled ? (double)accum_counts[num].running_time/accum_counts[num].enabled_time : 1;
+			u64 scaled_count = accum_counts[num].count ? accum_counts[num].count/fraction_time_running : 0;
+			count_str << dec << scaled_count;
+		}
+		string count = count_str.str();
+		for (int i = count.size() - 3; i > 0; i-=3) {
+			count.insert(i, 1, ',');
+		}
+		out << count;
+		num_pads = COUNT_COLUMN_WIDTH - count.size();
+		strncpy(temp, space_padding, num_pads);
+		temp[num_pads] = '\0';
+		out << temp;
+		ostringstream strm_tmp;
+		if (use_separation) {
+			if (!tmp_accum.enabled_time) {
+				out << "Event not counted" << endl;
+			} else {
+				strm_tmp.precision(2);
+				strm_tmp << fixed << fraction_time_running * 100
+				         << endl;
+				out << strm_tmp.str();
+			}
+		} else {
+			if (!accum_counts[num].enabled_time) {
+				out << "Event not counted" << endl;
+			} else {
+				strm_tmp.precision(2);
+				strm_tmp << fixed << fraction_time_running * 100
+				         << endl;
+				out << strm_tmp.str();
+			}
+		}
+	}
+}
+
+void ocount_record::output_results(ostream & out, bool use_separation, bool short_format)
+{
+#define MODE_FIELD_SIZE  3    /* space for :KU in the output */
+
+	size_t evt_name_col_size = 0;
+	u64 time_enabled = 0ULL;
+	bool scaled = false;
+	bool mode_specified = false;
+
+	for (unsigned long evt_num = 0; evt_num < evts.size(); evt_num++) {
+		unsigned int length = 0;
+
+		/* calculate the longest name + unit mask + mode specifier */
+		length = strlen(evts[evt_num].um_name) +
+		  strlen(evts[evt_num].name) + 1; /* for colon */
+
+		if ((strlen(evts[evt_num].um_numeric_val_as_str)
+		     + strlen(evts[evt_num].name)) > length)
+			length = strlen(evts[evt_num].um_numeric_val_as_str) +
+			  strlen(evts[evt_num].name) + 1;  /* for colon */
+
+		if (evts[evt_num].mode_specified)
+			length += MODE_FIELD_SIZE;
+
+		if (length > evt_name_col_size)
+			evt_name_col_size = length;
+
+		mode_specified = mode_specified ||
+			evts[evt_num].mode_specified;
+	}
+
+	if (with_time_interval) {
+		// reset the accum count values
+		for (size_t i = 0; i < evts.size(); i++) {
+			ocount_accum_t accum = accum_counts[i];
+			accum.count = 0ULL;
+			accum_counts[i] = accum;
+		}
+	}
+
+	/* We need to inspect all of the count data now to ascertain if scaling
+	 * is required, so we also collect aggregated counts into the accum_counts
+	 * vector (if needed).
+	 */
+	for (unsigned long ocounter = 0; ocounter < perfCounters.size(); ocounter++) {
+		ocount_accum_t tmp_accum;
+		int evt_key = ocounter % evts.size();
+		errno = 0;
+		cverb << vdebug << "Reading counter data for event " << evts[evt_key].name << endl;
+		if (perfCounters[ocounter].read_count_data(&tmp_accum) < 0) {
+			string err_msg = "Internal error: read of perfCounter fd failed with ";
+			err_msg += errno ? strerror(errno) : "unknown error";
+			throw runtime_error(err_msg);
+		}
+		if (!use_separation) {
+			ocount_accum_t real_accum = accum_counts[evt_key];
+			real_accum.count += tmp_accum.count;
+			real_accum.enabled_time += tmp_accum.enabled_time;
+			real_accum.running_time += tmp_accum.running_time;
+			accum_counts[evt_key] = real_accum;
+		}
+		if (tmp_accum.enabled_time != tmp_accum.running_time) {
+			if (((double)(tmp_accum.enabled_time - tmp_accum.running_time)/tmp_accum.enabled_time) > 0.01)
+				scaled = true;
+		}
+	}
+
+	if (with_time_interval && !use_separation) {
+		for (size_t i = 0; i < evts.size(); i++) {
+			u64 save_prev = prev_accum_counts[i];
+			ocount_accum_t real_accum = accum_counts[i];
+			prev_accum_counts[i] = real_accum.count;
+			real_accum.count -= save_prev;
+			accum_counts[i] = real_accum;
+		}
+	}
+	struct timespec tspec;
+	clock_gettime(CLOCK_MONOTONIC, &tspec);
+	time_enabled = (tspec.tv_sec * 1000000000ULL + tspec.tv_nsec) - start_time;
+
+
+	if (short_format)
+		output_short_results(out, use_separation, scaled);
+	else
+		output_long_results(out, use_separation, evt_name_col_size,
+				    scaled, time_enabled);
+}
+
+int ocount_record::_get_one_process_info(pid_t pid)
+{
+	char fname[PATH_MAX];
+	DIR *tids;
+	struct dirent dirent, *next;
+	int ret = 0;
+
+	add_process(pid);
+	if (are_tasks_processes()) {
+		snprintf(fname, sizeof(fname), "/proc/%d/task", pid);
+		tids = opendir(fname);
+		if (tids == NULL) {
+			// process must have exited
+			ret = -1;
+			cverb << vdebug << "Process " << pid << " apparently exited while "
+					<< "process info was being collected"<< endl;
+			goto out;
+		}
+
+		while (!readdir_r(tids, &dirent, &next) && next) {
+			char *end;
+			pid = strtol(dirent.d_name, &end, 10);
+			if (*end)
+				continue;
+			add_process(pid);
+		}
+		closedir(tids);
+	}
+
+out:
+	return ret;
+}
+
+/* Obtain process information for one or more active process, where the user has
+ * either passed in a set of processes via the --process-list option or has specified
+ * --system_wide.
+ */
+int ocount_record::get_process_info(const vector<pid_t> & _procs)
+{
+	int ret = 0;
+	if (cverb << vdebug)
+		cout << "op_get_process_info" << endl;
+	for (size_t i = 0; i < _procs.size(); i++) {
+		errno = 0;
+		if (kill(_procs[i], 0) < 0) {
+			if (errno == EPERM) {
+				string errmsg = "You do not have permission to monitor ";
+				errmsg += are_tasks_processes() ? "process " : "thread ";
+				cerr << errmsg << _procs[i] << endl;
+				ret = OP_PERF_HANDLED_ERROR;
+			}
+			break;
+		}
+		if ((ret = _get_one_process_info(_procs[i])) < 0)
+			break;
+	}
+	return ret;
+}
diff --git a/pe_counting/ocount_counter.h b/pe_counting/ocount_counter.h
new file mode 100644
index 0000000..2d55f8b
--- /dev/null
+++ b/pe_counting/ocount_counter.h
@@ -0,0 +1,133 @@
+/**
+ * @file ocount_counter.h
+ * Definitions and prototypes for ocount tool.
+ *
+ * @remark Copyright 2013 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * Created on: May 22, 2013
+ * @author Maynard Johnson
+ * (C) Copyright IBM Corp. 2013
+ *
+ */
+
+#ifndef OCOUNT_COUNTER_H_
+#define OCOUNT_COUNTER_H_
+
+#include <linux/perf_event.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include <vector>
+#include <set>
+#include <string>
+
+#include "operf_event.h"
+
+#define OP_PERF_HANDLED_ERROR -101
+
+enum op_runmode {
+	OP_START_APP,
+	OP_SYSWIDE,
+	OP_CPULIST,
+	OP_PROCLIST,
+	OP_THREADLIST,
+	OP_MAX_RUNMODE
+};
+
+typedef struct ocount_accum {
+	u64 count;
+	u64 enabled_time;
+	u64 running_time;
+} ocount_accum_t;
+
+static inline int
+op_perf_event_open(struct perf_event_attr * attr,
+		      pid_t pid, int cpu, int group_fd,
+		      unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
+			       group_fd, flags);
+}
+
+
+class ocount_record;
+class ocount_counter {
+public:
+	ocount_counter(operf_event_t & evt, bool enable_on_exec,
+	               bool inherit);
+	~ocount_counter();
+	int perf_event_open(pid_t pid, int cpu);
+	int get_cpu(void) { return cpu; }
+	pid_t get_pid(void) { return pid; }
+	const std::string get_umask_value(void) const { return event.um_name; }
+	const std::string get_event_name(void) const { return event.name; }
+	std::string get_um_numeric_val_as_str(void)
+		{ return event.um_numeric_val_as_str; }
+	int get_no_user(void) const { return attr.exclude_user; }
+	int get_no_kernel(void) const { return attr.exclude_kernel; }
+	bool get_mode_specified(void) { return event.mode_specified; }
+	bool get_um_specified(void) { return event.umask_specified; }
+	int read_count_data(ocount_accum_t * accum);
+
+private:
+	operf_event_t event;
+	struct perf_event_attr attr;
+	int fd;
+	int cpu;
+	pid_t pid;
+};
+
+class ocount_record {
+public:
+	ocount_record(enum op_runmode _runmode, std::vector<operf_event_t> & _evts,
+	              bool _with_time_interval);
+	~ocount_record();
+	bool start_counting_app_process(pid_t _pid);
+	bool start_counting_tasklist(std::vector<pid_t> _tasks, bool _are_threads);
+	bool start_counting_cpulist(std::vector<int> _cpus);
+	bool start_counting_syswide(void);
+	void add_process(pid_t proc) { tasks_to_count.insert(proc); }
+	void output_results(std::ostream & out, bool use_separation, bool short_format);
+	bool get_valid(void) { return valid; }
+	bool are_tasks_processes(void) { return !tasks_are_threads; }
+
+private:
+	void setup(void);
+	int get_process_info(const std::vector<pid_t> & _procs);
+	int _get_one_process_info(pid_t pid);
+	int do_counting_per_cpu(void);
+	int do_counting_per_task(void);
+	void output_short_results(std::ostream & out, bool use_separation, bool scaled);
+	void output_long_results(std::ostream & out, bool use_separation,
+                                 int longest_event_name,
+                                 bool scaled, u64 time_enabled);
+
+	enum op_runmode runmode;
+	bool tasks_are_threads;
+	int num_cpus;
+	pid_t app_pid;
+	std::set<pid_t> tasks_to_count;
+	std::set<int> cpus_to_count;
+	bool system_wide;
+	std::vector<ocount_counter> perfCounters;
+	unsigned int total_bytes_recorded;
+	std::vector<operf_event_t> evts;
+	std::vector<pid_t> specified_tasks;
+	std::vector<int> specified_cpus;
+	std::vector<ocount_accum_t> accum_counts;  // accumulated across threads or cpus; one object per event
+
+	/* The prev_accum_counts vector is used with time intervals for computing count values for just
+	 * the current time interval. The number of elements in this vector depends on the run mode:
+	 *   - For <command> [args] : vector size == evts.size()
+	 *   - For system-wide or cpu list : vector size is number of processors
+	 *   - For process or thread list : vector size is number of tasks
+	 */
+	std::vector<u64> prev_accum_counts;
+	bool valid;
+	bool with_time_interval;
+	u64 start_time;
+};
+
+
+#endif /* OCOUNT_COUNTER_H_ */
diff --git a/pe_profiling/Makefile.am b/pe_profiling/Makefile.am
index b27cbc7..8c232c4 100644
--- a/pe_profiling/Makefile.am
+++ b/pe_profiling/Makefile.am
@@ -6,6 +6,7 @@ AM_CPPFLAGS = \
 	-I ${top_srcdir}/libop \
 	-I ${top_srcdir}/libutil++ \
 	-I ${top_srcdir}/libperf_events \
+	-I ${top_srcdir}/libpe_utils \
 	@PERF_EVENT_FLAGS@ \
 	@OP_CPPFLAGS@
 
@@ -15,7 +16,8 @@ AM_CXXFLAGS = @OP_CXXFLAGS@
 AM_LDFLAGS = @OP_LDFLAGS@
 
 bin_PROGRAMS = operf
-operf_LDADD =	../libperf_events/libperf_events.a \
+operf_LDADD = ../libperf_events/libperf_events.a \
+	../libpe_utils/libpe_utils.a \
 	../libutil++/libutil++.a \
 	../libdb/libodb.a \
 	../libop/libop.a \
diff --git a/pe_profiling/Makefile.in b/pe_profiling/Makefile.in
index 98b9c42..9fe39c6 100644
--- a/pe_profiling/Makefile.in
+++ b/pe_profiling/Makefile.in
@@ -40,7 +40,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -49,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -65,6 +64,7 @@ am__operf_SOURCES_DIST = operf.cpp
 operf_OBJECTS = $(am_operf_OBJECTS)
 @BUILD_FOR_PERF_EVENT_TRUE@operf_DEPENDENCIES =  \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libperf_events/libperf_events.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libpe_utils/libpe_utils.a \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libutil++/libutil++.a \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libdb/libodb.a ../libop/libop.a \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libutil/libutil.a \
@@ -138,7 +138,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -162,20 +161,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -244,6 +236,7 @@ topdir = @topdir@
 @BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libop \
 @BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libutil++ \
 @BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libperf_events \
+@BUILD_FOR_PERF_EVENT_TRUE@	-I ${top_srcdir}/libpe_utils \
 @BUILD_FOR_PERF_EVENT_TRUE@	@PERF_EVENT_FLAGS@ \
 @BUILD_FOR_PERF_EVENT_TRUE@	@OP_CPPFLAGS@
 
@@ -251,6 +244,7 @@ topdir = @topdir@
 @BUILD_FOR_PERF_EVENT_TRUE@AM_CXXFLAGS = @OP_CXXFLAGS@
 @BUILD_FOR_PERF_EVENT_TRUE@AM_LDFLAGS = @OP_LDFLAGS@
 @BUILD_FOR_PERF_EVENT_TRUE@operf_LDADD = ../libperf_events/libperf_events.a \
+@BUILD_FOR_PERF_EVENT_TRUE@	../libpe_utils/libpe_utils.a \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libutil++/libutil++.a \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libdb/libodb.a \
 @BUILD_FOR_PERF_EVENT_TRUE@	../libop/libop.a \
diff --git a/pe_profiling/operf.cpp b/pe_profiling/operf.cpp
index 4ec9ab9..04a25d9 100644
--- a/pe_profiling/operf.cpp
+++ b/pe_profiling/operf.cpp
@@ -11,7 +11,7 @@
  * (C) Copyright IBM Corp. 2011
  *
  * Modified by Maynard Johnson <maynardj@us.ibm.com>
- * (C) Copyright IBM Corporation 2012
+ * (C) Copyright IBM Corporation 2012, 2013, 2014
  *
  */
 
@@ -35,6 +35,7 @@
 #include <getopt.h>
 #include <iostream>
 #include "operf_utils.h"
+#include "op_pe_utils.h"
 #include "op_libiberty.h"
 #include "string_manip.h"
 #include "cverb.h"
@@ -46,8 +47,12 @@
 #include "operf_kernel.h"
 #include "child_reader.h"
 #include "op_get_time.h"
+#include "operf_stats.h"
+#include "op_netburst.h"
+#include "utility.h"
 
 using namespace std;
+using namespace op_pe_utils;
 
 typedef enum END_CODE {
 	ALL_OK = 0,
@@ -59,25 +64,34 @@ typedef enum END_CODE {
 
 // Globals
 char * app_name = NULL;
+bool use_cpu_minus_one = false;
 pid_t app_PID = -1;
 uint64_t kernel_start, kernel_end;
-operf_read operfRead;
 op_cpu cpu_type;
 double cpu_speed;
-char op_samples_current_dir[PATH_MAX];
-uint op_nr_counters;
+uint op_nr_events;
 verbose vmisc("misc");
 uid_t my_uid;
 bool no_vmlinux;
 int kptr_restrict;
 char * start_time_human_readable;
+std::vector<operf_event_t> events;
+operf_read operfRead(events);
+/* With certain operf options, we have to take extra steps to track new threads
+ * and processes that an app may create via pthread_create, fork, etc.  Note that
+ * any such thread or process creation APIs will result in a PERF_RECORD_FORK event,
+ * so we handle these new threads/processes in operf_utils::__handle_fork_event.
+ */
+bool track_new_forks;
+
 
 #define DEFAULT_OPERF_OUTFILE "operf.data"
-#define CALLGRAPH_MIN_COUNT_SCALE 15
+#define KERN_ADDR_SPACE_START_SYMBOL  "_text"
+#define KERN_ADDR_SPACE_END_SYMBOL    "_etext"
 
-static char full_pathname[PATH_MAX];
+static operf_record * operfRecord = NULL;
 static char * app_name_SAVE = NULL;
-static char * app_args = NULL;
+static char ** app_args = NULL;
 static 	pid_t jitconv_pid = -1;
 static bool app_started;
 static pid_t operf_record_pid;
@@ -86,11 +100,23 @@ static string samples_dir;
 static bool startApp;
 static string outputfile;
 static char start_time_str[32];
-static vector<operf_event_t> events;
 static bool jit_conversion_running;
 static void convert_sample_data(void);
 static int sample_data_pipe[2];
-static bool ctl_c = false;
+static int app_ready_pipe[2], start_app_pipe[2], operf_record_ready_pipe[2];
+// The operf_convert_record_write_pipe is used for the convert process to send
+// forked PID data to the record process.
+static int operf_convert_record_write_pipe[2];
+// The operf_record_convert_write_pipe is used for the record process to send
+// data to the convert process in response to the forked PID data.
+static int operf_record_convert_write_pipe[2];
+// The operf_post_profiling_pipe is used by the main process to inform the operf_read_pid
+// that profiling is done.  The operf_read_pid will then print its progress in
+// finishing the conversion.
+static int operf_post_profiling_pipe[2];
+
+bool ctl_c = false;
+bool pipe_closed = false;
 
 
 namespace operf_options {
@@ -104,7 +130,7 @@ string vmlinux;
 bool separate_cpu;
 bool separate_thread;
 bool post_conversion;
-vector<string> evts;
+set<string> evts;
 }
 
 static const char * valid_verbose_vals[] = { "debug", "record", "convert", "misc", "sfile", "arcs", "all"};
@@ -133,6 +159,17 @@ const char * short_options = "V:d:k:gsap:e:ctlhuv";
 
 vector<string> verbose_string;
 
+void __set_event_throttled(int index)
+{
+	if (index < 0) {
+		cerr << "Unable to determine if throttling occurred for ";
+		cerr << "event " << events[index].name << endl;
+	} else {
+		throttled = true;
+		events[index].throttled = true;
+	}
+}
+
 static void __print_usage_and_exit(const char * extra_msg)
 {
 	if (extra_msg)
@@ -142,7 +179,7 @@ static void __print_usage_and_exit(const char * extra_msg)
 	exit(EXIT_FAILURE);
 }
 
-
+// Signal handler for main (parent) process.
 static void op_sig_stop(int val __attribute__((unused)))
 {
 	// Received a signal to quit, so we need to stop the
@@ -155,9 +192,14 @@ static void op_sig_stop(int val __attribute__((unused)))
 		kill(app_PID, SIGKILL);
 }
 
+// For child processes to manage a controlled stop after Ctl-C is done
 static void _handle_sigint(int val __attribute__((unused)))
 {
 	size_t dummy __attribute__ ((__unused__));
+	/* Each process (parent and each forked child) will have their own copy of
+	 * the ctl_c variable, so this can be used by each process in managing their
+	 * shutdown procedure.
+	 */
 	ctl_c = true;
 	if (cverb << vdebug)
 		dummy = write(1, "in _handle_sigint\n", 19);
@@ -165,7 +207,7 @@ static void _handle_sigint(int val __attribute__((unused)))
 }
 
 
-void _set_signals_for_record(void)
+void _set_basic_SIGINT_handler_for_child(void)
 {
 	struct sigaction act;
 	sigset_t ss;
@@ -183,7 +225,7 @@ void _set_signals_for_record(void)
 	}
 }
 
-void set_signals(void)
+void set_signals_for_parent(void)
 {
 	struct sigaction act;
 	sigset_t ss;
@@ -202,49 +244,28 @@ void set_signals(void)
 	}
 }
 
-static int app_ready_pipe[2], start_app_pipe[2], operf_record_ready_pipe[2];
+static string args_to_string(void)
+{
+	string ret;
+	char * const * ptr = app_args + 1;
+	while (*ptr != NULL) {
+		ret.append(*ptr);
+		ret += ' ';
+		ptr++;
+	}
+	return ret;
+}
 
 void run_app(void)
 {
+	// ASSUMPTION: app_name is a fully-qualified pathname
 	char * app_fname = rindex(app_name, '/') + 1;
-	if (!app_fname) {
-		string msg = "Error trying to parse app name ";
-		msg += app_name;
-		__print_usage_and_exit(msg.c_str());
-	}
+	app_args[0] = app_fname;
 
-	vector<string> exec_args_str;
-	if (app_args) {
-		size_t end_pos;
-		string app_args_str = app_args;
-		// Since the strings returned from substr would otherwise be ephemeral, we
-		// need to store them into the exec_args_str vector so we can reference
-		// them later when we call execvp.
-		do {
-			end_pos = app_args_str.find_first_of(' ', 0);
-			if (end_pos != string::npos) {
-				exec_args_str.push_back(app_args_str.substr(0, end_pos));
-				app_args_str = app_args_str.substr(end_pos + 1);
-			} else {
-				exec_args_str.push_back(app_args_str);
-			}
-		} while (end_pos != string::npos);
-	}
-
-	vector<const char *> exec_args;
-	exec_args.push_back(app_fname);
-	vector<string>::iterator it;
-	cverb << vdebug << "Exec args are: " << app_fname << " ";
-	// Now transfer the args from the intermediate exec_args_str container to the
-	// exec_args container that can be passed to execvp.
-	for (it = exec_args_str.begin(); it != exec_args_str.end(); it++) {
-		exec_args.push_back((*it).c_str());
-		cverb << vdebug << (*it).c_str() << " ";
-	}
-	exec_args.push_back((char *) NULL);
-	cverb << vdebug << endl;
+	string arg_str = args_to_string();
+	cverb << vdebug << "Exec args are: " << app_fname << " " << arg_str << endl;
 	// Fake an exec to warm-up the resolver
-	execvp("", ((char * const *)&exec_args[0]));
+	execvp("", app_args);
 	// signal to the parent that we're ready to exec
 	int startup = 1;
 	if (write(app_ready_pipe[1], &startup, sizeof(startup)) < 0) {
@@ -263,8 +284,8 @@ void run_app(void)
 
 	cverb << vdebug << "parent says start app " << app_name << endl;
 	app_started = true;
-	execvp(app_name, ((char * const *)&exec_args[0]));
-	cerr <<  "Failed to exec " << exec_args[0] << ": " << strerror(errno) << endl;
+	execvp(app_name, app_args);
+	cerr <<  "Failed to exec " << app_fname << " " << arg_str << ": " << strerror(errno) << endl;
 	/* We don't want any cleanup in the child */
 	_exit(EXIT_FAILURE);
 
@@ -306,14 +327,24 @@ int start_profiling(void)
 		perror("Internal error: could not create pipe");
 		return -1;
 	}
+	if (pipe2(operf_convert_record_write_pipe, O_NONBLOCK) < 0) {
+		perror("Internal error: could not create pipe");
+		return -1;
+	}
+	if (pipe(operf_record_convert_write_pipe) < 0) {
+		perror("Internal error: could not create pipe");
+		return -1;
+	}
 	operf_record_pid = fork();
 	if (operf_record_pid < 0) {
 		return -1;
 	} else if (operf_record_pid == 0) { // operf-record process
 		int ready = 0;
 		int exit_code = EXIT_SUCCESS;
-		_set_signals_for_record();
+		_set_basic_SIGINT_handler_for_child();
 		close(operf_record_ready_pipe[0]);
+		close(operf_convert_record_write_pipe[1]);
+		close(operf_record_convert_write_pipe[0]);
 		if (!operf_options::post_conversion)
 			close(sample_data_pipe[0]);
 		/*
@@ -339,11 +370,12 @@ int start_profiling(void)
 			} else {
 				outfd = sample_data_pipe[1];
 			}
-			operf_record operfRecord(outfd, operf_options::system_wide, app_PID,
+			operfRecord = new operf_record(outfd, operf_options::system_wide, app_PID,
 			                         (operf_options::pid == app_PID), events, vi,
 			                         operf_options::callgraph,
-			                         operf_options::separate_cpu, operf_options::post_conversion);
-			if (operfRecord.get_valid() == false) {
+			                         operf_options::separate_cpu, operf_options::post_conversion,
+			                         operf_convert_record_write_pipe[0], operf_record_convert_write_pipe[1]);
+			if (operfRecord->get_valid() == false) {
 				/* If valid is false, it means that one of the "known" errors has
 				 * occurred:
 				 *   - profiled process has already ended
@@ -366,10 +398,11 @@ int start_profiling(void)
 			}
 
 			// start recording
-			operfRecord.recordPerfData();
-			cverb << vmisc << "Total bytes recorded from perf events: " << dec
-					<< operfRecord.get_total_bytes_recorded() << endl;
-		} catch (runtime_error re) {
+			operfRecord->recordPerfData();
+			cverb << vdebug << "Total bytes recorded from perf events: " << dec
+					<< operfRecord->get_total_bytes_recorded() << endl;
+			delete operfRecord;
+		} catch (const runtime_error & re) {
 			/* If the user does ctl-c, the operf-record process may get interrupted
 			 * in a system call, causing problems with writes to the sample data pipe.
 			 * So we'll ignore such errors unless the user requests debug info.
@@ -384,6 +417,18 @@ int start_profiling(void)
 		_exit(exit_code);
 
 fail_out:
+		if (operfRecord)
+			try {
+				delete operfRecord;
+			} catch (const runtime_error & re) {
+				// We're already in failure mode here; if we get a runtime_error while
+				// deleting operfRecord, we'll only print it if user requests "-V misc"
+				if (cverb << vmisc) {
+					cerr << "Caught runtime_error: " << re.what() << endl;
+					exit_code = EXIT_FAILURE;
+				}
+			}
+
 		if (!ready){
 			/* ready==0 means we've not yet told parent we're ready,
 			 * but the parent is reading our pipe.  So we tell the
@@ -440,81 +485,47 @@ fail_out:
 	return 0;
 }
 
-static end_code_t _kill_operf_read_pid(end_code_t rc)
+static end_code_t _waitfor_operf_read_pid(end_code_t rc)
 {
-	// Now stop the operf-read process
-	int waitpid_status;
-	struct timeval tv;
-	long long start_time_sec;
-	long long usec_timer;
-	bool keep_trying = true;
-	waitpid_status = 0;
-	gettimeofday(&tv, NULL);
-	start_time_sec = tv.tv_sec;
-	usec_timer = tv.tv_usec;
-	/* We'll initially try the waitpid with WNOHANG once every 100,000 usecs.
-	 * If it hasn't ended within 5 seconds, we'll kill it and do one
-	 * final wait.
-	 */
-	while (keep_trying) {
-		int option = WNOHANG;
-		int wait_rc;
-		gettimeofday(&tv, NULL);
-		if (tv.tv_sec > start_time_sec + 5) {
-			keep_trying = false;
-			option = 0;
-			cerr << "now trying to kill convert pid..." << endl;
+	// Now wait for the operf-read process to finish
+	int wait_rc, waitpid_status, post_processing = 1;
 
-			if (kill(operf_read_pid, SIGUSR1) < 0) {
-				perror("Attempt to stop operf-read process failed");
-				rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR;
-				break;
-			}
-		} else {
-			/* If we exceed the 100000 usec interval or if the tv_usec
-			 * value has rolled over to restart at 0, then we reset
-			 * the usec_timer to current tv_usec and try waitpid.
-			 */
-			if ((tv.tv_usec % 1000000) > (usec_timer + 100000)
-					|| (tv.tv_usec < usec_timer))
-				usec_timer = tv.tv_usec;
-			else
-				continue;
+	if (write(operf_post_profiling_pipe[1], &post_processing, sizeof(post_processing)) < 0) {
+		perror("Internal error:  Failed to write to operf_post_profiling_pipe");
+		rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR;
+		return rc;
+	}
+	waitpid_status = 0;
+	if ((wait_rc = waitpid(operf_read_pid, &waitpid_status, 0)) < 0) {
+		if (errno != ECHILD) {
+			perror("waitpid for operf-read process failed");
+			rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR;
 		}
-		if ((wait_rc = waitpid(operf_read_pid, &waitpid_status, option)) < 0) {
-			keep_trying = false;
-			if (errno != ECHILD) {
-				perror("waitpid for operf-read process failed");
-				rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR;
-			}
-		} else if (wait_rc) {
-			if (WIFEXITED(waitpid_status)) {
-				keep_trying = false;
-				if (!WEXITSTATUS(waitpid_status)) {
-					cverb << vdebug << "operf-read process returned OK" << endl;
-				} else if (WIFEXITED(waitpid_status)) {
-					/* If user did ctl-c, operf-record may get spurious errors, like
-					 * broken pipe, etc.  We ignore these unless the user asks for
-					 * debug output.
-					 */
-					if (!ctl_c || cverb << vdebug) {
-						cerr <<  "operf-read process ended abnormally.  Status = "
-						     << WEXITSTATUS(waitpid_status) << endl;
-						rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR;
-					}
-				}
-			}  else if (WIFSIGNALED(waitpid_status)) {
-				keep_trying = false;
-				/* If user did ctl-c, operf-record may get spurious errors, like
+	} else if (wait_rc) {
+		if (WIFEXITED(waitpid_status)) {
+			if (!WEXITSTATUS(waitpid_status)) {
+				cverb << vdebug << "operf-read process returned OK" << endl;
+			} else {
+				/* If user did ctl-c, operf-read may get spurious errors, like
 				 * broken pipe, etc.  We ignore these unless the user asks for
 				 * debug output.
 				 */
 				if (!ctl_c || cverb << vdebug) {
-					cerr << "operf-read process killed by signal "
-					     << WTERMSIG(waitpid_status) << endl;
-					rc = PERF_RECORD_ERROR;
+					cerr <<  "operf-read process ended abnormally.  Status = "
+							<< WEXITSTATUS(waitpid_status) << endl;
+					rc = rc ? PERF_BOTH_ERROR : PERF_READ_ERROR;
 				}
 			}
+		}  else if (WIFSIGNALED(waitpid_status)) {
+			/* If user did ctl-c, operf-read may get spurious errors, like
+			 * broken pipe, etc.  We ignore these unless the user asks for
+			 * debug output.
+			 */
+			if (!ctl_c || cverb << vdebug) {
+				cerr << "operf-read process killed by signal "
+						<< WTERMSIG(waitpid_status) << endl;
+				rc = PERF_RECORD_ERROR;
+			}
 		}
 	}
 	return rc;
@@ -526,9 +537,13 @@ static end_code_t _kill_operf_record_pid(void)
 	end_code_t rc = ALL_OK;
 
 	// stop operf-record process
+	errno = 0;
 	if (kill(operf_record_pid, SIGUSR1) < 0) {
-		perror("Attempt to stop operf-record process failed");
-		rc = PERF_RECORD_ERROR;
+		// If operf-record process is already ended, don't consider this an error.
+		if (errno != ESRCH) {
+			perror("Attempt to stop operf-record process failed");
+			rc = PERF_RECORD_ERROR;
+		}
 	} else {
 		if (waitpid(operf_record_pid, &waitpid_status, 0) < 0) {
 			perror("waitpid for operf-record process failed");
@@ -587,29 +602,40 @@ static end_code_t _run(void)
 	/* If we're not doing system wide profiling and no app is started, then
 	 * there's no profile data to convert. So if this condition is NOT true,
 	 * then we'll do the convert.
-	 * Note that if --lazy-connversion is passed, then operf_options::post_conversion
+	 * Note that if --lazy-conversion is passed, then operf_options::post_conversion
 	 * will be set, and we will defer conversion until after the operf-record
 	 * process is done.
 	 */
 	if (!operf_options::post_conversion) {
 		if (!(!app_started && !operf_options::system_wide)) {
 			cverb << vdebug << "Forking read pid" << endl;
+			if (pipe(operf_post_profiling_pipe) < 0) {
+				perror("Internal error: operf-record could not create pipe");
+				_exit(EXIT_FAILURE);
+			}
 			operf_read_pid = fork();
 			if (operf_read_pid < 0) {
 				perror("Internal error: fork failed");
 				_exit(EXIT_FAILURE);
 			} else if (operf_read_pid == 0) { // child process
 				close(sample_data_pipe[1]);
+				close(operf_post_profiling_pipe[1]);
+				_set_basic_SIGINT_handler_for_child();
 				convert_sample_data();
+				_exit(EXIT_SUCCESS);
 			}
 			// parent
 			close(sample_data_pipe[0]);
 			close(sample_data_pipe[1]);
+			close(operf_convert_record_write_pipe[0]);
+			close(operf_convert_record_write_pipe[1]);
+			close(operf_record_convert_write_pipe[0]);
+			close(operf_record_convert_write_pipe[1]);
+			close(operf_post_profiling_pipe[0]);
 		}
 	}
 
-	set_signals();
-	cerr << "operf: Profiler started" << endl;
+	set_signals_for_parent();
 	if (startApp) {
 		/* The user passed in a command or program name to start, so we'll need to do waitpid on that
 		 * process.  However, while that user-requested process is running, it's possible we
@@ -619,37 +645,27 @@ static end_code_t _run(void)
 		 * process, checking their status.  The profiled app may end normally, abnormally, or by way
 		 * of ctrl-C.  The operf-record process should not end here, except abnormally.  The normal
 		 * flow is:
-		 *    1. profiled app ends or is stopped vi ctrl-C
+		 *    1. profiled app ends or is stopped via ctrl-C
 		 *    2. keep_trying is set to false, so we drop out of while loop and proceed to end of function
 		 *    3. call _kill_operf_record_pid and _kill_operf_read_pid
 		 */
-		struct timeval tv;
-		long long usec_timer;
 		bool keep_trying = true;
 		const char * app_process = "profiled app";
 		const char * record_process = "operf-record process";
 		waitpid_status = 0;
-		gettimeofday(&tv, NULL);
-		usec_timer = tv.tv_usec;
 		cverb << vdebug << "going into waitpid on profiled app " << app_PID << endl;
 
-		// We'll try the waitpid with WNOHANG once every 100,000 usecs.
+		// We'll try the waitpid with WNOHANG once every 100 ms (100,000,000 nsecs).
 		while (keep_trying) {
 			pid_t the_pid = app_PID;
 			int wait_rc;
 			const char * the_process = app_process;
-			gettimeofday(&tv, NULL);
-			/* If we exceed the 100000 usec interval or if the tv_usec
-			 * value has rolled over to restart at 0, then we reset
-			 * the usec_timer to current tv_usec and try waitpid.
-			 */
-			if ((tv.tv_usec % 1000000) > (usec_timer + 100000)
-					|| (tv.tv_usec < usec_timer))
-				usec_timer = tv.tv_usec;
-			else
-				continue;
-
 			bool trying_user_app = true;
+			struct timespec ts_req;
+			ts_req.tv_sec = 0;
+			ts_req.tv_nsec = 100000000;
+
+			(void)nanosleep(&ts_req, NULL);
 again:
 			if ((wait_rc = waitpid(the_pid, &waitpid_status, WNOHANG)) < 0) {
 				keep_trying = false;
@@ -722,10 +738,10 @@ again:
 		if (operf_options::post_conversion)
 			rc = _kill_operf_record_pid();
 		else
-			rc = _kill_operf_read_pid(_kill_operf_record_pid());
+			rc = _waitfor_operf_read_pid(_kill_operf_record_pid());
 	} else {
 		if (!operf_options::post_conversion)
-			rc = _kill_operf_read_pid(rc);
+			rc = _waitfor_operf_read_pid(rc);
 	}
 
 	return rc;
@@ -755,11 +771,15 @@ static void _jitconv_complete(int val __attribute__((unused)))
 	if (WIFEXITED(child_status) && (!WEXITSTATUS(child_status))) {
 		cverb << vdebug << "JIT dump processing complete." << endl;
 	} else {
-		 if (WIFSIGNALED(child_status))
-			 cerr << "child received signal " << WTERMSIG(child_status) << endl;
-		 else
+		 if (WIFSIGNALED(child_status)) {
+			 if (ctl_c)
+				 cerr << "JIT conversion stopped by request of user via ctl-c" << endl;
+			 else
+				 cerr << "child received signal " << WTERMSIG(child_status) << endl;
+		 } else {
 			 cerr << "JIT dump processing exited abnormally: "
-			 << WEXITSTATUS(child_status) << endl;
+			      << WEXITSTATUS(child_status) << endl;
+		 }
 	}
 }
 
@@ -779,14 +799,6 @@ static void _set_signals_for_convert(void)
 		perror("operf: install of SIGCHLD handler failed: ");
 		exit(EXIT_FAILURE);
 	}
-
-	act.sa_handler = _handle_sigint;
-	sigemptyset(&act.sa_mask);
-	sigaddset(&act.sa_mask, SIGINT);
-	if (sigaction(SIGINT, &act, NULL)) {
-		perror("operf: install of SIGINT handler failed: ");
-		exit(EXIT_FAILURE);
-	}
 }
 
 static void _do_jitdump_convert()
@@ -796,7 +808,7 @@ static void _do_jitdump_convert()
 	struct timeval tv;
 	char end_time_str[32];
 	char opjitconv_path[PATH_MAX + 1];
-	char * exec_args[8];
+	char * exec_args[9];
 
 	jitconv_pid = fork();
 	switch (jitconv_pid) {
@@ -808,6 +820,7 @@ static void _do_jitdump_convert()
 		const char * debug_option = "-d";
 		const char * non_root_user = "--non-root";
 		const char * delete_jitdumps = "--delete-jitdumps";
+		const char * sess_dir =  "--session-dir";
 		gettimeofday(&tv, NULL);
 		end_time = tv.tv_sec;
 		sprintf(end_time_str, "%llu", end_time);
@@ -819,6 +832,7 @@ static void _do_jitdump_convert()
 		if (my_uid != 0)
 			exec_args[arg_num++] = (char *)non_root_user;
 		exec_args[arg_num++] = (char *)delete_jitdumps;
+		exec_args[arg_num++] = (char *)sess_dir;
 		exec_args[arg_num++] = (char *)operf_options::session_dir.c_str();
 		exec_args[arg_num++] = start_time_str;
 		exec_args[arg_num++] = end_time_str;
@@ -850,18 +864,31 @@ static int __delete_old_previous_sample_data(const char *fpath,
 	}
 }
 
-/* Read perf_events sample data written by the operf-record process
- * through the sample_data_pipe and convert this to oprofile format
- * sample files.
+/* Read perf_events sample data written by the operf-record process through
+ * the sample_data_pipe or file (dependent on 'lazy-conversion' option)
+ * and convert the perf format sample data to to oprofile format sample files.
+ *
+ * If not invoked with --lazy-conversion option, this function is executed by
+ * the "operf-read" child process.  If user does a ctrl-C, the parent will
+ * execute _kill_operf_read_pid which will try to allow the conversion process
+ * to complete, waiting 5 seconds before it forcefully kills the operf-read
+ * process via 'kill SIGUSR1'.
+ *
+ * But if --lazy-conversion option is used, then it's the parent process that's
+ * running convert_sample_data.  If the user does a ctrl-C during this procedure,
+ * the ctrl-C is handled via op_sig_stop which essentially does nothing to stop
+ * the conversion procedure, which in general is fine.  On the very rare chance
+ * that the procedure gets stuck (hung) somehow, the user will have to do a
+ * 'kill -KILL'.
  */
 static void convert_sample_data(void)
 {
 	int inputfd;
 	string inputfname;
 	int rc = EXIT_SUCCESS;
-	int keep_waiting = 0;
 	string current_sampledir = samples_dir + "/current/";
 	string previous_sampledir = samples_dir + "/previous";
+	string stats_dir = "";
 	current_sampledir.copy(op_samples_current_dir, current_sampledir.length(), 0);
 
 	if (!app_started && !operf_options::system_wide)
@@ -902,7 +929,11 @@ static void convert_sample_data(void)
 		inputfd = sample_data_pipe[0];
 		inputfname = "";
 	}
-	operfRead.init(inputfd, inputfname, current_sampledir, cpu_type, events, operf_options::system_wide);
+	close(operf_record_convert_write_pipe[1]);
+	close(operf_convert_record_write_pipe[0]);
+	operfRead.init(inputfd, inputfname, current_sampledir, cpu_type,
+	               operf_options::system_wide, operf_convert_record_write_pipe[1],
+	               operf_record_convert_write_pipe[0], operf_post_profiling_pipe[0]);
 	if ((rc = operfRead.readPerfHeader()) < 0) {
 		if (rc != OP_PERF_HANDLED_ERROR)
 			cerr << "Error: Cannot create read header info for sample data " << endl;
@@ -912,24 +943,21 @@ static void convert_sample_data(void)
 	cverb << vdebug << "Successfully read header info for sample data " << endl;
 	if (operfRead.is_valid()) {
 		try {
-			int num = operfRead.convertPerfData();
+			unsigned int num = operfRead.convertPerfData();
 			cverb << vdebug << "operf_read: Total bytes received from operf_record process: " << dec << num << endl;
-		} catch (runtime_error e) {
+		} catch (const runtime_error & e) {
 			cerr << "Caught runtime error from operf_read::convertPerfData" << endl;
 			cerr << e.what() << endl;
 			rc = EXIT_FAILURE;
 			goto out;
 		}
 	}
+
 	_set_signals_for_convert();
 	cverb << vdebug << "Calling _do_jitdump_convert" << endl;
 	_do_jitdump_convert();
-	while (jit_conversion_running && (keep_waiting < 2)) {
+	while (jit_conversion_running) {
 		sleep(1);
-		keep_waiting++;
-	}
-	if (jit_conversion_running) {
-		kill(jitconv_pid, SIGKILL);
 	}
 out:
 	if (!operf_options::post_conversion)
@@ -937,329 +965,6 @@ out:
 }
 
 
-static int find_app_file_in_dir(const struct dirent * d)
-{
-	if (!strcmp(d->d_name, app_name))
-		return 1;
-	else
-		return 0;
-}
-
-static int get_PATH_based_pathname(char * path_holder, size_t n)
-{
-	int retval = -1;
-
-	char * real_path = getenv("PATH");
-	char * path = (char *) xstrdup(real_path);
-	char * segment = strtok(path, ":");
-	while (segment) {
-		struct dirent ** namelist;
-		int rc = scandir(segment, &namelist, find_app_file_in_dir, NULL);
-		if (rc < 0) {
-			cerr << app_name << " cannot be found in your PATH." << endl;
-			break;
-		} else if (rc == 1) {
-			size_t applen = strlen(app_name);
-			size_t dirlen = strlen(segment);
-
-			if (applen + dirlen + 2 > n) {
-				cerr << "Path segment " << segment
-				     << " prepended to the passed app name is too long"
-				     << endl;
-				retval = -1;
-				break;
-			}
-			strncpy(path_holder, segment, dirlen);
-			strcat(path_holder, "/");
-			strncat(path_holder, app_name, applen);
-			retval = 0;
-			free(namelist[0]);
-			free(namelist);
-			break;
-		}
-		segment = strtok(NULL, ":");
-	}
-	free(path);
-	return retval;
-}
-int validate_app_name(void)
-{
-	int rc = 0;
-	struct stat filestat;
-	size_t len = strlen(app_name);
-
-	if (len > (size_t) (OP_APPNAME_LEN - 1)) {
-		cerr << "app name longer than max allowed (" << OP_APPNAME_LEN
-		     << " chars)\n";
-		cerr << app_name << endl;
-		rc = -1;
-		goto out;
-	}
-
-	if (index(app_name, '/') == app_name) {
-		// Full pathname of app was specified, starting with "/".
-		strncpy(full_pathname, app_name, len);
-	} else if ((app_name[0] == '.') && (app_name[1] == '/')) {
-		// Passed app is in current directory; e.g., "./myApp"
-		if (getcwd(full_pathname, PATH_MAX) == NULL) {
-			rc = -1;
-			cerr << "getcwd [1] failed when trying to find app name " << app_name << ". Aborting."
-			     << endl;
-			goto out;
-		}
-		strcat(full_pathname, "/");
-		strcat(full_pathname, (app_name + 2));
-	} else if (index(app_name, '/')) {
-		// Passed app is in a subdirectory of cur dir; e.g., "test-stuff/myApp"
-		if (getcwd(full_pathname, PATH_MAX) == NULL) {
-			rc = -1;
-			cerr << "getcwd [2] failed when trying to find app name " << app_name << ". Aborting."
-			     << endl;
-			goto out;
-		}
-		strcat(full_pathname, "/");
-		strcat(full_pathname, app_name);
-	} else {
-		// Pass app name, at this point, MUST be found in PATH
-		rc = get_PATH_based_pathname(full_pathname, PATH_MAX);
-	}
-
-	if (rc) {
-		cerr << "Problem finding app name " << app_name << ". Aborting."
-		     << endl;
-		goto out;
-	}
-	app_name_SAVE = app_name;
-	app_name = full_pathname;
-	if (stat(app_name, &filestat)) {
-		char msg[OP_APPNAME_LEN + 50];
-		snprintf(msg, OP_APPNAME_LEN + 50, "Non-existent app name \"%s\"",
-		         app_name);
-		perror(msg);
-		rc = -1;
-	}
-
-	out: return rc;
-}
-
-static void _get_event_code(operf_event_t * event)
-{
-	FILE * fp;
-	char oprof_event_code[9];
-	string command;
-	u64 base_code, config;
-	base_code = config = 0ULL;
-
-	command = OP_BINDIR;
-	command += "ophelp ";
-	command += event->name;
-
-	fp = popen(command.c_str(), "r");
-	if (fp == NULL) {
-		cerr << "Unable to execute ophelp to get info for event "
-		     << event->name << endl;
-		exit(EXIT_FAILURE);
-	}
-	if (fgets(oprof_event_code, sizeof(oprof_event_code), fp) == NULL) {
-		pclose(fp);
-		cerr << "Unable to find info for event "
-		     << event->name << endl;
-		exit(EXIT_FAILURE);
-	}
-
-	pclose(fp);
-
-	base_code = strtoull(oprof_event_code, (char **) NULL, 10);
-
-
-#if defined(__i386__) || defined(__x86_64__)
-	// Setup EventSelct[11:8] field for AMD
-	const char * vendor_AMD = "AuthenticAMD";
-	if (op_is_cpu_vendor((char *)vendor_AMD)) {
-		config = base_code & 0xF00ULL;
-		config = config << 32;
-	}
-
-	// Setup EventSelct[7:0] field
-	config |= base_code & 0xFFULL;
-
-	// Setup unitmask field
-	if (event->um_name[0]) {
-		char mask[12];
-		char buf[20];
-		if ((snprintf(buf, 20, "%lu", event->count)) == -1) {
-			cerr << "Error parsing event count of " << event->count << endl;
-			exit(EXIT_FAILURE);
-		}
-		command = OP_BINDIR;
-		command += "ophelp ";
-		command += "--extra-mask ";
-		command += event->name;
-		command += ":";
-		command += buf;
-		command += ":";
-		command += event->um_name;
-		fp = popen(command.c_str(), "r");
-		if (fp == NULL) {
-			cerr << "Unable to execute ophelp to get info for event "
-			     << event->name << endl;
-			exit(EXIT_FAILURE);
-		}
-		if (fgets(mask, sizeof(mask), fp) == NULL) {
-			pclose(fp);
-			cerr << "Unable to find unit mask info for " << event->um_name << " for event "
-			     << event->name << endl;
-			exit(EXIT_FAILURE);
-		}
-		pclose(fp);
-		config |= strtoull(mask, (char **) NULL, 10);
-	} else {
-		config |= ((event->evt_um & 0xFFULL) << 8);
-	}
-#else
-	config = base_code;
-#endif
-
-	event->op_evt_code = base_code;
-	event->evt_code = config;
-}
-
-static void _process_events_list(void)
-{
-	string cmd = OP_BINDIR;
-	cmd += "/ophelp --check-events ";
-	for (unsigned int i = 0; i <  operf_options::evts.size(); i++) {
-		FILE * fp;
-		string full_cmd = cmd;
-		string event_spec = operf_options::evts[i];
-		if (operf_options::callgraph) {
-			full_cmd += " --callgraph=1 ";
-		}
-		full_cmd += event_spec;
-		fp = popen(full_cmd.c_str(), "r");
-		if (fp == NULL) {
-			cerr << "Unable to execute ophelp to get info for event "
-			     << event_spec << endl;
-			exit(EXIT_FAILURE);
-		}
-		if (fgetc(fp) == EOF) {
-			pclose(fp);
-			cerr << "Error retrieving info for event "
-			     << event_spec << endl;
-			if (operf_options::callgraph)
-				cerr << "Note: When doing callgraph profiling, the sample count must be"
-				     << endl << "15 times the minimum count value for the event."  << endl;
-			exit(EXIT_FAILURE);
-		}
-		fclose(fp);
-		char * event_str = op_xstrndup(event_spec.c_str(), event_spec.length());
-		operf_event_t event;
-		strncpy(event.name, strtok(event_str, ":"), OP_MAX_EVT_NAME_LEN);
-		event.count = atoi(strtok(NULL, ":"));
-		/* Name and count are required in the event spec in order for
-		 * 'ophelp --check-events' to pass.  But since unit mask and domain
-		 * control bits are optional, we need to ensure the result of strtok
-		 * is valid.
-		 */
-		char * info;
-#define	_OP_UM 1
-#define	_OP_KERNEL 2
-#define	_OP_USER 3
-		int place =  _OP_UM;
-		char * endptr = NULL;
-		event.evt_um = 0ULL;
-		event.no_kernel = 0;
-		event.no_user = 0;
-		memset(event.um_name, '\0', OP_MAX_UM_NAME_LEN);
-		while ((info = strtok(NULL, ":"))) {
-			switch (place) {
-			case _OP_UM:
-				event.evt_um = strtoul(info, &endptr, 0);
-				// If any of the UM part is not a number, then we
-				// consider the entire part a string.
-				if (*endptr) {
-					event.evt_um = 0;
-					strncpy(event.um_name, info, OP_MAX_UM_NAME_LEN);
-				}
-				break;
-			case _OP_KERNEL:
-				if (atoi(info) == 0)
-					event.no_kernel = 1;
-				break;
-			case _OP_USER:
-				if (atoi(info) == 0)
-					event.no_user = 1;
-				break;
-			}
-			place++;
-		}
-		free(event_str);
-		_get_event_code(&event);
-		events.push_back(event);
-	}
-#if (defined(__powerpc__) || defined(__powerpc64__))
-	{
-		/* This section of code is for architectures such as ppc[64] for which
-		 * the oprofile event code needs to be converted to the appropriate event
-		 * code to pass to the perf_event_open syscall.
-		 */
-
-		using namespace OP_perf_utils;
-		if (!op_convert_event_vals(&events)) {
-			cerr << "Unable to convert all oprofile event values to perf_event values" << endl;
-			exit(EXIT_FAILURE);
-		}
-	}
-#endif
-}
-
-static void get_default_event(void)
-{
-	operf_event_t dft_evt;
-	struct op_default_event_descr descr;
-	vector<operf_event_t> tmp_events;
-
-
-	op_default_event(cpu_type, &descr);
-	if (descr.name[0] == '\0') {
-		cerr << "Unable to find default event" << endl;
-		exit(EXIT_FAILURE);
-	}
-
-	memset(&dft_evt, 0, sizeof(dft_evt));
-	if (operf_options::callgraph) {
-		struct op_event * _event;
-		op_events(cpu_type);
-		if ((_event = find_event_by_name(descr.name, 0, 0))) {
-			dft_evt.count = _event->min_count * CALLGRAPH_MIN_COUNT_SCALE;
-		} else {
-			cerr << "Error getting event info for " << descr.name << endl;
-			exit(EXIT_FAILURE);
-		}
-	} else {
-		dft_evt.count = descr.count;
-	}
-	dft_evt.evt_um = descr.um;
-	strncpy(dft_evt.name, descr.name, OP_MAX_EVT_NAME_LEN - 1);
-	_get_event_code(&dft_evt);
-	events.push_back(dft_evt);
-
-#if (defined(__powerpc__) || defined(__powerpc64__))
-	{
-		/* This section of code is for architectures such as ppc[64] for which
-		 * the oprofile event code needs to be converted to the appropriate event
-		 * code to pass to the perf_event_open syscall.
-		 */
-
-		using namespace OP_perf_utils;
-		if (!op_convert_event_vals(&events)) {
-			cerr << "Unable to convert all oprofile event values to perf_event values" << endl;
-			exit(EXIT_FAILURE);
-		}
-	}
-#endif
-}
-
 static void _process_session_dir(void)
 {
 	if (operf_options::session_dir.empty()) {
@@ -1268,6 +973,10 @@ static void _process_session_dir(void)
 		cwd = (char *) xmalloc(PATH_MAX);
 		// set default session dir
 		cwd = getcwd(cwd, PATH_MAX);
+		if (cwd == NULL) {
+			perror("Error calling getcwd");
+			exit(EXIT_FAILURE);
+		}
 		operf_options::session_dir = cwd;
 		operf_options::session_dir +="/oprofile_data";
 		samples_dir = operf_options::session_dir + "/samples";
@@ -1304,6 +1013,7 @@ static void _process_session_dir(void)
 			exit(EXIT_FAILURE);
 		}
 	}
+
 	cverb << vdebug << "Using samples dir " << samples_dir << endl;
 }
 
@@ -1352,6 +1062,107 @@ bool _get_vmlinux_address_info(vector<string> args, string cmp_val, string &str)
 	return found;
 }
 
+static bool _add_kernel_entry(string start_addr_str, string end_addr_str, string image_name)
+{
+	string str, start_end;
+	unsigned long long start_addr, end_addr;
+
+	errno = 0;
+	start_addr = strtoull(start_addr_str.c_str(), NULL, 16);
+	if (errno) {
+		cerr << "Unable to convert kallsyms start address " << start_addr_str
+		     << " to a valid hex value. errno is " << strerror(errno) << endl;
+		return false;
+	}
+
+	errno = 0;
+	end_addr =  strtoull(end_addr_str.c_str(), NULL, 16);
+	if (errno) {
+		cerr << "Unable to convert kallsyms end address " << end_addr_str
+		     << " to a valid hex value. errno is " << strerror(errno) << endl;
+		return false;
+	}
+
+	if ((start_addr == 0) || (end_addr == 0)) {
+		no_vmlinux = true;
+		cerr << "Kernel profiling is not possible with current system "
+		     << "config." << endl
+		     << "Set /proc/sys/kernel/kptr_restrict to 0 to "
+		     << "collect kernel samples." << endl;
+		return false;
+	}
+
+	/* Do not assign kernel_start and kernel_end until the addresses
+	 * have been validated.
+	 */
+	kernel_start = start_addr;
+	kernel_end = end_addr;
+
+	start_end = start_addr_str;
+	start_end.append(",");
+	start_end.append(end_addr_str);
+
+	no_vmlinux = false;  // set to false or the operf_get_vmlinux_name() returns "no-vmlinux"
+	operf_create_vmlinux(image_name.c_str(), start_end.c_str());
+	return true;
+}
+
+static bool _process_kallsyms(void)
+{
+	ifstream  infile;
+	string start_addr_str, end_addr_str;
+	string address_str;
+	string str, start_end;
+	std::string line;
+	stringstream iss;
+	string name;
+	string kall_syms_file = KALL_SYM_FILE;
+	char type;
+	int rtn = false;
+
+	infile.open(kall_syms_file.c_str());
+	if (!infile) {
+		cerr << "Internal Error: Could not open kallsyms file." << endl;
+		return false;
+	}
+
+	start_addr_str.clear();
+	end_addr_str.clear();
+
+	/* get the start and end  address of the kernel address range */
+	while ( !infile.eof() ) {
+		getline(infile, line);
+		iss.clear();
+		iss << line;
+		address_str.clear();
+
+		iss >> address_str;
+		iss >> type;
+		iss >> name;
+
+		if (strncmp(name.c_str(), KERN_ADDR_SPACE_START_SYMBOL,
+			    strlen(name.c_str())) == 0) {
+			/* found the symbol for the start of the kernel
+			 * address space.
+			*/
+			start_addr_str.assign(address_str);
+		}
+
+		if (strncmp(name.c_str(), KERN_ADDR_SPACE_END_SYMBOL,
+			    strlen(name.c_str())) == 0) {
+			/* found the symbol for the end of the kernel
+			 * address space.
+			 */
+			end_addr_str.assign(address_str);
+			rtn = _add_kernel_entry(start_addr_str,
+						  end_addr_str, KALL_SYM_FILE);
+			break;
+		}
+	}
+	infile.close();
+	return rtn;
+}
+
 string _process_vmlinux(string vmlinux_file)
 {
 	vector<string> args;
@@ -1493,7 +1304,7 @@ static int _process_operf_and_app_args(int argc, char * const argv[])
 		case 'e':
 			event = strtok(optarg, ",");
 			do {
-				operf_options::evts.push_back(event);
+				operf_options::evts.insert(event);
 			} while ((event = strtok(NULL, ",")));
 			break;
 		case 'c':
@@ -1535,23 +1346,20 @@ static void process_args(int argc, char * const argv[])
 
 		app_name = (char *) xmalloc(strlen(argv[non_options_idx]) + 1);
 		strcpy(app_name, argv[non_options_idx]);
+		// Note 1: app_args[0] is placeholder for app_fname (filled in later).
+		// Note 2: app_args[<end>] is set to NULL (required by execvp)
 		if (non_options_idx < (argc -1)) {
-			size_t length_all_app_args = 0;
-			non_options_idx++;
-			for(int i = non_options_idx; i < argc; i++)
-				length_all_app_args += strlen(argv[i]) + 1;
-			if (length_all_app_args) {
-				app_args = (char *) xmalloc(length_all_app_args);
-				strcpy(app_args, argv[non_options_idx]);
-				non_options_idx++;
-				while (non_options_idx < argc) {
-					strcat(app_args, " ");
-					strcat(app_args, argv[non_options_idx]);
-					non_options_idx++;
-				}
+			app_args = (char **) xmalloc((sizeof *app_args) *
+			                             (argc - non_options_idx + 1));
+			for(int i = non_options_idx + 1; i < argc; i++) {
+				app_args[i - non_options_idx] = argv[i];
 			}
+			app_args[argc - non_options_idx] = NULL;
+		} else {
+			app_args = (char **) xmalloc((sizeof *app_args) * 2);
+			app_args[1] = NULL;
 		}
-		if (validate_app_name() < 0) {
+		if (op_validate_app_name(&app_name, &app_name_SAVE) < 0) {
 			__print_usage_and_exit(NULL);
 		}
 	} else {  // non_options_idx == 0
@@ -1582,42 +1390,33 @@ static void process_args(int argc, char * const argv[])
 
 	if (operf_options::evts.empty()) {
 		// Use default event
-		get_default_event();
+		op_get_default_event(operf_options::callgraph);
 	} else  {
-		_process_events_list();
+		op_process_events_list(operf_options::evts, true, operf_options::callgraph);
 	}
-	op_nr_counters = events.size();
+	op_nr_events = events.size();
 
 	if (operf_options::vmlinux.empty()) {
-		no_vmlinux = true;
-		operf_create_vmlinux(NULL, NULL);
+		/* get the begining and end of the kernel addr space */
+		if (!_process_kallsyms()) {
+			/* Do not have permission to read
+			 * kernel addresses from /proc/kallsyms.
+			 */
+			no_vmlinux = true;
+			operf_create_vmlinux(NULL, NULL);
+		}
 	} else {
 		string startEnd = _process_vmlinux(operf_options::vmlinux);
 		operf_create_vmlinux(operf_options::vmlinux.c_str(), startEnd.c_str());
 	}
+	if (operf_options::pid && !operf_options::post_conversion)
+		track_new_forks = true;
+	else
+		track_new_forks = false;
 
 	return;
 }
 
-static int _check_perf_events_cap(void)
-{
-	/* If perf_events syscall is not implemented, the syscall below will fail
-	 * with ENOSYS (38).  If implemented, but the processor type on which this
-	 * program is running is not supported by perf_events, the syscall returns
-	 * ENOENT (2).
-	 */
-	struct perf_event_attr attr;
-	pid_t pid ;
-        memset(&attr, 0, sizeof(attr));
-        attr.size = sizeof(attr);
-        attr.sample_type = PERF_SAMPLE_IP;
-
-	pid = getpid();
-	syscall(__NR_perf_event_open, &attr, pid, 0, -1, 0);
-	return errno;
-
-}
-
 static void _precheck_permissions_to_samplesdir(string sampledir, bool for_current)
 {
 	/* Pre-check to make sure we have permission to remove old sample data
@@ -1648,48 +1447,70 @@ static void _precheck_permissions_to_samplesdir(string sampledir, bool for_curre
 
 }
 
-static int _get_sys_value(const char * filename)
-{
-	char str[10];
-	int _val = -999;
-	FILE * fp = fopen(filename, "r");
-	if (fp == NULL)
-		return _val;
-	if (fgets(str, 9, fp))
-		sscanf(str, "%d", &_val);
-	fclose(fp);
-	return _val;
-}
-
-
 int main(int argc, char * const argv[])
 {
 	int rc;
+	int perf_event_paranoid = op_get_sys_value("/proc/sys/kernel/perf_event_paranoid");
+
+	my_uid = geteuid();
 	throttled = false;
-	if ((rc = _check_perf_events_cap())) {
-		if (rc == EBUSY) {
-			cerr << "Performance monitor unit is busy.  Do 'opcontrol --deinit' and try again." << endl;
-			exit(1);
-		}
-		if (rc == ENOSYS) {
-			cerr << "Your kernel does not implement a required syscall"
-			     << "  for the operf program." << endl;
-		} else if (rc == ENOENT) {
-			cerr << "Your kernel's Performance Events Subsystem does not support"
-			     << " your processor type." << endl;
-		} else {
-			cerr << "Unexpected error running operf: " << strerror(rc) << endl;
+	rc = op_check_perf_events_cap(use_cpu_minus_one);
+	if (rc == EACCES) {
+		/* Early perf_events kernels required the cpu argument to perf_event_open
+		 * to be '-1' when setting up to profile a single process if 1) the user is
+		 * not root; and 2) perf_event_paranoid is > 0.  An EACCES error would be
+		 * returned if passing '0' or greater for the cpu arg and the above criteria
+		 * was not met.  Unfortunately, later kernels turned this requirement around
+		 * such that the passed cpu arg must be '0' or greater when the user is not
+		 * root.
+		 *
+		 * We don't really have a good way to check whether we're running on such an
+		 * early kernel except to try the perf_event_open with different values to see
+		 * what works.
+		 */
+		if (my_uid != 0 && perf_event_paranoid > 0) {
+			use_cpu_minus_one = true;
+			rc = op_check_perf_events_cap(use_cpu_minus_one);
 		}
-		cerr << "Please use the opcontrol command instead of operf." << endl;
-		exit(1);
 	}
+	if (rc == EBUSY) {
+		cerr << "Performance monitor unit is busy.  Ensure that no other profilers are running on the system." << endl
+		     << "Note: For example, the obsolete opcontrol profiler (available in earlier oprofile releases)" << endl
+		     << "does not allow other perforrmance tools to run simultaneously. To check for this, look for the" << endl
+		     << "'oprofiled' process using the 'ps' command." << endl;
+	} else if (rc == ENOSYS) {
+		cerr << "Your kernel does not implement a required syscall"
+		     << " for the operf program." << endl;
+	} else if (rc == ENOENT) {
+		cerr << "Your kernel's Performance Events Subsystem does not support"
+		     << " your processor type." << endl;
+	} else if (rc) {
+		cerr << "Unexpected error running operf: " << strerror(rc) << endl;
+	}
+
+	if (rc)
+		exit(1);
 
 	cpu_type = op_get_cpu_type();
+	if (cpu_type == CPU_NO_GOOD) {
+		cerr << "Unable to ascertain cpu type.  Exiting." << endl;
+		cleanup();
+		exit(1);
+	}
+
+	if (cpu_type == CPU_TIMER_INT) {
+		cerr << "CPU type 'timer' was detected, but operf does not support timer mode." << endl
+		     << "Ensure the obsolete opcontrol profiler (available in earlier oprofile releases)" << endl
+		     << "is not running on the system.  To check for this, look for the file" << endl
+		     << "/dev/oprofile/cpu_type; if this file exists, locate the pre-1.0 oprofile" << endl
+		     << "installation, and use its 'opcontrol' command with the --deinit option." << endl;
+		cleanup();
+		exit(1);
+	}
+
 	cpu_speed = op_cpu_frequency();
 	process_args(argc, argv);
 
-	int perf_event_paranoid = _get_sys_value("/proc/sys/kernel/perf_event_paranoid");
-	my_uid = geteuid();
 	if (operf_options::system_wide && ((my_uid != 0) && (perf_event_paranoid > 0))) {
 		cerr << "To do system-wide profiling, either you must be root or" << endl;
 		cerr << "/proc/sys/kernel/perf_event_paranoid must be set to 0 or -1." << endl;
@@ -1697,12 +1518,6 @@ int main(int argc, char * const argv[])
 		exit(1);
 	}
 
-	if (cpu_type == CPU_NO_GOOD) {
-		cerr << "Unable to ascertain cpu type.  Exiting." << endl;
-		cleanup();
-		exit(1);
-	}
-
 	if (my_uid != 0) {
 		bool for_current = true;
 		string current_sampledir = samples_dir + "/current";
@@ -1713,7 +1528,7 @@ int main(int argc, char * const argv[])
 			_precheck_permissions_to_samplesdir(previous_sampledir, for_current);
 		}
 	}
-	kptr_restrict = _get_sys_value("/proc/sys/kernel/kptr_restrict");
+	kptr_restrict = op_get_sys_value("/proc/sys/kernel/kptr_restrict");
 	end_code_t run_result;
 	if ((run_result = _run())) {
 		if (startApp && app_started && (run_result != APP_ABNORMAL_END)) {
diff --git a/pp/Makefile.in b/pp/Makefile.in
index e576613..2bf59d4 100644
--- a/pp/Makefile.in
+++ b/pp/Makefile.in
@@ -41,7 +41,6 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -50,7 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -158,7 +157,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -182,20 +180,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
diff --git a/pp/common_option.cpp b/pp/common_option.cpp
index eaf5c75..e1499ad 100644
--- a/pp/common_option.cpp
+++ b/pp/common_option.cpp
@@ -187,7 +187,7 @@ options::spec get_options(int argc, char const * argv[])
 		if ((stat(options::session_dir.c_str(), &sb) < 0) ||
 				((sb.st_mode & S_IFMT) != S_IFDIR)) {
 			// Use the standard default session dir instead
-			options::session_dir = "/var/lib/oprofile";
+			options::session_dir = OP_SESSION_DIR_DEFAULT;
 		}
 		session_dir_supplied = 0;
 	} else {
diff --git a/pp/opannotate.cpp b/pp/opannotate.cpp
index 96fab96..77d5392 100644
--- a/pp/opannotate.cpp
+++ b/pp/opannotate.cpp
@@ -466,6 +466,10 @@ void output_objdump_asm(symbol_collection const & symbols,
 	string image =
 		classes.extra_found_images.find_image_path(app_name, error,
 							   true);
+	if (image == "/proc/kallsyms") {
+		cerr << "Cannot annotate pseudo file /proc/kallsyms -- ignoring." << endl;
+		return;
+	}
 
 	// this is only an optimisation, we can either filter output by
 	// directly calling objdump and rely on the symbol filtering or
@@ -839,8 +843,8 @@ int opannotate(options::spec const & spec)
 	}
 
 	if (!debug_info && !options::assembly) {
-		cerr << "opannotate (warning): no debug information available for binary "
-		     << it->image << ", and --assembly not requested\n";
+		cerr << "opannotate (warning): no debug information available for any binary "
+		     << "selected, and --assembly not requested.\n";
 	}
 
 	annotate_source(images);
diff --git a/pp/opannotate_options.cpp b/pp/opannotate_options.cpp
index 04f675a..40f48fc 100644
--- a/pp/opannotate_options.cpp
+++ b/pp/opannotate_options.cpp
@@ -140,6 +140,12 @@ void handle_options(options::spec const & spec)
 		exit(EXIT_FAILURE);
 	}
 
+	if (assembly && (!include_file.empty() || !exclude_file.empty())) {
+		cerr << "--exclude[include]-file options not supported with --assembly" << endl;
+		cerr << "Please see the opannotate man page." << endl;
+		exit(EXIT_FAILURE);
+	}
+
 	options::symbol_filter = string_filter(include_symbols, exclude_symbols);
 
 	options::file_filter = path_filter(include_file, exclude_file);
diff --git a/pp/oparchive.cpp b/pp/oparchive.cpp
index cd63bd1..5b6906d 100644
--- a/pp/oparchive.cpp
+++ b/pp/oparchive.cpp
@@ -19,7 +19,7 @@
 #include <errno.h>
 #include <string.h>
 #include <dirent.h>
-
+#include <fcntl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
@@ -54,30 +54,62 @@ void copy_one_file(image_error err, string const & source, string const & dest)
 	}
 }
 
-void copy_stats(string const & session_samples_dir,
-		string const & archive_path)
+static void _copy_operf_stats(string const & archive_stats,
+                              string const & stats_path)
 {
-	DIR * dir;
 	struct dirent * dirent;
-	string stats_path;
-	
-	stats_path = session_samples_dir + "stats/";
+	string throttled_path;
+	throttled_path = stats_path + "/throttled";
+	DIR * dir =  opendir(throttled_path.c_str());
+	while (dir && (dirent = readdir(dir))) {
+		if ((!strcmp(dirent->d_name, ".")) || (!strcmp(dirent->d_name, "..")))
+			continue;
+		string archive_stats_path;
+		string throttled_event = throttled_path + "/" + dirent->d_name;
+		archive_stats_path = archive_stats + "throttled/" + dirent->d_name;
+		if (!options::list_files &&
+		    create_path(archive_stats_path.c_str())) {
+			cerr << "Unable to create directory for "
+			     <<	archive_stats_path << "." << endl;
+			exit (EXIT_FAILURE);
+		}
+		copy_one_file(image_ok, throttled_event, archive_stats_path);
+	}
+	if (dir)
+		closedir(dir);
 
-	if (!(dir = opendir(stats_path.c_str()))) {
-		return;
+	for (int i = 0; i < OPERF_MAX_STATS; i++) {
+		if (i > 0 && i < OPERF_INDEX_OF_FIRST_LOST_STAT)
+			continue;
+		string fname = stats_path + "/" + stats_filenames[i];
+		int fd = open(fname.c_str(), O_RDONLY);
+		if (fd != -1) {
+			string archive_stats_path = archive_stats + stats_filenames[i];
+			if (!options::list_files &&
+			    create_path(archive_stats_path.c_str())) {
+				cerr << "Unable to create directory for "
+				     <<	archive_stats_path << "." << endl;
+				exit (EXIT_FAILURE);
+			}
+			copy_one_file(image_ok, fname, archive_stats_path);
+			close(fd);
+		}
 	}
+}
 
-	string sample_base_dir = session_samples_dir.substr(archive_path.size());
-	string archive_stats = options::outdirectory + sample_base_dir + "stats/";
+static void _copy_legacy_stats(DIR * dir, string const & archive_stats,
+                               string const & stats_path)
+{
+	struct dirent * dirent;
 	string archive_stats_path = archive_stats + "event_lost_overflow";
+
 	if (!options::list_files &&
 	    create_path(archive_stats_path.c_str())) {
 		cerr << "Unable to create directory for "
-		     <<	archive_stats << "." << endl;
+		     <<	archive_stats_path << "." << endl;
 		exit (EXIT_FAILURE);
 	}
-
-	copy_one_file(image_ok, stats_path + "/event_lost_overflow", archive_stats_path);
+	copy_one_file(image_ok, stats_path + "event_lost_overflow", archive_stats_path);
 
 	while ((dirent = readdir(dir))) {
 		int cpu_nr;
@@ -93,10 +125,26 @@ void copy_stats(string const & session_samples_dir,
 			exit (EXIT_FAILURE);
 		}
 		copy_one_file(image_ok, stats_path + path, archive_stats_path);
+	}
+}
 
+void copy_stats(string const & session_samples_dir,
+		string const & archive_path)
+{
+	DIR * dir;
+	string stats_path;
+
+	stats_path = session_samples_dir + "stats/";
+
+	if (!(dir = opendir(stats_path.c_str()))) {
+		return;
 	}
-	closedir(dir);
 
+	string sample_base_dir = session_samples_dir.substr(archive_path.size());
+	string archive_stats = options::outdirectory + sample_base_dir + "stats/";
+	_copy_legacy_stats(dir, archive_stats, stats_path);
+	closedir(dir);
+	_copy_operf_stats(archive_stats, stats_path);
 }
 
 int oparchive(options::spec const & spec)
diff --git a/pp/oparchive_options.cpp b/pp/oparchive_options.cpp
index 18f74f3..e6f2ddc 100644
--- a/pp/oparchive_options.cpp
+++ b/pp/oparchive_options.cpp
@@ -122,9 +122,9 @@ void handle_options(options::spec const & spec)
 		exit(EXIT_FAILURE);
 	}
 
-	if (strncmp(op_session_dir, "/var/lib/oprofile", strlen("/var/lib/oprofile")))
+	if (strncmp(op_session_dir, OP_SESSION_DIR_DEFAULT, strlen(OP_SESSION_DIR_DEFAULT)))
 		cerr << "NOTE: The sample data in this archive is located at " << op_session_dir << endl
-		     << "instead of the standard location of /var/lib/oprofile.  Hence, when using opreport" << endl
+		     << "instead of the standard location of " << OP_SESSION_DIR_DEFAULT << ".  Hence, when using opreport" << endl
 		     << "and other post-processing tools on this archive, you must pass the following option:" << endl
 		     << "\t--session-dir=" << op_session_dir << endl;
 }
diff --git a/pp/opreport.cpp b/pp/opreport.cpp
index 327043c..7ad6190 100644
--- a/pp/opreport.cpp
+++ b/pp/opreport.cpp
@@ -323,9 +323,10 @@ void output_summaries(summary_container const & summaries)
 
 	for (size_t i = 0; i < summaries.apps.size(); ++i) {
 		app_summary const & app = summaries.apps[i];
-
-		if ((app.counts[0] * 100.0) / summaries.total_counts[0]
-		    < options::threshold) {
+		double ratio = (!summaries.total_counts[0]) ? 0
+		                                            : (app.counts[0] * 100.0)/
+		                                              summaries.total_counts[0];
+		if (ratio < options::threshold) {
 			continue;
 		}
 
diff --git a/utils/Makefile.am b/utils/Makefile.am
index d34b060..408dc54 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -8,7 +8,6 @@ AM_LDFLAGS = @OP_LDFLAGS@
 LIBS=@POPT_LIBS@ @LIBERTY_LIBS@
 
 bin_PROGRAMS = ophelp op-check-perfevents
-dist_bin_SCRIPTS = opcontrol
 
 op_check_perfevents_SOURCES = op_perf_events_checker.c
 op_check_perfevents_CPPFLAGS = ${AM_CFLAGS} @PERF_EVENT_FLAGS@
diff --git a/utils/Makefile.in b/utils/Makefile.in
index 1b83cf7..57b05a3 100644
--- a/utils/Makefile.in
+++ b/utils/Makefile.in
@@ -15,7 +15,6 @@
 
 @SET_MAKE@
 
-
 VPATH = @srcdir@
 pkgdatadir = $(datadir)/@PACKAGE@
 pkgincludedir = $(includedir)/@PACKAGE@
@@ -37,12 +36,10 @@ build_triplet = @build@
 host_triplet = @host@
 bin_PROGRAMS = ophelp$(EXEEXT) op-check-perfevents$(EXEEXT)
 subdir = utils
-DIST_COMMON = $(dist_bin_SCRIPTS) $(srcdir)/Makefile.am \
-	$(srcdir)/Makefile.in
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/builtinexpect.m4 \
-	$(top_srcdir)/m4/cellspubfdsupport.m4 \
 	$(top_srcdir)/m4/compileroption.m4 \
 	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
 	$(top_srcdir)/m4/extradirs.m4 \
@@ -51,7 +48,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
 	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
 	$(top_srcdir)/m4/mallocattribute.m4 \
 	$(top_srcdir)/m4/poptconst.m4 \
-	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 \
 	$(top_srcdir)/m4/sstream.m4 $(top_srcdir)/m4/typedef.m4 \
 	$(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -60,7 +57,7 @@ mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = $(top_builddir)/config.h
 CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
-am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"
+am__installdirs = "$(DESTDIR)$(bindir)"
 PROGRAMS = $(bin_PROGRAMS)
 am_op_check_perfevents_OBJECTS =  \
 	op_check_perfevents-op_perf_events_checker.$(OBJEXT)
@@ -69,28 +66,6 @@ op_check_perfevents_LDADD = $(LDADD)
 am_ophelp_OBJECTS = ophelp.$(OBJEXT)
 ophelp_OBJECTS = $(am_ophelp_OBJECTS)
 ophelp_DEPENDENCIES = ../libop/libop.a ../libutil/libutil.a
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
-    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
-    *) f=$$p;; \
-  esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
-  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
-  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
-  for p in $$list; do echo "$$p $$p"; done | \
-  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
-  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
-    if (++n[$$2] == $(am__install_max)) \
-      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
-    END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
-  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
-  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-SCRIPTS = $(dist_bin_SCRIPTS)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp = $(SHELL) $(top_srcdir)/depcomp
 am__depfiles_maybe = depfiles
@@ -160,7 +135,6 @@ LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
-MOC = @MOC@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -184,20 +158,13 @@ PFM_LIB = @PFM_LIB@
 PKG_CONFIG = @PKG_CONFIG@
 POPT_LIBS = @POPT_LIBS@
 PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
-QT_CFLAGS = @QT_CFLAGS@
-QT_INCLUDES = @QT_INCLUDES@
-QT_LDFLAGS = @QT_LDFLAGS@
-QT_LIB = @QT_LIB@
-QT_LIBS = @QT_LIBS@
-QT_VERSION = @QT_VERSION@
 RANLIB = @RANLIB@
+RT_LIB = @RT_LIB@
 SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIZE_T_TYPE = @SIZE_T_TYPE@
 STRIP = @STRIP@
-UIC = @UIC@
-UIChelp = @UIChelp@
 VERSION = @VERSION@
 XMKMF = @XMKMF@
 XML_CATALOG = @XML_CATALOG@
@@ -267,7 +234,6 @@ AM_CPPFLAGS = -I ${top_srcdir}/libop \
 
 AM_CFLAGS = @OP_CFLAGS@
 AM_LDFLAGS = @OP_LDFLAGS@
-dist_bin_SCRIPTS = opcontrol
 op_check_perfevents_SOURCES = op_perf_events_checker.c
 op_check_perfevents_CPPFLAGS = ${AM_CFLAGS} @PERF_EVENT_FLAGS@
 ophelp_SOURCES = ophelp.c
@@ -355,40 +321,6 @@ op-check-perfevents$(EXEEXT): $(op_check_perfevents_OBJECTS) $(op_check_perfeven
 ophelp$(EXEEXT): $(ophelp_OBJECTS) $(ophelp_DEPENDENCIES) 
 	@rm -f ophelp$(EXEEXT)
 	$(LINK) $(ophelp_OBJECTS) $(ophelp_LDADD) $(LIBS)
-install-dist_binSCRIPTS: $(dist_bin_SCRIPTS)
-	@$(NORMAL_INSTALL)
-	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
-	@list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || list=; \
-	for p in $$list; do \
-	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
-	  if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
-	done | \
-	sed -e 'p;s,.*/,,;n' \
-	    -e 'h;s|.*|.|' \
-	    -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
-	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
-	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
-	    if ($$2 == $$4) { files[d] = files[d] " " $$1; \
-	      if (++n[d] == $(am__install_max)) { \
-		print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
-	    else { print "f", d "/" $$4, $$1 } } \
-	  END { for (d in files) print "f", d, files[d] }' | \
-	while read type dir files; do \
-	     if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
-	     test -z "$$files" || { \
-	       echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \
-	       $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
-	     } \
-	; done
-
-uninstall-dist_binSCRIPTS:
-	@$(NORMAL_UNINSTALL)
-	@list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
-	files=`for p in $$list; do echo "$$p"; done | \
-	       sed -e 's,.*/,,;$(transform)'`; \
-	test -n "$$list" || exit 0; \
-	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
-	cd "$(DESTDIR)$(bindir)" && rm -f $$files
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
@@ -524,9 +456,9 @@ distdir: $(DISTFILES)
 	done
 check-am: all-am
 check: check-am
-all-am: Makefile $(PROGRAMS) $(SCRIPTS)
+all-am: Makefile $(PROGRAMS)
 installdirs:
-	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"; do \
+	for dir in "$(DESTDIR)$(bindir)"; do \
 	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
 	done
 install: install-am
@@ -582,7 +514,7 @@ install-dvi: install-dvi-am
 
 install-dvi-am:
 
-install-exec-am: install-binPROGRAMS install-dist_binSCRIPTS
+install-exec-am: install-binPROGRAMS
 
 install-html: install-html-am
 
@@ -622,7 +554,7 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS
+uninstall-am: uninstall-binPROGRAMS
 
 .MAKE: install-am install-strip
 
@@ -630,16 +562,15 @@ uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS
 	clean-generic clean-libtool ctags distclean distclean-compile \
 	distclean-generic distclean-libtool distclean-tags distdir dvi \
 	dvi-am html html-am info info-am install install-am \
-	install-binPROGRAMS install-data install-data-am \
-	install-dist_binSCRIPTS install-dvi install-dvi-am \
-	install-exec install-exec-am install-html install-html-am \
-	install-info install-info-am install-man install-pdf \
-	install-pdf-am install-ps install-ps-am install-strip \
-	installcheck installcheck-am installdirs maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-compile \
-	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
-	tags uninstall uninstall-am uninstall-binPROGRAMS \
-	uninstall-dist_binSCRIPTS
+	install-binPROGRAMS install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags uninstall uninstall-am \
+	uninstall-binPROGRAMS
 
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/utils/op_perf_events_checker.c b/utils/op_perf_events_checker.c
index 519cafa..b771758 100644
--- a/utils/op_perf_events_checker.c
+++ b/utils/op_perf_events_checker.c
@@ -49,8 +49,10 @@ int main(int argc, char **argv)
 	}
 
 #if HAVE_PERF_EVENTS
-	/* If perf_events syscall is not implemented, the syscall below will fail
-	 * with ENOSYS (38).  If implemented, but the processor type on which this
+	/* Even if the perf_event_open syscall is implemented, the architecture may still
+	 * not provide a full implementation of the perf_events subsystem, in which case,
+	 * the syscall below will fail with ENOSYS (38).  If the perf_events subsystem is
+	 * implemented for the architecture, but the processor type on which this
 	 * program is running is not supported by perf_events, the syscall returns
 	 * ENOENT (2).
 	 */
@@ -64,6 +66,9 @@ int main(int argc, char **argv)
 		fprintf(stderr, "perf_event_open syscall returned %s\n", strerror(errno));
 	return errno;
 #else
+	if (_verbose)
+		fprintf(stderr, "perf_events is not available on this system\n");
+
 	return -1;
 #endif
 }
diff --git a/utils/opcontrol b/utils/opcontrol
deleted file mode 100644
index 89849d1..0000000
--- a/utils/opcontrol
+++ /dev/null
@@ -1,2283 +0,0 @@
-#!/bin/sh
-#
-# opcontrol is a script to control OProfile
-# opcontrol --help and opcontrol --list-events have info
-#
-# Copyright 2002
-# Read the file COPYING
-#
-# Authors: John Levon, Philippe Elie, Will Cohen, Jens Wilke, Daniel Hansel
-#
-# Copyright IBM Corporation 2007
-#
-# NOTE: This script should be as shell independent as possible
-
-SYSCTL=do_sysctl
-
-# A replacement function for the sysctl (procps package) utility which is
-# missing on some distribution (e.g. slack 7.0).
-# Handles only the -w option of sysctl.
-do_sysctl()
-{
-	if test "$1" != "-w"; then
-		echo "$0 unknown sysctl option" >&2
-		exit 1
-	fi
-
-	shift
-
-	arg=`echo $1 | awk -F= '{print $1}'`
-	val=`echo $1 | awk -F= '{print $2}'`
-
-	dev_name=`echo $arg | tr . /`
-
-	if test ! -f /proc/sys/$dev_name; then
-		echo "/proc/sys/$dev_name does not exist or is not a regular file" >&2
-		exit 1
-	fi
-	echo $val > /proc/sys/$dev_name
-}
-
-# Helper function to check if oprofile daemon is active.
-# Takes one argument: the "lock file" for the oprofile daemon.
-# The lock file may exist even if the daemon was killed or died in
-# some way.  So we do a kill SIG_DFL to test whether the daemon is
-# truly alive. If the lock file is stale (daemon dead), the kill will
-# not return '0'.
-is_oprofiled_active()
-{
-        [ -f "$1" ] && kill -0 `cat "$1"` 2>/dev/null
-}
-
-# check value is set
-error_if_empty()
-{
-	if test -z "$2"; then
-		echo "No value given for option $1" >&2
-		do_help
-		exit 1
-	fi
-}
-
-# guess_number_base() checks if string is a valid octal(8), hexidecimal(16),
-# or decimal number(10). The value is returned in $?. Returns 0, if string
-# isn't a octal, hexidecimal, or decimal number.
-guess_number_base()
-{
-	case "$1" in
-		0[Xx]*[!0-9a-fA-F]*)	return 0  ;; # Bad hex string
-		0[Xx][0-9a-fA-F]*)	return 16 ;; # Hex
-		*[!0-9]*)		return 0  ;; # Some non-digit char
-		[1-9]*)			return 10 ;; # Base 10
-		0*[89]*)		return 0  ;; # Bad octal string
-		0*)			return 8  ;; # Octal
-	esac
-	return 0
-}
-
-# check value is a valid number
-error_if_not_number()
-{
-	error_if_empty "$1" "$2"
-	guess_number_base "$2"
-	if test "$?" -eq 0 ; then
-		echo "Argument for $1, $2, is not a valid number." >&2
-		exit 1
-	fi
-}
-
-# check value is a base filename
-error_if_not_valid_savename()
-{
-	error_if_empty "$1" "$2"
-	bname=`basename "$2"`
-	if test "$2" !=  "$bname"; then
-		echo "Argument for $1, $2, cannot change directory." >&2
-		exit 1
-	fi
-	case "$2" in
-		# The following catches anything that is not
-		# 0-9, a-z, A-Z, an '-', ':', ',', '.', or '/'
-		*[!-[:alnum:]_:,./]*) 
-			echo "Argument for $1, $2, not allow to have special characters" >&2
-			exit 1;;
-	esac
-}
-
-error_if_invalid_arg()
-{
-	error_if_empty "$1" "$2"
-	case "$2" in
-		# The following catches anything that is not
-		# 0-9, a-z, A-Z, an '-', ':', ',', '.', or '/'
-		*[!-[:alnum:]_:,./]*) 
-			echo "Argument for $1, $2, is not valid argument." >&2
-			exit 1;;
-	esac
-}
-
-# rm_device arguments $1=file_name
-rm_device()
-{
-	if test -c "$1"; then
-		vecho "Removing $1"
-		rm "$1"
-	fi
-}
-
-
-# create_device arguments $1=file_name $2=MAJOR_NR $3=MINOR_NR
-create_device()
-{
-	vecho "Doing mknod $1"
-	mknod "$1" c $2 $3
-	if test "$?" != "0"; then
-		echo "Couldn't mknod $1" >&2
-		exit 1
-	fi
-	chmod 700 "$1"
-}
-
-
-move_and_remove()
-{
-	if test -e $1; then
-		mv $1 $SAMPLES_DIR/.tmp_reset.$$
-		rm -rf $SAMPLES_DIR/.tmp_reset.$$
-	fi
-}
-
-
-# verbose echo
-vecho()
-{
-	if test -n "$VERBOSE"; then
-		echo $@
-	fi
-}
-
-
-is_tool_available()
-{
-	if which $1 > /dev/null 2>&1; then
-		if test -x `which $1`; then
-			return 1
-		fi
-	fi
-
-	return 0
-}
-
-
-# print help message
-do_help()
-{
-    cat >&2 <<EOF
-opcontrol: usage:
-   -l/--list-events list event types and unit masks
-   -?/--help        this message
-   -v/--version     show version
-   --init           loads the oprofile module and oprofilefs
-   --setup          give setup arguments (may be omitted)
-   --status         show configuration
-   --start-daemon   start daemon without starting profiling
-   -s/--start       start data collection
-   -d/--dump        flush the collected profiling data
-   -t/--stop        stop data collection
-   -h/--shutdown    stop data collection and kill daemon
-   -V/--verbose[=all,sfile,arcs,samples,module,misc,ext]
-                    be verbose in the daemon log
-   --reset          clears out data from current session
-   --save=name      save data from current session to session_name
-   --deinit         unload the oprofile module and oprofilefs
-
-   -e/--event=eventspec
-
-      Choose an event. May be specified multiple times. Of the form
-      "default" or "name:count:unitmask:kernel:user", where :
-
-      name:     event name, e.g. CPU_CLK_UNHALTED or RTC_INTERRUPTS
-      count:    reset counter value e.g. 100000
-      unitmask: hardware unit mask e.g. 0x0f
-      kernel:   whether to profile kernel: 0 or 1
-      user:     whether to profile userspace: 0 or 1
-
-   -p/--separate=type,[types]
-
-       Separate profiles as follows :
-
-       none:     no profile separation
-       library:  separate shared library profiles per-application
-       kernel:   same as library, plus kernel profiles
-       thread:   per-thread/process profiles
-       cpu:      per CPU profiles
-       all:      all of the above
-
-   -c/--callgraph=#depth         enable callgraph sample collection with a
-                                 maximum depth. Use '0' to disable callgraph
-                                 profiling.
-   --session-dir=dir             place sample database in dir instead of
-                                 default location (/var/lib/oprofile)
-   -i/--image=name[,names]       list of binaries to profile (default is "all")
-   --vmlinux=file                vmlinux kernel image
-   --no-vmlinux                  no kernel image (vmlinux) available
-   --kernel-range=start,end      kernel range vma address in hexadecimal
-   --buffer-size=num             kernel buffer size in sample units.
-                                 Rules: A non-zero value goes into effect after
-                                 a '--shutdown/start' sequence.  A value of
-                                 zero sets this parameter back to default value
-                                 but does not go into effect until after a
-                                 '--deinit/init' sequence.
-   --buffer-watershed            kernel buffer watershed in sample units (2.6
-                                 kernel). Same rules as defined for
-                                 buffer-size.
-   --cpu-buffer-size=num         per-cpu buffer size in units (2.6 kernel)
-                                 Same rules as defined for buffer-size.
-   --note-table-size             kernel notes buffer size in notes units (2.4
-                                 kernel)
-
-   --xen                         Xen image (for Xen only)
-   --active-domains=<list>       List of domains in profiling session (for Xen)
-                                 (list contains domain ids separated by commas)
-
-  System z specific options
-
-  --s390hwsampbufsize=num        Number of 2MB areas used per CPU for storing sample data.
-EOF
-}
-
-
-# load the module and mount oprofilefs
-load_module_26()
-{
-	grep oprofilefs /proc/filesystems >/dev/null
-	if test "$?" -ne 0; then
-		modprobe oprofile
-		if test "$?" != "0"; then
-			# couldn't load the module
-			return
-		fi
-		grep oprofile /proc/modules >/dev/null
-		if test "$?" != "0"; then
-			# didn't find module
-			return
-		fi
-		grep oprofilefs /proc/filesystems >/dev/null
-		if test "$?" -ne 0; then
-			# filesystem still not around
-			return
-		fi
-	fi
-	mkdir /dev/oprofile >/dev/null 2>&1
-	grep oprofilefs /etc/mtab >/dev/null
-	if test "$?" -ne 0; then
-		mount -t oprofilefs nodev /dev/oprofile >/dev/null
-	fi
-	KERNEL_SUPPORT=yes
-	OPROFILE_AVAILABLE=yes
-}
-
-
-load_module_24()
-{
-	grep oprof /proc/devices >/dev/null
-	if test "$?" -ne 0; then
-		modprobe oprofile
-		if test "$?" != "0"; then
-			# couldn't load a module
-			return
-		fi
-		grep oprofile /proc/modules >/dev/null
-		if test "$?" != "0"; then
-			# didn't find module
-			return
-		fi
-	fi
-	KERNEL_SUPPORT=no
-	OPROFILE_AVAILABLE=yes
-}
-
-
-load_module()
-{
-	OPROFILE_AVAILABLE=no
-	load_module_26
-	if test "$OPROFILE_AVAILABLE" != "yes"; then
-		load_module_24
-	fi
-	if test "$OPROFILE_AVAILABLE" != "yes"; then
-		echo "Kernel doesn't support oprofile" >&2
-		exit 1
-	fi
-}
-
-# setup variables related to path or daemon. Set vars according to following
-# relationship: command-line-option > config-file-settings > defaults.
-# Note that upon entry SESSION_DIR may be set by command-line option.
-do_init_daemon_vars()
-{
-	# load settings from config file, keeping command-line value
-	# of SESSION_DIR if necessary.
-	if test -n "$SESSION_DIR"; then
-		SAVED=$SESSION_DIR
-	fi
-	do_load_setup
-	if test -n "$SAVED"; then
-		SESSION_DIR=$SAVED
-	fi
-
-	# daemon parameters (as in op_config.h).  Note that we preserve
-	# any previous value of SESSION_DIR
-	if test -z "$SESSION_DIR"; then
-		SESSION_DIR="/var/lib/oprofile"
-	fi
-	LOCK_FILE="$SESSION_DIR/lock"
-	SAMPLES_DIR="$SESSION_DIR/samples"
-	LOG_FILE="$SAMPLES_DIR/oprofiled.log"
-	CURRENT_SAMPLES_DIR="$SAMPLES_DIR/current"
-}
-
-
-# pick the appropriate device mount based on kernel
-decide_oprofile_device_mount()
-{
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		MOUNT="/dev/oprofile"
-	else
-		MOUNT="/proc/sys/dev/oprofile"
-	fi
-}
-
-
-# pick the appropriate locations device for oprofile based on kernel
-decide_oprofile_device()
-{
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		DEVICE_FILE="$MOUNT/buffer"
-	else
-		DEVICE_FILE="$SESSION_DIR/opdev"
-		NOTE_DEVICE_FILE="$SESSION_DIR/opnotedev"
-		HASH_MAP_DEVICE_FILE="$SESSION_DIR/ophashmapdev"
-	fi
-}
-
-# initialise parameters
-do_init()
-{
-	# for these three buffer size == 0 means use the default value
-	# hard-coded in op_user.h
-	BUF_SIZE=0
-	BUF_WATERSHED=0
-	CPU_BUF_SIZE=0
-	NOTE_SIZE=0
-	VMLINUX=
-	XENIMAGE="none"
-	VERBOSE=""
-	SEPARATE_LIB=0
-	SEPARATE_KERNEL=0
-	SEPARATE_THREAD=0
-	SEPARATE_CPU=0
-	CALLGRAPH=0
-	IBS_FETCH_EVENTS=""
-	IBS_FETCH_COUNT=0
-	IBS_FETCH_UNITMASK=0
-	IBS_OP_EVENTS=""
-	IBS_OP_COUNT=0
-	IBS_OP_UNITMASK=0
-
-	# System z specific values
-	S390_HW_SAMPLER=0
-	S390_HW_SAMPLER_BUFSIZE=0
-
-	OPROFILED="$OPDIR/oprofiled"
-
-	# location for daemon setup information
-	SETUP_DIR="/root/.oprofile"
-	SETUP_FILE="$SETUP_DIR/daemonrc"
-	SEC_SETUP_FILE="$SETUP_DIR/daemonrc_new"
-
-	# initialize daemon vars
-	decide_oprofile_device_mount
-	CPUTYPE=`cat $MOUNT/cpu_type`
-	OP_COUNTERS=`ls $MOUNT/ | grep "^[0-9]\+\$" | tr "\n" " "`
-	OP_COUNTERS="$OP_COUNTERS `ls $MOUNT/ | grep "^timer\+\$"`"
-	NR_CHOSEN=0
-
-	do_init_daemon_vars
-	decide_oprofile_device
-
-	DEFAULT_EVENT=`$OPHELP --get-default-event`
-
-	IS_TIMER=0
-	IS_PERFMON=0
-	if test "$CPUTYPE" = "timer"; then
-		IS_TIMER=1
-	else
-		case "$CPUTYPE" in
-			s390/*)
-				S390_HW_SAMPLER=1
-				;;
-			ia64/*)
-				IS_PERFMON=$KERNEL_SUPPORT
-				;;
-		esac
-	fi
-
-	# Ignore configured events when running in timer mode.
-	if test "$IS_TIMER" = 1; then
-		NR_CHOSEN=0
-	fi
-}
-
-
-create_dir()
-{
-	if test ! -d "$1"; then
-		mkdir -p "$1"
-		if test "$?" != "0"; then
-			echo "Couldn't mkdir -p $1" >&2
-			exit 1
-		fi
-		chmod 755 "$1"
-	fi
-}
-
-get_event()
-{
-	GOTEVENT=`eval "echo \\$CHOSEN_EVENTS_$1"`
-}
-
-set_event()
-{
-	eval "CHOSEN_EVENTS_$1=$2"
-}
-
-
-# save all the setup related information
-do_save_setup()
-{
-	create_dir "$SETUP_DIR"
-	SAVE_SETUP_FILE="$SETUP_FILE"
-
-# If the daemon is currently running, we want changes to the daemon config
-# stored in the secondary cache file so that 'opcontrol --status' will
-# show actual config data for the running daemon.  The next time the
-# daemon is restarted, we'll reload the config data from this secondary
-# cache file.
-
-	if is_oprofiled_active "$LOCK_FILE"; then
-		SETUP_FILE="$SEC_SETUP_FILE"
-		echo "The profiling daemon is currently active, so changes to the configuration"
-		echo "will be used the next time you restart oprofile after a --shutdown or --deinit."
-	fi
-
-	touch $SETUP_FILE
-	chmod 644 $SETUP_FILE
-	>$SETUP_FILE
-
-	echo "SESSION_DIR=$SESSION_DIR" >>$SETUP_FILE
-
-	if test "$NR_CHOSEN" != "0"; then
-		for f in `seq 0 $((NR_CHOSEN - 1))`; do
-			get_event $f
-			echo "CHOSEN_EVENTS_${f}=$GOTEVENT" >>$SETUP_FILE
-		done
-	fi
-
-	echo "NR_CHOSEN=$NR_CHOSEN" >>$SETUP_FILE
-
-	echo "SEPARATE_LIB=$SEPARATE_LIB" >> $SETUP_FILE
-	echo "SEPARATE_KERNEL=$SEPARATE_KERNEL" >> $SETUP_FILE
-	echo "SEPARATE_THREAD=$SEPARATE_THREAD" >> $SETUP_FILE
-	echo "SEPARATE_CPU=$SEPARATE_CPU" >> $SETUP_FILE
-	echo "VMLINUX=$VMLINUX" >> $SETUP_FILE
-	echo "IMAGE_FILTER=$IMAGE_FILTER" >> $SETUP_FILE
-	# write the actual information to file
-	if test "$BUF_SIZE" != "0"; then
-		echo "BUF_SIZE=$BUF_SIZE" >> $SETUP_FILE
-	fi
-	if test "$BUF_WATERSHED" != "0"; then
-		echo "BUF_WATERSHED=$BUF_WATERSHED" >> $SETUP_FILE
-	fi
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		echo "CPU_BUF_SIZE=$CPU_BUF_SIZE" >> $SETUP_FILE
-	fi
-	if test "$KERNEL_SUPPORT" != "yes"; then
-		echo "NOTE_SIZE=$NOTE_SIZE" >> $SETUP_FILE
-	fi
-	echo "CALLGRAPH=$CALLGRAPH" >> $SETUP_FILE
-	if test "$KERNEL_RANGE"; then
-		echo "KERNEL_RANGE=$KERNEL_RANGE" >> $SETUP_FILE
-	fi
-	echo "XENIMAGE=$XENIMAGE" >> $SETUP_FILE
-	if test "$XEN_RANGE"; then
-		echo "XEN_RANGE=$XEN_RANGE" >> $SETUP_FILE
-	fi
-	if test "$S390_HW_SAMPLER" = "1" -a "$S390_HW_SAMPLER_BUFSIZE" != "0"; then
-		echo "S390_HW_SAMPLER_BUFSIZE=$S390_HW_SAMPLER_BUFSIZE" >> $SETUP_FILE
-	fi
-	SETUP_FILE="$SAVE_SETUP_FILE"
-}
-
-
-# reload all the setup-related information
-do_load_setup()
-{
-# If a secondary setup file exists and the daemon is not running,
-# then we'll move the data from the secondary file to the actual
-# setup file to prepare for daemon startup.
-	if test -z "$SESSION_DIR"; then
-		__TMP_SESSION_DIR="/var/lib/oprofile"
-	else
-		__TMP_SESSION_DIR="$SESSION_DIR"
-	fi
-
-	if test -f "$SEC_SETUP_FILE"; then
-		is_oprofiled_active "$__TMP_SESSION_DIR/lock" \
-		     || mv "$SEC_SETUP_FILE" "$SETUP_FILE"
-	fi
-
-	if test ! -f "$SETUP_FILE"; then return; fi
-
-	while IFS== read -r arg val; do
-		case "$arg" in
-			# The following catches anything that is not
-			# 0-9, a-z, A-Z, or an '_'
-			*[![:alnum:]_]*)
-				echo "Invalid variable \"$arg\" in $SETUP_FILE."
-				exit 1;;
-		esac
-		case "$val" in
-			# The following catches anything that is not
-			# 0-9, a-z, A-Z, an '-', ':', ',', '.', or '/'
-			*[!-[:alnum:]_:,./]*) 
-				echo "Invalid value \"$val\" for $arg in $SETUP_FILE."
-				exit 1;;
-		esac
-		eval "${arg}=${val}"
-	done < $SETUP_FILE
-}
-
-
-check_valid_args()
-{
-	if test -z "$VMLINUX"; then
-		echo "No vmlinux file specified. You must specify the correct vmlinux file, e.g." >&2
-		echo "opcontrol --vmlinux=/path/to/vmlinux" >&2
-		echo "If you do not have a vmlinux file, use " >&2
-		echo "opcontrol --no-vmlinux" >&2
-		echo "Enter opcontrol --help for full options" >&2
-		exit 1
-	fi
-
-	if test -f "$VMLINUX"; then
-		return
-	fi
-
-	if test "$VMLINUX" = "none"; then
-		return
-	fi
-
-	echo "The specified vmlinux file \"$VMLINUX\" doesn't exist." >&2
-	exit 1
-
-# similar check for Xen image
-	if test -f "$XENIMAGE"; then
-		return
-	fi
-
-	if test "$XENIMAGE" = "none"; then
-		return
-	fi
-
-	echo "The specified XenImage file \"$XENIMAGE\" does not exist." >&2
-	exit 1
-}
-
-
-# get start and end points of a file image (linux kernel or xen)
-# get_image_range parameter: $1=type_of_image (linux or xen)
-get_image_range()
-{
-	if test "$1" = "xen"; then
-		if test ! -z "$XEN_RANGE"; then
-			return;
-		fi
-		FILE_IMAGE="$XENIMAGE"
-	else
-		if test ! -z "$KERNEL_RANGE"; then
-			return;
-		fi
-		FILE_IMAGE="$VMLINUX"
-	fi
-
-	if test "$FILE_IMAGE" = "none"; then
-		return;
-	fi
-
-	if is_tool_available objdump; then
-		echo "objdump is not installed on this system, use opcontrol --kernel-range=start,end or opcontrol --xen-range= or install objdump"
-		exit 1
-	fi
-
-	# start at the start of .text, and end at _etext
-	range_info=`objdump -h $FILE_IMAGE 2>/dev/null | grep " .text "`
-	tmp1=`echo $range_info | awk '{print $4}'`
-	tmp2=`objdump -t $FILE_IMAGE 2>/dev/null | grep "_etext$" | awk '{ print $1 }'`
-
-	if test -z "$tmp1" -o -z "$tmp2"; then
-		echo "The specified file $FILE_IMAGE does not seem to be valid" >&2
-		echo "Make sure you are using the non-compressed image file (e.g. vmlinux not vmlinuz)" >&2
-		vecho "found start as \"$tmp1\", end as \"$tmp2\"" >&2
-		exit 1
-	fi
-
-	if test "$1" = "xen"; then
-		XEN_RANGE="`echo $tmp1`,`echo $tmp2`"
-		vecho "XEN_RANGE $XEN_RANGE"
-	else
-		KERNEL_RANGE="`echo $tmp1`,`echo $tmp2`"
-		vecho "KERNEL_RANGE $KERNEL_RANGE"
-	fi
-}
-
-
-# validate --separate= parameters. This function is called with IFS=,
-# so on each argument is splitted
-validate_separate_args()
-{
-	error_if_empty $1 $2	# we need at least one argument
-	local i=1
-	SEPARATE_LIB=0
-	SEPARATE_KERNEL=0
-	SEPARATE_THREAD=0
-	SEPARATE_CPU=0
-	while [ "$i" -lt "$#" ]; do
-		shift
-		case "$1" in
-			lib|library)
-				SEPARATE_LIB=1
-				;;
-			kernel)
-				# first implied by second
-				SEPARATE_LIB=1
-				SEPARATE_KERNEL=1
-				;;
-			thread)
-				SEPARATE_THREAD=1
-				;;
-			cpu)
-				SEPARATE_CPU=1
-				;;
-			all)
-				SEPARATE_LIB=1
-				SEPARATE_KERNEL=1
-				SEPARATE_THREAD=1
-				SEPARATE_CPU=1
-				;;
-			none)
-				SEPARATE_LIB=0
-				SEPARATE_KERNEL=0
-				SEPARATE_THREAD=0
-				SEPARATE_CPU=0
-				;;
-			*)
-				echo "invalid --separate= argument: $1"
-				exit 1
-		esac
-	done
-}
-
-
-# check the counters make sense, and resolve the hardware allocation
-verify_counters()
-{
-	if test "$IS_TIMER" = 1; then
-		if test "$NR_CHOSEN" != 0; then
-			echo "You cannot specify any performance counter events" >&2
-			echo "because OProfile is in timer mode." >&2
-			exit 1
-		fi
-		return
-	fi
-
-	OPHELP_ARGS=
-
-	if test "$NR_CHOSEN" != 0; then
-		for f in `seq 0 $((NR_CHOSEN - 1))`; do
-			get_event $f
-			if test "$GOTEVENT" != ""; then
-				verify_ibs $GOTEVENT
-				OPHELP_ARGS="$OPHELP_ARGS $GOTEVENT"
-			fi
-		done
-
-		if test ! -z "$OPHELP_ARGS" ; then
-			HW_CTRS=`$OPHELP --check-events $OPHELP_ARGS --callgraph=$CALLGRAPH`
-			if test "$?" != 0; then
-				exit 1
-			fi
-		fi
-	fi
-}
-
-
-# setup any needed default value in chosen events
-normalise_events()
-{
-	if test "$NR_CHOSEN" -le 0 || test "$IS_TIMER" = 1; then
-		return
-	fi
-
-	for f in `seq 0 $((NR_CHOSEN - 1))`; do
-		get_event $f
-		if test "$GOTEVENT" != ""; then
-			EVENT=`echo $GOTEVENT | awk -F: '{print $1}'`
-			EVENT_VAL=`$OPHELP $EVENT`
-			if test "$?" != 0; then
-				exit 1
-			fi
-			COUNT=`echo $GOTEVENT | awk -F: '{print $2}'`
-			UNIT_MASK=`echo $GOTEVENT | awk -F: '{print $3}'`
-			KERNEL=`echo $GOTEVENT | awk -F: '{print $4}'`
-			USER=`echo $GOTEVENT | awk -F: '{print $5}'`
-			TMPEVENT="$EVENT:$COUNT:$UNIT_MASK"
-			UNIT_MASK_NAMED="$UNIT_MASK"
-			UNIT_MASK=`$OPHELP --unit-mask $TMPEVENT`
-			if test "$?" != 0; then
-				exit 1
-			fi
-			if test -z "$KERNEL"; then
-				KERNEL=1
-			fi
-			if test -z "$USER"; then
-				USER=1
-			fi
-
-			set_event $f "$EVENT:$COUNT:$UNIT_MASK:$KERNEL:$USER"
-		fi
-	done
-}
-
-
-# get and check specified options
-do_options()
-{
-	EXCLUSIVE_ARGC=0
-	SETUP=no
-	NEED_SETUP=no
-	SEEN_EVENT=0
-
-	# note: default settings have already been loaded
-
-	while [ "$#" -ne 0 ]
-	do
-		arg=`printf %s $1 | awk -F= '{print $1}'`
-		val=`printf %s $1 | awk -F= '{print $2}'`
-		shift
-		if test -z "$val"; then
-			local possibleval=$1
-			printf %s $1 "$possibleval" | grep ^- >/dev/null 2>&1
-			if test "$?" != "0"; then
-				val=$possibleval
-				if [ "$#" -ge 1 ]; then
-					shift
-				fi
-			fi
-		fi
-
-		case "$arg" in
-
-			--init)
-				# this is already done in load_module
-				# because need to know the processor type
-				# and number of registers
-				INIT=yes;
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			--setup)
-				SETUP=yes
-				;;
-
-			--start-daemon)
-				if test "$KERNEL_SUPPORT" != "yes"; then
-					echo "$arg unsupported. use \"--start\"" >&2
-					exit 1
-				fi
-				START_DAEMON=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			-s|--start)
-				START=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			-d|--dump)
-				DUMP=yes
-				ONLY_DUMP=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			-t|--stop)
-				if test "$KERNEL_SUPPORT" != "yes"; then
-					echo "$arg unsupported. use \"--shutdown\"" >&2
-					exit 1
-				fi
-				DUMP=yes
-				STOP=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			-h|--shutdown)
-				DUMP=yes
-				STOP=yes
-				KILL_DAEMON=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			--status)
-				STATUS=yes
-				;;
-
-			--reset)
-				DUMP=yes
-				RESET=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			--save)
-				error_if_not_valid_savename "$arg" "$val"
-				DUMP=yes
-				SAVE_SESSION=yes
-				SAVE_NAME=$val
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			--deinit)
-				DUMP=yes
-				test ! -f "$LOCK_FILE" || {
-					STOP=yes
-					KILL_DAEMON=yes
-				}
-				DEINIT=yes
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				;;
-
-			# --setup options
-
-			--session-dir)
-				# already processed
-				;;
-			--buffer-size)
-				error_if_not_number "$arg" "$val"
-				BUF_SIZE=$val
-				DO_SETUP=yes
-				;;
-			--buffer-watershed)
-				if test "$KERNEL_SUPPORT" != "yes"; then
-					echo "$arg unsupported for this kernel version"
-					exit 1
-				fi
-				error_if_not_number "$arg" "$val"
-				BUF_WATERSHED=$val
-				DO_SETUP=yes
-				;;
-			--cpu-buffer-size)
-				if test "$KERNEL_SUPPORT" != "yes"; then
-					echo "$arg unsupported for this kernel version"
-					exit 1
-				fi
-				error_if_not_number "$arg" "$val"
-				CPU_BUF_SIZE=$val
-				DO_SETUP=yes
-				;;
-			-e|--event)
-				error_if_invalid_arg "$arg" "$val"
-				# reset any read-in defaults from daemonrc
-				if test "$SEEN_EVENT" = "0"; then
-					NR_CHOSEN=0
-					SEEN_EVENT=1
-				fi
-				if test "$val" = "default"; then
-					val=$DEFAULT_EVENT
-				fi
-				set_event $NR_CHOSEN "$val"
-				NR_CHOSEN=`expr $NR_CHOSEN + 1`
-				DO_SETUP=yes
-				;;
-			-p|--separate)
-				OLD_IFS=$IFS
-				IFS=,
-				validate_separate_args $arg $val
-				IFS=$OLD_IFS
-				DO_SETUP=yes
-				;;
-			-c|--callgraph)
-				if test ! -f $MOUNT/backtrace_depth; then
-					echo "Call-graph profiling unsupported on this kernel/hardware" >&2
-					exit 1
-				fi
-				error_if_not_number "$arg" "$val"
-				CALLGRAPH=$val
-				DO_SETUP=yes
-				;;
-			--vmlinux)
-				error_if_invalid_arg "$arg" "$val"
-				VMLINUX=$val
-				DO_SETUP=yes
-				;;
-			--no-vmlinux)
-				VMLINUX=none
-				DO_SETUP=yes
-				;;
-			--kernel-range)
-				error_if_invalid_arg "$arg" "$val"
-				KERNEL_RANGE=$val
-				DO_SETUP=yes
-				;;
-			--xen)
-				error_if_invalid_arg "$arg" "$val"
-				XENIMAGE=$val
-				DO_SETUP=yes
-				;;
-			--active-domains)
-				error_if_invalid_arg $arg $val
-				ACTIVE_DOMAINS=$val
-				DO_SETUP=yes
-				;;
-			--note-table-size)
-				if test "$KERNEL_SUPPORT" = "yes"; then
-					echo "\"$arg\" meaningless on this kernel" >&2
-					exit 1
-				else
-					error_if_not_number "$arg" "$val"
-					NOTE_SIZE=$val
-				fi
-				DO_SETUP=yes
-				;;
-			-i|--image)
-				error_if_invalid_arg "$arg" "$val"
-				if test "$val" = "all"; then
-					IMAGE_FILTER=
-				else
-					IMAGE_FILTER=$val
-				fi
-				DO_SETUP=yes
-				;;
-
-			-V|--verbose)
-				if test -z "$val"; then
-					VERBOSE="all"
-				else
-					error_if_invalid_arg "$arg" "$val"
-					VERBOSE=$val
-				fi
-				;;
-
-			-l|--list-events)
-				EXCLUSIVE_ARGC=`expr $EXCLUSIVE_ARGC + 1`
-				EXCLUSIVE_ARGV="$arg"
-				exec $OPHELP
-				;;
-
-			--s390hwsampbufsize)
-				error_if_not_number "$arg" "$val"
-				S390_HW_SAMPLER_BUFSIZE=$val
-				DO_SETUP=yes
-				;;
-
-
-			*)
-				echo "Unknown option \"$arg\". See opcontrol --help" >&2
-				exit 1
-				;;
-		esac
-	done
-
-	normalise_events
-	verify_counters
-
-	# error checking to make sure options make sense
-	if test "$EXCLUSIVE_ARGC" -gt 1; then
-		echo "Option \"$EXCLUSIVE_ARGV\" not valid with other options." >&2
-		exit 1
-	fi
-
-	if test "$SETUP" = "yes" -a "$DO_SETUP" != "yes"; then
-		echo "No options specified for --setup." >&2
-		exit 1
-	fi
-
-	if test -n "$VERBOSE"; then
-		if test "$START" != "yes" -a "$START_DAEMON" != "yes"; then
-			echo "Option --verbose may only be used with --start or --start-daemon" >&2
-			exit 1
-		fi
-	fi
-
-	if test "$DO_SETUP" = "yes"; then
-		SETUP="$DO_SETUP"
-	fi
-
-	if test "$EXCLUSIVE_ARGC" -eq 1 -a "$SETUP" = "yes"; then
-		if test "$EXCLUSIVE_ARGV" != "--start-daemon" -a "$EXCLUSIVE_ARGV" != "--start"; then
-			echo "Option \"--setup\" not valid with \"$EXCLUSIVE_ARGV\"." >&2
-			exit 1
-		fi
-	fi
-
-	vecho "Parameters used:"
-	vecho "SESSION_DIR $SESSION_DIR"
-	vecho "LOCK_FILE   $LOCK_FILE"
-	vecho "SAMPLES_DIR $SAMPLES_DIR"
-	vecho "CURRENT_SAMPLES_DIR $CURRENT_SAMPLES_DIR"
-	vecho "CPUTYPE $CPUTYPE"
-	if test "$BUF_SIZE" != "0"; then
-		vecho "BUF_SIZE $BUF_SIZE"
-	else
-		vecho "BUF_SIZE default value"
-	fi
-	if test "$BUF_WATERSHED" != "0"; then
-		vecho "BUF_WATERSHED $BUF_WATERSHED"
-	else
-		vecho "BUF_WATERSHED default value"
-	fi
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		if test "$CPU_BUF_SIZE" != "0"; then
-			vecho "CPU_BUF_SIZE $CPU_BUF_SIZE"
-		else
-			vecho "CPU_BUF_SIZE default value"
-		fi
-	fi
-
-	vecho "SEPARATE_LIB $SEPARATE_LIB"
-	vecho "SEPARATE_KERNEL $SEPARATE_KERNEL"
-	vecho "SEPARATE_THREAD $SEPARATE_THREAD"
-	vecho "SEPARATE_CPU $SEPARATE_CPU"
-	vecho "CALLGRAPH $CALLGRAPH"
-	vecho "VMLINUX $VMLINUX"
-	vecho "KERNEL_RANGE $KERNEL_RANGE"
-	vecho "XENIMAGE $XENIMAGE"
-	vecho "XEN_RANGE $XEN_RANGE"
-}
-
-
-# stop any existing daemon
-do_stop()
-{
-	if test ! -f "$LOCK_FILE"; then
-		echo "Daemon not running" >&2
-		return
-	fi
-
-	kill -0 `cat $LOCK_FILE` 2>/dev/null
-	if test "$?" -ne 0; then
-		echo "Detected stale lock file. Removing." >&2
-		rm -f "$LOCK_FILE"
-		return
-	fi
-
-	if test $KERNEL_SUPPORT = "yes" \
-	    && test 0 != $(cat /dev/oprofile/enable); then
-		echo "Stopping profiling."
-		echo 0 >/dev/oprofile/enable
-	fi
-	kill -USR2 `cat $LOCK_FILE` 2>/dev/null
-}
-
-
-# kill the daemon process(es)
-do_kill_daemon()
-{
-	if test ! -f "$LOCK_FILE"; then
-		# no error message, do_kill_daemon imply stop and stop already
-		# output "Daemon not running"
-		return
-	fi
-
-	kill -0 `cat $LOCK_FILE` 2>/dev/null
-	if test "$?" -ne 0; then
-		echo "Detected stale lock file. Removing." >&2
-		rm -f "$LOCK_FILE"
-		return
-	fi
-
-	echo "Killing daemon."
-
-	if test $KERNEL_SUPPORT = "yes"; then
-		kill -TERM `cat $LOCK_FILE`
-	else
-		echo 1 >/proc/sys/dev/oprofile/dump_stop
-	fi
-
-	COUNT=0
-	while test -n "`pidof oprofiled`"
-	do
-		sleep 1
-
-		# because oprofiled only sets a variable inside the
-		# signal handler itself, it's possible to miss a
-		# signal just before it goes to sleep waiting for
-		# data from the kernel that never arrives. So we
-		# remind it it needs to die - this works because
-		# the signal will bring oprofiled out of the kernel
-		# back into userspace
-		if test $KERNEL_SUPPORT = "yes"; then
-			pid=`cat $LOCK_FILE 2>/dev/null`
-			kill -TERM "$pid" 2>/dev/null
-		fi
-
-		COUNT=`expr $COUNT + 1`
-
-		# IBS can generate a large number of samples/events.
-		# Therefore, extend the delay before killing
-		if test "$IBS_FETCH_COUNT" != "0" \
-		     -o "$IBS_OP_COUNT" != "0" ; then
-			DELAY_KILL=60
-		else
-			DELAY_KILL=15
-		fi
-		if test "$COUNT" -eq "$DELAY_KILL"; then
-			echo "Daemon stuck shutting down; killing !"
-			kill -9 `cat $LOCK_FILE`
-		fi
-	done
-	sleep 1
-	# already removed unless we forced the kill
-	rm -f "$SESSION_DIR/lock"
-}
-
-
-rm_devices_24()
-{
-	rm_device "$DEVICE_FILE"
-	rm_device "$NOTE_DEVICE_FILE"
-	rm_device "$HASH_MAP_DEVICE_FILE"
-}
-
-
-create_devices_24()
-{
-	MAJOR_NR=`grep oprof /proc/devices | awk '{print $1}'`
-
-	create_device $DEVICE_FILE $MAJOR_NR 0
-	create_device $NOTE_DEVICE_FILE $MAJOR_NR 2
-	create_device $HASH_MAP_DEVICE_FILE $MAJOR_NR 1
-}
-
-# create jitdump directory and remove any old files from
-# a previous run
-prep_jitdump() {
-	local dumpdir=$SESSION_DIR/jitdump
-	test -d $dumpdir || {
-		mkdir -p $dumpdir;
-		chmod 777 $dumpdir;
-		return;
-	}
-	# VMs may already be running when profiling is started, so
-	# remove only dump files that are not in use
-	for I in $dumpdir/*; do
-		test -f $I || continue;
-		local pid=`basename $I .dump`;
-		if test -d /proc/$pid; then
-			local files=`find /proc/$pid/fd -lname $I`;
-			test -n "$files" && continue;
-		fi
-		rm -f $I;
-	done
-}
-
-# setup and start module
-do_setup()
-{
-	create_dir "$SESSION_DIR"
-
-	if test "$KERNEL_SUPPORT" != "yes"; then
-		rm_devices_24
-		create_devices_24
-	fi
-
-	create_dir "$CURRENT_SAMPLES_DIR"
-
-	prep_jitdump;
-}
-
-
-# set a sysctl/oprofilefs parameter
-set_param()
-{
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		echo $2 >$MOUNT/$1
-	else
-		$SYSCTL -w dev.oprofile.$1=$2
-	fi
-}
-
-
-# set a sysctl/oprofilefs counter parameter
-set_ctr_param()
-{
-	# no such thing for perfmon
-	if test "$IS_PERFMON" = "yes"; then
-		return
-	fi
-
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		if test "$1" = "timer" -a "$2" != "enabled"; then
-			# For now everything other than 'enabled' is
-			# unsupported for the `timer' counter.
-			return
-		fi
-		if test -e $MOUNT/$1; then
-			echo $3 >$MOUNT/$1/$2 2>/dev/null
-			if test $? -ne 0; then
-				echo "Value $3 is not accepted by $MOUNT/$1/$2"
-				exit 1
-			fi
-		else
-			echo -n "Error: counter $1 not available"
-			if test -e /proc/sys/kernel/nmi_watchdog; then
-				echo " nmi_watchdog using this resource ? Try:"
-				echo "opcontrol --deinit"
-				echo "echo 0 > /proc/sys/kernel/nmi_watchdog"
-			fi
-			exit 1
-		fi
-	else
-		$SYSCTL -w dev.oprofile.$1.$2=$3
-	fi
-}
-
-
-# returns 1 if $CPUTYPE is a PPC64 variant
-is_non_cell_ppc64_variant()
-{
-	case "$1" in
-	    ppc64/*cell*)
-		return 0
-		;;
-	    ppc64/*)
-		return 1
-		;;
-	    *)
-		return 0
-		;;
-	esac
-}
-
-
-# The arch_event_validate procedure gives the
-# opportunity to validate events and enforce any
-# arch-specific restritions, etc.
-arch_event_validate()
-{
-
-	is_non_cell_ppc64_variant $CPUTYPE
-	if test $? -ne 0 ; then
-		# For PPC64 architectures, the values required to program
-		# MMCRs for the given event are returned along with the event.
-		# Here we use those values to ensure that all chosen events
-		# are from the same group.
-		MMCR0=`echo $EVENT_STR | awk '{print $2}'`
-		MMCR1=`echo $EVENT_STR | awk '{print $3}'`
-		MMCRA=`echo $EVENT_STR | awk '{print $4}'`
-		MMCR0_VAL=`echo $MMCR0 | awk -F: '{print $2}'`
-		MMCR1_VAL=`echo $MMCR1 | awk -F: '{print $2}'`
-		MMCRA_VAL=`echo $MMCRA | awk -F: '{print $2}'`
-
-		## mmcr0, mmcr1, mmcra are for all ppc64 counters
-		# Save first event mmcr settings to compare with additional
-		# events.  All events must have the same mmcrx values i.e. be in
-		# the same group.  Only one event is assigned per counter,
-		# hence there will not be a conflict on the counters
-		if [ "$MMCR0_CK_VAL" = "" ] ; then
-			MMCR0_CK_VAL=$MMCR0_VAL
-			MMCR1_CK_VAL=$MMCR1_VAL
-			MMCRA_CK_VAL=$MMCRA_VAL
-		else
-			# make sure all events are from the same group
-			if test $MMCR0_CK_VAL != $MMCR0_VAL \
-				-o $MMCR1_CK_VAL != $MMCR1_VAL \
-				-o $MMCRA_CK_VAL != $MMCRA_VAL ; then
-				echo "ERROR: The specified events are not from the same group."
-				echo "       Use 'opcontrol --list-events' to see event groupings."
-				exit 1
-			fi
-		fi
-
-		# Check if all user/kernel flags per-counter are matching.
-		if [ "$USER_CK" = "" ] ; then
-			USER_CK=$USER
-			KERNEL_CK=$KERNEL
-		else
-			if test $USER_CK != $USER \
-				-o $KERNEL_CK != $KERNEL ; then
-				echo "ERROR: All kernel/user event flags must match."
-				exit 1
-			fi
-		fi
-	fi
-	if [ "$CPUTYPE" = "ppc64/cell-be" ]; then
-		event_num=`echo $EVENT_STR | awk '{print $1}'`
-		# PPU event and cycle events can be measured at
-		# the same time.  SPU event can not be measured
-		# at the same time as any other event.  Similarly for
-		# SPU Cycles
-
-		# We use EVNT_MSK to track what events have already
-		# been seen.  Valid values are:
-		#    NULL string -  no events seen yet
-		#    1 - PPU CYCLES or PPU Event seen
-		#    2 - SPU CYCLES seen
-		#    3 - SPU EVENT seen
-
-		# check if event is PPU_CYCLES
-		if [ "$event_num" = "1" ]; then
-			if [ "$EVNT_MSK" = "1" ] || [ "$EVNT_MSK" = "" ]; then
-				EVNT_MSK=1
-			else
-				echo "PPU CYCLES not compatible with previously specified event"
-				exit 1
-		fi
-
-		# check if event is SPU_CYCLES
-		elif [ "$event_num" = "2" ]; then
-			if [ "$EVNT_MSK" = "" ]; then
-				EVNT_MSK=2
-			else
-				echo "SPU CYCLES not compatible with any other event"
-				exit 1
-			fi
-
-		# check if event is SPU Event profiling
-		elif [ "$event_num" -ge "4100" ] && [ "$event_num" -le "4163" ] ; then
-			if [ "$EVNT_MSK" = "" ]; then
-				EVNT_MSK=3
-			else
-				echo "SPU event profiling not compatible with any other event"
-				exit 1
-			fi
-
-			# Check to see that the kernel supports SPU event
-			# profiling.  Note, if the file exits it should have
-			# the LSB bit set to 1 indicating SPU event profiling
-			# support. For now, it is sufficient to test that the
-			# file exists.
-			if test ! -f /dev/oprofile/cell_support; then
-				echo "Kernel does not support SPU event profiling"
-				exit 1
-			fi
-
-			# check if event is PPU Event profiling (all other
-			# events are PPU events)
-		else
-			if [ "$EVNT_MSK" = "1" ] || [ "$EVNT_MSK" = "" ]; then
-				EVNT_MSK=1
-			else
-				echo "PPU profiling not compatible with previously specified event"
-				exit 1
-			fi
-		fi
-	fi
-	if test "$S390_HW_SAMPLER" = "1" -a "$EVENT" = "HWSAMPLING"; then
-		if test "$CALLGRAPH" != "0"; then
-			echo "Callgraph sample collection is not supported with " >&2
-			echo "System z hardware sampling.  Please use --callgraph=0 " >&2
-			echo "or enable timer based sampling." >&2
-			exit 1
-		fi
-		echo "$COUNT" > $MOUNT/0/count
-		NEW_COUNT=`cat $MOUNT/0/count`
-		if test "$NEW_COUNT" -lt "$COUNT"; then
-			echo "Warning: Hardware sampling intervals higher than $NEW_COUNT are not supported." >&2
-			echo "Value is set to $NEW_COUNT." >&2
-			COUNT=$NEW_COUNT
-		fi
-		if test "$NEW_COUNT" -gt "$COUNT"; then
-			echo "Warning: Hardware sampling intervals lower than $NEW_COUNT are not supported." >&2
-			echo "Value is set to $NEW_COUNT." >&2
-			COUNT=$NEW_COUNT
-		fi
-	fi
-
-	len=`echo -n $event_num | wc -c`
-	num_chars_in_grpid=`expr $len - 2`
-	GRP_NUM_VAL=`echo | awk '{print substr("'"${event_num}"'",1,"'"${num_chars_in_grpid}"'")}'`
-	if [ "$GRP_NUM_CK_VAL" = "" ] ; then
-		GRP_NUM_CK_VAL=$GRP_NUM_VAL
-	else
-		if test $GRP_NUM_CK_VAL != $GRP_NUM_VAL ; then
-			echo "ERROR: The specified events are not from the same group." >&2
-			echo "       Use 'opcontrol --list-events' to see event groupings." >&2
-			exit 1
-		fi
-	fi
-}
-
-
-do_param_setup()
-{
-	# different names
-	if test $BUF_SIZE != 0; then
-		if test "$KERNEL_SUPPORT" = "yes"; then
-			echo $BUF_SIZE >$MOUNT/buffer_size
-		else
-			$SYSCTL -w dev.oprofile.bufsize=$BUF_SIZE
-		fi
-	fi
-
-	if test $BUF_WATERSHED != 0; then
-		if test "$KERNEL_SUPPORT" = "yes"; then
-			echo $BUF_WATERSHED >$MOUNT/buffer_watershed
-		else
-			echo "buffer-watershed not supported - ignored" >&2
-		fi
-	fi
-
-	if test $CPU_BUF_SIZE != 0; then
-		if test "$KERNEL_SUPPORT" = "yes"; then
-			echo $CPU_BUF_SIZE >$MOUNT/cpu_buffer_size
-		else
-			echo "cpu-buffer-size not supported - ignored" >&2
-		fi
-	fi
-
-	if test -n "$ACTIVE_DOMAINS"; then
-		if test "$KERNEL_SUPPORT" = "yes"; then
-			echo $ACTIVE_DOMAINS >$MOUNT/active_domains
-		else
-			echo "active-domains not supported - ignored" >&2
-		fi
-	fi
-
-	if test $NOTE_SIZE != 0; then
-		set_param notesize $NOTE_SIZE
-	fi
-
-	if test "$KERNEL_SUPPORT" = "yes" -a -f $MOUNT/backtrace_depth; then
-		set_param backtrace_depth $CALLGRAPH
-	elif test "$CALLGRAPH" != "0"; then
-		echo "Call-graph profiling not supported - ignored" >&2
-	fi
-
-	if test "$IS_TIMER" = 1; then
-		return
-	fi
-
-	if test "$S390_HW_SAMPLER" = "1" -a "$S390_HW_SAMPLER_BUFSIZE" != "0"; then
-		echo $S390_HW_SAMPLER_BUFSIZE >$MOUNT/0/hw_sdbt_blocks
-	fi
-
-	# use the default setup if none set
-	if test "$NR_CHOSEN" = 0; then
-		set_event 0 $DEFAULT_EVENT
-		NR_CHOSEN=1
-		HW_CTRS=`$OPHELP --check-events $DEFAULT_EVENT --callgraph=$CALLGRAPH`
-		echo "Using default event: $DEFAULT_EVENT"
-	fi
-
-	# Necessary in this case :
-	# opcontrol ctr0-on ctr1-on then opcontrol ctr0-on
-	for f in $OP_COUNTERS ; do
-		set_ctr_param $f enabled 0
-		set_ctr_param $f event 0
-		set_ctr_param $f count 0
-
-		if test -f $MOUNT/$f/extra ; then
-		    set_ctr_param $f extra 0
-		fi
-	done
-
-	# Check if driver has IBS support
-	if test -d $MOUNT/ibs_fetch; then
-		# Reset driver's IBS fetch setting
-		set_param ibs_fetch/enable 0
-	fi
-	
-	if test -d $MOUNT/ibs_op ; then
-		# Reset driver's IBS op setting
-		set_param ibs_op/enable 0
-	fi	
-
-	verify_counters
-
-	OPROFILED_EVENTS=
-	for f in `seq 0 $((NR_CHOSEN - 1))`; do
-		get_event $f
-		if test "$GOTEVENT" != ""; then
-			EVENT=`echo $GOTEVENT | awk -F: '{print $1}'`
-			EVENT_STR=`$OPHELP $EVENT`
-			EVENT_VAL=`echo $EVENT_STR | awk '{print $1}'`
-			COUNT=`echo $GOTEVENT | awk -F: '{print $2}'`
-			UNIT_MASK=`echo $GOTEVENT | awk -F: '{print $3}'`
-			KERNEL=`echo $GOTEVENT | awk -F: '{print $4}'`
-			USER=`echo $GOTEVENT | awk -F: '{print $5}'`
-			CTR=`echo $HW_CTRS | awk "{print \\$$((f + 1))}"`
-			arch_event_validate
-
-			if test "$EVENT" = "SPU_CYCLES"; then
-				if test "$SEPARATE_KERNEL" = "1"; then
-					SEPARATE_KERNEL=0
-					echo "Ignoring --separate=kernel option with SPU_CYCLES"
-				fi
-				if test "$SEPARATE_LIB" = "0"; then
-					SEPARATE_LIB=1
-					echo "Forcing required option --separate=lib with SPU_CYCLES"
-				fi
-
-				# It is possible for a single application to be
-				# running on all SPUs simultaneously.  Without
-				# SEPARATE_CPU, the resulting sample data would
-				# consist of a single sample file.  If all SPUs
-				# were truly running the same code, the merging
-				# of sample data would be fine.  However, an
-				# application file may have multiple SPU images
-				# embedded within it, resulting in different
-				# code running on different SPUs.  Therefore,
-				# we force SEPARATE_CPU in order to properly
-				# handle this case.
-				if test "$SEPARATE_CPU" = "0"; then
-					SEPARATE_CPU=1
-					echo "Forcing required option --separate=cpu with SPU_CYCLES"
-
-				fi
-			fi
-
-			if [ "$CTR" = "ibs_fetch" -o "$CTR" = "ibs_op" ] ; then
-				# Handle IBS events setup
-				do_param_setup_ibs
-				continue
-			fi
-
-			if test "$EVENT" = "RTC_INTERRUPTS"; then
-				set_param rtc_value $COUNT
-				$SYSCTL -w dev.oprofile.rtc_value=$COUNT
-			else
-				set_ctr_param $CTR enabled 1
-				set_ctr_param $CTR event $EVENT_VAL
-				loop_count=1
-				for i in ${EVENT_STR}; do
-					#Skip first argument of EVENT_STR (event val) since we've already
-					#processed that value.
-					if test "$loop_count" -gt 1; then
-						KEY=`echo $i | awk -F: '{print $1}'`
-						VAL=`echo $i | awk -F: '{print $2}'`
-						set_ctr_param "" $KEY $VAL
-					fi
-					loop_count=$((loop_count+1))
-				done
-				set_ctr_param $CTR count $COUNT
-				set_ctr_param $CTR kernel $KERNEL
-				set_ctr_param $CTR user $USER
-				set_ctr_param $CTR unit_mask $UNIT_MASK
-
-				EXTRA=`$OPHELP --extra-mask $EVENT:$COUNT:$UNIT_MASK_NAMED`
-				if test "$EXTRA" -ne 0 ; then
-					if ! test -f $MOUNT/$CTR/extra ; then
-						echo >&2 "Warning: $GOTEVENT has extra mask, but kernel does not support extra field"
-						echo >&2 "Please update your kernel or use a different event. Will miscount."
-					else
-						set_ctr_param $CTR extra $EXTRA
-					fi
-				fi
-			fi
-			OPROFILED_EVENTS=${OPROFILED_EVENTS}$EVENT:$EVENT_VAL:
-			OPROFILED_EVENTS=${OPROFILED_EVENTS}$CTR:$COUNT:$UNIT_MASK:
-			OPROFILED_EVENTS=${OPROFILED_EVENTS}$KERNEL:$USER,
-		fi
-	done
-
-	# For PPC64 architectures we need to set the enable_kernel and
-	# enable_user flags for enabling/disabling user/kernel domain
-	# profiling. All per-counter user/kernel flags must match.
-	# This condition is checked previously by arch_event_validate.
-	# This statement uses the last event's user/kernel flags to set
-	# /dev/oprofile/enable_kernel and /dev/oprofile/enable_user.
-	is_non_cell_ppc64_variant $CPUTYPE
-	if test $? -ne 0 ; then
-		set_param "enable_kernel" $KERNEL
-		set_param "enable_user" $USER
-	fi
-
-}
-
-
-do_start_daemon()
-{
-
-	$OPDIR/op-check-perfevents
-	if [ "$?" = "0" ]; then
-		echo "ATTENTION: Use of opcontrol is discouraged.  Please see the man page for operf."
-	fi
-
-	if test -f "$LOCK_FILE"; then
-		kill -0 `cat $LOCK_FILE` 2>/dev/null
-		if test "$?" -eq 0; then
-			return;
-		else
-			echo "Detected stale lock file. Removing." >&2
-			rm -f "$LOCK_FILE"
-		fi
-	fi
-
-	do_setup
-	check_valid_args
-	get_image_range "linux"
-	get_image_range "xen"
-	do_param_setup
-
-	OPD_ARGS=" \
-		--session-dir=$SESSION_DIR \
-		--separate-lib=$SEPARATE_LIB \
-		--separate-kernel=$SEPARATE_KERNEL \
-		--separate-thread=$SEPARATE_THREAD \
-		--separate-cpu=$SEPARATE_CPU"
-
-	if test "$IS_TIMER" = 1; then
-		OPD_ARGS="$OPD_ARGS --events="
-	else
-		if ! test -z "$OPROFILED_EVENTS"; then
-			OPD_ARGS="$OPD_ARGS --events=$OPROFILED_EVENTS"
-		fi
-	fi
-
-	if test "$VMLINUX" = "none"; then
-		OPD_ARGS="$OPD_ARGS --no-vmlinux"
-	else
-		OPD_ARGS="$OPD_ARGS --vmlinux=$VMLINUX --kernel-range=$KERNEL_RANGE"
-	fi
-
-	if ! test "$XENIMAGE" = "none"; then
-		OPD_ARGS="$OPD_ARGS --xen-image=$XENIMAGE --xen-range=$XEN_RANGE"
-	fi
-
-	if ! test -z "$IMAGE_FILTER"; then
-		OPD_ARGS="$OPD_ARGS --image=$IMAGE_FILTER"
-	fi
-
-	if test -n "$VERBOSE"; then
-		OPD_ARGS="$OPD_ARGS --verbose=$VERBOSE"
-	fi
-
-	help_start_daemon_with_ibs
-
-	vecho "executing oprofiled $OPD_ARGS"
-
-	$OPROFILED $OPD_ARGS
-
-	COUNT=0
-	while ! test -f "$SESSION_DIR/lock"
-	do
-		sleep 1
-		COUNT=`expr $COUNT + 1`
-		if test "$COUNT" -eq 10; then
-			echo "Couldn't start oprofiled." >&2
-			echo "Check the log file \"$LOG_FILE\" and kernel syslog" >&2
-			exit 1
-		fi
-	done
-
-	echo "Daemon started."
-}
-
-do_start()
-{
-	prep_jitdump;
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		echo 1 >$MOUNT/enable
-	fi
-	kill -USR1 `cat $LOCK_FILE` 2>/dev/null
-	echo "Profiler running."
-}
-
-
-# print status
-do_status()
-{
-	OPROFILED_PID=`cat $SESSION_DIR/lock 2>/dev/null`
-	if test -n "$OPROFILED_PID" -a -d "/proc/$OPROFILED_PID"; then
-		if test "$KERNEL_SUPPORT" = yes \
-		    && test 0 != $(cat /dev/oprofile/enable); then
-			echo "Daemon running: pid $OPROFILED_PID"
-		else
-			echo "Daemon paused: pid $OPROFILED_PID"
-		fi
-	else
-		echo "Daemon not running"
-	fi
-	echo "Session-dir: $SESSION_DIR"
-
-	if test "$NR_CHOSEN" != "0"; then
-		for f in `seq 0 $((NR_CHOSEN - 1))`; do
-			get_event $f
-			echo "Event $f: $GOTEVENT"
-		done
-	fi
-
-	SEPARATE=""
-	if test "$SEPARATE_LIB" = "1"; then
-		SEPARATE="library";
-	fi
-	if test "$SEPARATE_KERNEL" = "1"; then
-		SEPARATE="$SEPARATE kernel";
-	fi
-	if test "$SEPARATE_THREAD" = "1"; then
-		SEPARATE="$SEPARATE thread";
-	fi
-	if test "$SEPARATE_CPU" = "1"; then
-		SEPARATE="$SEPARATE cpu";
-	fi
-
-	if test -z "$SEPARATE"; then
-		SEPARATE=none
-	fi
-
-	echo "Separate options: $SEPARATE"
-	echo "vmlinux file: $VMLINUX"
-
-	if test -z "$IMAGE_FILTER"; then
-		echo "Image filter: none"
-	else
-		echo "Image filter: $IMAGE_FILTER"
-	fi
-
-	echo "Call-graph depth: $CALLGRAPH"
-	if test "$BUF_SIZE" != "0"; then
-		echo "Buffer size: $BUF_SIZE"
-	fi
-	if test "$KERNEL_SUPPORT" != "yes"; then
-		if test "$NOTE_SIZE" != "0"; then
-			echo "Note buffer size: $NOTE_SIZE"
-		fi
-	else
-		if test "$BUF_WATERSHED" != "0"; then
-			echo "CPU buffer watershed: $BUF_WATERSHED"
-		fi
-		if test "$CPU_BUF_SIZE" != "0"; then
-			echo "CPU buffer size: $CPU_BUF_SIZE"
-		fi
-	fi
-	if test "$S390_HW_SAMPLER" = "1"; then
-		echo -n "System z hardware sampling buffer size (in 2MB areas): "
-		if test "$S390_HW_SAMPLER_BUFSIZE" = "0"; then
-			cat $MOUNT/0/hw_sdbt_blocks
-		else
-			echo "$S390_HW_SAMPLER_BUFSIZE"
-		fi
-	fi
-
-	exit 0
-}
-
-
-# do_dump_data
-# returns 0 if successful
-# returns 1 if the daemon is unable to dump data
-# exit 1 if we need to be root to dump
-do_dump_data()
-{
-	# make sure that the daemon is not dead and gone
-	if test -e "$SESSION_DIR/lock"; then
-		OPROFILED_PID=`cat $SESSION_DIR/lock`
-		if test ! -d "/proc/$OPROFILED_PID"; then
-			echo "dump fail: daemon died during last run ?" >&2
-			return 1;
-		fi
-	else
-		return 1;
-	fi
-
-	if test "$KERNEL_SUPPORT" = "yes"; then
-		if ! test -w $MOUNT/dump; then
-			if test `id -u` != "0"; then
-				echo "You must be root to dump with this kernel version"
-				exit 1
-			fi
-		fi
-		# trigger oprofiled to execute opjitconv
-		if test -p "$SESSION_DIR/opd_pipe"; then
-			echo do_jitconv >> $SESSION_DIR/opd_pipe
-		fi
-		rm -f "$SESSION_DIR/complete_dump"
-		echo 1 > $MOUNT/dump
-		# loop until the complete_dump file is created to
-		# signal that the dump has been completed
-		while [ \( ! -e "$SESSION_DIR/complete_dump" \) ]
-		do
-			if test ! -d "/proc/$OPROFILED_PID"; then
-				echo "dump fail: either daemon died during last run or dies during dump" >&2
-				return 1
-			fi
-			sleep 1;
-		done
-	else
-		echo 1 > $MOUNT/dump
-		# HACK !
-		sleep 2
-	fi
-	cp -r /dev/oprofile/stats "$SAMPLES_DIR/current"
-
-	return 0;
-}
-
-
-# do_dump
-# returns 0 if successful
-# exits if unsuccessful
-do_dump()
-{
-	do_dump_data
-	if test $? -ne 0 -a "$ONLY_DUMP" = "yes"; then
-		echo "Unable to complete dump of oprofile data: is the oprofile daemon running?" >& 2
-		exit 1;
-	fi
-	return 0;
-}
-
-# tell daemon to re-open the sample files
-hup_daemon()
-{
-	if test -f "$LOCK_FILE"; then
-		echo -n "Signalling daemon... "
-		kill -HUP `cat $LOCK_FILE`
-		echo "done"
-	fi
-}
-
-
-# move all the sample files to a sample directory
-do_save_session()
-{
-	SAVE_DIR="${SAMPLES_DIR}/${SAVE_NAME}"
-
-	if test -e "$SAVE_DIR"; then
-		echo "session $SAVE_DIR already exists" >&2
-		exit 1
-	fi
-
-	if ! test -e $CURRENT_SAMPLES_DIR; then
-		echo "$CURRENT_SAMPLES_DIR doesn't exist: nothing to save" >&2
-		exit 0
-	fi
-
-	# FIXME: I don't think it's worth checking for empty current directory
-
-	mv $CURRENT_SAMPLES_DIR $SAVE_DIR
-	if test "$?" != "0"; then
-		echo "Couldn't move $CURRENT_SAMPLES_DIR to $SAVE_DIR" >&2
-		exit 1
-	fi
-
-	hup_daemon
-}
-
-
-# remove all the sample files
-do_reset()
-{
-	if test -z "$SAMPLES_DIR"; then
-		echo "opcontrol:do_reset() SAMPLES_DIR is empty!"
-		exit 1;
-	fi
-
-	# daemon use {kern} and {root} subdir, it's not a typo to not use ${}
-	move_and_remove $SAMPLES_DIR/current/{kern}
-	move_and_remove $SAMPLES_DIR/current/{root}
-	move_and_remove $SAMPLES_DIR/current/stats
-
-	# clear temp directory for jitted code
-	prep_jitdump;
-
-	hup_daemon
-}
-
-
-do_deinit()
-{
-	# unmount /dev/oprofile if it is mounted
-	OPROF_FS=`grep /dev/oprofile /etc/mtab`
-	if test -n "$OPROF_FS"; then
-		umount /dev/oprofile
-	fi
-	# unload the oprofile module if it is around
-	OPROF_MOD=`lsmod | grep oprofile`
-	if test -n "$OPROF_MOD"; then
-		echo "Unloading oprofile module" >& 2
-		rmmod oprofile
-	fi
-}
-
-
-# The function that calls the appropriate operations
-do_operations()
-{
-	# INIT always done by load_module to get access to cputype
-	# thus INIT is a noop
-
-	if test "$STATUS" = "yes"; then
-		do_status
-	fi
-
-	if test "$SETUP" = "yes"; then
-		check_valid_args
-		do_save_setup
-	fi
-
-	if test "$START_DAEMON" = "yes"; then
-		do_start_daemon
-	fi
-
-	if test "$START" = "yes"; then
-		do_start_daemon
-		do_start
-	fi
-
-	if test "$DUMP" = "yes"; then
-		do_dump
-	fi
-
-	if test "$SAVE_SESSION" = "yes"; then
-		do_save_session
-	fi
-
-	if test "$STOP" = "yes"; then
-		do_stop
-	fi
-
-	if test "$KILL_DAEMON" = "yes"; then
-		do_kill_daemon
-	fi
-
-	if test "$RESET" = "yes"; then
-		do_reset
-	fi
-
-	if test "$DEINIT" = "yes"; then
-		do_deinit
-	fi
-}
-
-# early check for --version, --help and --session-dir
-check_options_early()
-{
-
-	OPHELP="$OPDIR/ophelp"
-
-	for i in $@; do
-		# added to handle arg=val parameters
-		arg=`printf %s $i | awk -F= '{print $1}'`
-		val=`printf %s $i | awk -F= '{print $2}'`
-		case "$arg" in
-			-\?|--help)
-				do_help
-				exit 0
-				;;
-
-			-v|--version)
-				echo -n "`basename $0`: "
-				$OPHELP --version | cut -d' ' -f2-
-				exit 0
-				;;
-			--session-dir)
-				error_if_invalid_arg "$arg" "$val"
-				SESSION_DIR="$val"
-				DO_SETUP=yes
-				# do not exit early
-				;;
-
-		esac
-	done
-}
-
-
-# determine which module is loaded
-check_version()
-{
-	OPROFILE_AVAILABLE=no
-	grep oprofilefs /etc/mtab >/dev/null
-	if test "$?" -eq 0; then
-		# need to have oprofilefs mounted for this to work on 2.6
-		KERNEL_SUPPORT=yes
-		OPROFILE_AVAILABLE=yes
-		return
-	fi
-	# need to have /proc/oprof available for this to work on 2.4
-	grep oprof /proc/devices >/dev/null
-	if test "$?" -eq 0; then
-		KERNEL_SUPPORT=no
-		OPROFILE_AVAILABLE=yes
-		return
-	fi
-}
-
-# error out if the module is not loaded
-check_oprofile_available()
-{
-	if test "$OPROFILE_AVAILABLE" != "yes"; then
-		echo "Kernel support not available, missing opcontrol --init as root ?"
-		exit 1
-	fi
-}
-
-
-try_reset_sample_file()
-{
-	# special case to avoid loading the module, it works only if the
-	# daemon is not running because --reset imply --dump. Rather to check
-	# if the daemon is running we check if the module is loaded because
-	# we are only trying to avoid its load, if the check fails we fallback
-	# to the normal dump / reset sequence.
-	if test -z "$2" -a "$1" = "--reset"; then
-		check_version
-		if test "$OPROFILE_AVAILABLE" != "yes"; then
-			do_init_daemon_vars
-			do_reset
-			exit 0
-		fi
-	fi
-}
-
-#
-# Begin IBS Specific Functions
-#
-verify_ibs()
-{
-	IBS_EVENT=`echo $1| awk -F: '{print $1}'`
-	IBS_COUNT=`echo $1 | awk -F: '{print $2}'`
-	IBS_MASK=`echo $1 | awk -F: '{print $3}'`
-	
-	IBS_TYPE=`$OPHELP --check-events $1`
-	if test "$?" != "0" ; then
-		exit 1
-	fi
-			
-	if [ "$IBS_TYPE" = "ibs_fetch " ] ; then
-		# Check IBS_COUNT consistency
-		if test "$IBS_FETCH_COUNT" = "0" ; then 
-			IBS_FETCH_COUNT=$IBS_COUNT
-			IBS_FETCH_MASK=$IBS_MASK
-		elif test "$IBS_FETCH_COUNT" != "$IBS_COUNT" ; then
-			echo "ERROR: All IBS Fetch must have the same count."
-			exit 1
-		fi
-
-		# Check IBS_MASK consistency
-		if test "$IBS_FETCH_MASK" != "$IBS_MASK" ; then
-			echo "ERROR: All IBS Fetch must have the same unitmask."
-			exit 1
-		fi
-
-		# Check IBS_FETCH_COUNT within range
-		if test "$IBS_FETCH_COUNT" -gt 1048575 ; then 
-			echo "ERROR: IBS Fetch count is too large."
-			echo "       The maximum IBS-fetch count is 1048575."
-			exit 1
-		fi
-
-	elif [ "$IBS_TYPE" = "ibs_op " ] ; then
-		# Check IBS_COUNT consistency
-		if test "$IBS_OP_COUNT" = "0" ; then 
-			IBS_OP_COUNT=$IBS_COUNT
-			IBS_OP_MASK=$IBS_MASK
-		elif test "$IBS_OP_COUNT" != "$IBS_COUNT" ; then
-			echo "All IBS Op must have the same count."
-			exit 1
-		fi
-
-		# Check IBS_MASK consistency
-		if test "$IBS_OP_MASK" != "$IBS_MASK" ; then
-			echo "All IBS Op must have the same unitmask."
-			exit 1
-		fi
-		
-		# Check IBS_OP_COUNT within range
-		case "$CPUTYPE" in
-			x86-64/family10)
-				if test "$IBS_OP_COUNT" -gt 1048575 ; then 
-					echo "ERROR: IBS Op count is too large."
-					echo "       The maximum IBS-fetch count is 1048575."
-					exit 1
-				fi
-				;;
-
-			x86-64/family12h|\
-			x86-64/family14h|\
-			x86-64/family15h)
-				if test "$IBS_OP_COUNT" -gt 134217727 ; then 
-					echo "ERROR: IBS Op count is too large."
-					echo "       The maximum IBS-Op count is 134217727."
-					exit 1
-				fi
-				;;
-			*)
-		esac
-	fi
-
-	return
-}
-
-
-do_param_setup_ibs()
-{
-	if test "$KERNEL_SUPPORT" != "yes" ; then
-		echo "ERROR: No kernel support for IBS profiling."
-		exit 1	
-	fi
-
-	# Check if driver has IBS support
-	if test ! -d $MOUNT/ibs_fetch -o ! -d $MOUNT/ibs_op ; then
-		echo "ERROR: No kernel support for IBS profiling."
-		exit 1	
-	fi	
-
-	if test `echo $EVENT |  \
-	awk '{ print substr($0, 1, 10)}'` = "IBS_FETCH_" ; then
-		if test "$COUNT" != "0"; then
-			if [ "$IBS_FETCH_EVENTS" = "" ] ; then
-				IBS_FETCH_EVENTS="$EVENT"
-			else
-				IBS_FETCH_EVENTS="$IBS_FETCH_EVENTS,$EVENT"
-			fi
-			IBS_FETCH_COUNT=$COUNT
-			set_param ibs_fetch/max_count $COUNT
-			set_param ibs_fetch/rand_enable 1
-			set_param ibs_fetch/enable 1
-		else
-			set_param ibs_fetch/enable 0
-		fi
-
-	elif test `echo $EVENT |  \
-	awk '{ print substr($0, 1, 7)}'` = "IBS_OP_" ; then
-		if test "$COUNT" != "0"; then
-			if [ "$IBS_OP_EVENTS" = "" ] ; then
-				IBS_OP_EVENTS="$EVENT"
-			else
-				IBS_OP_EVENTS="$IBS_OP_EVENTS,$EVENT"
-			fi
-			IBS_OP_COUNT=$COUNT
-			IBS_OP_UNITMASK=$UNIT_MASK
-
-			set_param ibs_op/max_count $COUNT
-			set_param ibs_op/enable 1
-
-			# NOTE: We default to use dispatched_op if available. 
-			#       Some of the older family10 system does not have
-			#       dispatched_ops feature.
-			#       Dispatched op is enabled by bit 0 of the unitmask
-			IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 ))
-			if test -f $MOUNT/ibs_op/dispatched_ops ; then
-				set_param ibs_op/dispatched_ops $IBS_OP_DISPATCHED_OP
-			else
-				if test $IBS_OP_DISPATCHED_OP -eq 1 ; then
-					echo "ERROR: IBS Op dispatched ops is not supported."
-					exit 1
-				fi
-			fi
-		
-			# NOTE: BTA is enabled by bit 2 of the unitmask
-			IBS_OP_BTA=$(( IBS_OP_UNITMASK & 0x4 ))
-			if test -f $MOUNT/ibs_op/branch_target; then
-				if [ "$IBS_OP_BTA" = "4" ] ; then
-					set_param ibs_op/branch_target 1
-				else
-					set_param ibs_op/branch_target 0
-				fi
-
-				# TODO: Check if write successful
-			else
-				if test $IBS_OP_BTA -eq 1 ; then
-					echo "ERROR: IBS Op Branch Target Address is not supported."
-					exit 1
-				fi
-			fi
-		else
-			set_param ibs_op/enable 0
-		fi
-	fi
-}
-
-
-help_start_daemon_with_ibs()
-{
-	if test "$IBS_FETCH_COUNT" != "0" -o "$IBS_OP_COUNT" != "0" ; then
-		OPD_ARGS="${OPD_ARGS} --ext-feature=ibs:"
-		if test "$IBS_FETCH_COUNT" != "0"; then
-			OPD_ARGS="${OPD_ARGS}fetch:$IBS_FETCH_EVENTS:$IBS_FETCH_COUNT:$IBS_FETCH_UNITMASK|"
-		fi
-
-		if test "$IBS_OP_COUNT" != "0"; then
-			OPD_ARGS="${OPD_ARGS}op:$IBS_OP_EVENTS:$IBS_OP_COUNT:$IBS_OP_UNITMASK"
-		fi
-	fi
-}
-
-#
-# End IBS Specific Functions
-#
-
-# main
-
-# determine the location of opcontrol and related programs
-if test -z "$OPDIR"; then
-	BINDIR="/usr/bin"
-	OPCONTROL=`$BINDIR/which $0`
-	OPDIR=`$BINDIR/dirname $OPCONTROL`
-fi
-
-PATH=$OPDIR:/usr/local/bin:/usr/local/sbin:/bin:/sbin:/usr/bin:/usr/sbin
-
-check_options_early $@
-
-if test -z "$1"; then
-	do_help
-	exit 0
-fi
-
-if test `id -u` = "0"; then
-	try_reset_sample_file $@
-
-	load_module
-fi
-check_version
-
-# Except --reset, even the few operations allowed as non root needs the
-# kernel support, if we don't error out now the error message will be obscure
-check_oprofile_available
-
-do_init
-if test `id -u` != "0"; then
-    if test -z "$2"; then
-	case "$1" in
-	    --dump|-d)
-		ONLY_DUMP=yes
-		do_dump
-		exit 0;
-		;;
-	    --list-events|-l)
-		exec $OPHELP
-		exit 0;
-		;;
-	    *)
-		echo "Normal users are limited to either '--dump' or '--list-events'." >&2
-		exit 1
-		;;
-	esac
-    else
-	echo "Normal users are limited to either '--dump' or '--list-events'." >&2
-	exit 1
-    fi
-fi
-
-do_options $@
-do_operations
diff --git a/utils/ophelp.c b/utils/ophelp.c
index fb65dcf..e38e417 100644
--- a/utils/ophelp.c
+++ b/utils/ophelp.c
@@ -33,6 +33,7 @@ static op_cpu cpu_type = CPU_NO_GOOD;
 static char * cpu_string;
 static int callgraph_depth;
 static int want_xml;
+static int ignore_count;
 
 static poptContext optcon;
 
@@ -50,17 +51,6 @@ static size_t hweight(size_t mask)
 	return count;
 }
 
-static void do_arch_specific_event_help(struct op_event * event)
-{
-	switch (cpu_type) {
-	case CPU_PPC64_CELL:
-		printf("Group %u :", event->val / 100);
-		break;
-	default:
-		break;
-	}
-}
-
 #define LINE_LEN 99
 
 static void word_wrap(int indent, int *column, char *msg)
@@ -74,8 +64,9 @@ static void word_wrap(int indent, int *column, char *msg)
 		printf("%.*s", wlen, msg);
 		*column += wlen + 1;
 		msg += wlen;
-		msg += strspn(msg, " ");
-		if (*msg)
+		wlen = strspn(msg, " ");
+		msg += wlen;
+		if (wlen != 0)
 			putchar(' ');
 	}
 }
@@ -94,7 +85,6 @@ static void help_for_event(struct op_event * event)
 	size_t nr_counters;
 	char buf[32];
 
-	do_arch_specific_event_help(event);
 	nr_counters = op_get_nr_counters(cpu_type);
 
 	/* Sanity check */
@@ -136,30 +126,29 @@ static void help_for_event(struct op_event * event)
 
 	if (strcmp(event->unit->name, "zero")) {
 
-		printf("\tUnit masks (default 0x%x)\n",
-		       event->unit->default_mask);
+		if (event->unit->default_mask_name) {
+			printf("\tUnit masks (default %s)\n",
+			       event->unit->default_mask_name);
+		} else {
+			printf("\tUnit masks (default 0x%x)\n",
+			       event->unit->default_mask);
+		}
 		printf("\t----------\n");
 
 		for (j = 0; j < event->unit->num; j++) {
 			printf("\t0x%.2x: ",
 			       event->unit->um[j].value);
 			column = 14;
-			word_wrap(14, &column, event->unit->um[j].desc);
-			if (event->unit->um[j].extra) {
-				u32 extra = event->unit->um[j].extra;
-
-				word_wrap(14, &column, " (extra:");
-				if (extra & EXTRA_EDGE)
-					word_wrap(14, &column, " edge");
-				if (extra & EXTRA_INV)
-					word_wrap(14, &column, " inv");
-				if ((extra >> EXTRA_CMASK_SHIFT) & EXTRA_CMASK_MASK) {
-					snprintf(buf, sizeof buf, " cmask=%x",
-						 (extra >> EXTRA_CMASK_SHIFT) & EXTRA_CMASK_MASK);
-					word_wrap(14, &column, buf);
-				}
-				word_wrap(14, &column, ")");
+
+			/* Named mask */
+			if (event->unit->um[j].name) {
+				word_wrap(14, &column, "(name=");
+				word_wrap(14, &column,
+					event->unit->um[j].name);
+				word_wrap(14, &column, ") ");
 			}
+
+			word_wrap(14, &column, event->unit->um[j].desc);
 			putchar('\n');
 		}
 	}
@@ -186,7 +175,12 @@ static void check_event(struct parsed_event * pev,
 
 	op_resolve_unit_mask(pev, NULL);
 
-	ret = op_check_events(0, event->val, pev->unit_mask, cpu_type);
+	// If a named UM is passed, op_resolve_unit_mask will resolve that into a
+	// valid unit mask, so we don't need to call op_check_events.
+	if (pev->unit_mask_name)
+		ret = 0;
+	else
+		ret = op_check_events(0, event->val, pev->unit_mask, cpu_type);
 
 	if (ret & OP_INVALID_UM) {
 		fprintf(stderr, "Invalid unit mask 0x%x for event %s\n",
@@ -197,7 +191,7 @@ static void check_event(struct parsed_event * pev,
 	min_count = event->min_count;
 	if (callgraph_depth)
 		min_count *= callgraph_min_count_scale;
-	if (pev->count < min_count) {
+	if (!ignore_count && pev->count < min_count) {
 		fprintf(stderr, "Count %d for event %s is below the "
 		        "minimum %d\n", pev->count, pev->name, min_count);
 		exit(EXIT_FAILURE);
@@ -213,7 +207,8 @@ static void resolve_events(void)
 	size_t nr_counters = op_get_nr_counters(cpu_type);
 	struct op_event const * selected_events[num_chosen_events];
 
-	count = parse_events(parsed_events, num_chosen_events, chosen_events);
+	count = parse_events(parsed_events, num_chosen_events, chosen_events,
+	                     ignore_count ? 0 : 1);
 
 	for (i = 0; i < count; ++i) {
 	        op_resolve_unit_mask(&parsed_events[i], NULL);
@@ -266,10 +261,7 @@ static void resolve_events(void)
 			else
 				printf("N/A ");
 		else
-			if (strcmp(selected_events[i]->name, TIMER_EVENT_NAME) == 0)
-				printf("timer ");
-			else
-				printf("%d ", (unsigned int) counter_map[i]);
+			printf("%d ", (unsigned int) counter_map[i]);
 	printf("\n");
 
 	free(counter_map);
@@ -280,7 +272,7 @@ static void show_unit_mask(void)
 {
 	size_t count;
 
-	count = parse_events(parsed_events, num_chosen_events, chosen_events);
+	count = parse_events(parsed_events, num_chosen_events, chosen_events, ignore_count ? 0 : 1);
 	if (count > 1) {
 		fprintf(stderr, "More than one event specified.\n");
 		exit(EXIT_FAILURE);
@@ -298,7 +290,7 @@ static void show_extra_mask(void)
 	size_t count;
 	unsigned extra = 0;
 
-	count = parse_events(parsed_events, num_chosen_events, chosen_events);
+	count = parse_events(parsed_events, num_chosen_events, chosen_events, ignore_count ? 0 : 1);
 	if (count > 1) {
 		fprintf(stderr, "More than one event specified.\n");
 		exit(EXIT_FAILURE);
@@ -333,6 +325,8 @@ static struct poptOption options[] = {
 	  "use the given CPU type", "cpu type", },
 	{ "check-events", 'e', POPT_ARG_NONE, &check_events, 0,
 	  "check the given event descriptions for validity", NULL, },
+	{ "ignore-count", 'i', POPT_ARG_NONE, &ignore_count, 0,
+	  "do not validate count value (used by ocount)", NULL},
 	{ "unit-mask", 'u', POPT_ARG_NONE, &unit_mask, 0,
 	  "default unit mask for the given event", NULL, },
 	{ "get-cpu-type", 'r', POPT_ARG_NONE, &get_cpu_type, 0,
@@ -441,8 +435,13 @@ int main(int argc, char const * argv[])
 	}
 
 	if (cpu_type == CPU_TIMER_INT) {
-		if (!check_events)
-			printf("Using timer interrupt.\n");
+		if (!check_events) {
+			printf("CPU type 'timer' was detected, but this is no longer a supported mode for oprofile.\n"
+			       "Ensure the obsolete opcontrol profiler (available in pre-1.0 oprofile releases)\n"
+			       "is not running on the system.  To check for this, look for the file\n"
+			       "/dev/oprofile/cpu_type; if this file exists, locate the pre-1.0 oprofile\n"
+			       "installation, and use its 'opcontrol' command with the --deinit option.\n");
+		}
 		exit(EXIT_SUCCESS);
 	}
 
@@ -522,6 +521,10 @@ int main(int argc, char const * argv[])
 		event_doc =
 			"See BIOS and Kernel Developer's Guide for AMD Family 15h Processors\n";
 		break;
+	case CPU_AMD64_GENERIC:
+		event_doc =
+			"See BIOS and Kernel Developer's Guide for AMD Processors\n";
+		break;
 	case CPU_ATHLON:
 		event_doc =
 			"See AMD Athlon Processor x86 Code Optimization Guide\n"
@@ -537,37 +540,25 @@ int main(int argc, char const * argv[])
 	case CPU_CORE_2:
 	case CPU_CORE_I7:
 	case CPU_NEHALEM:
+	case CPU_HASWELL:
+	case CPU_BROADWELL:
+	case CPU_SILVERMONT:
 	case CPU_WESTMERE:
 	case CPU_SANDYBRIDGE:
 	case CPU_IVYBRIDGE:
 	case CPU_ATOM:
 		event_doc =
 			"See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n"
-			"Intel Architecture Optimization Reference Manual (730795-001)\n\n";
+			"Intel Architecture Optimization Reference Manual\n\n";
 		break;
 
 	case CPU_ARCH_PERFMON:
 		event_doc =
 			"See Intel 64 and IA-32 Architectures Software Developer's Manual\n"
-			"Volume 3B (Document 253669) Chapter 18 for architectural perfmon events\n"
+			"Volume 3B Chapter 18 for architectural perfmon events\n"
 			"This is a limited set of fallback events because oprofile doesn't know your CPU\n";
 		break;
 	
-	case CPU_IA64:
-	case CPU_IA64_1:
-	case CPU_IA64_2:
-		event_doc =
-			"See Intel Itanium Processor Reference Manual\n"
-			"for Software Development (Document 245320-003),\n"
-			"Intel Itanium Processor Reference Manual\n"
-			"for Software Optimization (Document 245473-003),\n"
-			"Intel Itanium 2 Processor Reference Manual\n"
-			"for Software Development and Optimization (Document 251110-001)\n\n";
-		break;
-	case CPU_AXP_EV4:
-	case CPU_AXP_EV5:
-	case CPU_AXP_PCA56:
-	case CPU_AXP_EV6:
 	case CPU_AXP_EV67:
 		event_doc =
 			"See Alpha Architecture Reference Manual\n"
@@ -607,6 +598,12 @@ int main(int argc, char const * argv[])
 			"Scorpion Processor Family Programmer's Reference Manual (PRM)\n";
 		break;
 
+	case CPU_ARM_KRAIT:
+		event_doc =
+			"See ARM Architecture Reference Manual ARMv7-A and ARMv7-R Edition\n"
+			"Krait Processor Family Programmer's Reference Manual (PRM)\n";
+		break;
+
 	case CPU_ARM_V7_CA9:
 		event_doc =
 			"See Cortex-A9 Technical Reference Manual\n"
@@ -631,10 +628,23 @@ int main(int argc, char const * argv[])
 			"Cortex A15 DDI (ARM DDI 0438F, revision r3p1)\n";
 		break;
 
-	case CPU_PPC64_PA6T:
+	case CPU_ARM_V8_APM_XGENE:
+		event_doc =
+			"See ARM Architecture Reference Manual \n"
+			"ARMv8, for ARMv8-A architecture profile\n"
+			"DDI (ARM DDI0487A.a)\n";
+		break;
+
+	case CPU_ARM_V8_CA57:
+		event_doc =
+			"See Cortex-A57 MPCore Technical Reference Manual\n"
+			"Cortex A57 DDI (ARM DDI 0488D, revision r1p1)\n";
+		break;
+
+	case CPU_ARM_V8_CA53:
 		event_doc =
-			"See PA6T Power Implementation Features Book IV\n"
-			"Chapter 7 Performance Counters\n";
+			"See Cortex-A53 MPCore Technical Reference Manual\n"
+			"Cortex A57 DDI (ARM DDI 0500D, revision r0p2)\n";
 		break;
 
 	case CPU_PPC64_POWER4:
@@ -645,16 +655,21 @@ int main(int argc, char const * argv[])
 	case CPU_PPC64_970:
 	case CPU_PPC64_970MP:
 	case CPU_PPC64_POWER7:
-	case CPU_PPC64_IBM_COMPAT_V1:
 		event_doc =
-			"Obtain PowerPC64 processor documentation at:\n"
-			"http://www-306.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC\n";
+			"When using operf, events may be specified without a '_GRP<n>' suffix.\n"
+			"If _GRP<n> (i.e., group number) is not specified, one will be automatically\n"
+			"selected for use by the profiler.  OProfile post-processing tools will\n"
+			"always show real event names that include the group number suffix.\n\n"
+			"Documentation for IBM POWER7 can be obtained at:\n"
+			"http://www.power.org/events/Power7/\n"
+			"No public performance monitoring doc available for older processors.\n";
 		break;
 
-	case CPU_PPC64_CELL:
+	case CPU_PPC64_ARCH_V1:
+	case CPU_PPC64_POWER8:
 		event_doc =
-			"Obtain Cell Broadband Engine documentation at:\n"
-			"http://www-306.ibm.com/chips/techlib/techlib.nsf/products/Cell_Broadband_Engine\n";
+			"This processor type is fully supported with operf.\n"
+			"See Power ISA 2.07 at https://www.power.org/\n\n";
 		break;
 
 	case CPU_MIPS_20K:
@@ -727,6 +742,8 @@ int main(int argc, char const * argv[])
 
 	case CPU_PPC_E500:
 	case CPU_PPC_E500_2:
+	case CPU_PPC_E500MC:
+	case CPU_PPC_E6500:
 		event_doc =
 			"See PowerPC e500 Core Complex Reference Manual\n"
 			"Chapter 7: Performance Monitor\n"
@@ -747,14 +764,6 @@ int main(int argc, char const * argv[])
 			"Downloadable from http://www.freescale.com\n";
 		break;
 
-	case CPU_AVR32:
-		event_doc =
-			"See AVR32 Architecture Manual\n"
-			"Chapter 6: Performance Counters\n"
-			"http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf\n";
-
-		break;
-
 	case CPU_TILE_TILE64:
 	case CPU_TILE_TILEPRO:
 	case CPU_TILE_TILEGX:
@@ -767,21 +776,19 @@ int main(int argc, char const * argv[])
 
 	case CPU_S390_Z10:
 	case CPU_S390_Z196:
+	case CPU_S390_ZEC12:
 		event_doc = "IBM System z CPU Measurement Facility\n"
-			"http://www-01.ibm.com/support/docview.wss"
-			"?uid=isg26fcd1cc32246f4c8852574ce0044734a\n";
+				"http://www-01.ibm.com/support/docview.wss"
+				"?uid=isg26fcd1cc32246f4c8852574ce0044734a\n";
 		break;
 
-		case CPU_RTC:
-			break;
-
-		// don't use default, if someone add a cpu he wants a compiler warning
-		// if he forgets to handle it here.
-		case CPU_TIMER_INT:
-		case CPU_NO_GOOD:
-		case MAX_CPU_TYPE:
-			printf("%d is not a valid processor type.\n", cpu_type);
-			exit(EXIT_FAILURE);
+	// don't use default, if someone add a cpu he wants a compiler warning
+	// if he forgets to handle it here.
+	case CPU_TIMER_INT:
+	case CPU_NO_GOOD:
+	case MAX_CPU_TYPE:
+		printf("%d is not a valid processor type.\n", cpu_type);
+		exit(EXIT_FAILURE);
 	}
 
 	sprintf(title, "oprofile: available events for CPU type \"%s\"\n\n", pretty);
@@ -790,7 +797,7 @@ int main(int argc, char const * argv[])
 	else {
 		printf("%s%s", title, event_doc);
 		printf("For architectures using unit masks, you may be able to specify\n"
-		       "unit masks by name.  See 'opcontrol' or 'operf' man page for more details.\n\n");
+		       "unit masks by name.  See 'operf' or 'ocount' man page for more details.\n\n");
 	}
 
 	list_for_each(pos, events) {
-- 
2.34.1