perf cs-etm: Support for ARM A32/T32 instruction sets in CoreSight trace
authorRobert Walker <robert.walker@arm.com>
Mon, 3 Dec 2018 12:18:46 +0000 (12:18 +0000)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 17 Dec 2018 17:56:18 +0000 (14:56 -0300)
This patch adds support for generating instruction samples from trace of
AArch32 programs using the A32 and T32 instruction sets.

T32 has variable 2 or 4 byte instruction size, so the conversion between
addresses and instruction counts requires extra information from the
trace decoder, requiring version 0.10.0 of OpenCSD.  A check for the
OpenCSD library version has been added to the feature check for OpenCSD.

Signed-off-by: Robert Walker <robert.walker@arm.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1543839526-30348-1-git-send-email-robert.walker@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/build/feature/test-libopencsd.c
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
tools/perf/util/cs-etm.c

index 5ff1246e6194bce9d32f7b380eaf884b4ddc47ab..d68eb4fb40cc4261e6ad857d7ad656be0f6e95d6 100644 (file)
@@ -1,6 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <opencsd/c_api/opencsd_c_api.h>
 
+/*
+ * Check OpenCSD library version is sufficient to provide required features
+ */
+#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
+#error "OpenCSD >= 0.10.0 is required"
+#endif
+
 int main(void)
 {
        (void)ocsd_get_version();
index 938def6d0bb9878bdf3b773d071e841a8ff0e595..5efb616bd609033c527ac1cc4696de9fe9049626 100644 (file)
@@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
        decoder->tail = 0;
        decoder->packet_count = 0;
        for (i = 0; i < MAX_BUFFER; i++) {
+               decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
                decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
                decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+               decoder->packet_buffer[i].instr_count = 0;
                decoder->packet_buffer[i].last_instr_taken_branch = false;
+               decoder->packet_buffer[i].last_instr_size = 0;
                decoder->packet_buffer[i].exc = false;
                decoder->packet_buffer[i].exc_ret = false;
                decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
        decoder->packet_count++;
 
        decoder->packet_buffer[et].sample_type = sample_type;
+       decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
        decoder->packet_buffer[et].exc = false;
        decoder->packet_buffer[et].exc_ret = false;
        decoder->packet_buffer[et].cpu = *((int *)inode->priv);
        decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
        decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+       decoder->packet_buffer[et].instr_count = 0;
+       decoder->packet_buffer[et].last_instr_taken_branch = false;
+       decoder->packet_buffer[et].last_instr_size = 0;
 
        if (decoder->packet_count == MAX_BUFFER - 1)
                return OCSD_RESP_WAIT;
@@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 
        packet = &decoder->packet_buffer[decoder->tail];
 
+       switch (elem->isa) {
+       case ocsd_isa_aarch64:
+               packet->isa = CS_ETM_ISA_A64;
+               break;
+       case ocsd_isa_arm:
+               packet->isa = CS_ETM_ISA_A32;
+               break;
+       case ocsd_isa_thumb2:
+               packet->isa = CS_ETM_ISA_T32;
+               break;
+       case ocsd_isa_tee:
+       case ocsd_isa_jazelle:
+       case ocsd_isa_custom:
+       case ocsd_isa_unknown:
+       default:
+               packet->isa = CS_ETM_ISA_UNKNOWN;
+       }
+
        packet->start_addr = elem->st_addr;
        packet->end_addr = elem->en_addr;
+       packet->instr_count = elem->num_instr_range;
+
        switch (elem->last_i_type) {
        case OCSD_INSTR_BR:
        case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
                break;
        }
 
+       packet->last_instr_size = elem->last_instr_sz;
+
        return ret;
 }
 
index 612b5755f742ffc1052e82df925143f4caf93a15..9351bd10d864ee3a80fe50934c763ec034461c9b 100644 (file)
@@ -28,11 +28,21 @@ enum cs_etm_sample_type {
        CS_ETM_TRACE_ON = 1 << 1,
 };
 
+enum cs_etm_isa {
+       CS_ETM_ISA_UNKNOWN,
+       CS_ETM_ISA_A64,
+       CS_ETM_ISA_A32,
+       CS_ETM_ISA_T32,
+};
+
 struct cs_etm_packet {
        enum cs_etm_sample_type sample_type;
+       enum cs_etm_isa isa;
        u64 start_addr;
        u64 end_addr;
+       u32 instr_count;
        u8 last_instr_taken_branch;
+       u8 last_instr_size;
        u8 exc;
        u8 exc_ret;
        int cpu;
index 73430b73570d51f9f2f96131208ac5f2022dee37..48ad217bf0df0ba3474ca6c3e9040c4f06a07977 100644 (file)
 
 #define MAX_TIMESTAMP (~0ULL)
 
-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
 struct cs_etm_auxtrace {
        struct auxtrace auxtrace;
        struct auxtrace_queues queues;
@@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
        etmq->last_branch_rb->nr = 0;
 }
 
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
-       /* Returns 0 for the CS_ETM_TRACE_ON packet */
-       if (packet->sample_type == CS_ETM_TRACE_ON)
-               return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+                                        u64 addr) {
+       u8 instrBytes[2];
 
+       cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
        /*
-        * The packet records the execution range with an exclusive end address
-        *
-        * A64 instructions are constant size, so the last executed
-        * instruction is A64_INSTR_SIZE before the end address
-        * Will need to do instruction level decode for T32 instructions as
-        * they can be variable size (not yet supported).
+        * T32 instruction size is indicated by bits[15:11] of the first
+        * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+        * denote a 32-bit instruction.
         */
-       return packet->end_addr - A64_INSTR_SIZE;
+       return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
 }
 
 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
        return packet->start_addr;
 }
 
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
 {
-       /*
-        * Only A64 instructions are currently supported, so can get
-        * instruction count by dividing.
-        * Will need to do instruction level decode for T32 instructions as
-        * they can be variable size (not yet supported).
-        */
-       return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+       /* Returns 0 for the CS_ETM_TRACE_ON packet */
+       if (packet->sample_type == CS_ETM_TRACE_ON)
+               return 0;
+
+       return packet->end_addr - packet->last_instr_size;
 }
 
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+                                    const struct cs_etm_packet *packet,
                                     u64 offset)
 {
-       /*
-        * Only A64 instructions are currently supported, so can get
-        * instruction address by muliplying.
-        * Will need to do instruction level decode for T32 instructions as
-        * they can be variable size (not yet supported).
-        */
-       return packet->start_addr + offset * A64_INSTR_SIZE;
+       if (packet->isa == CS_ETM_ISA_T32) {
+               u64 addr = packet->start_addr;
+
+               while (offset > 0) {
+                       addr += cs_etm__t32_instr_size(etmq, addr);
+                       offset--;
+               }
+               return addr;
+       }
+
+       /* Assume a 4 byte instruction size (A32/A64) */
+       return packet->start_addr + offset * 4;
 }
 
 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
        struct cs_etm_auxtrace *etm = etmq->etm;
        struct cs_etm_packet *tmp;
        int ret;
-       u64 instrs_executed;
+       u64 instrs_executed = etmq->packet->instr_count;
 
-       instrs_executed = cs_etm__instr_count(etmq->packet);
        etmq->period_instructions += instrs_executed;
 
        /*
@@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
                 * executed, but PC has not advanced to next instruction)
                 */
                u64 offset = (instrs_executed - instrs_over - 1);
-               u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+               u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
 
                ret = cs_etm__synth_instruction_sample(
                        etmq, addr, etm->instructions_sample_period);