Merge tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 29 Dec 2018 17:37:03 +0000 (09:37 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 29 Dec 2018 17:37:03 +0000 (09:37 -0800)
Pull nds32 updates from Greentime Hu:

 - Perf support

 - Power management support

 - FPU support

 - Hardware prefetcher support

 - Build error fixed

 - Performance enhancement

* tag 'nds32-for-linus-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux:
  nds32: support hardware prefetcher
  nds32: Fix the items of hwcap_str ordering issue.
  math-emu/soft-fp.h: (_FP_ROUND_ZERO) cast 0 to void to fix warning
  math-emu/op-2.h: Use statement expressions to prevent negative constant shift
  nds32: support denormalized result through FP emulator
  nds32: Support FP emulation
  nds32: nds32 FPU port
  nds32: Remove duplicated include from pm.c
  nds32: Power management for nds32
  nds32: Add document for NDS32 PMU.
  nds32: Add perf call-graph support.
  nds32: Perf porting
  nds32: Fix bug in bitfield.h
  nds32: Fix gcc 8.0 compiler option incompatible.
  nds32: Fill all TLB entries with kernel image mapping
  nds32: Remove the redundant assignment

65 files changed:
Documentation/devicetree/bindings/perf/nds32v3-pmu.txt [new file with mode: 0644]
arch/nds32/Kconfig
arch/nds32/Kconfig.cpu
arch/nds32/Makefile
arch/nds32/boot/dts/ae3xx.dts
arch/nds32/include/asm/Kbuild
arch/nds32/include/asm/bitfield.h
arch/nds32/include/asm/elf.h
arch/nds32/include/asm/fpu.h [new file with mode: 0644]
arch/nds32/include/asm/fpuemu.h [new file with mode: 0644]
arch/nds32/include/asm/nds32_fpu_inst.h [new file with mode: 0644]
arch/nds32/include/asm/perf_event.h [new file with mode: 0644]
arch/nds32/include/asm/pmu.h [new file with mode: 0644]
arch/nds32/include/asm/processor.h
arch/nds32/include/asm/sfp-machine.h [new file with mode: 0644]
arch/nds32/include/asm/stacktrace.h [new file with mode: 0644]
arch/nds32/include/asm/suspend.h [new file with mode: 0644]
arch/nds32/include/asm/syscalls.h
arch/nds32/include/uapi/asm/auxvec.h
arch/nds32/include/uapi/asm/sigcontext.h
arch/nds32/include/uapi/asm/udftrap.h [new file with mode: 0644]
arch/nds32/include/uapi/asm/unistd.h
arch/nds32/kernel/Makefile
arch/nds32/kernel/ex-entry.S
arch/nds32/kernel/ex-exit.S
arch/nds32/kernel/ex-scall.S
arch/nds32/kernel/fpu.c [new file with mode: 0644]
arch/nds32/kernel/head.S
arch/nds32/kernel/perf_event_cpu.c [new file with mode: 0644]
arch/nds32/kernel/pm.c [new file with mode: 0644]
arch/nds32/kernel/process.c
arch/nds32/kernel/setup.c
arch/nds32/kernel/signal.c
arch/nds32/kernel/sleep.S [new file with mode: 0644]
arch/nds32/kernel/sys_nds32.c
arch/nds32/kernel/traps.c
arch/nds32/math-emu/Makefile [new file with mode: 0644]
arch/nds32/math-emu/faddd.c [new file with mode: 0644]
arch/nds32/math-emu/fadds.c [new file with mode: 0644]
arch/nds32/math-emu/fcmpd.c [new file with mode: 0644]
arch/nds32/math-emu/fcmps.c [new file with mode: 0644]
arch/nds32/math-emu/fd2s.c [new file with mode: 0644]
arch/nds32/math-emu/fdivd.c [new file with mode: 0644]
arch/nds32/math-emu/fdivs.c [new file with mode: 0644]
arch/nds32/math-emu/fmuld.c [new file with mode: 0644]
arch/nds32/math-emu/fmuls.c [new file with mode: 0644]
arch/nds32/math-emu/fnegd.c [new file with mode: 0644]
arch/nds32/math-emu/fnegs.c [new file with mode: 0644]
arch/nds32/math-emu/fpuemu.c [new file with mode: 0644]
arch/nds32/math-emu/fs2d.c [new file with mode: 0644]
arch/nds32/math-emu/fsqrtd.c [new file with mode: 0644]
arch/nds32/math-emu/fsqrts.c [new file with mode: 0644]
arch/nds32/math-emu/fsubd.c [new file with mode: 0644]
arch/nds32/math-emu/fsubs.c [new file with mode: 0644]
arch/nds32/mm/Makefile
arch/nds32/mm/fault.c
drivers/irqchip/irq-ativic32.c
include/math-emu/op-2.h
include/math-emu/soft-fp.h
tools/include/asm/barrier.h
tools/perf/arch/nds32/Build [new file with mode: 0644]
tools/perf/arch/nds32/util/Build [new file with mode: 0644]
tools/perf/arch/nds32/util/header.c [new file with mode: 0644]
tools/perf/pmu-events/arch/nds32/mapfile.csv [new file with mode: 0644]
tools/perf/pmu-events/arch/nds32/n13/atcpmu.json [new file with mode: 0644]

diff --git a/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt b/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
new file mode 100644 (file)
index 0000000..1bd1578
--- /dev/null
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatible :
+       "andestech,nds32v3-pmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+       compatible = "andestech,nds32v3-pmu";
+       interrupts = <13>;
+}
index 1af6bba..dda1906 100644 (file)
@@ -28,7 +28,9 @@ config NDS32
        select HANDLE_DOMAIN_IRQ
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DEBUG_KMEMLEAK
+       select HAVE_EXIT_THREAD
        select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_PERF_EVENTS
        select IRQ_DOMAIN
        select LOCKDEP_SUPPORT
        select MODULES_USE_ELF_RELA
@@ -91,3 +93,13 @@ endmenu
 menu "Kernel Features"
 source "kernel/Kconfig.hz"
 endmenu
+
+menu "Power management options"
+config SYS_SUPPORTS_APM_EMULATION
+       bool
+
+config ARCH_SUSPEND_POSSIBLE
+       def_bool y
+
+source "kernel/power/Kconfig"
+endmenu
index b8c8984..f16edf0 100644 (file)
@@ -7,6 +7,40 @@ config CPU_LITTLE_ENDIAN
        bool "Little endian"
        default y
 
+config FPU
+       bool "FPU support"
+       default n
+       help
+         If FPU ISA is used in user space, this configuration shall be Y to
+          enable required support in kerenl such as fpu context switch and
+          fpu exception handler.
+
+         If no FPU ISA is used in user space, say N.
+
+config LAZY_FPU
+       bool "lazy FPU support"
+       depends on FPU
+       default y
+       help
+         Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can
+          enhance system performance by reducing the context switch
+         frequency of the FPU register.
+
+         For nomal case, say Y.
+
+config SUPPORT_DENORMAL_ARITHMETIC
+       bool "Denormal arithmetic support"
+       depends on FPU
+       default n
+       help
+         Say Y here to enable arithmetic of denormalized number. Enabling
+         this feature can enhance the precision for tininess number.
+         However, performance loss in float pointe calculations is
+         possibly significant due to additional FPU exception.
+
+         If the calculated tolerance for tininess number is not critical,
+         say N to prevent performance loss.
+
 config HWZOL
        bool "hardware zero overhead loop support"
        depends on CPU_D10 || CPU_D15
@@ -143,6 +177,13 @@ config CACHE_L2
          Say Y here to enable L2 cache if your SoC are integrated with L2CC.
          If unsure, say N.
 
+config HW_PRE
+       bool "Enable hardware prefetcher"
+       default y
+       help
+         Say Y here to enable hardware prefetcher feature.
+         Only when CPU_VER.REV >= 0x09 can support.
+
 menu "Memory configuration"
 
 choice
index 9f525ed..0a935c1 100644 (file)
@@ -5,10 +5,14 @@ KBUILD_DEFCONFIG := defconfig
 
 comma = ,
 
+
 ifdef CONFIG_FUNCTION_TRACER
 arch-y += -malways-save-lp -mno-relax
 endif
 
+# Avoid generating FPU instructions
+arch-y  += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft
+
 KBUILD_CFLAGS  += $(call cc-option, -mno-sched-prolog-epilog)
 KBUILD_CFLAGS  += -mcmodel=large
 
@@ -26,6 +30,7 @@ export        TEXTADDR
 
 # If we have a machine-specific directory, then include it in the build.
 core-y                         += arch/nds32/kernel/ arch/nds32/mm/
+core-$(CONFIG_FPU)              += arch/nds32/math-emu/
 libs-y                         += arch/nds32/lib/
 
 ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
index bb39749..16a9f54 100644 (file)
@@ -82,4 +82,9 @@
                        interrupts = <18>;
                };
        };
+
+       pmu {
+               compatible = "andestech,nds32v3-pmu";
+               interrupts= <13>;
+       };
 };
index dbc4e54..f81b633 100644 (file)
@@ -36,6 +36,7 @@ generic-y += kprobes.h
 generic-y += kvm_para.h
 generic-y += limits.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += parport.h
index 8e84fc3..7414fcb 100644 (file)
 #define ITYPE_mskSTYPE         ( 0xF  << ITYPE_offSTYPE )
 #define ITYPE_mskCPID          ( 0x3  << ITYPE_offCPID )
 
+/* Additional definitions of ITYPE register for FPU */
+#define FPU_DISABLE_EXCEPTION  (0x1  << ITYPE_offSTYPE)
+#define FPU_EXCEPTION          (0x2  << ITYPE_offSTYPE)
+#define FPU_CPID               0       /* FPU Co-Processor ID is 0 */
+
 #define NDS32_VECTOR_mskNONEXCEPTION   0x78
 #define NDS32_VECTOR_offEXCEPTION      8
 #define NDS32_VECTOR_offINTERRUPT      9
 #define PFM_CTL_offKU1         13      /* Enable user mode event counting for PFMC1 */
 #define PFM_CTL_offKU2         14      /* Enable user mode event counting for PFMC2 */
 #define PFM_CTL_offSEL0                15      /* The event selection for PFMC0 */
-#define PFM_CTL_offSEL1                21      /* The event selection for PFMC1 */
-#define PFM_CTL_offSEL2                27      /* The event selection for PFMC2 */
+#define PFM_CTL_offSEL1                16      /* The event selection for PFMC1 */
+#define PFM_CTL_offSEL2                22      /* The event selection for PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0         ( 0x01  << PFM_CTL_offEN0 )
 #define N13MISC_CTL_offRTP     1       /* Disable Return Target Predictor */
 #define N13MISC_CTL_offPTEPF   2       /* Disable HPTWK L2 PTE pefetch */
 #define N13MISC_CTL_offSP_SHADOW_EN    4       /* Enable shadow stack pointers */
+#define MISC_CTL_offHWPRE      11      /* Enable HardWare PREFETCH */
 /* bit 6, 9:31 reserved */
 
 #define N13MISC_CTL_makBTB     ( 0x1  << N13MISC_CTL_offBTB )
 #define N13MISC_CTL_makRTP     ( 0x1  << N13MISC_CTL_offRTP )
 #define N13MISC_CTL_makPTEPF   ( 0x1  << N13MISC_CTL_offPTEPF )
 #define N13MISC_CTL_makSP_SHADOW_EN    ( 0x1  << N13MISC_CTL_offSP_SHADOW_EN )
+#define MISC_CTL_makHWPRE_EN     ( 0x1  << MISC_CTL_offHWPRE )
 
+#ifdef CONFIG_HW_PRE
+#define MISC_init      (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
+#else
 #define MISC_init      (N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
+#endif
 
 /******************************************************************************
  * PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
 #define FPCSR_mskDNIT           ( 0x1  << FPCSR_offDNIT )
 #define FPCSR_mskRIT           ( 0x1  << FPCSR_offRIT )
 #define FPCSR_mskALL           (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
+#define FPCSR_mskALLE_NO_UDFE  (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLE          (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLT           (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
 
 #define FPCFG_mskIMVER         ( 0x1F  << FPCFG_offIMVER )
 #define FPCFG_mskAVER          ( 0x1F  << FPCFG_offAVER )
 
+/* 8 Single precision or 4 double precision registers are available */
+#define SP8_DP4_reg            0
+/* 16 Single precision or 8 double precision registers are available */
+#define SP16_DP8_reg           1
+/* 32 Single precision or 16 double precision registers are available */
+#define SP32_DP16_reg          2
+/* 32 Single precision or 32 double precision registers are available */
+#define SP32_DP32_reg          3
+
 /******************************************************************************
  * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
  *****************************************************************************/
index f5f9cf7..95f3ea2 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <asm/ptrace.h>
+#include <asm/fpu.h>
 
 typedef unsigned long elf_greg_t;
 typedef unsigned long elf_freg_t[3];
@@ -159,8 +160,18 @@ struct elf32_hdr;
 
 #endif
 
+
+#if IS_ENABLED(CONFIG_FPU)
+#define FPU_AUX_ENT    NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
+#else
+#define FPU_AUX_ENT    NEW_AUX_ENT(AT_IGNORE, 0)
+#endif
+
 #define ARCH_DLINFO                                            \
 do {                                                           \
+       /* Optional FPU initialization */                       \
+       FPU_AUX_ENT;                                            \
+                                                               \
        NEW_AUX_ENT(AT_SYSINFO_EHDR,                            \
                    (elf_addr_t)current->mm->context.vdso);     \
 } while (0)
diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h
new file mode 100644 (file)
index 0000000..019f1bc
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ASM_NDS32_FPU_H
+#define __ASM_NDS32_FPU_H
+
+#if IS_ENABLED(CONFIG_FPU)
+#ifndef __ASSEMBLY__
+#include <linux/sched/task_stack.h>
+#include <linux/preempt.h>
+#include <asm/ptrace.h>
+
+extern bool has_fpu;
+
+extern void save_fpu(struct task_struct *__tsk);
+extern void load_fpu(const struct fpu_struct *fpregs);
+extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
+extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
+
+#define test_tsk_fpu(regs)     (regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
+
+/*
+ * Initially load the FPU with signalling NANS.  This bit pattern
+ * has the property that no matter whether considered as single or as
+ * double precision, it still represents a signalling NAN.
+ */
+
+#define sNAN64    0xFFFFFFFFFFFFFFFFULL
+#define sNAN32    0xFFFFFFFFUL
+
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+/*
+ * Denormalized number is unsupported by nds32 FPU. Hence the operation
+ * is treated as underflow cases when the final result is a denormalized
+ * number. To enhance precision, underflow exception trap should be
+ * enabled by default and kerenl will re-execute it by fpu emulator
+ * when getting underflow exception.
+ */
+#define FPCSR_INIT  FPCSR_mskUDFE
+#else
+#define FPCSR_INIT  0x0UL
+#endif
+
+extern const struct fpu_struct init_fpuregs;
+
+static inline void disable_ptreg_fpu(struct pt_regs *regs)
+{
+       regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_ptreg_fpu(struct pt_regs *regs)
+{
+       regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_fpu(void)
+{
+       unsigned long fucop_ctl;
+
+       fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
+       __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+       __nds32__isb();
+}
+
+static inline void disable_fpu(void)
+{
+       unsigned long fucop_ctl;
+
+       fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
+       __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+       __nds32__isb();
+}
+
+static inline void lose_fpu(void)
+{
+       preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       if (last_task_used_math == current) {
+               last_task_used_math = NULL;
+#else
+       if (test_tsk_fpu(task_pt_regs(current))) {
+#endif
+               save_fpu(current);
+       }
+       disable_ptreg_fpu(task_pt_regs(current));
+       preempt_enable();
+}
+
+static inline void own_fpu(void)
+{
+       preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       if (last_task_used_math != current) {
+               if (last_task_used_math != NULL)
+                       save_fpu(last_task_used_math);
+               load_fpu(&current->thread.fpu);
+               last_task_used_math = current;
+       }
+#else
+       if (!test_tsk_fpu(task_pt_regs(current))) {
+               load_fpu(&current->thread.fpu);
+       }
+#endif
+       enable_ptreg_fpu(task_pt_regs(current));
+       preempt_enable();
+}
+
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+       preempt_disable();
+       if (test_tsk_fpu(task_pt_regs(tsk)))
+               save_fpu(tsk);
+       preempt_enable();
+}
+#endif /* !CONFIG_LAZY_FPU */
+static inline void clear_fpu(struct pt_regs *regs)
+{
+       preempt_disable();
+       if (test_tsk_fpu(regs))
+               disable_ptreg_fpu(regs);
+       preempt_enable();
+}
+#endif /* CONFIG_FPU */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_NDS32_FPU_H */
diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h
new file mode 100644 (file)
index 0000000..c4bd0c7
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ARCH_NDS32_FPUEMU_H
+#define __ARCH_NDS32_FPUEMU_H
+
+/*
+ * single precision
+ */
+
+void fadds(void *ft, void *fa, void *fb);
+void fsubs(void *ft, void *fa, void *fb);
+void fmuls(void *ft, void *fa, void *fb);
+void fdivs(void *ft, void *fa, void *fb);
+void fs2d(void *ft, void *fa);
+void fsqrts(void *ft, void *fa);
+void fnegs(void *ft, void *fa);
+int fcmps(void *ft, void *fa, void *fb, int cop);
+
+/*
+ * double precision
+ */
+void faddd(void *ft, void *fa, void *fb);
+void fsubd(void *ft, void *fa, void *fb);
+void fmuld(void *ft, void *fa, void *fb);
+void fdivd(void *ft, void *fa, void *fb);
+void fsqrtd(void *ft, void *fa);
+void fd2s(void *ft, void *fa);
+void fnegd(void *ft, void *fa);
+int fcmpd(void *ft, void *fa, void *fb, int cop);
+
+#endif /* __ARCH_NDS32_FPUEMU_H */
diff --git a/arch/nds32/include/asm/nds32_fpu_inst.h b/arch/nds32/include/asm/nds32_fpu_inst.h
new file mode 100644 (file)
index 0000000..1e4b86a
--- /dev/null
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __NDS32_FPU_INST_H
+#define __NDS32_FPU_INST_H
+
+#define cop0_op        0x35
+
+/*
+ * COP0 field of opcodes.
+ */
+#define fs1_op 0x0
+#define fs2_op  0x4
+#define fd1_op  0x8
+#define fd2_op  0xc
+
+/*
+ * FS1 opcode.
+ */
+enum fs1 {
+       fadds_op, fsubs_op, fcpynss_op, fcpyss_op,
+       fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op,
+       fnmadds_op, fnmsubs_op,
+       fmuls_op = 0xc, fdivs_op,
+       fs1_f2op_op = 0xf
+};
+
+/*
+ * FS1/F2OP opcode.
+ */
+enum fs1_f2 {
+       fs2d_op, fsqrts_op,
+       fui2s_op = 0x8, fsi2s_op = 0xc,
+       fs2ui_op = 0x10, fs2ui_z_op = 0x14,
+       fs2si_op = 0x18, fs2si_z_op = 0x1c
+};
+
+/*
+ * FS2 opcode.
+ */
+enum fs2 {
+       fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op,
+       fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op
+};
+
+/*
+ * FD1 opcode.
+ */
+enum fd1 {
+       faddd_op, fsubd_op, fcpynsd_op, fcpysd_op,
+       fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op,
+       fnmaddd_op, fnmsubd_op,
+       fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf
+};
+
+/*
+ * FD1/F2OP opcode.
+ */
+enum fd1_f2 {
+       fd2s_op, fsqrtd_op,
+       fui2d_op = 0x8, fsi2d_op = 0xc,
+       fd2ui_op = 0x10, fd2ui_z_op = 0x14,
+       fd2si_op = 0x18, fd2si_z_op = 0x1c
+};
+
+/*
+ * FD2 opcode.
+ */
+enum fd2 {
+       fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op,
+       fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op
+};
+
+#define NDS32Insn(x) x
+
+#define I_OPCODE_off                   25
+#define NDS32Insn_OPCODE(x)            (NDS32Insn(x) >> I_OPCODE_off)
+
+#define I_OPCODE_offRt                 20
+#define I_OPCODE_mskRt                 (0x1fUL << I_OPCODE_offRt)
+#define NDS32Insn_OPCODE_Rt(x) \
+       ((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt)
+
+#define I_OPCODE_offRa                 15
+#define I_OPCODE_mskRa                 (0x1fUL << I_OPCODE_offRa)
+#define NDS32Insn_OPCODE_Ra(x) \
+       ((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa)
+
+#define I_OPCODE_offRb                 10
+#define I_OPCODE_mskRb                 (0x1fUL << I_OPCODE_offRb)
+#define NDS32Insn_OPCODE_Rb(x) \
+       ((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb)
+
+#define I_OPCODE_offbit1014            10
+#define I_OPCODE_mskbit1014            (0x1fUL << I_OPCODE_offbit1014)
+#define NDS32Insn_OPCODE_BIT1014(x) \
+       ((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014)
+
+#define I_OPCODE_offbit69              6
+#define I_OPCODE_mskbit69              (0xfUL << I_OPCODE_offbit69)
+#define NDS32Insn_OPCODE_BIT69(x) \
+       ((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69)
+
+#define I_OPCODE_offCOP0               0
+#define I_OPCODE_mskCOP0               (0x3fUL << I_OPCODE_offCOP0)
+#define NDS32Insn_OPCODE_COP0(x) \
+       ((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0)
+
+#endif /* __NDS32_FPU_INST_H */
diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h
new file mode 100644 (file)
index 0000000..fcdff02
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/*
+ * This file is request by Perf,
+ * please refer to tools/perf/design.txt for more details
+ */
+struct pt_regs;
+unsigned long perf_instruction_pointer(struct pt_regs *regs);
+unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)   perf_misc_flags(regs)
+
+#endif
diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
new file mode 100644 (file)
index 0000000..e1ac0b0
--- /dev/null
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PMU_H
+#define __ASM_PMU_H
+
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <asm/bitfield.h>
+
+/* Has special meaning for perf core implementation */
+#define HW_OP_UNSUPPORTED              0x0
+#define C(_x)                          PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED           0x0
+
+/* Enough for both software and hardware defined events */
+#define SOFTWARE_EVENT_MASK            0xFF
+
+#define PFM_OFFSET_MAGIC_0             2       /* DO NOT START FROM 0 */
+#define PFM_OFFSET_MAGIC_1             (PFM_OFFSET_MAGIC_0 + 36)
+#define PFM_OFFSET_MAGIC_2             (PFM_OFFSET_MAGIC_1 + 36)
+
+enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
+
+u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
+                      PFM_CTL_mskOVF2 };
+u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
+                     PFM_CTL_mskEN2 };
+u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
+                         PFM_CTL_offSEL2 };
+u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
+u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
+u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
+u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
+/*
+ * Perf Events' indices
+ */
+#define NDS32_IDX_CYCLE_COUNTER                        0
+#define NDS32_IDX_COUNTER0                     1
+#define NDS32_IDX_COUNTER1                     2
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+       /*
+        * The events that are active on the PMU for the given index.
+        */
+       struct perf_event *events[MAX_COUNTERS];
+
+       /*
+        * A 1 bit for an index indicates that the counter is being used for
+        * an event. A 0 means that the counter can be used.
+        */
+       unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
+
+       /*
+        * Hardware lock to serialize accesses to PMU registers. Needed for the
+        * read/modify/write sequences.
+        */
+       raw_spinlock_t pmu_lock;
+};
+
+struct nds32_pmu {
+       struct pmu pmu;
+       cpumask_t active_irqs;
+       char *name;
+        irqreturn_t (*handle_irq)(int irq_num, void *dev);
+       void (*enable)(struct perf_event *event);
+       void (*disable)(struct perf_event *event);
+       int (*get_event_idx)(struct pmu_hw_events *hw_events,
+                            struct perf_event *event);
+       int (*set_event_filter)(struct hw_perf_event *evt,
+                               struct perf_event_attr *attr);
+       u32 (*read_counter)(struct perf_event *event);
+       void (*write_counter)(struct perf_event *event, u32 val);
+       void (*start)(struct nds32_pmu *nds32_pmu);
+       void (*stop)(struct nds32_pmu *nds32_pmu);
+       void (*reset)(void *data);
+       int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
+       void (*free_irq)(struct nds32_pmu *nds32_pmu);
+       int (*map_event)(struct perf_event *event);
+       int num_events;
+       atomic_t active_events;
+       u64 max_period;
+       struct platform_device *plat_device;
+       struct pmu_hw_events *(*get_hw_events)(void);
+};
+
+#define to_nds32_pmu(p)                        (container_of(p, struct nds32_pmu, pmu))
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
+
+u64 nds32_pmu_event_update(struct perf_event *event);
+
+int nds32_pmu_event_set_period(struct perf_event *event);
+
+/*
+ * Common NDS32 SPAv3 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ *
+ * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
+ * NOT_SUPPORTED EVENT mapping in generic perf code.
+ * You will need to deal it in the event writing implementation.
+ */
+enum spav3_counter_0_perf_types {
+       SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0,     /* counting symbol */
+       SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
+       SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
+       SPAV3_0_SEL_LAST        /* counting symbol */
+};
+
+enum spav3_counter_1_perf_types {
+       SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1,     /* counting symbol */
+       SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
+       SPAV3_1_SEL_LAST        /* counting symbol */
+};
+
+enum spav3_counter_2_perf_types {
+       SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2,     /* counting symbol */
+       SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
+           3 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
+       SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
+       SPAV3_2_SEL_LAST        /* counting symbol */
+};
+
+/* Get converted event counter index */
+static inline int get_converted_event_idx(unsigned long event)
+{
+       int idx;
+
+       if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
+               idx = 0;
+       } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
+               idx = 1;
+       } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
+               idx = 2;
+       } else {
+               pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
+               return -EPERM;
+       }
+
+       return idx;
+}
+
+/* Get converted hardware event number */
+static inline u32 get_converted_evet_hw_num(u32 event)
+{
+       if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
+               event -= PFM_OFFSET_MAGIC_0;
+       else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
+               event -= PFM_OFFSET_MAGIC_1;
+       else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
+               event -= PFM_OFFSET_MAGIC_2;
+       else if (event != 0)
+               pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
+
+       return event;
+}
+
+/*
+ * NDS32 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
+       [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
+       [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
+       [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
+       [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
+       [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
+       [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
+       [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
+       [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
+       [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
+};
+
+static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+       [PERF_COUNT_HW_CACHE_OP_MAX]
+       [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(L1D)] = {
+                   [C(OP_READ)] = {
+                                   [C(RESULT_ACCESS)] =
+                                   SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
+                                   [C(RESULT_MISS)] =
+                                   SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
+                                   },
+                   [C(OP_WRITE)] = {
+                                    [C(RESULT_ACCESS)] =
+                                    SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
+                                    [C(RESULT_MISS)] =
+                                    SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
+                                    },
+                   [C(OP_PREFETCH)] = {
+                                       [C(RESULT_ACCESS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                       [C(RESULT_MISS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                       },
+                   },
+       [C(L1I)] = {
+                   [C(OP_READ)] = {
+                                   [C(RESULT_ACCESS)] =
+                                   SPAV3_1_SEL_CODE_CACHE_ACCESS,
+                                   [C(RESULT_MISS)] =
+                                   SPAV3_2_SEL_CODE_CACHE_MISS,
+                                   },
+                   [C(OP_WRITE)] = {
+                                    [C(RESULT_ACCESS)] =
+                                    SPAV3_1_SEL_CODE_CACHE_ACCESS,
+                                    [C(RESULT_MISS)] =
+                                    SPAV3_2_SEL_CODE_CACHE_MISS,
+                                    },
+                   [C(OP_PREFETCH)] = {
+                                       [C(RESULT_ACCESS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                       },
+                   },
+       /* TODO: L2CC */
+       [C(LL)] = {
+                  [C(OP_READ)] = {
+                                  [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                                  [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                  },
+                  [C(OP_WRITE)] = {
+                                   [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                                   [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                   },
+                  [C(OP_PREFETCH)] = {
+                                      [C(RESULT_ACCESS)] =
+                                      CACHE_OP_UNSUPPORTED,
+                                      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                      },
+                  },
+       /* NDS32 PMU does not support TLB read/write hit/miss,
+        * However, it can count access/miss, which mixed with read and write.
+        * Therefore, only READ counter will use it.
+        * We do as possible as we can.
+        */
+       [C(DTLB)] = {
+                    [C(OP_READ)] = {
+                                    [C(RESULT_ACCESS)] =
+                                       SPAV3_1_SEL_UDTLB_ACCESS,
+                                    [C(RESULT_MISS)] =
+                                       SPAV3_2_SEL_UDTLB_MISS,
+                                    },
+                    [C(OP_WRITE)] = {
+                                     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                                     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                     },
+                    [C(OP_PREFETCH)] = {
+                                        [C(RESULT_ACCESS)] =
+                                        CACHE_OP_UNSUPPORTED,
+                                        [C(RESULT_MISS)] =
+                                        CACHE_OP_UNSUPPORTED,
+                                        },
+                    },
+       [C(ITLB)] = {
+                    [C(OP_READ)] = {
+                                    [C(RESULT_ACCESS)] =
+                                       SPAV3_1_SEL_UITLB_ACCESS,
+                                    [C(RESULT_MISS)] =
+                                       SPAV3_2_SEL_UITLB_MISS,
+                                    },
+                    [C(OP_WRITE)] = {
+                                     [C(RESULT_ACCESS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                     [C(RESULT_MISS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                     },
+                    [C(OP_PREFETCH)] = {
+                                        [C(RESULT_ACCESS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                        [C(RESULT_MISS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                        },
+                    },
+       [C(BPU)] = {            /* What is BPU? */
+                   [C(OP_READ)] = {
+                                   [C(RESULT_ACCESS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                   [C(RESULT_MISS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                   },
+                   [C(OP_WRITE)] = {
+                                    [C(RESULT_ACCESS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                    [C(RESULT_MISS)] =
+                                       CACHE_OP_UNSUPPORTED,
+                                    },
+                   [C(OP_PREFETCH)] = {
+                                       [C(RESULT_ACCESS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                       [C(RESULT_MISS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                       },
+                   },
+       [C(NODE)] = {           /* What is NODE? */
+                    [C(OP_READ)] = {
+                                    [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                                    [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                    },
+                    [C(OP_WRITE)] = {
+                                     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                                     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+                                     },
+                    [C(OP_PREFETCH)] = {
+                                        [C(RESULT_ACCESS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                        [C(RESULT_MISS)] =
+                                               CACHE_OP_UNSUPPORTED,
+                                        },
+                    },
+};
+
+int nds32_pmu_map_event(struct perf_event *event,
+                       const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+                       const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+                       [PERF_COUNT_HW_CACHE_OP_MAX]
+                       [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+
+#endif /* __ASM_PMU_H */
index c2660f5..72024f8 100644 (file)
@@ -35,6 +35,8 @@ struct thread_struct {
        unsigned long address;
        unsigned long trap_no;
        unsigned long error_code;
+
+       struct fpu_struct fpu;
 };
 
 #define INIT_THREAD  { }
@@ -72,6 +74,11 @@ struct task_struct;
 
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
+#if IS_ENABLED(CONFIG_FPU)
+#if !IS_ENABLED(CONFIG_UNLAZU_FPU)
+extern struct task_struct *last_task_used_math;
+#endif
+#endif
 
 /* Prepare to copy thread state - unlazy all lazy status */
 #define prepare_to_copy(tsk)   do { } while (0)
diff --git a/arch/nds32/include/asm/sfp-machine.h b/arch/nds32/include/asm/sfp-machine.h
new file mode 100644 (file)
index 0000000..b1a5caa
--- /dev/null
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#include <asm/bitfield.h>
+
+#define _FP_W_TYPE_SIZE                32
+#define _FP_W_TYPE             unsigned long
+#define _FP_WS_TYPE            signed long
+#define _FP_I_TYPE             long
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define _FP_MUL_MEAT_S(R, X, Y)                                \
+       _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_D(R, X, Y)                                \
+       _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_Q(R, X, Y)                                \
+       _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+
+#define _FP_MUL_MEAT_DW_S(R, X, Y)                     \
+       _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_DW_D(R, X, Y)                     \
+       _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R, X, Y)        _FP_DIV_MEAT_1_udiv_norm(S, R, X, Y)
+#define _FP_DIV_MEAT_D(R, X, Y)        _FP_DIV_MEAT_2_udiv(D, R, X, Y)
+
+#define _FP_NANFRAC_S          ((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D          ((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q          ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S          0
+#define _FP_NANSIGN_D          0
+#define _FP_NANSIGN_Q          0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)                     \
+do {                                                           \
+       if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)      \
+         && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \
+               R##_s = Y##_s;                                  \
+               _FP_FRAC_COPY_##wc(R, Y);                       \
+       } else {                                                \
+               R##_s = X##_s;                                  \
+               _FP_FRAC_COPY_##wc(R, X);                       \
+       }                                                       \
+       R##_c = FP_CLS_NAN;                                     \
+} while (0)
+
+#define __FPU_FPCSR    (current->thread.fpu.fpcsr)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE                    \
+({                                      \
+       __FPU_FPCSR & FPCSR_mskRM;      \
+})
+
+#define FP_RND_NEAREST         0
+#define FP_RND_PINF            1
+#define FP_RND_MINF            2
+#define FP_RND_ZERO            3
+
+#define FP_EX_INVALID          FPCSR_mskIVO
+#define FP_EX_DIVZERO          FPCSR_mskDBZ
+#define FP_EX_OVERFLOW         FPCSR_mskOVF
+#define FP_EX_UNDERFLOW                FPCSR_mskUDF
+#define FP_EX_INEXACT          FPCSR_mskIEX
+
+#define SF_CEQ 2
+#define SF_CLT 1
+#define SF_CGT 3
+#define SF_CUN 4
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN__
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __LITTLE_ENDIAN 0
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __BIG_ENDIAN 0
+#endif
+
+#define abort() do { } while (0)
+#define umul_ppmm(w1, w0, u, v)                                                \
+do {                                                                   \
+       UWtype __x0, __x1, __x2, __x3;                                  \
+       UHWtype __ul, __vl, __uh, __vh;                                 \
+                                                                       \
+       __ul = __ll_lowpart(u);                                         \
+       __uh = __ll_highpart(u);                                        \
+       __vl = __ll_lowpart(v);                                         \
+       __vh = __ll_highpart(v);                                        \
+                                                                       \
+       __x0 = (UWtype) __ul * __vl;                                    \
+       __x1 = (UWtype) __ul * __vh;                                    \
+       __x2 = (UWtype) __uh * __vl;                                    \
+       __x3 = (UWtype) __uh * __vh;                                    \
+                                                                       \
+       __x1 += __ll_highpart(__x0);                                    \
+       __x1 += __x2;                                                   \
+       if (__x1 < __x2)                                                \
+               __x3 += __ll_B;                                         \
+                                                                       \
+       (w1) = __x3 + __ll_highpart(__x1);                              \
+       (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);        \
+} while (0)
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+       UWtype __x; \
+       __x = (al) + (bl); \
+       (sh) = (ah) + (bh) + (__x < (al)); \
+       (sl) = __x; \
+} while (0)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+       UWtype __x; \
+       __x = (al) - (bl); \
+       (sh) = (ah) - (bh) - (__x > (al)); \
+       (sl) = __x; \
+} while (0)
+
+#define udiv_qrnnd(q, r, n1, n0, d)                            \
+do {                                                           \
+       UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;         \
+       __d1 = __ll_highpart(d);                                \
+       __d0 = __ll_lowpart(d);                                 \
+                                                               \
+       __r1 = (n1) % __d1;                                     \
+       __q1 = (n1) / __d1;                                     \
+       __m = (UWtype) __q1 * __d0;                             \
+       __r1 = __r1 * __ll_B | __ll_highpart(n0);               \
+       if (__r1 < __m) {                                       \
+               __q1--, __r1 += (d);                            \
+               if (__r1 >= (d))                                \
+                       if (__r1 < __m)                         \
+                               __q1--, __r1 += (d);            \
+       }                                                       \
+       __r1 -= __m;                                            \
+       __r0 = __r1 % __d1;                                     \
+       __q0 = __r1 / __d1;                                     \
+       __m = (UWtype) __q0 * __d0;                             \
+       __r0 = __r0 * __ll_B | __ll_lowpart(n0);                \
+       if (__r0 < __m) {                                       \
+               __q0--, __r0 += (d);                            \
+               if (__r0 >= (d))                                \
+                       if (__r0 < __m)                         \
+                               __q0--, __r0 += (d);            \
+       }                                                       \
+       __r0 -= __m;                                            \
+       (q) = (UWtype) __q1 * __ll_B | __q0;                    \
+       (r) = __r0;                                             \
+} while (0)
diff --git a/arch/nds32/include/asm/stacktrace.h b/arch/nds32/include/asm/stacktrace.h
new file mode 100644 (file)
index 0000000..6bf7c77
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+/* Kernel callchain */
+struct stackframe {
+       unsigned long fp;
+       unsigned long sp;
+       unsigned long lp;
+};
+
+/*
+ * struct frame_tail: User callchain
+ * IMPORTANT:
+ * This struct is used for call-stack walking,
+ * the order and types matters.
+ * Do not use array, it only stores sizeof(pointer)
+ *
+ * The details can refer to arch/arm/kernel/perf_event.c
+ */
+struct frame_tail {
+       unsigned long stack_fp;
+       unsigned long stack_lp;
+};
+
+/* For User callchain with optimize for size */
+struct frame_tail_opt_size {
+       unsigned long stack_r6;
+       unsigned long stack_fp;
+       unsigned long stack_gp;
+       unsigned long stack_lp;
+};
+
+extern void
+get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h
new file mode 100644 (file)
index 0000000..6ed2418
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#ifndef __ASM_NDS32_SUSPEND_H
+#define __ASM_NDS32_SUSPEND_H
+
+extern void suspend2ram(void);
+extern void cpu_resume(void);
+extern unsigned long wake_mask;
+
+#endif
index 78778ec..da32101 100644 (file)
@@ -7,6 +7,7 @@
 asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op);
 asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len);
 asmlinkage long sys_rt_sigreturn_wrapper(void);
+asmlinkage long sys_udftrap(int option);
 
 #include <asm-generic/syscalls.h>
 
index 56043ce..2d3213f 100644 (file)
@@ -4,6 +4,13 @@
 #ifndef __ASM_AUXVEC_H
 #define __ASM_AUXVEC_H
 
+/*
+ * This entry gives some information about the FPU initialization
+ * performed by the kernel.
+ */
+#define AT_FPUCW       18      /* Used FPU control word.  */
+
+
 /* VDSO location */
 #define AT_SYSINFO_EHDR        33
 
index 00567b2..58afc41 100644 (file)
@@ -9,6 +9,19 @@
  * before the signal handler was invoked.  Note: only add new entries
  * to the end of the structure.
  */
+struct fpu_struct {
+       unsigned long long fd_regs[32];
+       unsigned long fpcsr;
+       /*
+        * UDF_trap is used to recognize whether underflow trap is enabled
+        * or not. When UDF_trap == 1, this process will be traped and then
+        * get a SIGFPE signal when encountering an underflow exception.
+        * UDF_trap is only modified through setfputrap syscall. Therefore,
+        * UDF_trap needn't be saved or loaded to context in each context
+        * switch.
+        */
+       unsigned long UDF_trap;
+};
 
 struct zol_struct {
        unsigned long nds32_lc; /* $LC */
@@ -54,6 +67,7 @@ struct sigcontext {
        unsigned long fault_address;
        unsigned long used_math_flag;
        /* FPU Registers */
+       struct fpu_struct fpu;
        struct zol_struct zol;
 };
 
diff --git a/arch/nds32/include/uapi/asm/udftrap.h b/arch/nds32/include/uapi/asm/udftrap.h
new file mode 100644 (file)
index 0000000..433f79d
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+#ifndef        _ASM_SETFPUTRAP
+#define        _ASM_SETFPUTRAP
+
+/*
+ * Options for setfputrap system call
+ */
+#define        DISABLE_UDFTRAP 0       /* disable underflow exception trap */
+#define        ENABLE_UDFTRAP  1       /* enable undeflos exception trap */
+#define        GET_UDFTRAP     2       /* only get undeflos exception trap status */
+
+#endif /* _ASM_CACHECTL */
index 603e826..c2c3a3e 100644 (file)
@@ -9,4 +9,6 @@
 
 /* Additional NDS32 specific syscalls. */
 #define __NR_cacheflush                (__NR_arch_specific_syscall)
+#define __NR_udftrap           (__NR_arch_specific_syscall + 1)
 __SYSCALL(__NR_cacheflush, sys_cacheflush)
+__SYSCALL(__NR_udftrap, sys_udftrap)
index 27cded3..a1a1d61 100644 (file)
@@ -4,7 +4,6 @@
 
 CPPFLAGS_vmlinux.lds   := -DTEXTADDR=$(TEXTADDR)
 AFLAGS_head.o          := -DTEXTADDR=$(TEXTADDR)
-
 # Object file lists.
 
 obj-y                  := ex-entry.o ex-exit.o ex-scall.o irq.o \
@@ -14,11 +13,15 @@ obj-y                       := ex-entry.o ex-exit.o ex-scall.o irq.o \
 
 obj-$(CONFIG_MODULES)          += nds32_ksyms.o module.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
+obj-$(CONFIG_FPU)              += fpu.o
 obj-$(CONFIG_OF)               += devtree.o
 obj-$(CONFIG_CACHE_L2)         += atl2c.o
-
+obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
+obj-$(CONFIG_PM)               += pm.o sleep.o
 extra-y := head.o vmlinux.lds
 
+CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp
+
 
 obj-y                          += vdso/
 
index 21a1440..107d98a 100644 (file)
@@ -7,6 +7,7 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
+#include <asm/fpu.h>
 
 #ifdef CONFIG_HWZOL
        .macro push_zol
        mfusr   $r16, $LC
        .endm
 #endif
+       .macro  skip_save_fucop_ctl
+#if defined(CONFIG_FPU)
+skip_fucop_ctl:
+       smw.adm $p0, [$sp], $p0, #0x1
+       j fucop_ctl_done
+#endif
+       .endm
 
        .macro  save_user_regs
-
+#if defined(CONFIG_FPU)
+       sethi   $p0, hi20(has_fpu)
+       lbsi    $p0, [$p0+lo12(has_fpu)]
+       beqz    $p0, skip_fucop_ctl
+       mfsr    $p0, $FUCOP_CTL
+       smw.adm $p0, [$sp], $p0, #0x1
+       bclr    $p0, $p0, #FUCOP_CTL_offCP0EN
+       mtsr    $p0, $FUCOP_CTL
+fucop_ctl_done:
+       /* move $SP to the bottom of pt_regs */
+       addi    $sp, $sp, -FUCOP_CTL_OFFSET
+#else
        smw.adm $sp, [$sp], $sp, #0x1
        /* move $SP to the bottom of pt_regs */
        addi    $sp, $sp, -OSP_OFFSET
+#endif
 
        /* push $r0 ~ $r25 */
        smw.bim $r0, [$sp], $r25
@@ -79,6 +99,7 @@ exception_handlers:
        .long   eh_syscall              !Syscall
        .long   asm_do_IRQ              !IRQ
 
+       skip_save_fucop_ctl
 common_exception_handler:
        save_user_regs
        mfsr    $p0, $ITYPE
@@ -103,7 +124,6 @@ common_exception_handler:
        mtsr    $r21, $PSW
        dsb
        jr      $p1
-
        /* syscall */
 1:
        addi    $p1, $p0, #-NDS32_VECTOR_offEXCEPTION
index f00af92..97ba15c 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/current.h>
+#include <asm/fpu.h>
 
 
 
        .macro  restore_user_regs_first
        setgie.d
        isb
-
+#if defined(CONFIG_FPU)
+       addi    $sp, $sp, OSP_OFFSET
+       lmw.adm $r12, [$sp], $r25, #0x0
+       sethi   $p0, hi20(has_fpu)
+       lbsi    $p0, [$p0+lo12(has_fpu)]
+       beqz    $p0, 2f
+       mtsr    $r25, $FUCOP_CTL
+2:
+#else
        addi    $sp, $sp, FUCOP_CTL_OFFSET
-
        lmw.adm $r12, [$sp], $r24, #0x0
+#endif
        mtsr    $r12, $SP_USR
        mtsr    $r13, $IPC
 #ifdef CONFIG_HWZOL
index 36aa87e..270050f 100644 (file)
@@ -19,11 +19,13 @@ ENTRY(__switch_to)
 
        la      $p0, __entry_task
        sw      $r1, [$p0]
-       move    $p1, $r0
-       addi    $p1, $p1, #THREAD_CPU_CONTEXT
+       addi    $p1, $r0, #THREAD_CPU_CONTEXT
        smw.bi  $r6, [$p1], $r14, #0xb          ! push r6~r14, fp, lp, sp
        move    $r25, $r1
-       addi    $r1, $r1, #THREAD_CPU_CONTEXT
+#if defined(CONFIG_FPU)
+       call    _switch_fpu
+#endif
+       addi    $r1, $r25, #THREAD_CPU_CONTEXT
        lmw.bi  $r6, [$r1], $r14, #0xb          ! pop r6~r14, fp, lp, sp
        ret
 
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
new file mode 100644 (file)
index 0000000..fddd40c
--- /dev/null
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/sched/signal.h>
+#include <asm/processor.h>
+#include <asm/user.h>
+#include <asm/io.h>
+#include <asm/bitfield.h>
+#include <asm/fpu.h>
+
+const struct fpu_struct init_fpuregs = {
+       .fd_regs = {[0 ... 31] = sNAN64},
+       .fpcsr = FPCSR_INIT,
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+       .UDF_trap = 0
+#endif
+};
+
+void save_fpu(struct task_struct *tsk)
+{
+       unsigned int fpcfg, fpcsr;
+
+       enable_fpu();
+       fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+       switch (fpcfg) {
+       case SP32_DP32_reg:
+               asm volatile ("fsdi $fd31, [%0+0xf8]\n\t"
+                             "fsdi $fd30, [%0+0xf0]\n\t"
+                             "fsdi $fd29, [%0+0xe8]\n\t"
+                             "fsdi $fd28, [%0+0xe0]\n\t"
+                             "fsdi $fd27, [%0+0xd8]\n\t"
+                             "fsdi $fd26, [%0+0xd0]\n\t"
+                             "fsdi $fd25, [%0+0xc8]\n\t"
+                             "fsdi $fd24, [%0+0xc0]\n\t"
+                             "fsdi $fd23, [%0+0xb8]\n\t"
+                             "fsdi $fd22, [%0+0xb0]\n\t"
+                             "fsdi $fd21, [%0+0xa8]\n\t"
+                             "fsdi $fd20, [%0+0xa0]\n\t"
+                             "fsdi $fd19, [%0+0x98]\n\t"
+                             "fsdi $fd18, [%0+0x90]\n\t"
+                             "fsdi $fd17, [%0+0x88]\n\t"
+                             "fsdi $fd16, [%0+0x80]\n\t"
+                             : /* no output */
+                             : "r" (&tsk->thread.fpu)
+                             : "memory");
+               /* fall through */
+       case SP32_DP16_reg:
+               asm volatile ("fsdi $fd15, [%0+0x78]\n\t"
+                             "fsdi $fd14, [%0+0x70]\n\t"
+                             "fsdi $fd13, [%0+0x68]\n\t"
+                             "fsdi $fd12, [%0+0x60]\n\t"
+                             "fsdi $fd11, [%0+0x58]\n\t"
+                             "fsdi $fd10, [%0+0x50]\n\t"
+                             "fsdi $fd9,  [%0+0x48]\n\t"
+                             "fsdi $fd8,  [%0+0x40]\n\t"
+                             : /* no output */
+                             : "r" (&tsk->thread.fpu)
+                             : "memory");
+               /* fall through */
+       case SP16_DP8_reg:
+               asm volatile ("fsdi $fd7,  [%0+0x38]\n\t"
+                             "fsdi $fd6,  [%0+0x30]\n\t"
+                             "fsdi $fd5,  [%0+0x28]\n\t"
+                             "fsdi $fd4,  [%0+0x20]\n\t"
+                             : /* no output */
+                             : "r" (&tsk->thread.fpu)
+                             : "memory");
+               /* fall through */
+       case SP8_DP4_reg:
+               asm volatile ("fsdi $fd3,  [%1+0x18]\n\t"
+                             "fsdi $fd2,  [%1+0x10]\n\t"
+                             "fsdi $fd1,  [%1+0x8]\n\t"
+                             "fsdi $fd0,  [%1+0x0]\n\t"
+                             "fmfcsr   %0\n\t"
+                             "swi  %0, [%1+0x100]\n\t"
+                             : "=&r" (fpcsr)
+                             : "r"(&tsk->thread.fpu)
+                             : "memory");
+       }
+       disable_fpu();
+}
+
+void load_fpu(const struct fpu_struct *fpregs)
+{
+       unsigned int fpcfg, fpcsr;
+
+       enable_fpu();
+       fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+       switch (fpcfg) {
+       case SP32_DP32_reg:
+               asm volatile ("fldi $fd31, [%0+0xf8]\n\t"
+                             "fldi $fd30, [%0+0xf0]\n\t"
+                             "fldi $fd29, [%0+0xe8]\n\t"
+                             "fldi $fd28, [%0+0xe0]\n\t"
+                             "fldi $fd27, [%0+0xd8]\n\t"
+                             "fldi $fd26, [%0+0xd0]\n\t"
+                             "fldi $fd25, [%0+0xc8]\n\t"
+                             "fldi $fd24, [%0+0xc0]\n\t"
+                             "fldi $fd23, [%0+0xb8]\n\t"
+                             "fldi $fd22, [%0+0xb0]\n\t"
+                             "fldi $fd21, [%0+0xa8]\n\t"
+                             "fldi $fd20, [%0+0xa0]\n\t"
+                             "fldi $fd19, [%0+0x98]\n\t"
+                             "fldi $fd18, [%0+0x90]\n\t"
+                             "fldi $fd17, [%0+0x88]\n\t"
+                             "fldi $fd16, [%0+0x80]\n\t"
+                             : /* no output */
+                             : "r" (fpregs));
+               /* fall through */
+       case SP32_DP16_reg:
+               asm volatile ("fldi $fd15, [%0+0x78]\n\t"
+                             "fldi $fd14, [%0+0x70]\n\t"
+                             "fldi $fd13, [%0+0x68]\n\t"
+                             "fldi $fd12, [%0+0x60]\n\t"
+                             "fldi $fd11, [%0+0x58]\n\t"
+                             "fldi $fd10, [%0+0x50]\n\t"
+                             "fldi $fd9,  [%0+0x48]\n\t"
+                             "fldi $fd8,  [%0+0x40]\n\t"
+                             : /* no output */
+                             : "r" (fpregs));
+               /* fall through */
+       case SP16_DP8_reg:
+               asm volatile ("fldi $fd7,  [%0+0x38]\n\t"
+                             "fldi $fd6,  [%0+0x30]\n\t"
+                             "fldi $fd5,  [%0+0x28]\n\t"
+                             "fldi $fd4,  [%0+0x20]\n\t"
+                             : /* no output */
+                             : "r" (fpregs));
+               /* fall through */
+       case SP8_DP4_reg:
+               asm volatile ("fldi $fd3,  [%1+0x18]\n\t"
+                             "fldi $fd2,  [%1+0x10]\n\t"
+                             "fldi $fd1,  [%1+0x8]\n\t"
+                             "fldi $fd0,  [%1+0x0]\n\t"
+                             "lwi  %0, [%1+0x100]\n\t"
+                             "fmtcsr   %0\n\t":"=&r" (fpcsr)
+                             : "r"(fpregs));
+       }
+       disable_fpu();
+}
+void store_fpu_for_suspend(void)
+{
+#ifdef CONFIG_LAZY_FPU
+       if (last_task_used_math != NULL)
+               save_fpu(last_task_used_math);
+       last_task_used_math = NULL;
+#else
+       if (!used_math())
+               return;
+       unlazy_fpu(current);
+#endif
+       clear_fpu(task_pt_regs(current));
+}
+inline void do_fpu_context_switch(struct pt_regs *regs)
+{
+       /* Enable to use FPU. */
+
+       if (!user_mode(regs)) {
+               pr_err("BUG: FPU is used in kernel mode.\n");
+               BUG();
+               return;
+       }
+
+       enable_ptreg_fpu(regs);
+#ifdef CONFIG_LAZY_FPU //Lazy FPU is used
+       if (last_task_used_math == current)
+               return;
+       if (last_task_used_math != NULL)
+               /* Other processes fpu state, save away */
+               save_fpu(last_task_used_math);
+       last_task_used_math = current;
+#endif
+       if (used_math()) {
+               load_fpu(&current->thread.fpu);
+       } else {
+               /* First time FPU user.  */
+               load_fpu(&init_fpuregs);
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+               current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
+#endif
+               set_used_math();
+       }
+
+}
+
+inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
+{
+       if (fpcsr & FPCSR_mskOVFT)
+               *signo = FPE_FLTOVF;
+#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC
+       else if (fpcsr & FPCSR_mskUDFT)
+               *signo = FPE_FLTUND;
+#endif
+       else if (fpcsr & FPCSR_mskIVOT)
+               *signo = FPE_FLTINV;
+       else if (fpcsr & FPCSR_mskDBZT)
+               *signo = FPE_FLTDIV;
+       else if (fpcsr & FPCSR_mskIEXT)
+               *signo = FPE_FLTRES;
+}
+
+inline void handle_fpu_exception(struct pt_regs *regs)
+{
+       unsigned int fpcsr;
+       int si_code = 0, si_signo = SIGFPE;
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+       unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT;
+#else
+       unsigned long redo_except = FPCSR_mskDNIT;
+#endif
+
+       lose_fpu();
+       fpcsr = current->thread.fpu.fpcsr;
+
+       if (fpcsr & redo_except) {
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+               if (fpcsr & FPCSR_mskUDFT)
+                       current->thread.fpu.fpcsr &= ~FPCSR_mskIEX;
+#endif
+               si_signo = do_fpuemu(regs, &current->thread.fpu);
+               fpcsr = current->thread.fpu.fpcsr;
+               if (!si_signo)
+                       goto done;
+       } else if (fpcsr & FPCSR_mskRIT) {
+               if (!user_mode(regs))
+                       do_exit(SIGILL);
+               si_signo = SIGILL;
+       }
+
+
+       switch (si_signo) {
+       case SIGFPE:
+               fill_sigfpe_signo(fpcsr, &si_code);
+               break;
+       case SIGILL:
+               show_regs(regs);
+               si_code = ILL_COPROC;
+               break;
+       case SIGBUS:
+               si_code = BUS_ADRERR;
+               break;
+       default:
+               break;
+       }
+
+       force_sig_fault(si_signo, si_code,
+                       (void __user *)instruction_pointer(regs), current);
+done:
+       own_fpu();
+}
+
+bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs)
+{
+       int done = true;
+       /* Coprocessor disabled exception */
+       if (subtype == FPU_DISABLE_EXCEPTION) {
+               preempt_disable();
+               do_fpu_context_switch(regs);
+               preempt_enable();
+       }
+       /* Coprocessor exception such as underflow and overflow */
+       else if (subtype == FPU_EXCEPTION)
+               handle_fpu_exception(regs);
+       else
+               done = false;
+       return done;
+}
index c5fdae1..db64b78 100644 (file)
@@ -123,21 +123,12 @@ _image_size_check:
         andi    $r0, $r0, MMU_CFG_mskTBS
         srli    $r6, $r6, MMU_CFG_offTBW
         srli    $r0, $r0, MMU_CFG_offTBS
-        /*
-         * we just map the kernel to the maximum way - 1 of tlb
-         * reserver one way for UART VA mapping
-         * it will cause page fault if UART mapping cover the kernel mapping
-         *
-         * direct mapping is not supported now.
-         */
-        li      $r2, 't'
-        beqz    $r6, __error                 ! MMU_CFG.TBW = 0 is direct mappin
+       addi    $r6, $r6, #0x1               ! MMU_CFG.TBW value -> meaning
         addi    $r0, $r0, #0x2               ! MMU_CFG.TBS value -> meaning
         sll     $r0, $r6, $r0                ! entries = k-way * n-set
         mul     $r6, $r0, $r5                ! max size = entries * page size
         /* check kernel image size */
         la      $r3, (_end - PAGE_OFFSET)
-        li      $r2, 's'
         bgt     $r3, $r6, __error
 
        li      $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr)
@@ -160,7 +151,7 @@ _tlb:
 #endif
        mtsr    $r3, $TLB_MISC
 
-       mfsr    $r0, $MISC_CTL      ! Enable BTB and RTP and shadow sp
+       mfsr    $r0, $MISC_CTL      ! Enable BTB, RTP, shadow sp, and HW_PRE
        ori     $r0, $r0, #MISC_init
        mtsr    $r0, $MISC_CTL
 
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
new file mode 100644 (file)
index 0000000..5e00ce5
--- /dev/null
@@ -0,0 +1,1522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008-2017 Andes Technology Corporation
+ *
+ * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <linux/sched/clock.h>
+#include <linux/percpu-defs.h>
+
+#include <asm/pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/nds32.h>
+#include <asm/stacktrace.h>
+#include <asm/perf_event.h>
+#include <nds32_intrinsic.h>
+
+/* Set at runtime when we know what CPU type we are. */
+static struct nds32_pmu *cpu_pmu;
+
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
+static struct platform_device_id cpu_pmu_plat_device_ids[] = {
+       {.name = "nds32-pfm"},
+       {},
+};
+
+static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
+                                 [PERF_COUNT_HW_CACHE_MAX]
+                                 [PERF_COUNT_HW_CACHE_OP_MAX]
+                                 [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
+{
+       unsigned int cache_type, cache_op, cache_result, ret;
+
+       cache_type = (config >> 0) & 0xff;
+       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+               return -EINVAL;
+
+       cache_op = (config >> 8) & 0xff;
+       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+               return -EINVAL;
+
+       cache_result = (config >> 16) & 0xff;
+       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return -EINVAL;
+
+       ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+       if (ret == CACHE_OP_UNSUPPORTED)
+               return -ENOENT;
+
+       return ret;
+}
+
+static int
+nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+                      u64 config)
+{
+       int mapping;
+
+       if (config >= PERF_COUNT_HW_MAX)
+               return -ENOENT;
+
+       mapping = (*event_map)[config];
+       return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+       int ev_type = (int)(config & raw_event_mask);
+       int idx = config >> 8;
+
+       switch (idx) {
+       case 0:
+               ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
+               if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
+                       return -ENOENT;
+               break;
+       case 1:
+               ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
+               if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
+                       return -ENOENT;
+               break;
+       case 2:
+               ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
+               if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
+                       return -ENOENT;
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       return ev_type;
+}
+
+int
+nds32_pmu_map_event(struct perf_event *event,
+                   const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+                   const unsigned int (*cache_map)
+                   [PERF_COUNT_HW_CACHE_MAX]
+                   [PERF_COUNT_HW_CACHE_OP_MAX]
+                   [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
+{
+       u64 config = event->attr.config;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_HARDWARE:
+               return nds32_pmu_map_hw_event(event_map, config);
+       case PERF_TYPE_HW_CACHE:
+               return nds32_pmu_map_cache_event(cache_map, config);
+       case PERF_TYPE_RAW:
+               return nds32_pmu_map_raw_event(raw_event_mask, config);
+       }
+
+       return -ENOENT;
+}
+
+static int nds32_spav3_map_event(struct perf_event *event)
+{
+       return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
+                               &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
+}
+
+static inline u32 nds32_pfm_getreset_flags(void)
+{
+       /* Read overflow status */
+       u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       u32 old_val = val;
+
+       /* Write overflow bit to clear status, and others keep it 0 */
+       u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+       __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
+
+       return old_val;
+}
+
+static inline int nds32_pfm_has_overflowed(u32 pfm)
+{
+       u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+       return pfm & ov_flag;
+}
+
+static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
+{
+       u32 mask = 0;
+
+       switch (idx) {
+       case 0:
+               mask = PFM_CTL_OVF[0];
+               break;
+       case 1:
+               mask = PFM_CTL_OVF[1];
+               break;
+       case 2:
+               mask = PFM_CTL_OVF[2];
+               break;
+       default:
+               pr_err("%s index wrong\n", __func__);
+               break;
+       }
+       return pfm & mask;
+}
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int nds32_pmu_event_set_period(struct perf_event *event)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int ret = 0;
+
+       /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
+       if (unlikely(period != hwc->last_period))
+               left = period - (hwc->last_period - left);
+
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (unlikely(left <= 0)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (left > (s64)nds32_pmu->max_period)
+               left = nds32_pmu->max_period;
+
+       /*
+        * The hw event starts counting from this event offset,
+        * mark it to be able to extract future "deltas":
+        */
+       local64_set(&hwc->prev_count, (u64)(-left));
+
+       nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
+{
+       u32 pfm;
+       struct perf_sample_data data;
+       struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
+       struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+       struct pt_regs *regs;
+       int idx;
+       /*
+        * Get and reset the IRQ flags
+        */
+       pfm = nds32_pfm_getreset_flags();
+
+       /*
+        * Did an overflow occur?
+        */
+       if (!nds32_pfm_has_overflowed(pfm))
+               return IRQ_NONE;
+
+       /*
+        * Handle the counter(s) overflow(s)
+        */
+       regs = get_irq_regs();
+
+       nds32_pmu_stop(cpu_pmu);
+       for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+               struct perf_event *event = cpuc->events[idx];
+               struct hw_perf_event *hwc;
+
+               /* Ignore if we don't have an event. */
+               if (!event)
+                       continue;
+
+               /*
+                * We have a single interrupt for all counters. Check that
+                * each counter has overflowed before we process it.
+                */
+               if (!nds32_pfm_counter_has_overflowed(pfm, idx))
+                       continue;
+
+               hwc = &event->hw;
+               nds32_pmu_event_update(event);
+               perf_sample_data_init(&data, 0, hwc->last_period);
+               if (!nds32_pmu_event_set_period(event))
+                       continue;
+
+               if (perf_event_overflow(event, &data, regs))
+                       cpu_pmu->disable(event);
+       }
+       nds32_pmu_start(cpu_pmu);
+       /*
+        * Handle the pending perf events.
+        *
+        * Note: this call *must* be run with interrupts disabled. For
+        * platforms that can have the PMU interrupts raised as an NMI, this
+        * will not work.
+        */
+       irq_work_run();
+
+       return IRQ_HANDLED;
+}
+
+static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
+{
+       return ((idx >= 0) && (idx < cpu_pmu->num_events));
+}
+
+static inline int nds32_pfm_disable_counter(int idx)
+{
+       unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       u32 mask = 0;
+
+       mask = PFM_CTL_EN[idx];
+       val &= ~mask;
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+       return idx;
+}
+
+/*
+ * Add an event filter to a given event.
+ */
+static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
+                                     struct perf_event_attr *attr)
+{
+       unsigned long config_base = 0;
+       int idx = event->idx;
+       unsigned long no_kernel_tracing = 0;
+       unsigned long no_user_tracing = 0;
+       /* If index is -1, do not do anything */
+       if (idx == -1)
+               return 0;
+
+       no_kernel_tracing = PFM_CTL_KS[idx];
+       no_user_tracing = PFM_CTL_KU[idx];
+       /*
+        * Default: enable both kernel and user mode tracing.
+        */
+       if (attr->exclude_user)
+               config_base |= no_user_tracing;
+
+       if (attr->exclude_kernel)
+               config_base |= no_kernel_tracing;
+
+       /*
+        * Install the filter into config_base as this is used to
+        * construct the event type.
+        */
+       event->config_base |= config_base;
+       return 0;
+}
+
+static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
+{
+       u32 offset = 0;
+       u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       u32 ev_mask = 0;
+       u32 no_kernel_mask = 0;
+       u32 no_user_mask = 0;
+       u32 val;
+
+       offset = PFM_CTL_OFFSEL[idx];
+       /* Clear previous mode selection, and write new one */
+       no_kernel_mask = PFM_CTL_KS[idx];
+       no_user_mask = PFM_CTL_KU[idx];
+       ori_val &= ~no_kernel_mask;
+       ori_val &= ~no_user_mask;
+       if (evnum & no_kernel_mask)
+               ori_val |= no_kernel_mask;
+
+       if (evnum & no_user_mask)
+               ori_val |= no_user_mask;
+
+       /* Clear previous event selection */
+       ev_mask = PFM_CTL_SEL[idx];
+       ori_val &= ~ev_mask;
+       evnum &= SOFTWARE_EVENT_MASK;
+
+       /* undo the linear mapping */
+       evnum = get_converted_evet_hw_num(evnum);
+       val = ori_val | (evnum << offset);
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+}
+
+static inline int nds32_pfm_enable_counter(int idx)
+{
+       unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       u32 mask = 0;
+
+       mask = PFM_CTL_EN[idx];
+       val |= mask;
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+       return idx;
+}
+
+static inline int nds32_pfm_enable_intens(int idx)
+{
+       unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       u32 mask = 0;
+
+       mask = PFM_CTL_IE[idx];
+       val |= mask;
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+       return idx;
+}
+
+static inline int nds32_pfm_disable_intens(int idx)
+{
+       unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       u32 mask = 0;
+
+       mask = PFM_CTL_IE[idx];
+       val &= ~mask;
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+       return idx;
+}
+
+static int event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+       /* Other modes NDS32 does not support */
+       return attr->exclude_user || attr->exclude_kernel;
+}
+
+static void nds32_pmu_enable_event(struct perf_event *event)
+{
+       unsigned long flags;
+       unsigned int evnum = 0;
+       struct hw_perf_event *hwc = &event->hw;
+       struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+       int idx = hwc->idx;
+
+       if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+               pr_err("CPU enabling wrong pfm counter IRQ enable\n");
+               return;
+       }
+
+       /*
+        * Enable counter and interrupt, and set the counter to count
+        * the event that we're interested in.
+        */
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       /*
+        * Disable counter
+        */
+       nds32_pfm_disable_counter(idx);
+
+       /*
+        * Check whether we need to exclude the counter from certain modes.
+        */
+       if ((!cpu_pmu->set_event_filter ||
+            cpu_pmu->set_event_filter(hwc, &event->attr)) &&
+            event_requires_mode_exclusion(&event->attr)) {
+               pr_notice
+               ("NDS32 performance counters do not support mode exclusion\n");
+               hwc->config_base = 0;
+       }
+       /* Write event */
+       evnum = hwc->config_base;
+       nds32_pfm_write_evtsel(idx, evnum);
+
+       /*
+        * Enable interrupt for this counter
+        */
+       nds32_pfm_enable_intens(idx);
+
+       /*
+        * Enable counter
+        */
+       nds32_pfm_enable_counter(idx);
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_disable_event(struct perf_event *event)
+{
+       unsigned long flags;
+       struct hw_perf_event *hwc = &event->hw;
+       struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+       int idx = hwc->idx;
+
+       if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+               pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
+               return;
+       }
+
+       /*
+        * Disable counter and interrupt
+        */
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       /*
+        * Disable counter
+        */
+       nds32_pfm_disable_counter(idx);
+
+       /*
+        * Disable interrupt for this counter
+        */
+       nds32_pfm_disable_intens(idx);
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static inline u32 nds32_pmu_read_counter(struct perf_event *event)
+{
+       struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+       u32 count = 0;
+
+       if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+               pr_err("CPU reading wrong counter %d\n", idx);
+       } else {
+               switch (idx) {
+               case PFMC0:
+                       count = __nds32__mfsr(NDS32_SR_PFMC0);
+                       break;
+               case PFMC1:
+                       count = __nds32__mfsr(NDS32_SR_PFMC1);
+                       break;
+               case PFMC2:
+                       count = __nds32__mfsr(NDS32_SR_PFMC2);
+                       break;
+               default:
+                       pr_err
+                           ("%s: CPU has no performance counters %d\n",
+                            __func__, idx);
+               }
+       }
+       return count;
+}
+
+static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
+{
+       struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+               pr_err("CPU writing wrong counter %d\n", idx);
+       } else {
+               switch (idx) {
+               case PFMC0:
+                       __nds32__mtsr_isb(value, NDS32_SR_PFMC0);
+                       break;
+               case PFMC1:
+                       __nds32__mtsr_isb(value, NDS32_SR_PFMC1);
+                       break;
+               case PFMC2:
+                       __nds32__mtsr_isb(value, NDS32_SR_PFMC2);
+                       break;
+               default:
+                       pr_err
+                           ("%s: CPU has no performance counters %d\n",
+                            __func__, idx);
+               }
+       }
+}
+
+static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+                                  struct perf_event *event)
+{
+       int idx;
+       struct hw_perf_event *hwc = &event->hw;
+       /*
+        * Current implementation maps cycles, instruction count and cache-miss
+        * to specific counter.
+        * However, multiple of the 3 counters are able to count these events.
+        *
+        *
+        * SOFTWARE_EVENT_MASK mask for getting event num ,
+        * This is defined by Jia-Rung, you can change the polocies.
+        * However, do not exceed 8 bits. This is hardware specific.
+        * The last number is SPAv3_2_SEL_LAST.
+        */
+       unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
+
+       idx = get_converted_event_idx(evtype);
+       /*
+        * Try to get the counter for correpsonding event
+        */
+       if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
+               if (!test_and_set_bit(idx, cpuc->used_mask))
+                       return idx;
+               if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
+                       return NDS32_IDX_COUNTER0;
+               if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+                       return NDS32_IDX_COUNTER1;
+       } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
+               if (!test_and_set_bit(idx, cpuc->used_mask))
+                       return idx;
+               else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+                       return NDS32_IDX_COUNTER1;
+               else if (!test_and_set_bit
+                        (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
+                       return NDS32_IDX_CYCLE_COUNTER;
+       } else {
+               if (!test_and_set_bit(idx, cpuc->used_mask))
+                       return idx;
+       }
+       return -EAGAIN;
+}
+
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
+{
+       unsigned long flags;
+       unsigned int val;
+       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       /* Enable all counters , NDS PFM has 3 counters */
+       val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
+{
+       unsigned long flags;
+       unsigned int val;
+       struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       /* Disable all counters , NDS PFM has 3 counters */
+       val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+       val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+       val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_reset(void *info)
+{
+       u32 val = 0;
+
+       val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+       __nds32__mtsr(val, NDS32_SR_PFM_CTL);
+       __nds32__mtsr(0, NDS32_SR_PFM_CTL);
+       __nds32__mtsr(0, NDS32_SR_PFMC0);
+       __nds32__mtsr(0, NDS32_SR_PFMC1);
+       __nds32__mtsr(0, NDS32_SR_PFMC2);
+}
+
+static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+       cpu_pmu->handle_irq = nds32_pmu_handle_irq;
+       cpu_pmu->enable = nds32_pmu_enable_event;
+       cpu_pmu->disable = nds32_pmu_disable_event;
+       cpu_pmu->read_counter = nds32_pmu_read_counter;
+       cpu_pmu->write_counter = nds32_pmu_write_counter;
+       cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
+       cpu_pmu->start = nds32_pmu_start;
+       cpu_pmu->stop = nds32_pmu_stop;
+       cpu_pmu->reset = nds32_pmu_reset;
+       cpu_pmu->max_period = 0xFFFFFFFF;       /* Maximum counts */
+};
+
+static u32 nds32_read_num_pfm_events(void)
+{
+       /* NDS32 SPAv3 PMU support 3 counter */
+       return 3;
+}
+
+static int device_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+       nds32_pmu_init(cpu_pmu);
+       /*
+        * This name should be devive-specific name, whatever you like :)
+        * I think "PMU" will be a good generic name.
+        */
+       cpu_pmu->name = "nds32v3-pmu";
+       cpu_pmu->map_event = nds32_spav3_map_event;
+       cpu_pmu->num_events = nds32_read_num_pfm_events();
+       cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
+       return 0;
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct nds32_pmu *pmu)
+{
+       int ret;
+
+       get_cpu();
+       ret = -ENODEV;
+       /*
+        * If ther are various CPU types with its own PMU, initialize with
+        *
+        * the corresponding one
+        */
+       device_pmu_init(pmu);
+       put_cpu();
+       return ret;
+}
+
+static void nds32_pmu_enable(struct pmu *pmu)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+       struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+       int enabled = bitmap_weight(hw_events->used_mask,
+                                   nds32_pmu->num_events);
+
+       if (enabled)
+               nds32_pmu->start(nds32_pmu);
+}
+
+static void nds32_pmu_disable(struct pmu *pmu)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+
+       nds32_pmu->stop(nds32_pmu);
+}
+
+static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
+{
+       nds32_pmu->free_irq(nds32_pmu);
+       pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
+}
+
+static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
+{
+       struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
+       int ret;
+       u64 start_clock, finish_clock;
+
+       start_clock = local_clock();
+       ret = nds32_pmu->handle_irq(irq, dev);
+       finish_clock = local_clock();
+
+       perf_sample_event_took(finish_clock - start_clock);
+       return ret;
+}
+
+static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
+{
+       int err;
+       struct platform_device *pmu_device = nds32_pmu->plat_device;
+
+       if (!pmu_device)
+               return -ENODEV;
+
+       pm_runtime_get_sync(&pmu_device->dev);
+       err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
+       if (err) {
+               nds32_pmu_release_hardware(nds32_pmu);
+               return err;
+       }
+
+       return 0;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+              struct perf_event *event)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+
+       if (is_software_event(event))
+               return 1;
+
+       if (event->pmu != pmu)
+               return 0;
+
+       if (event->state < PERF_EVENT_STATE_OFF)
+               return 1;
+
+       if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+               return 1;
+
+       return nds32_pmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *sibling, *leader = event->group_leader;
+       struct pmu_hw_events fake_pmu;
+       DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
+       /*
+        * Initialize the fake PMU. We only need to populate the
+        * used_mask for the purposes of validation.
+        */
+       memset(fake_used_mask, 0, sizeof(fake_used_mask));
+
+       if (!validate_event(event->pmu, &fake_pmu, leader))
+               return -EINVAL;
+
+       for_each_sibling_event(sibling, leader) {
+               if (!validate_event(event->pmu, &fake_pmu, sibling))
+                       return -EINVAL;
+       }
+
+       if (!validate_event(event->pmu, &fake_pmu, event))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int mapping;
+
+       mapping = nds32_pmu->map_event(event);
+
+       if (mapping < 0) {
+               pr_debug("event %x:%llx not supported\n", event->attr.type,
+                        event->attr.config);
+               return mapping;
+       }
+
+       /*
+        * We don't assign an index until we actually place the event onto
+        * hardware. Use -1 to signify that we haven't decided where to put it
+        * yet. For SMP systems, each core has it's own PMU so we can't do any
+        * clever allocation or constraints checking at this point.
+        */
+       hwc->idx = -1;
+       hwc->config_base = 0;
+       hwc->config = 0;
+       hwc->event_base = 0;
+
+       /*
+        * Check whether we need to exclude the counter from certain modes.
+        */
+       if ((!nds32_pmu->set_event_filter ||
+            nds32_pmu->set_event_filter(hwc, &event->attr)) &&
+           event_requires_mode_exclusion(&event->attr)) {
+               pr_debug
+                       ("NDS performance counters do not support mode exclusion\n");
+               return -EOPNOTSUPP;
+       }
+
+       /*
+        * Store the event encoding into the config_base field.
+        */
+       hwc->config_base |= (unsigned long)mapping;
+
+       if (!hwc->sample_period) {
+               /*
+                * For non-sampling runs, limit the sample_period to half
+                * of the counter width. That way, the new counter value
+                * is far less likely to overtake the previous one unless
+                * you have some serious IRQ latency issues.
+                */
+               hwc->sample_period = nds32_pmu->max_period >> 1;
+               hwc->last_period = hwc->sample_period;
+               local64_set(&hwc->period_left, hwc->sample_period);
+       }
+
+       if (event->group_leader != event) {
+               if (validate_group(event) != 0)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int nds32_pmu_event_init(struct perf_event *event)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       int err = 0;
+       atomic_t *active_events = &nds32_pmu->active_events;
+
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       if (nds32_pmu->map_event(event) == -ENOENT)
+               return -ENOENT;
+
+       if (!atomic_inc_not_zero(active_events)) {
+               if (atomic_read(active_events) == 0) {
+                       /* Register irq handler */
+                       err = nds32_pmu_reserve_hardware(nds32_pmu);
+               }
+
+               if (!err)
+                       atomic_inc(active_events);
+       }
+
+       if (err)
+               return err;
+
+       err = __hw_perf_event_init(event);
+
+       return err;
+}
+
+static void nds32_start(struct perf_event *event, int flags)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       /*
+        * NDS pmu always has to reprogram the period, so ignore
+        * PERF_EF_RELOAD, see the comment below.
+        */
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
+       /* Set the period for the event. */
+       nds32_pmu_event_set_period(event);
+
+       nds32_pmu->enable(event);
+}
+
+static int nds32_pmu_add(struct perf_event *event, int flags)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+       int err = 0;
+
+       perf_pmu_disable(event->pmu);
+
+       /* If we don't have a space for the counter then finish early. */
+       idx = nds32_pmu->get_event_idx(hw_events, event);
+       if (idx < 0) {
+               err = idx;
+               goto out;
+       }
+
+       /*
+        * If there is an event in the counter we are going to use then make
+        * sure it is disabled.
+        */
+       event->hw.idx = idx;
+       nds32_pmu->disable(event);
+       hw_events->events[idx] = event;
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               nds32_start(event, PERF_EF_RELOAD);
+
+       /* Propagate our changes to the userspace mapping. */
+       perf_event_update_userpage(event);
+
+out:
+       perf_pmu_enable(event->pmu);
+       return err;
+}
+
+u64 nds32_pmu_event_update(struct perf_event *event)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       u64 delta, prev_raw_count, new_raw_count;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = nds32_pmu->read_counter(event);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                           new_raw_count) != prev_raw_count) {
+               goto again;
+       }
+       /*
+        * Whether overflow or not, "unsigned substraction"
+        * will always get their delta
+        */
+       delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+
+       return new_raw_count;
+}
+
+static void nds32_stop(struct perf_event *event, int flags)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       /*
+        * NDS pmu always has to update the counter, so ignore
+        * PERF_EF_UPDATE, see comments in nds32_start().
+        */
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               nds32_pmu->disable(event);
+               nds32_pmu_event_update(event);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
+}
+
+static void nds32_pmu_del(struct perf_event *event, int flags)
+{
+       struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+       struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       nds32_stop(event, PERF_EF_UPDATE);
+       hw_events->events[idx] = NULL;
+       clear_bit(idx, hw_events->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
+static void nds32_pmu_read(struct perf_event *event)
+{
+       nds32_pmu_event_update(event);
+}
+
+/* Please refer to SPAv3 for more hardware specific details */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *nds32_arch_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group nds32_pmu_format_group = {
+       .name = "format",
+       .attrs = nds32_arch_formats_attr,
+};
+
+static ssize_t nds32_pmu_cpumask_show(struct device *dev,
+                                     struct device_attribute *attr,
+                                     char *buf)
+{
+       return 0;
+}
+
+static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
+
+static struct attribute *nds32_pmu_common_attrs[] = {
+       &dev_attr_cpus.attr,
+       NULL,
+};
+
+static struct attribute_group nds32_pmu_common_group = {
+       .attrs = nds32_pmu_common_attrs,
+};
+
+static const struct attribute_group *nds32_pmu_attr_groups[] = {
+       &nds32_pmu_format_group,
+       &nds32_pmu_common_group,
+       NULL,
+};
+
+static void nds32_init(struct nds32_pmu *nds32_pmu)
+{
+       atomic_set(&nds32_pmu->active_events, 0);
+
+       nds32_pmu->pmu = (struct pmu) {
+               .pmu_enable = nds32_pmu_enable,
+               .pmu_disable = nds32_pmu_disable,
+               .attr_groups = nds32_pmu_attr_groups,
+               .event_init = nds32_pmu_event_init,
+               .add = nds32_pmu_add,
+               .del = nds32_pmu_del,
+               .start = nds32_start,
+               .stop = nds32_stop,
+               .read = nds32_pmu_read,
+       };
+}
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
+{
+       nds32_init(nds32_pmu);
+       pm_runtime_enable(&nds32_pmu->plat_device->dev);
+       pr_info("enabled with %s PMU driver, %d counters available\n",
+               nds32_pmu->name, nds32_pmu->num_events);
+       return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
+}
+
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+{
+       return this_cpu_ptr(&cpu_hw_events);
+}
+
+static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
+{
+       int err, irq, irqs;
+       struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+       if (!pmu_device)
+               return -ENODEV;
+
+       irqs = min(pmu_device->num_resources, num_possible_cpus());
+       if (irqs < 1) {
+               pr_err("no irqs for PMUs defined\n");
+               return -ENODEV;
+       }
+
+       irq = platform_get_irq(pmu_device, 0);
+       err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
+                         cpu_pmu);
+       if (err) {
+               pr_err("unable to request IRQ%d for NDS PMU counters\n",
+                      irq);
+               return err;
+       }
+       return 0;
+}
+
+static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
+{
+       int irq;
+       struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+       irq = platform_get_irq(pmu_device, 0);
+       if (irq >= 0)
+               free_irq(irq, cpu_pmu);
+}
+
+static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+       int cpu;
+       struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+
+       raw_spin_lock_init(&events->pmu_lock);
+
+       cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
+       cpu_pmu->request_irq = cpu_pmu_request_irq;
+       cpu_pmu->free_irq = cpu_pmu_free_irq;
+
+       /* Ensure the PMU has sane values out of reset. */
+       if (cpu_pmu->reset)
+               on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+}
+
+const static struct of_device_id cpu_pmu_of_device_ids[] = {
+       {.compatible = "andestech,nds32v3-pmu",
+        .data = device_pmu_init},
+       {},
+};
+
+static int cpu_pmu_device_probe(struct platform_device *pdev)
+{
+       const struct of_device_id *of_id;
+       int (*init_fn)(struct nds32_pmu *nds32_pmu);
+       struct device_node *node = pdev->dev.of_node;
+       struct nds32_pmu *pmu;
+       int ret = -ENODEV;
+
+       if (cpu_pmu) {
+               pr_notice("[perf] attempt to register multiple PMU devices!\n");
+               return -ENOSPC;
+       }
+
+       pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+       if (!pmu)
+               return -ENOMEM;
+
+       of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
+       if (node && of_id) {
+               init_fn = of_id->data;
+               ret = init_fn(pmu);
+       } else {
+               ret = probe_current_pmu(pmu);
+       }
+
+       if (ret) {
+               pr_notice("[perf] failed to probe PMU!\n");
+               goto out_free;
+       }
+
+       cpu_pmu = pmu;
+       cpu_pmu->plat_device = pdev;
+       cpu_pmu_init(cpu_pmu);
+       ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
+
+       if (!ret)
+               return 0;
+
+out_free:
+       pr_notice("[perf] failed to register PMU devices!\n");
+       kfree(pmu);
+       return ret;
+}
+
+static struct platform_driver cpu_pmu_driver = {
+       .driver = {
+                  .name = "nds32-pfm",
+                  .of_match_table = cpu_pmu_of_device_ids,
+                  },
+       .probe = cpu_pmu_device_probe,
+       .id_table = cpu_pmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+       int err = 0;
+
+       err = platform_driver_register(&cpu_pmu_driver);
+       if (err)
+               pr_notice("[perf] PMU initialization failed\n");
+       else
+               pr_notice("[perf] PMU initialization done\n");
+
+       return err;
+}
+
+device_initcall(register_pmu_driver);
+
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+       int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+       /* 0x3 means misalignment */
+       if (!kstack_end((void *)frame->fp) &&
+           !((unsigned long)frame->fp & 0x3) &&
+           ((unsigned long)frame->fp >= TASK_SIZE)) {
+               /*
+                *      The array index is based on the ABI, the below graph
+                *      illustrate the reasons.
+                *      Function call procedure: "smw" and "lmw" will always
+                *      update SP and FP for you automatically.
+                *
+                *      Stack                                 Relative Address
+                *      |  |                                          0
+                *      ----
+                *      |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+                *      ----
+                *      |FP|                                         -2
+                *      ----
+                *      |  | <-- SP(after smw)                       -3
+                */
+               frame->lp = ((unsigned long *)frame->fp)[-1];
+               frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+               /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+               if (__kernel_text_address(frame->lp))
+                       frame->lp = ftrace_graph_ret_addr
+                                               (NULL, &graph, frame->lp, NULL);
+
+               return 0;
+       } else {
+               return -EPERM;
+       }
+#else
+       /*
+        * You can refer to arch/nds32/kernel/traps.c:__dump()
+        * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+        * And, the "sp" is not always correct.
+        *
+        *   Stack                                 Relative Address
+        *   |  |                                          0
+        *   ----
+        *   |LP| <-- SP(before smw)                      -1
+        *   ----
+        *   |  | <-- SP(after smw)                       -2
+        *   ----
+        */
+       if (!kstack_end((void *)frame->sp)) {
+               frame->lp = ((unsigned long *)frame->sp)[1];
+               /* TODO: How to deal with the value in first
+                * "sp" is not correct?
+                */
+               if (__kernel_text_address(frame->lp))
+                       frame->lp = ftrace_graph_ret_addr
+                                               (tsk, &graph, frame->lp, NULL);
+
+               frame->sp = ((unsigned long *)frame->sp) + 1;
+
+               return 0;
+       } else {
+               return -EPERM;
+       }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+               int (*fn_record)(struct stackframe *, void *),
+               void *data)
+{
+       while (1) {
+               int ret;
+
+               if (fn_record(frame, data))
+                       break;
+
+               ret = unwind_frame_kernel(frame);
+               if (ret < 0)
+                       break;
+       }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int callchain_trace(struct stackframe *fr, void *data)
+{
+       struct perf_callchain_entry_ctx *entry = data;
+
+       perf_callchain_store(entry, fr->lp);
+       return 0;
+}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long
+user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
+{
+       struct frame_tail buftail;
+       unsigned long lp = 0;
+       unsigned long *user_frame_tail =
+               (unsigned long *)(fp - (unsigned long)sizeof(buftail));
+
+       /* Check accessibility of one struct frame_tail beyond */
+       if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail)))
+               return 0;
+       if (__copy_from_user_inatomic
+               (&buftail, user_frame_tail, sizeof(buftail)))
+               return 0;
+
+       /*
+        * Refer to unwind_frame_kernel() for more illurstration
+        */
+       lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
+       fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
+       perf_callchain_store(entry, lp);
+       return fp;
+}
+
+static unsigned long
+user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
+                       unsigned long fp)
+{
+       struct frame_tail_opt_size buftail;
+       unsigned long lp = 0;
+
+       unsigned long *user_frame_tail =
+               (unsigned long *)(fp - (unsigned long)sizeof(buftail));
+
+       /* Check accessibility of one struct frame_tail beyond */
+       if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail)))
+               return 0;
+       if (__copy_from_user_inatomic
+               (&buftail, user_frame_tail, sizeof(buftail)))
+               return 0;
+
+       /*
+        * Refer to unwind_frame_kernel() for more illurstration
+        */
+       lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
+       fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
+
+       perf_callchain_store(entry, lp);
+       return fp;
+}
+
+/*
+ * This will be called when the target is in user mode
+ * This function will only be called when we use
+ * "PERF_SAMPLE_CALLCHAIN" in
+ * kernel/events/core.c:perf_prepare_sample()
+ *
+ * How to trigger perf_callchain_[user/kernel] :
+ * $ perf record -e cpu-clock --call-graph fp ./program
+ * $ perf report --call-graph
+ */
+unsigned long leaf_fp;
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+                   struct pt_regs *regs)
+{
+       unsigned long fp = 0;
+       unsigned long gp = 0;
+       unsigned long lp = 0;
+       unsigned long sp = 0;
+       unsigned long *user_frame_tail;
+
+       leaf_fp = 0;
+
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* We don't support guest os callchain now */
+               return;
+       }
+
+       perf_callchain_store(entry, regs->ipc);
+       fp = regs->fp;
+       gp = regs->gp;
+       lp = regs->lp;
+       sp = regs->sp;
+       if (entry->nr < PERF_MAX_STACK_DEPTH &&
+           (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
+               user_frame_tail =
+                       (unsigned long *)(fp - (unsigned long)sizeof(fp));
+
+               if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(fp)))
+                       return;
+
+               if (__copy_from_user_inatomic
+                       (&leaf_fp, user_frame_tail, sizeof(fp)))
+                       return;
+
+               if (leaf_fp == lp) {
+                       /*
+                        * Maybe this is non leaf function
+                        * with optimize for size,
+                        * or maybe this is the function
+                        * with optimize for size
+                        */
+                       struct frame_tail buftail;
+
+                       user_frame_tail =
+                               (unsigned long *)(fp -
+                                       (unsigned long)sizeof(buftail));
+
+                       if (!access_ok
+                               (VERIFY_READ, user_frame_tail, sizeof(buftail)))
+                               return;
+
+                       if (__copy_from_user_inatomic
+                               (&buftail, user_frame_tail, sizeof(buftail)))
+                               return;
+
+                       if (buftail.stack_fp == gp) {
+                               /* non leaf function with optimize
+                                * for size condition
+                                */
+                               struct frame_tail_opt_size buftail_opt_size;
+
+                               user_frame_tail =
+                                       (unsigned long *)(fp - (unsigned long)
+                                               sizeof(buftail_opt_size));
+
+                               if (!access_ok(VERIFY_READ, user_frame_tail,
+                                              sizeof(buftail_opt_size)))
+                                       return;
+
+                               if (__copy_from_user_inatomic
+                                  (&buftail_opt_size, user_frame_tail,
+                                  sizeof(buftail_opt_size)))
+                                       return;
+
+                               perf_callchain_store(entry, lp);
+                               fp = buftail_opt_size.stack_fp;
+
+                               while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+                                      (unsigned long)fp &&
+                                               !((unsigned long)fp & 0x7) &&
+                                               fp > sp) {
+                                       sp = fp;
+                                       fp = user_backtrace_opt_size(entry, fp);
+                               }
+
+                       } else {
+                               /* this is the function
+                                * without optimize for size
+                                */
+                               fp = buftail.stack_fp;
+                               perf_callchain_store(entry, lp);
+                               while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+                                      (unsigned long)fp &&
+                                               !((unsigned long)fp & 0x7) &&
+                                               fp > sp) {
+                                       sp = fp;
+                                       fp = user_backtrace(entry, fp);
+                               }
+                       }
+               } else {
+                       /* this is leaf function */
+                       fp = leaf_fp;
+                       perf_callchain_store(entry, lp);
+
+                       /* previous function callcahin  */
+                       while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+                              (unsigned long)fp &&
+                                  !((unsigned long)fp & 0x7) && fp > sp) {
+                               sp = fp;
+                               fp = user_backtrace(entry, fp);
+                       }
+               }
+               return;
+       }
+}
+
+/* This will be called when the target is in kernel mode */
+void
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+                     struct pt_regs *regs)
+{
+       struct stackframe fr;
+
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* We don't support guest os callchain now */
+               return;
+       }
+       fr.fp = regs->fp;
+       fr.lp = regs->lp;
+       fr.sp = regs->sp;
+       walk_stackframe(&fr, callchain_trace, entry);
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+       /* However, NDS32 does not support virtualization */
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+               return perf_guest_cbs->get_guest_ip();
+
+       return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+       int misc = 0;
+
+       /* However, NDS32 does not support virtualization */
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               if (perf_guest_cbs->is_user_mode())
+                       misc |= PERF_RECORD_MISC_GUEST_USER;
+               else
+                       misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+       } else {
+               if (user_mode(regs))
+                       misc |= PERF_RECORD_MISC_USER;
+               else
+                       misc |= PERF_RECORD_MISC_KERNEL;
+       }
+
+       return misc;
+}
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
new file mode 100644 (file)
index 0000000..ffa8040
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/device.h>
+#include <linux/printk.h>
+#include <asm/suspend.h>
+#include <nds32_intrinsic.h>
+
+unsigned int resume_addr;
+unsigned int *phy_addr_sp_tmp;
+
+static void nds32_suspend2ram(void)
+{
+       pgd_t *pgdv;
+       pud_t *pudv;
+       pmd_t *pmdv;
+       pte_t *ptev;
+
+       pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
+               L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
+
+       pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+       pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
+       ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
+
+       resume_addr = ((*ptev) & TLB_DATA_mskPPN)
+                       | ((unsigned int)cpu_resume & 0x00000fff);
+
+       suspend2ram();
+}
+
+static void nds32_suspend_cpu(void)
+{
+       while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask))
+               __asm__ volatile ("standby no_wake_grant\n\t");
+}
+
+static int nds32_pm_valid(suspend_state_t state)
+{
+       switch (state) {
+       case PM_SUSPEND_ON:
+       case PM_SUSPEND_STANDBY:
+       case PM_SUSPEND_MEM:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+static int nds32_pm_enter(suspend_state_t state)
+{
+       pr_debug("%s:state:%d\n", __func__, state);
+       switch (state) {
+       case PM_SUSPEND_STANDBY:
+               nds32_suspend_cpu();
+               return 0;
+       case PM_SUSPEND_MEM:
+               nds32_suspend2ram();
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
+static const struct platform_suspend_ops nds32_pm_ops = {
+       .valid = nds32_pm_valid,
+       .enter = nds32_pm_enter,
+};
+
+static int __init nds32_pm_init(void)
+{
+       pr_debug("Enter %s\n", __func__);
+       suspend_set_ops(&nds32_pm_ops);
+       return 0;
+}
+late_initcall(nds32_pm_init);
index 65fda98..ab7ab46 100644 (file)
@@ -9,15 +9,16 @@
 #include <linux/uaccess.h>
 #include <asm/elf.h>
 #include <asm/proc-fns.h>
+#include <asm/fpu.h>
 #include <linux/ptrace.h>
 #include <linux/reboot.h>
 
-extern void setup_mm_for_reboot(char mode);
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_dir_cpu;
-EXPORT_SYMBOL(proc_dir_cpu);
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+struct task_struct *last_task_used_math;
 #endif
 
+extern void setup_mm_for_reboot(char mode);
+
 extern inline void arch_reset(char mode)
 {
        if (mode == 's') {
@@ -125,15 +126,31 @@ void show_regs(struct pt_regs *regs)
 
 EXPORT_SYMBOL(show_regs);
 
+void exit_thread(struct task_struct *tsk)
+{
+#if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU)
+       if (last_task_used_math == tsk)
+               last_task_used_math = NULL;
+#endif
+}
+
 void flush_thread(void)
 {
+#if defined(CONFIG_FPU)
+       clear_fpu(task_pt_regs(current));
+       clear_used_math();
+# ifdef CONFIG_LAZY_FPU
+       if (last_task_used_math == current)
+               last_task_used_math = NULL;
+# endif
+#endif
 }
 
 DEFINE_PER_CPU(struct task_struct *, __entry_task);
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-           unsigned long stk_sz, struct task_struct *p)
+               unsigned long stk_sz, struct task_struct *p)
 {
        struct pt_regs *childregs = task_pt_regs(p);
 
@@ -159,6 +176,22 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
        p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
        p->thread.cpu_context.sp = (unsigned long)childregs;
 
+#if IS_ENABLED(CONFIG_FPU)
+       if (used_math()) {
+# if !IS_ENABLED(CONFIG_LAZY_FPU)
+               unlazy_fpu(current);
+# else
+               preempt_disable();
+               if (last_task_used_math == current)
+                       save_fpu(current);
+               preempt_enable();
+# endif
+               p->thread.fpu = current->thread.fpu;
+               clear_fpu(task_pt_regs(p));
+               set_stopped_child_used_math(p);
+       }
+#endif
+
 #ifdef CONFIG_HWZOL
        childregs->lb = 0;
        childregs->le = 0;
@@ -168,12 +201,33 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
        return 0;
 }
 
+#if IS_ENABLED(CONFIG_FPU)
+struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next)
+{
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+       unlazy_fpu(prev);
+#endif
+       if (!(next->flags & PF_KTHREAD))
+               clear_fpu(task_pt_regs(next));
+       return prev;
+}
+#endif
+
 /*
  * fill in the fpe structure for a core dump...
  */
 int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
 {
        int fpvalid = 0;
+#if IS_ENABLED(CONFIG_FPU)
+       struct task_struct *tsk = current;
+
+       fpvalid = tsk_used_math(tsk);
+       if (fpvalid) {
+               lose_fpu();
+               memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu));
+       }
+#endif
        return fpvalid;
 }
 
index eacc790..31d29d9 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/proc-fns.h>
 #include <asm/cache_info.h>
 #include <asm/elf.h>
+#include <asm/fpu.h>
 #include <nds32_intrinsic.h>
 
 #define HWCAP_MFUSR_PC         0x000001
 #define HWCAP_FPU_DP           0x040000
 #define HWCAP_V2               0x080000
 #define HWCAP_DX_REGS          0x100000
+#define HWCAP_HWPRE            0x200000
 
 unsigned long cpu_id, cpu_rev, cpu_cfgid;
+bool has_fpu = false;
 char cpu_series;
 char *endianness = NULL;
 
@@ -70,8 +73,10 @@ static const char *hwcap_str[] = {
        "div",
        "mac",
        "l2c",
-       "dx_regs",
+       "fpu_dp",
        "v2",
+       "dx_regs",
+       "hw_pre",
        NULL,
 };
 
@@ -136,6 +141,11 @@ static void __init dump_cpu_info(int cpu)
                    (aliasing_num - 1) << PAGE_SHIFT;
        }
 #endif
+#ifdef CONFIG_FPU
+       /* Disable fpu and enable when it is used. */
+       if (has_fpu)
+               disable_fpu();
+#endif
 }
 
 static void __init setup_cpuinfo(void)
@@ -180,9 +190,10 @@ static void __init setup_cpuinfo(void)
        if (cpu_cfgid & 0x0004)
                elf_hwcap |= HWCAP_EXT2;
 
-       if (cpu_cfgid & 0x0008)
+       if (cpu_cfgid & 0x0008) {
                elf_hwcap |= HWCAP_FPU;
-
+               has_fpu = true;
+       }
        if (cpu_cfgid & 0x0010)
                elf_hwcap |= HWCAP_STRING;
 
@@ -212,6 +223,11 @@ static void __init setup_cpuinfo(void)
        if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C)
                elf_hwcap |= HWCAP_L2C;
 
+#ifdef CONFIG_HW_PRE
+       if (__nds32__mfsr(NDS32_SR_MISC_CTL) & MISC_CTL_makHWPRE_EN)
+               elf_hwcap |= HWCAP_HWPRE;
+#endif
+
        tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL);
        if (!IS_ENABLED(CONFIG_CPU_DCACHE_DISABLE))
                tmp |= CACHE_CTL_mskDC_EN;
index 5d01f6e..5b5be08 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
+#include <asm/fpu.h>
 
 #include <asm/ptrace.h>
 #include <asm/vdso.h>
@@ -20,6 +21,60 @@ struct rt_sigframe {
        struct siginfo info;
        struct ucontext uc;
 };
+#if IS_ENABLED(CONFIG_FPU)
+static inline int restore_sigcontext_fpu(struct pt_regs *regs,
+                                        struct sigcontext __user *sc)
+{
+       struct task_struct *tsk = current;
+       unsigned long used_math_flag;
+       int ret = 0;
+
+       clear_used_math();
+       __get_user_error(used_math_flag, &sc->used_math_flag, ret);
+
+       if (!used_math_flag)
+               return 0;
+       set_used_math();
+
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       preempt_disable();
+       if (current == last_task_used_math) {
+               last_task_used_math = NULL;
+               disable_ptreg_fpu(regs);
+       }
+       preempt_enable();
+#else
+       clear_fpu(regs);
+#endif
+
+       return __copy_from_user(&tsk->thread.fpu, &sc->fpu,
+                               sizeof(struct fpu_struct));
+}
+
+static inline int setup_sigcontext_fpu(struct pt_regs *regs,
+                                      struct sigcontext __user *sc)
+{
+       struct task_struct *tsk = current;
+       int ret = 0;
+
+       __put_user_error(used_math(), &sc->used_math_flag, ret);
+
+       if (!used_math())
+               return ret;
+
+       preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       if (last_task_used_math == tsk)
+               save_fpu(last_task_used_math);
+#else
+       unlazy_fpu(tsk);
+#endif
+       ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu,
+                            sizeof(struct fpu_struct));
+       preempt_enable();
+       return ret;
+}
+#endif
 
 static int restore_sigframe(struct pt_regs *regs,
                            struct rt_sigframe __user * sf)
@@ -69,7 +124,9 @@ static int restore_sigframe(struct pt_regs *regs,
        __get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
        __get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
 #endif
-
+#if IS_ENABLED(CONFIG_FPU)
+       err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
        /*
         * Avoid sys_rt_sigreturn() restarting.
         */
@@ -153,6 +210,9 @@ setup_sigframe(struct rt_sigframe __user * sf, struct pt_regs *regs,
        __put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
        __put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
 #endif
+#if IS_ENABLED(CONFIG_FPU)
+       err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
 
        __put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no,
                         err);
diff --git a/arch/nds32/kernel/sleep.S b/arch/nds32/kernel/sleep.S
new file mode 100644 (file)
index 0000000..ca4e61f
--- /dev/null
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#include <asm/memory.h>
+
+.data
+.global sp_tmp
+sp_tmp:
+.long
+
+.text
+.globl suspend2ram
+.globl cpu_resume
+
+suspend2ram:
+       pushm   $r0, $r31
+#if defined(CONFIG_HWZOL)
+       mfusr   $r0, $lc
+       mfusr   $r1, $le
+       mfusr   $r2, $lb
+#endif
+       mfsr    $r3, $mr0
+       mfsr    $r4, $mr1
+       mfsr    $r5, $mr4
+       mfsr    $r6, $mr6
+       mfsr    $r7, $mr7
+       mfsr    $r8, $mr8
+       mfsr    $r9, $ir0
+       mfsr    $r10, $ir1
+       mfsr    $r11, $ir2
+       mfsr    $r12, $ir3
+       mfsr    $r13, $ir9
+       mfsr    $r14, $ir10
+       mfsr    $r15, $ir12
+       mfsr    $r16, $ir13
+       mfsr    $r17, $ir14
+       mfsr    $r18, $ir15
+       pushm   $r0, $r19
+#if defined(CONFIG_FPU)
+       jal     store_fpu_for_suspend
+#endif
+       tlbop   FlushAll
+       isb
+
+       // transfer $sp from va to pa
+       sethi   $r0, hi20(PAGE_OFFSET)
+       ori     $r0, $r0, lo12(PAGE_OFFSET)
+       movi    $r2, PHYS_OFFSET
+       sub     $r1, $sp, $r0
+       add     $r2, $r1, $r2
+
+       // store pa($sp) to sp_tmp
+       sethi   $r1, hi20(sp_tmp)
+       swi     $r2, [$r1 + lo12(sp_tmp)]
+
+       pushm   $r16, $r25
+       pushm   $r29, $r30
+#ifdef CONFIG_CACHE_L2
+       jal     dcache_wb_all_level
+#else
+       jal     cpu_dcache_wb_all
+#endif
+       popm    $r29, $r30
+       popm    $r16, $r25
+
+       // get wake_mask and loop in standby
+       la      $r1, wake_mask
+       lwi     $r1, [$r1]
+self_loop:
+       standby wake_grant
+       mfsr    $r2, $ir15
+       and     $r2, $r1, $r2
+       beqz    $r2, self_loop
+
+       // set ipc to resume address
+       la      $r1, resume_addr
+       lwi     $r1, [$r1]
+       mtsr    $r1, $ipc
+       isb
+
+       // reset psw, turn off the address translation
+       li      $r2, 0x7000a
+       mtsr    $r2, $ipsw
+       isb
+
+       iret
+cpu_resume:
+       // translate the address of sp_tmp variable to pa
+       la      $r1, sp_tmp
+       sethi   $r0, hi20(PAGE_OFFSET)
+       ori     $r0, $r0, lo12(PAGE_OFFSET)
+       movi    $r2, PHYS_OFFSET
+       sub     $r1, $r1, $r0
+       add     $r1, $r1, $r2
+
+       // access the sp_tmp to get stack pointer
+       lwi     $sp, [$r1]
+
+       popm    $r0, $r19
+#if defined(CONFIG_HWZOL)
+       mtusr   $r0, $lb
+       mtusr   $r1, $lc
+       mtusr   $r2, $le
+#endif
+       mtsr    $r3, $mr0
+       mtsr    $r4, $mr1
+       mtsr    $r5, $mr4
+       mtsr    $r6, $mr6
+       mtsr    $r7, $mr7
+       mtsr    $r8, $mr8
+       // set original psw to ipsw
+       mtsr    $r9, $ir1
+
+       mtsr    $r11, $ir2
+       mtsr    $r12, $ir3
+
+       // set ipc to RR
+       la      $r13, RR
+       mtsr    $r13, $ir9
+
+       mtsr    $r14, $ir10
+       mtsr    $r15, $ir12
+       mtsr    $r16, $ir13
+       mtsr    $r17, $ir14
+       mtsr    $r18, $ir15
+       popm    $r0, $r31
+
+       isb
+       iret
+RR:
+       ret
index 9de93ab..0835277 100644 (file)
@@ -6,6 +6,8 @@
 
 #include <asm/cachectl.h>
 #include <asm/proc-fns.h>
+#include <asm/udftrap.h>
+#include <asm/fpu.h>
 
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
               unsigned long, prot, unsigned long, flags,
@@ -48,3 +50,33 @@ SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache)
 
        return 0;
 }
+
+SYSCALL_DEFINE1(udftrap, int, option)
+{
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+       int old_udftrap;
+
+       if (!used_math()) {
+               load_fpu(&init_fpuregs);
+               current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
+               set_used_math();
+       }
+
+       old_udftrap = current->thread.fpu.UDF_trap;
+       switch (option) {
+       case DISABLE_UDFTRAP:
+               current->thread.fpu.UDF_trap = 0;
+               break;
+       case ENABLE_UDFTRAP:
+               current->thread.fpu.UDF_trap = FPCSR_mskUDFE;
+               break;
+       case GET_UDFTRAP:
+               break;
+       default:
+               return -EINVAL;
+       }
+       return old_udftrap;
+#else
+       return -ENOTSUPP;
+#endif
+}
index 1496aab..5aa7c17 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <asm/proc-fns.h>
 #include <asm/unistd.h>
+#include <asm/fpu.h>
 
 #include <linux/ptrace.h>
 #include <nds32_intrinsic.h>
@@ -357,6 +358,21 @@ void do_dispatch_general(unsigned long entry, unsigned long addr,
        } else if (type == ETYPE_RESERVED_INSTRUCTION) {
                /* Reserved instruction */
                do_revinsn(regs);
+       } else if (type == ETYPE_COPROCESSOR) {
+               /* Coprocessor */
+#if IS_ENABLED(CONFIG_FPU)
+               unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST);
+               unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID);
+
+               if ((cpid == FPU_CPID) &&
+                   (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) {
+                       unsigned int subtype = (itype & ITYPE_mskSTYPE);
+
+                       if (true == do_fpu_exception(subtype, regs))
+                               return;
+               }
+#endif
+               unhandled_exceptions(entry, addr, type, regs);
        } else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) {
                /* trap, used on v3 EDM target debugging workaround */
                /*
diff --git a/arch/nds32/math-emu/Makefile b/arch/nds32/math-emu/Makefile
new file mode 100644 (file)
index 0000000..947fe0c
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux/nds32 kernel FPU emulation.
+#
+
+obj-y  := fpuemu.o \
+          fdivd.o fmuld.o fsubd.o faddd.o fs2d.o fsqrtd.o fcmpd.o fnegs.o \
+          fdivs.o fmuls.o fsubs.o fadds.o fd2s.o fsqrts.o fcmps.o fnegd.o
diff --git a/arch/nds32/math-emu/faddd.c b/arch/nds32/math-emu/faddd.c
new file mode 100644 (file)
index 0000000..f7fd4e3
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void faddd(void *ft, void *fa, void *fb)
+{
+       FP_DECL_D(A);
+       FP_DECL_D(B);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+       FP_UNPACK_DP(B, fb);
+
+       FP_ADD_D(R, A, B);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fadds.c b/arch/nds32/math-emu/fadds.c
new file mode 100644 (file)
index 0000000..f5af6ca
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fadds(void *ft, void *fa, void *fb)
+{
+       FP_DECL_S(A);
+       FP_DECL_S(B);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+       FP_UNPACK_SP(B, fb);
+
+       FP_ADD_S(R, A, B);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fcmpd.c b/arch/nds32/math-emu/fcmpd.c
new file mode 100644 (file)
index 0000000..0ea225a
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+int fcmpd(void *ft, void *fa, void *fb, int cmpop)
+{
+       FP_DECL_D(A);
+       FP_DECL_D(B);
+       FP_DECL_EX;
+       long cmp;
+
+       FP_UNPACK_DP(A, fa);
+       FP_UNPACK_DP(B, fb);
+
+       FP_CMP_D(cmp, A, B, SF_CUN);
+       cmp += 2;
+       if (cmp == SF_CGT)
+               *(long *)ft = 0;
+       else
+               *(long *)ft = (cmp & cmpop) ? 1 : 0;
+
+       return 0;
+}
diff --git a/arch/nds32/math-emu/fcmps.c b/arch/nds32/math-emu/fcmps.c
new file mode 100644 (file)
index 0000000..6814807
--- /dev/null
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+int fcmps(void *ft, void *fa, void *fb, int cmpop)
+{
+       FP_DECL_S(A);
+       FP_DECL_S(B);
+       FP_DECL_EX;
+       long cmp;
+
+       FP_UNPACK_SP(A, fa);
+       FP_UNPACK_SP(B, fb);
+
+       FP_CMP_S(cmp, A, B, SF_CUN);
+       cmp += 2;
+       if (cmp == SF_CGT)
+               *(int *)ft = 0x0;
+       else
+               *(int *)ft = (cmp & cmpop) ? 0x1 : 0x0;
+
+       return 0;
+}
diff --git a/arch/nds32/math-emu/fd2s.c b/arch/nds32/math-emu/fd2s.c
new file mode 100644 (file)
index 0000000..1328371
--- /dev/null
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+#include <math-emu/soft-fp.h>
+void fd2s(void *ft, void *fa)
+{
+       FP_DECL_D(A);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+
+       FP_CONV(S, D, 1, 2, R, A);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fdivd.c b/arch/nds32/math-emu/fdivd.c
new file mode 100644 (file)
index 0000000..458e7e9
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fdivd(void *ft, void *fa, void *fb)
+{
+       FP_DECL_D(A);
+       FP_DECL_D(B);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+       FP_UNPACK_DP(B, fb);
+
+       if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO)
+               FP_SET_EXCEPTION(FP_EX_DIVZERO);
+
+       FP_DIV_D(R, A, B);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fdivs.c b/arch/nds32/math-emu/fdivs.c
new file mode 100644 (file)
index 0000000..c7d2021
--- /dev/null
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fdivs(void *ft, void *fa, void *fb)
+{
+       FP_DECL_S(A);
+       FP_DECL_S(B);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+       FP_UNPACK_SP(B, fb);
+
+       if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO)
+               FP_SET_EXCEPTION(FP_EX_DIVZERO);
+
+       FP_DIV_S(R, A, B);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fmuld.c b/arch/nds32/math-emu/fmuld.c
new file mode 100644 (file)
index 0000000..f3c77a4
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fmuld(void *ft, void *fa, void *fb)
+{
+       FP_DECL_D(A);
+       FP_DECL_D(B);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+       FP_UNPACK_DP(B, fb);
+
+       FP_MUL_D(R, A, B);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fmuls.c b/arch/nds32/math-emu/fmuls.c
new file mode 100644 (file)
index 0000000..cf150df
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fmuls(void *ft, void *fa, void *fb)
+{
+       FP_DECL_S(A);
+       FP_DECL_S(B);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+       FP_UNPACK_SP(B, fb);
+
+       FP_MUL_S(R, A, B);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fnegd.c b/arch/nds32/math-emu/fnegd.c
new file mode 100644 (file)
index 0000000..de7ea6a
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fnegd(void *ft, void *fa)
+{
+       FP_DECL_D(A);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+
+       FP_NEG_D(R, A);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fnegs.c b/arch/nds32/math-emu/fnegs.c
new file mode 100644 (file)
index 0000000..07270b3
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fnegs(void *ft, void *fa)
+{
+       FP_DECL_S(A);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+
+       FP_NEG_S(R, A);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fpuemu.c b/arch/nds32/math-emu/fpuemu.c
new file mode 100644 (file)
index 0000000..75cf164
--- /dev/null
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <asm/bitfield.h>
+#include <asm/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <asm/fpuemu.h>
+#include <asm/nds32_fpu_inst.h>
+
+#define DPFROMREG(dp, x) (dp = (void *)((unsigned long *)fpu_reg + 2*x))
+#ifdef __NDS32_EL__
+#define SPFROMREG(sp, x)\
+       ((sp) = (void *)((unsigned long *)fpu_reg + (x^1)))
+#else
+#define SPFROMREG(sp, x) ((sp) = (void *)((unsigned long *)fpu_reg + x))
+#endif
+
+#define DEF3OP(name, p, f1, f2) \
+void fpemu_##name##p(void *ft, void *fa, void *fb) \
+{ \
+       f1(fa, fa, fb); \
+       f2(ft, ft, fa); \
+}
+
+#define DEF3OPNEG(name, p, f1, f2, f3) \
+void fpemu_##name##p(void *ft, void *fa, void *fb) \
+{ \
+       f1(fa, fa, fb); \
+       f2(ft, ft, fa); \
+       f3(ft, ft); \
+}
+DEF3OP(fmadd, s, fmuls, fadds);
+DEF3OP(fmsub, s, fmuls, fsubs);
+DEF3OP(fmadd, d, fmuld, faddd);
+DEF3OP(fmsub, d, fmuld, fsubd);
+DEF3OPNEG(fnmadd, s, fmuls, fadds, fnegs);
+DEF3OPNEG(fnmsub, s, fmuls, fsubs, fnegs);
+DEF3OPNEG(fnmadd, d, fmuld, faddd, fnegd);
+DEF3OPNEG(fnmsub, d, fmuld, fsubd, fnegd);
+
+static const unsigned char cmptab[8] = {
+       SF_CEQ,
+       SF_CEQ,
+       SF_CLT,
+       SF_CLT,
+       SF_CLT | SF_CEQ,
+       SF_CLT | SF_CEQ,
+       SF_CUN,
+       SF_CUN
+};
+
+enum ARGTYPE {
+       S1S = 1,
+       S2S,
+       S1D,
+       CS,
+       D1D,
+       D2D,
+       D1S,
+       CD
+};
+union func_t {
+       void (*t)(void *ft, void *fa, void *fb);
+       void (*b)(void *ft, void *fa);
+};
+/*
+ * Emulate a single FPU arithmetic instruction.
+ */
+static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn)
+{
+       int rfmt;               /* resulting format */
+       union func_t func;
+       int ftype = 0;
+
+       switch (rfmt = NDS32Insn_OPCODE_COP0(insn)) {
+       case fs1_op:{
+                       switch (NDS32Insn_OPCODE_BIT69(insn)) {
+                       case fadds_op:
+                               func.t = fadds;
+                               ftype = S2S;
+                               break;
+                       case fsubs_op:
+                               func.t = fsubs;
+                               ftype = S2S;
+                               break;
+                       case fmadds_op:
+                               func.t = fpemu_fmadds;
+                               ftype = S2S;
+                               break;
+                       case fmsubs_op:
+                               func.t = fpemu_fmsubs;
+                               ftype = S2S;
+                               break;
+                       case fnmadds_op:
+                               func.t = fpemu_fnmadds;
+                               ftype = S2S;
+                               break;
+                       case fnmsubs_op:
+                               func.t = fpemu_fnmsubs;
+                               ftype = S2S;
+                               break;
+                       case fmuls_op:
+                               func.t = fmuls;
+                               ftype = S2S;
+                               break;
+                       case fdivs_op:
+                               func.t = fdivs;
+                               ftype = S2S;
+                               break;
+                       case fs1_f2op_op:
+                               switch (NDS32Insn_OPCODE_BIT1014(insn)) {
+                               case fs2d_op:
+                                       func.b = fs2d;
+                                       ftype = S1D;
+                                       break;
+                               case fsqrts_op:
+                                       func.b = fsqrts;
+                                       ftype = S1S;
+                                       break;
+                               default:
+                                       return SIGILL;
+                               }
+                               break;
+                       default:
+                               return SIGILL;
+                       }
+                       break;
+               }
+       case fs2_op:
+               switch (NDS32Insn_OPCODE_BIT69(insn)) {
+               case fcmpeqs_op:
+               case fcmpeqs_e_op:
+               case fcmplts_op:
+               case fcmplts_e_op:
+               case fcmples_op:
+               case fcmples_e_op:
+               case fcmpuns_op:
+               case fcmpuns_e_op:
+                       ftype = CS;
+                       break;
+               default:
+                       return SIGILL;
+               }
+               break;
+       case fd1_op:{
+                       switch (NDS32Insn_OPCODE_BIT69(insn)) {
+                       case faddd_op:
+                               func.t = faddd;
+                               ftype = D2D;
+                               break;
+                       case fsubd_op:
+                               func.t = fsubd;
+                               ftype = D2D;
+                               break;
+                       case fmaddd_op:
+                               func.t = fpemu_fmaddd;
+                               ftype = D2D;
+                               break;
+                       case fmsubd_op:
+                               func.t = fpemu_fmsubd;
+                               ftype = D2D;
+                               break;
+                       case fnmaddd_op:
+                               func.t = fpemu_fnmaddd;
+                               ftype = D2D;
+                               break;
+                       case fnmsubd_op:
+                               func.t = fpemu_fnmsubd;
+                               ftype = D2D;
+                               break;
+                       case fmuld_op:
+                               func.t = fmuld;
+                               ftype = D2D;
+                               break;
+                       case fdivd_op:
+                               func.t = fdivd;
+                               ftype = D2D;
+                               break;
+                       case fd1_f2op_op:
+                               switch (NDS32Insn_OPCODE_BIT1014(insn)) {
+                               case fd2s_op:
+                                       func.b = fd2s;
+                                       ftype = D1S;
+                                       break;
+                               case fsqrtd_op:
+                                       func.b = fsqrtd;
+                                       ftype = D1D;
+                                       break;
+                               default:
+                                       return SIGILL;
+                               }
+                               break;
+                       default:
+                               return SIGILL;
+
+                       }
+                       break;
+               }
+
+       case fd2_op:
+               switch (NDS32Insn_OPCODE_BIT69(insn)) {
+               case fcmpeqd_op:
+               case fcmpeqd_e_op:
+               case fcmpltd_op:
+               case fcmpltd_e_op:
+               case fcmpled_op:
+               case fcmpled_e_op:
+               case fcmpund_op:
+               case fcmpund_e_op:
+                       ftype = CD;
+                       break;
+               default:
+                       return SIGILL;
+               }
+               break;
+
+       default:
+               return SIGILL;
+       }
+
+       switch (ftype) {
+       case S1S:{
+                       void *ft, *fa;
+
+                       SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       func.b(ft, fa);
+                       break;
+               }
+       case S2S:{
+                       void *ft, *fa, *fb;
+
+                       SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+                       func.t(ft, fa, fb);
+                       break;
+               }
+       case S1D:{
+                       void *ft, *fa;
+
+                       DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       func.b(ft, fa);
+                       break;
+               }
+       case CS:{
+                       unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn);
+                       void *ft, *fa, *fb;
+
+                       SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+                       if (cmpop < 0x8) {
+                               cmpop = cmptab[cmpop];
+                               fcmps(ft, fa, fb, cmpop);
+                       } else
+                               return SIGILL;
+                       break;
+               }
+       case D1D:{
+                       void *ft, *fa;
+
+                       DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       func.b(ft, fa);
+                       break;
+               }
+       case D2D:{
+                       void *ft, *fa, *fb;
+
+                       DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+                       func.t(ft, fa, fb);
+                       break;
+               }
+       case D1S:{
+                       void *ft, *fa;
+
+                       SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       func.b(ft, fa);
+                       break;
+               }
+       case CD:{
+                       unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn);
+                       void *ft, *fa, *fb;
+
+                       SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+                       DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+                       DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+                       if (cmpop < 0x8) {
+                               cmpop = cmptab[cmpop];
+                               fcmpd(ft, fa, fb, cmpop);
+                       } else
+                               return SIGILL;
+                       break;
+               }
+       default:
+               return SIGILL;
+       }
+
+       /*
+        * If an exception is required, generate a tidy SIGFPE exception.
+        */
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+       if (((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE_NO_UDFE) ||
+           ((fpu_reg->fpcsr & FPCSR_mskUDF) && (fpu_reg->UDF_trap)))
+#else
+       if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE)
+#endif
+               return SIGFPE;
+       return 0;
+}
+
+
+int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu)
+{
+       unsigned long insn = 0, addr = regs->ipc;
+       unsigned long emulpc, contpc;
+       unsigned char *pc = (void *)&insn;
+       char c;
+       int i = 0, ret;
+
+       for (i = 0; i < 4; i++) {
+               if (__get_user(c, (unsigned char *)addr++))
+                       return SIGBUS;
+               *pc++ = c;
+       }
+
+       insn = be32_to_cpu(insn);
+
+       emulpc = regs->ipc;
+       contpc = regs->ipc + 4;
+
+       if (NDS32Insn_OPCODE(insn) != cop0_op)
+               return SIGILL;
+       switch (NDS32Insn_OPCODE_COP0(insn)) {
+       case fs1_op:
+       case fs2_op:
+       case fd1_op:
+       case fd2_op:
+               {
+                       /* a real fpu computation instruction */
+                       ret = fpu_emu(fpu, insn);
+                       if (!ret)
+                               regs->ipc = contpc;
+               }
+               break;
+
+       default:
+               return SIGILL;
+       }
+
+       return ret;
+}
diff --git a/arch/nds32/math-emu/fs2d.c b/arch/nds32/math-emu/fs2d.c
new file mode 100644 (file)
index 0000000..0e8db90
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+#include <math-emu/soft-fp.h>
+
+void fs2d(void *ft, void *fa)
+{
+       FP_DECL_S(A);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+
+       FP_CONV(D, S, 2, 1, R, A);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsqrtd.c b/arch/nds32/math-emu/fsqrtd.c
new file mode 100644 (file)
index 0000000..c3a8dbd
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fsqrtd(void *ft, void *fa)
+{
+       FP_DECL_D(A);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+
+       FP_SQRT_D(R, A);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsqrts.c b/arch/nds32/math-emu/fsqrts.c
new file mode 100644 (file)
index 0000000..4c6f94b
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fsqrts(void *ft, void *fa)
+{
+       FP_DECL_S(A);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+
+       FP_SQRT_S(R, A);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsubd.c b/arch/nds32/math-emu/fsubd.c
new file mode 100644 (file)
index 0000000..81b6a0d
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fsubd(void *ft, void *fa, void *fb)
+{
+
+       FP_DECL_D(A);
+       FP_DECL_D(B);
+       FP_DECL_D(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_DP(A, fa);
+       FP_UNPACK_DP(B, fb);
+
+       if (B_c != FP_CLS_NAN)
+               B_s ^= 1;
+
+       FP_ADD_D(R, A, B);
+
+       FP_PACK_DP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsubs.c b/arch/nds32/math-emu/fsubs.c
new file mode 100644 (file)
index 0000000..61ddd97
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fsubs(void *ft, void *fa, void *fb)
+{
+
+       FP_DECL_S(A);
+       FP_DECL_S(B);
+       FP_DECL_S(R);
+       FP_DECL_EX;
+
+       FP_UNPACK_SP(A, fa);
+       FP_UNPACK_SP(B, fb);
+
+       if (B_c != FP_CLS_NAN)
+               B_s ^= 1;
+
+       FP_ADD_S(R, A, B);
+
+       FP_PACK_SP(ft, R);
+
+       __FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
index 6b68558..7c5c15a 100644 (file)
@@ -4,4 +4,8 @@ obj-y                           := extable.o tlb.o \
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
 obj-$(CONFIG_HIGHMEM)           += highmem.o
-CFLAGS_proc-n13.o              += -fomit-frame-pointer
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_proc.o     = $(CC_FLAGS_FTRACE)
+endif
+CFLAGS_proc.o              += -fomit-frame-pointer
index b740534..68d5f2a 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/hardirq.h>
 #include <linux/uaccess.h>
+#include <linux/perf_event.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -169,8 +170,6 @@ good_area:
                        mask = VM_EXEC;
                else {
                        mask = VM_READ | VM_WRITE;
-                       if (vma->vm_flags & VM_WRITE)
-                               flags |= FAULT_FLAG_WRITE;
                }
        } else if (entry == ENTRY_TLB_MISC) {
                switch (error_code & ITYPE_mskETYPE) {
@@ -231,11 +230,17 @@ good_area:
         * attempt. If we go through a retry, it is extremely likely that the
         * page will be found in page cache at that point.
         */
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
        if (flags & FAULT_FLAG_ALLOW_RETRY) {
-               if (fault & VM_FAULT_MAJOR)
+               if (fault & VM_FAULT_MAJOR) {
                        tsk->maj_flt++;
-               else
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
+                                     1, regs, addr);
+               } else {
                        tsk->min_flt++;
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
+                                     1, regs, addr);
+               }
                if (fault & VM_FAULT_RETRY) {
                        flags &= ~FAULT_FLAG_ALLOW_RETRY;
                        flags |= FAULT_FLAG_TRIED;
index f69a858..85cf6e0 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/irqchip.h>
 #include <nds32_intrinsic.h>
 
+unsigned long wake_mask;
+
 static void ativic32_ack_irq(struct irq_data *data)
 {
        __nds32__mtsr_dsb(BIT(data->hwirq), NDS32_SR_INT_PEND2);
@@ -27,11 +29,40 @@ static void ativic32_unmask_irq(struct irq_data *data)
        __nds32__mtsr_dsb(int_mask2 | (BIT(data->hwirq)), NDS32_SR_INT_MASK2);
 }
 
+static int nointc_set_wake(struct irq_data *data, unsigned int on)
+{
+       unsigned long int_mask = __nds32__mfsr(NDS32_SR_INT_MASK);
+       static unsigned long irq_orig_bit;
+       u32 bit = 1 << data->hwirq;
+
+       if (on) {
+               if (int_mask & bit)
+                       __assign_bit(data->hwirq, &irq_orig_bit, true);
+               else
+                       __assign_bit(data->hwirq, &irq_orig_bit, false);
+
+               __assign_bit(data->hwirq, &int_mask, true);
+               __assign_bit(data->hwirq, &wake_mask, true);
+
+       } else {
+               if (!(irq_orig_bit & bit))
+                       __assign_bit(data->hwirq, &int_mask, false);
+
+               __assign_bit(data->hwirq, &wake_mask, false);
+               __assign_bit(data->hwirq, &irq_orig_bit, false);
+       }
+
+       __nds32__mtsr_dsb(int_mask, NDS32_SR_INT_MASK);
+
+       return 0;
+}
+
 static struct irq_chip ativic32_chip = {
        .name = "ativic32",
        .irq_ack = ativic32_ack_irq,
        .irq_mask = ativic32_mask_irq,
        .irq_unmask = ativic32_unmask_irq,
+       .irq_set_wake = nointc_set_wake,
 };
 
 static unsigned int __initdata nivic_map[6] = { 6, 2, 10, 16, 24, 32 };
index 4f26ecc..13a374f 100644 (file)
 #define _FP_FRAC_HIGH_2(X)     (X##_f1)
 #define _FP_FRAC_LOW_2(X)      (X##_f0)
 #define _FP_FRAC_WORD_2(X,w)   (X##_f##w)
+#define _FP_FRAC_SLL_2(X, N) (                                                \
+       (void) (((N) < _FP_W_TYPE_SIZE)                                        \
+         ? ({                                                                 \
+               if (__builtin_constant_p(N) && (N) == 1) {                     \
+                       X##_f1 = X##_f1 + X##_f1 +                             \
+                               (((_FP_WS_TYPE) (X##_f0)) < 0);                \
+                       X##_f0 += X##_f0;                                      \
+               } else {                                                       \
+                       X##_f1 = X##_f1 << (N) | X##_f0 >>                     \
+                                               (_FP_W_TYPE_SIZE - (N));       \
+                       X##_f0 <<= (N);                                        \
+               }                                                              \
+               0;                                                             \
+           })                                                                 \
+         : ({                                                                 \
+             X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                      \
+             X##_f0 = 0;                                                      \
+         })))
+
+
+#define _FP_FRAC_SRL_2(X, N) (                                                \
+       (void) (((N) < _FP_W_TYPE_SIZE)                                        \
+         ? ({                                                                 \
+             X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));      \
+             X##_f1 >>= (N);                                                  \
+           })                                                                 \
+         : ({                                                                 \
+             X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                      \
+             X##_f1 = 0;                                                      \
+           })))
 
-#define _FP_FRAC_SLL_2(X,N)                                            \
-  do {                                                                 \
-    if ((N) < _FP_W_TYPE_SIZE)                                         \
-      {                                                                        \
-       if (__builtin_constant_p(N) && (N) == 1)                        \
-         {                                                             \
-           X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
-           X##_f0 += X##_f0;                                           \
-         }                                                             \
-       else                                                            \
-         {                                                             \
-           X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
-           X##_f0 <<= (N);                                             \
-         }                                                             \
-      }                                                                        \
-    else                                                               \
-      {                                                                        \
-       X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
-       X##_f0 = 0;                                                     \
-      }                                                                        \
-  } while (0)
-
-#define _FP_FRAC_SRL_2(X,N)                                            \
-  do {                                                                 \
-    if ((N) < _FP_W_TYPE_SIZE)                                         \
-      {                                                                        \
-       X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));     \
-       X##_f1 >>= (N);                                                 \
-      }                                                                        \
-    else                                                               \
-      {                                                                        \
-       X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                     \
-       X##_f1 = 0;                                                     \
-      }                                                                        \
-  } while (0)
 
 /* Right shift with sticky-lsb.  */
-#define _FP_FRAC_SRS_2(X,N,sz)                                         \
-  do {                                                                 \
-    if ((N) < _FP_W_TYPE_SIZE)                                         \
-      {                                                                        \
-       X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |   \
-                 (__builtin_constant_p(N) && (N) == 1                  \
-                  ? X##_f0 & 1                                         \
-                  : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));        \
-       X##_f1 >>= (N);                                                 \
-      }                                                                        \
-    else                                                               \
-      {                                                                        \
-       X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                   \
-               (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \
-       X##_f1 = 0;                                                     \
-      }                                                                        \
-  } while (0)
+#define _FP_FRAC_SRS_2(X, N, sz) (                                            \
+       (void) (((N) < _FP_W_TYPE_SIZE)                                        \
+         ? ({                                                                 \
+             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)      \
+                       | (__builtin_constant_p(N) && (N) == 1                 \
+                          ? X##_f0 & 1                                        \
+                          : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));       \
+               X##_f1 >>= (N);                                                \
+           })                                                                 \
+         : ({                                                                 \
+             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)                      \
+                       | ((((N) == _FP_W_TYPE_SIZE                            \
+                            ? 0                                               \
+                            : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))          \
+                           | X##_f0) != 0));                                  \
+             X##_f1 = 0;                                                      \
+           })))
 
 #define _FP_FRAC_ADDI_2(X,I)   \
   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
index 3f284bc..5650c16 100644 (file)
@@ -138,7 +138,7 @@ do {                                                        \
       _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);           \
 } while (0)
 
-#define _FP_ROUND_ZERO(wc, X)          0
+#define _FP_ROUND_ZERO(wc, X)          (void)0
 
 #define _FP_ROUND_PINF(wc, X)                          \
 do {                                                   \
index 8d378c5..dc696c1 100644 (file)
@@ -24,6 +24,8 @@
 #include "../../arch/ia64/include/asm/barrier.h"
 #elif defined(__xtensa__)
 #include "../../arch/xtensa/include/asm/barrier.h"
+#elif defined(__nds32__)
+#include "../../arch/nds32/include/asm/barrier.h"
 #else
 #include <asm-generic/barrier.h>
 #endif
diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build
new file mode 100644 (file)
index 0000000..54afe4a
--- /dev/null
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build
new file mode 100644 (file)
index 0000000..ca623bb
--- /dev/null
@@ -0,0 +1 @@
+libperf-y += header.o
diff --git a/tools/perf/arch/nds32/util/header.c b/tools/perf/arch/nds32/util/header.c
new file mode 100644 (file)
index 0000000..ef9dbdb
--- /dev/null
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2017 Andes Technology Corporation
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include "header.h"
+
+#define STR_LEN 1024
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+       /* In nds32, we only have one cpu */
+       char *buf = NULL;
+       struct cpu_map *cpus;
+       const char *sysfs = sysfs__mountpoint();
+
+       if (!sysfs || !pmu || !pmu->cpus)
+               return NULL;
+
+       buf = malloc(STR_LEN);
+       if (!buf)
+               return NULL;
+
+       cpus = cpu_map__get(pmu->cpus);
+       sprintf(buf, "0x%x", cpus->nr - 1);
+       cpu_map__put(cpus);
+       return buf;
+}
diff --git a/tools/perf/pmu-events/arch/nds32/mapfile.csv b/tools/perf/pmu-events/arch/nds32/mapfile.csv
new file mode 100644 (file)
index 0000000..efb395f
--- /dev/null
@@ -0,0 +1,15 @@
+# Format:
+#      MIDR,Version,JSON/file/pathname,Type
+#
+# where
+#      MIDR    Processor version
+#              Variant[23:20] and Revision [3:0] should be zero.
+#      Version could be used to track version of of JSON file
+#              but currently unused.
+#      JSON/file/pathname is the path to JSON file, relative
+#              to tools/perf/pmu-events/arch/arm64/.
+#      Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x0,v3,n13,core
diff --git a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json
new file mode 100644 (file)
index 0000000..5347350
--- /dev/null
@@ -0,0 +1,290 @@
+[
+  {
+       "PublicDescription": "Conditional branch",
+    "EventCode": "0x102",
+    "EventName": "cond_br",
+    "BriefDescription": "V3 Conditional branch"
+  },
+  {
+       "PublicDescription": "Taken conditional branches",
+    "EventCode": "0x103",
+    "EventName": "taken_cond_br",
+    "BriefDescription": "V3 Taken Conditional branch"
+  },
+  {
+       "PublicDescription": "Prefetch Instruction",
+    "EventCode": "0x104",
+    "EventName": "prefetch_inst",
+    "BriefDescription": "V3 Prefetch Instruction"
+  },
+  {
+       "PublicDescription": "RET Inst",
+    "EventCode": "0x105",
+    "EventName": "ret_inst",
+    "BriefDescription": "V3 RET Inst"
+  },
+  {
+       "PublicDescription": "JR(non-RET) instructions",
+    "EventCode": "0x106",
+    "EventName": "jr_inst",
+    "BriefDescription": "V3 JR(non-RET) instructions"
+  },
+  {
+       "PublicDescription": "JAL/JRAL instructions",
+    "EventCode": "0x107",
+    "EventName": "jal_jral_inst",
+    "BriefDescription": "V3 JAL/JRAL instructions"
+  },
+  {
+       "PublicDescription": "NOP instructions",
+    "EventCode": "0x108",
+    "EventName": "nop_inst",
+    "BriefDescription": "V3 NOP instructions"
+  },
+  {
+       "PublicDescription": "SCW instructions",
+    "EventCode": "0x109",
+    "EventName": "scw_inst",
+    "BriefDescription": "V3 SCW instructions"
+  },
+  {
+       "PublicDescription": "ISB/DSB instructions",
+    "EventCode": "0x10a",
+    "EventName": "isb_dsb_inst",
+    "BriefDescription": "V3 ISB/DSB instructions"
+  },
+  {
+       "PublicDescription": "CCTL instructions",
+    "EventCode": "0x10b",
+    "EventName": "cctl_inst",
+    "BriefDescription": "V3 CCTL instructions"
+  },
+  {
+       "PublicDescription": "Taken Interrupts",
+    "EventCode": "0x10c",
+    "EventName": "taken_interrupts",
+    "BriefDescription": "V3 Taken Interrupts"
+  },
+  {
+       "PublicDescription": "Loads Completed",
+    "EventCode": "0x10d",
+    "EventName": "load_completed",
+    "BriefDescription": "V3 Loads Completed"
+  },
+  {
+       "PublicDescription": "uITLB accesses",
+    "EventCode": "0x10e",
+    "EventName": "uitlb_access",
+    "BriefDescription": "V3 uITLB accesses"
+  },
+  {
+       "PublicDescription": "uDTLB accesses",
+    "EventCode": "0x10f",
+    "EventName": "udtlb_access",
+    "BriefDescription": "V3 uDTLB accesses"
+  },
+  {
+       "PublicDescription": "MTLB accesses",
+    "EventCode": "0x110",
+    "EventName": "mtlb_access",
+    "BriefDescription": "V3 MTLB accesses"
+  },
+  {
+       "PublicDescription": "DATA_DEPENDENCY_STALL_CYCLES",
+    "EventCode": "0x112",
+    "EventName": "data_dependency_stall",
+    "BriefDescription": "V3 DATA_DEPENDENCY_STALL_CYCLES"
+  },
+  {
+       "PublicDescription": "DATA_CACHE_MISS_STALL_CYCLES",
+    "EventCode": "0x113",
+    "EventName": "dcache_miss_stall",
+    "BriefDescription": "V3 DATA_CACHE_MISS_STALL_CYCLES"
+  },
+  {
+       "PublicDescription": "ILM access",
+    "EventCode": "0x118",
+    "EventName": "ilm_access",
+    "BriefDescription": "V3 ILM accesses"
+  },
+  {
+       "PublicDescription": "LSU BIU CYCLES",
+    "EventCode": "0x119",
+    "EventName": "lsu_biu_cycles",
+    "BriefDescription": "V3 LSU BIU CYCLES"
+  },
+  {
+       "PublicDescription": "HPTWK BIU CYCLES",
+    "EventCode": "0x11a",
+    "EventName": "hptwk_biu_cycles",
+    "BriefDescription": "V3 HPTWK BIU CYCLES"
+  },
+  {
+       "PublicDescription": "DMA BIU CYCLES",
+    "EventCode": "0x11b",
+    "EventName": "dma_biu_cycles",
+    "BriefDescription": "V3 DMA BIU CYCLES"
+  },
+  {
+       "PublicDescription": "CODE CACHE FILL BIU CYCLES",
+    "EventCode": "0x11c",
+    "EventName": "icache_fill_biu_cycles",
+    "BriefDescription": "V3 CODE CACHE FILL BIU CYCLES"
+  },
+  {
+       "PublicDescription": "LEAGAL UNALIGN DCACHE ACCESS",
+    "EventCode": "0x11d",
+    "EventName": "legal_unalined_dcache_access",
+    "BriefDescription": "V3 LEAGAL UNALIGN DCACHE ACCESS"
+  },
+  {
+       "PublicDescription": "PUSH25 instructions",
+    "EventCode": "0x11e",
+    "EventName": "push25_inst",
+    "BriefDescription": "V3 PUSH25 instructions"
+  },
+  {
+       "PublicDescription": "SYSCALL instructions",
+    "EventCode": "0x11f",
+    "EventName": "syscall_inst",
+    "BriefDescription": "V3 SYSCALL instructions"
+  },
+  {
+       "PublicDescription": "conditional branch miss",
+    "EventCode": "0x202",
+    "EventName": "cond_br_miss",
+    "BriefDescription": "V3 conditional branch miss"
+  },
+  {
+       "PublicDescription": "taken conditional branch miss",
+    "EventCode": "0x203",
+    "EventName": "taken_cond_br_miss",
+    "BriefDescription": "V3 taken conditional branch miss"
+  },
+  {
+       "PublicDescription": "Prefetch Instructions with cache hit",
+    "EventCode": "0x204",
+    "EventName": "prefetch_icache_hit",
+    "BriefDescription": "V3 Prefetch Instructions with cache hit"
+  },
+  {
+       "PublicDescription": "RET mispredict",
+    "EventCode": "0x205",
+    "EventName": "ret_mispredict",
+    "BriefDescription": "V3 RET mispredict"
+  },
+  {
+       "PublicDescription": "Immediate J instructions",
+    "EventCode": "0x206",
+    "EventName": "imm_j_inst",
+    "BriefDescription": "V3 Immediate J instructions"
+  },
+  {
+       "PublicDescription": "Multiply instructions",
+    "EventCode": "0x207",
+    "EventName": "mul_inst",
+    "BriefDescription": "V3 Multiply instructions"
+  },
+  {
+       "PublicDescription": "16 bits instructions",
+    "EventCode": "0x208",
+    "EventName": "sixteen_bits_inst",
+    "BriefDescription": "V3 16 bits instructions"
+  },
+  {
+       "PublicDescription": "Failed SCW instructions",
+    "EventCode": "0x209",
+    "EventName": "fail_scw_inst",
+    "BriefDescription": "V3 Failed SCW instructions"
+  },
+  {
+       "PublicDescription": "ld-after-st conflict replays",
+    "EventCode": "0x20a",
+    "EventName": "ld_af_st_conflict",
+    "BriefDescription": "V3 ld-after-st conflict replays"
+  },
+  {
+       "PublicDescription": "Exception taken",
+    "EventCode": "0x20c",
+    "EventName": "exception_taken",
+    "BriefDescription": "V3 Exception taken"
+  },
+  {
+       "PublicDescription": "Stores completed",
+    "EventCode": "0x20d",
+    "EventName": "store_completed",
+    "BriefDescription": "V3 Stores completed"
+  },
+  {
+       "PublicDescription": "uITLB miss",
+    "EventCode": "0x20e",
+    "EventName": "uitlb_miss",
+    "BriefDescription": "V3 uITLB miss"
+  },
+  {
+       "PublicDescription": "uDTLB miss",
+    "EventCode": "0x20f",
+    "EventName": "udtlb_miss",
+    "BriefDescription": "V3 uDTLB miss"
+  },
+  {
+       "PublicDescription": "MTLB miss",
+    "EventCode": "0x210",
+    "EventName": "mtlb_miss",
+    "BriefDescription": "V3 MTLB miss"
+  },
+  {
+       "PublicDescription": "Empty instructions queue stall cycles",
+    "EventCode": "0x212",
+    "EventName": "empty_inst_q_stall",
+    "BriefDescription": "V3 Empty instructions queue stall cycles"
+  },
+  {
+       "PublicDescription": "Data write back",
+    "EventCode": "0x213",
+    "EventName": "data_wb",
+    "BriefDescription": "V3 Data write back"
+  },
+  {
+       "PublicDescription": "DLM access",
+    "EventCode": "0x218",
+    "EventName": "dlm_access",
+    "BriefDescription": "V3 DLM access"
+  },
+  {
+       "PublicDescription": "LSU BIU request",
+    "EventCode": "0x219",
+    "EventName": "lsu_biu_req",
+    "BriefDescription": "V3 LSU BIU request"
+  },
+  {
+       "PublicDescription": "HPTWK BIU request",
+    "EventCode": "0x21a",
+    "EventName": "hptwk_biu_req",
+    "BriefDescription": "V3 HPTWK BIU request"
+  },
+  {
+       "PublicDescription": "DMA BIU request",
+    "EventCode": "0x21b",
+    "EventName": "dma_biu_req",
+    "BriefDescription": "V3 DMA BIU request"
+  },
+  {
+       "PublicDescription": "Icache fill BIU request",
+    "EventCode": "0x21c",
+    "EventName": "icache_fill_biu_req",
+    "BriefDescription": "V3 Icache fill BIU request"
+  },
+  {
+       "PublicDescription": "External events",
+    "EventCode": "0x21d",
+    "EventName": "external_events",
+    "BriefDescription": "V3 External events"
+  },
+  {
+       "PublicDescription": "POP25 instructions",
+    "EventCode": "0x21e",
+    "EventName": "pop25_inst",
+    "BriefDescription": "V3 POP25 instructions"
+  },
+]