Merge tag 'powerpc-5.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Feb 2021 22:34:00 +0000 (14:34 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Feb 2021 22:34:00 +0000 (14:34 -0800)
Pull powerpc updates from Michael Ellerman:

 - A large series adding wrappers for our interrupt handlers, so that
   irq/nmi/user tracking can be isolated in the wrappers rather than
   spread in each handler.

 - Conversion of the 32-bit syscall handling into C.

 - A series from Nick to streamline our TLB flushing when using the
   Radix MMU.

 - Switch to using queued spinlocks by default for 64-bit server CPUs.

 - A rework of our PCI probing so that it happens later in boot, when
   more generic infrastructure is available.

 - Two small fixes to allow 32-bit little-endian processes to run on
   64-bit kernels.

 - Other smaller features, fixes & cleanups.

Thanks to: Alexey Kardashevskiy, Ananth N Mavinakayanahalli, Aneesh
Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Cédric Le Goater, Chengyang
Fan, Christophe Leroy, Christopher M. Riedl, Fabiano Rosas, Florian
Fainelli, Frederic Barrat, Ganesh Goudar, Hari Bathini, Jiapeng Chong,
Joseph J Allen, Kajol Jain, Markus Elfring, Michal Suchanek, Nathan
Lynch, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Pingfan Liu,
Po-Hsu Lin, Qian Cai, Ram Pai, Randy Dunlap, Sandipan Das, Stephen
Rothwell, Tyrel Datwyler, Will Springer, Yury Norov, and Zheng Yongjun.

* tag 'powerpc-5.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (188 commits)
  powerpc/perf: Adds support for programming of Thresholding in P10
  powerpc/pci: Remove unimplemented prototypes
  powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user()
  powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size()
  powerpc/uaccess: get rid of small constant size cases in raw_copy_{to,from}_user()
  powerpc/64: Fix stack trace not displaying final frame
  powerpc/time: Remove get_tbl()
  powerpc/time: Avoid using get_tbl()
  spi: mpc52xx: Avoid using get_tbl()
  powerpc/syscall: Avoid storing 'current' in another pointer
  powerpc/32: Handle bookE debugging in C in syscall entry/exit
  powerpc/syscall: Do not check unsupported scv vector on PPC32
  powerpc/32: Remove the counter in global_dbcr0
  powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry
  powerpc/syscall: implement system call entry/exit logic in C for PPC32
  powerpc/32: Always save non volatile GPRs at syscall entry
  powerpc/syscall: Change condition to check MSR_RI
  powerpc/syscall: Save r3 in regs->orig_r3
  powerpc/syscall: Use is_compat_task()
  powerpc/syscall: Make interrupt.c buildable on PPC32
  ...

17 files changed:
1  2 
arch/powerpc/Kconfig
arch/powerpc/configs/44x/akebono_defconfig
arch/powerpc/include/asm/book3s/64/kup.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_book3s_32.S
arch/powerpc/kernel/irq.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/lib/sstep.c
arch/powerpc/perf/core-book3s.c
drivers/spi/spi-mpc52xx.c

diff --combined arch/powerpc/Kconfig
@@@ -196,7 -196,6 +196,6 @@@ config PP
        select HAVE_STACKPROTECTOR              if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
        select HAVE_STACKPROTECTOR              if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
        select HAVE_CONTEXT_TRACKING            if PPC64
-       select HAVE_TIF_NOHZ                    if PPC64
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DEBUG_STACKOVERFLOW
        select HAVE_DYNAMIC_FTRACE
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_NMI                         if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
        select HAVE_HARDLOCKUP_DETECTOR_ARCH    if (PPC64 && PPC_BOOK3S)
 -      select HAVE_OPROFILE
        select HAVE_OPTPROBES                   if PPC64
        select HAVE_PERF_EVENTS
        select HAVE_PERF_EVENTS_NMI             if PPC64
@@@ -281,6 -281,7 +280,6 @@@ config COMPA
        bool "Enable support for 32bit binaries"
        depends on PPC64
        default y if !CPU_LITTLE_ENDIAN
 -      select COMPAT_BINFMT_ELF
        select ARCH_WANT_OLD_COMPAT_IPC
        select COMPAT_OLD_SIGACTION
  
@@@ -503,18 -504,14 +502,14 @@@ config HOTPLUG_CP
          Say N if you are unsure.
  
  config PPC_QUEUED_SPINLOCKS
-       bool "Queued spinlocks"
+       bool "Queued spinlocks" if EXPERT
        depends on SMP
+       default PPC_BOOK3S_64
        help
          Say Y here to use queued spinlocks which give better scalability and
          fairness on large SMP and NUMA systems without harming single threaded
          performance.
  
-         This option is currently experimental, the code is more complex and
-         less tested so it defaults to "N" for the moment.
-         If unsure, say "N".
  config ARCH_CPU_PROBE_RELEASE
        def_bool y
        depends on HOTPLUG_CPU
@@@ -718,18 -715,6 +713,6 @@@ config ARCH_MEMORY_PROB
        def_bool y
        depends on MEMORY_HOTPLUG
  
- config STDBINUTILS
-       bool "Using standard binutils settings"
-       depends on 44x
-       default y
-       help
-         Turning this option off allows you to select 256KB PAGE_SIZE on 44x.
-         Note, that kernel will be able to run only those applications,
-         which had been compiled using binutils later than 2.17.50.0.3 with
-         '-zmax-page-size' set to 256K (the default is 64K). Or, if using
-         the older binutils, you can patch them with a trivial patch, which
-         changes the ELF_MAXPAGESIZE definition from 0x10000 to 0x40000.
  choice
        prompt "Page size"
        default PPC_4K_PAGES
@@@ -769,17 -754,15 +752,15 @@@ config PPC_64K_PAGE
        select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
  
  config PPC_256K_PAGES
-       bool "256k page size"
-       depends on 44x && !STDBINUTILS
+       bool "256k page size (Requires non-standard binutils settings)"
+       depends on 44x && !PPC_47x
        help
          Make the page size 256k.
  
-         As the ELF standard only requires alignment to support page
-         sizes up to 64k, you will need to compile all of your user
-         space applications with a non-standard binutils settings
-         (see the STDBINUTILS description for details).
-         Say N unless you know what you are doing.
+         The kernel will only be able to run applications that have been
+         compiled with '-zmax-page-size' set to 256K (the default is 64K) using
+         binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE
+         definition from 0x10000 to 0x40000 in older versions.
  
  endchoice
  
@@@ -8,6 -8,7 +8,6 @@@ CONFIG_EXPERT=
  CONFIG_KALLSYMS_ALL=y
  # CONFIG_SLUB_CPU_PARTIAL is not set
  CONFIG_PROFILING=y
 -CONFIG_OPROFILE=y
  CONFIG_MODULES=y
  CONFIG_MODULE_UNLOAD=y
  # CONFIG_BLK_DEV_BSG is not set
@@@ -20,6 -21,7 +20,7 @@@ CONFIG_IRQ_ALL_CPUS=
  # CONFIG_COMPACTION is not set
  # CONFIG_SUSPEND is not set
  CONFIG_NET=y
+ CONFIG_NETDEVICES=y
  CONFIG_PACKET=y
  CONFIG_UNIX=y
  CONFIG_INET=y
@@@ -40,7 -42,9 +41,9 @@@ CONFIG_BLK_DEV_RAM_SIZE=3500
  # CONFIG_SCSI_PROC_FS is not set
  CONFIG_BLK_DEV_SD=y
  # CONFIG_SCSI_LOWLEVEL is not set
+ CONFIG_ATA=y
  # CONFIG_SATA_PMP is not set
+ CONFIG_SATA_AHCI_PLATFORM=y
  # CONFIG_ATA_SFF is not set
  # CONFIG_NET_VENDOR_3COM is not set
  # CONFIG_NET_VENDOR_ADAPTEC is not set
@@@ -97,6 -101,8 +100,8 @@@ CONFIG_USB_OHCI_HCD=
  # CONFIG_USB_OHCI_HCD_PCI is not set
  CONFIG_USB_STORAGE=y
  CONFIG_MMC=y
+ CONFIG_MMC_SDHCI=y
+ CONFIG_MMC_SDHCI_PLTFM=y
  CONFIG_RTC_CLASS=y
  CONFIG_RTC_DRV_M41T80=y
  CONFIG_EXT2_FS=y
@@@ -199,31 -199,25 +199,31 @@@ DECLARE_STATIC_KEY_FALSE(uaccess_flush_
  
  #ifdef CONFIG_PPC_PKEY
  
 +extern u64 __ro_after_init default_uamor;
 +extern u64 __ro_after_init default_amr;
 +extern u64 __ro_after_init default_iamr;
 +
  #include <asm/mmu.h>
  #include <asm/ptrace.h>
  
 -/*
 - * For kernel thread that doesn't have thread.regs return
 - * default AMR/IAMR values.
 +/* usage of kthread_use_mm() should inherit the
 + * AMR value of the operating address space. But, the AMR value is
 + * thread-specific and we inherit the address space and not thread
 + * access restrictions. Because of this ignore AMR value when accessing
 + * userspace via kernel thread.
   */
  static inline u64 current_thread_amr(void)
  {
        if (current->thread.regs)
                return current->thread.regs->amr;
 -      return AMR_KUAP_BLOCKED;
 +      return default_amr;
  }
  
  static inline u64 current_thread_iamr(void)
  {
        if (current->thread.regs)
                return current->thread.regs->iamr;
 -      return AMR_KUEP_BLOCKED;
 +      return default_iamr;
  }
  #endif /* CONFIG_PPC_PKEY */
  
@@@ -339,7 -333,7 +339,7 @@@ static inline unsigned long get_kuap(vo
         * This has no effect in terms of actually blocking things on hash,
         * so it doesn't break anything.
         */
-       if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+       if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
                return AMR_KUAP_BLOCKED;
  
        return mfspr(SPRN_AMR);
  
  static inline void set_kuap(unsigned long value)
  {
-       if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+       if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
                return;
  
        /*
@@@ -314,8 -314,6 +314,8 @@@ struct kvmppc_ops 
                              int size);
        int (*enable_svm)(struct kvm *kvm);
        int (*svm_off)(struct kvm *kvm);
 +      int (*enable_dawr1)(struct kvm *kvm);
 +      bool (*hash_v3_possible)(void);
  };
  
  extern struct kvmppc_ops *kvmppc_hv_ops;
@@@ -629,9 -627,9 +629,9 @@@ extern int h_ipi_redirect
  static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
                                struct kvm *kvm)
        { return NULL; }
- static inline void kvmppc_alloc_host_rm_ops(void) {};
- static inline void kvmppc_free_host_rm_ops(void) {};
- static inline void kvmppc_free_pimap(struct kvm *kvm) {};
+ static inline void kvmppc_alloc_host_rm_ops(void) {}
+ static inline void kvmppc_free_host_rm_ops(void) {}
+ static inline void kvmppc_free_pimap(struct kvm *kvm) {}
  static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
        { return 0; }
  static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
@@@ -883,9 -881,9 +883,9 @@@ static inline void kvmppc_mmu_flush_ica
  
        /* Clear i-cache for new pages */
        page = pfn_to_page(pfn);
-       if (!test_bit(PG_arch_1, &page->flags)) {
+       if (!test_bit(PG_dcache_clean, &page->flags)) {
                flush_dcache_icache_page(page);
-               set_bit(PG_arch_1, &page->flags);
+               set_bit(PG_dcache_clean, &page->flags);
        }
  }
  
@@@ -46,12 -46,12 +46,12 @@@ obj-y                              := cputable.o syscalls.o 
                                   prom.o traps.o setup-common.o \
                                   udbg.o misc.o io.o misc_$(BITS).o \
                                   of_platform.o prom_parse.o firmware.o \
-                                  hw_breakpoint_constraints.o
+                                  hw_breakpoint_constraints.o interrupt.o
  obj-y                         += ptrace/
  obj-$(CONFIG_PPC64)           += setup_64.o \
-                                  paca.o nvram_64.o note.o syscall_64.o
+                                  paca.o nvram_64.o note.o
  obj-$(CONFIG_COMPAT)          += sys_ppc32.o signal_32.o
 -obj-$(CONFIG_VDSO32)          += vdso32/
 +obj-$(CONFIG_VDSO32)          += vdso32_wrapper.o
  obj-$(CONFIG_PPC_WATCHDOG)    += watchdog.o
  obj-$(CONFIG_HAVE_HW_BREAKPOINT)      += hw_breakpoint.o
  obj-$(CONFIG_PPC_DAWR)                += dawr.o
@@@ -60,7 -60,7 +60,7 @@@ obj-$(CONFIG_PPC_BOOK3S_64)   += cpu_setu
  obj-$(CONFIG_PPC_BOOK3S_64)   += mce.o mce_power.o
  obj-$(CONFIG_PPC_BOOK3E_64)   += exceptions-64e.o idle_book3e.o
  obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
 -obj-$(CONFIG_PPC64)           += vdso64/
 +obj-$(CONFIG_PPC64)           += vdso64_wrapper.o
  obj-$(CONFIG_ALTIVEC)         += vecemu.o
  obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o
  procfs-y                      := proc_powerpc.o
@@@ -255,7 -255,6 +255,6 @@@ int main(void
  #endif /* CONFIG_PPC_MM_SLICES */
        OFFSET(PACA_EXGEN, paca_struct, exgen);
        OFFSET(PACA_EXMC, paca_struct, exmc);
-       OFFSET(PACA_EXSLB, paca_struct, exslb);
        OFFSET(PACA_EXNMI, paca_struct, exnmi);
  #ifdef CONFIG_PPC_PSERIES
        OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
  
        /* Interrupt register frame */
        DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-       DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+       DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
        STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
        STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
        STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
        OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
        OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
        OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
 -      OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
 -      OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
 +      OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
 +      OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
 +      OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
 +      OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
        OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
        OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
        OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
        HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
        HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
        HSTATE_FIELD(HSTATE_PTID, ptid);
 -      HSTATE_FIELD(HSTATE_TID, tid);
        HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
        HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
        HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
        OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
        OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
        OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
 -      OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
 -      OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  
  #ifdef CONFIG_PPC_BOOK3S_64
@@@ -75,7 -75,7 +75,7 @@@ BEGIN_FTR_SECTIO
        bne     .Ltabort_syscall
  END_FTR_SECTION_IFSET(CPU_FTR_TM)
  #endif
 -      INTERRUPT_TO_KERNEL
 +      SCV_INTERRUPT_TO_KERNEL
        mr      r10,r1
        ld      r1,PACAKSAVE(r13)
        std     r10,0(r1)
        li      r11,\trapnr
        std     r11,_TRAP(r1)
        std     r12,_CCR(r1)
-       std     r3,ORIG_GPR3(r1)
        addi    r10,r1,STACK_FRAME_OVERHEAD
        ld      r11,exception_marker@toc(r2)
        std     r11,-16(r10)            /* "regshere" marker */
@@@ -226,6 -225,12 +225,12 @@@ _ASM_NOKPROBE_SYMBOL(system_call_vector
  #endif
  
        .balign IFETCH_ALIGN_BYTES
+       .globl system_call_common_real
+ system_call_common_real:
+       ld      r10,PACAKMSR(r13)       /* get MSR value for kernel */
+       mtmsrd  r10
+       .balign IFETCH_ALIGN_BYTES
        .globl system_call_common
  system_call_common:
  _ASM_NOKPROBE_SYMBOL(system_call_common)
@@@ -278,7 -283,6 +283,6 @@@ END_BTB_FLUSH_SECTIO
        std     r10,_LINK(r1)
        std     r11,_TRAP(r1)
        std     r12,_CCR(r1)
-       std     r3,ORIG_GPR3(r1)
        addi    r10,r1,STACK_FRAME_OVERHEAD
        ld      r11,exception_marker@toc(r2)
        std     r11,-16(r10)            /* "regshere" marker */
@@@ -139,7 -139,6 +139,6 @@@ name
  #define IKVM_VIRT     .L_IKVM_VIRT_\name\()   /* Virt entry tests KVM */
  #define ISTACK                .L_ISTACK_\name\()      /* Set regular kernel stack */
  #define __ISTACK(name)        .L_ISTACK_ ## name
- #define IRECONCILE    .L_IRECONCILE_\name\()  /* Do RECONCILE_IRQ_STATE */
  #define IKUAP         .L_IKUAP_\name\()       /* Do KUAP lock */
  
  #define INT_DEFINE_BEGIN(n)                                           \
@@@ -203,9 -202,6 +202,6 @@@ do_define_int 
        .ifndef ISTACK
                ISTACK=1
        .endif
-       .ifndef IRECONCILE
-               IRECONCILE=1
-       .endif
        .ifndef IKUAP
                IKUAP=1
        .endif
@@@ -581,7 -577,6 +577,6 @@@ DEFINE_FIXED_SYMBOL(\name\()_common_rea
        kuap_save_amr_and_lock r9, r10, cr1, cr0
        .endif
        beq     101f                    /* if from kernel mode          */
-       ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
  BEGIN_FTR_SECTION
        ld      r9,IAREA+EX_PPR(r13)    /* Read PPR from paca           */
        std     r9,_PPR(r1)
@@@ -649,14 -644,6 +644,6 @@@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR
        ld      r11,exception_marker@toc(r2)
        std     r10,RESULT(r1)          /* clear regs->result           */
        std     r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame       */
-       .if ISTACK
-       ACCOUNT_STOLEN_TIME
-       .endif
-       .if IRECONCILE
-       RECONCILE_IRQ_STATE(r10, r11)
-       .endif
  .endm
  
  /*
        ld      r1,GPR1(r1)
  .endm
  
- #define RUNLATCH_ON                           \
- BEGIN_FTR_SECTION                             \
-       ld      r3, PACA_THREAD_INFO(r13);      \
-       ld      r4,TI_LOCAL_FLAGS(r3);          \
-       andi.   r0,r4,_TLF_RUNLATCH;            \
-       beql    ppc64_runlatch_on_trampoline;   \
- END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
  /*
   * When the idle code in power4_idle puts the CPU into NAP mode,
   * it has to do so in a loop, and relies on the external interrupt
@@@ -935,7 -914,6 +914,6 @@@ INT_DEFINE_BEGIN(system_reset
         */
        ISET_RI=0
        ISTACK=0
-       IRECONCILE=0
        IKVM_REAL=1
  INT_DEFINE_END(system_reset)
  
@@@ -1022,20 -1000,6 +1000,6 @@@ EXC_COMMON_BEGIN(system_reset_common
        ld      r1,PACA_NMI_EMERG_SP(r13)
        subi    r1,r1,INT_FRAME_SIZE
        __GEN_COMMON_BODY system_reset
-       /*
-        * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
-        * the right thing. We do not want to reconcile because that goes
-        * through irq tracing which we don't want in NMI.
-        *
-        * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS
-        * as we are running with MSR[EE]=0.
-        */
-       li      r10,IRQS_ALL_DISABLED
-       stb     r10,PACAIRQSOFTMASK(r13)
-       lbz     r10,PACAIRQHAPPENED(r13)
-       std     r10,RESULT(r1)
-       ori     r10,r10,PACA_IRQ_HARD_DIS
-       stb     r10,PACAIRQHAPPENED(r13)
  
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      system_reset_exception
        subi    r10,r10,1
        sth     r10,PACA_IN_NMI(r13)
  
-       /*
-        * Restore soft mask settings.
-        */
-       ld      r10,RESULT(r1)
-       stb     r10,PACAIRQHAPPENED(r13)
-       ld      r10,SOFTE(r1)
-       stb     r10,PACAIRQSOFTMASK(r13)
        kuap_kernel_restore r9, r10
        EXCEPTION_RESTORE_REGS
        RFI_TO_USER_OR_KERNEL
@@@ -1123,7 -1079,6 +1079,6 @@@ INT_DEFINE_BEGIN(machine_check_early
        ISTACK=0
        IDAR=1
        IDSISR=1
-       IRECONCILE=0
        IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
  INT_DEFINE_END(machine_check_early)
  
@@@ -1205,30 -1160,11 +1160,11 @@@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE
        li      r10,MSR_RI
        mtmsrd  r10,1
  
-       /*
-        * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
-        * system_reset_common)
-        */
-       li      r10,IRQS_ALL_DISABLED
-       stb     r10,PACAIRQSOFTMASK(r13)
-       lbz     r10,PACAIRQHAPPENED(r13)
-       std     r10,RESULT(r1)
-       ori     r10,r10,PACA_IRQ_HARD_DIS
-       stb     r10,PACAIRQHAPPENED(r13)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      machine_check_early
        std     r3,RESULT(r1)   /* Save result */
        ld      r12,_MSR(r1)
  
-       /*
-        * Restore soft mask settings.
-        */
-       ld      r10,RESULT(r1)
-       stb     r10,PACAIRQHAPPENED(r13)
-       ld      r10,SOFTE(r1)
-       stb     r10,PACAIRQSOFTMASK(r13)
  #ifdef CONFIG_PPC_P7_NAP
        /*
         * Check if thread was in power saving mode. We come here when any
@@@ -1401,14 -1337,15 +1337,15 @@@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE
   *
   * Handling:
   * - Hash MMU
-  *   Go to do_hash_page first to see if the HPT can be filled from an entry in
-  *   the Linux page table. Hash faults can hit in kernel mode in a fairly
+  *   Go to do_hash_fault, which attempts to fill the HPT from an entry in the
+  *   Linux page table. Hash faults can hit in kernel mode in a fairly
   *   arbitrary state (e.g., interrupts disabled, locks held) when accessing
   *   "non-bolted" regions, e.g., vmalloc space. However these should always be
-  *   backed by Linux page tables.
+  *   backed by Linux page table entries.
   *
-  *   If none is found, do a Linux page fault. Linux page faults can happen in
-  *   kernel mode due to user copy operations of course.
+  *   If no entry is found the Linux page fault handler is invoked (by
+  *   do_hash_fault). Linux page faults can happen in kernel mode due to user
+  *   copy operations of course.
   *
   *   KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
   *   MMU context, which may cause a DSI in the host, which must go to the
@@@ -1437,15 -1374,24 +1374,24 @@@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x8
  EXC_VIRT_END(data_access, 0x4300, 0x80)
  EXC_COMMON_BEGIN(data_access_common)
        GEN_COMMON data_access
-       ld      r4,_DAR(r1)
-       ld      r5,_DSISR(r1)
+       ld      r4,_DSISR(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       andis.  r0,r4,DSISR_DABRMATCH@h
+       bne-    1f
  BEGIN_MMU_FTR_SECTION
-       ld      r6,_MSR(r1)
-       li      r3,0x300
-       b       do_hash_page            /* Try to handle as hpte fault */
+       bl      do_hash_fault
  MMU_FTR_SECTION_ELSE
-       b       handle_page_fault
+       bl      do_page_fault
  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+       b       interrupt_return
+ 1:    bl      do_break
+       /*
+        * do_break() may have changed the NV GPRS while handling a breakpoint.
+        * If so, we need to restore them with their updated values.
+        */
+       REST_NVGPRS(r1)
+       b       interrupt_return
  
        GEN_KVM data_access
  
   *   on user-handler data structures.
   *
   *   KVM: Same as 0x300, DSLB must test for KVM guest.
-  *
-  * A dedicated save area EXSLB is used (XXX: but it actually need not be
-  * these days, we could use EXGEN).
   */
  INT_DEFINE_BEGIN(data_access_slb)
        IVEC=0x380
-       IAREA=PACA_EXSLB
-       IRECONCILE=0
        IDAR=1
        IKVM_SKIP=1
        IKVM_REAL=1
@@@ -1487,10 -1428,9 +1428,9 @@@ EXC_VIRT_BEGIN(data_access_slb, 0x4380
  EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
  EXC_COMMON_BEGIN(data_access_slb_common)
        GEN_COMMON data_access_slb
-       ld      r4,_DAR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
  BEGIN_MMU_FTR_SECTION
        /* HPT case, do SLB fault */
+       addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_slb_fault
        cmpdi   r3,0
        bne-    1f
@@@ -1501,9 -1441,6 +1441,6 @@@ MMU_FTR_SECTION_ELS
        li      r3,-EFAULT
  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
        std     r3,RESULT(r1)
-       RECONCILE_IRQ_STATE(r10, r11)
-       ld      r4,_DAR(r1)
-       ld      r5,RESULT(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_bad_slb_fault
        b       interrupt_return
@@@ -1538,15 -1475,13 +1475,13 @@@ EXC_VIRT_BEGIN(instruction_access, 0x44
  EXC_VIRT_END(instruction_access, 0x4400, 0x80)
  EXC_COMMON_BEGIN(instruction_access_common)
        GEN_COMMON instruction_access
-       ld      r4,_DAR(r1)
-       ld      r5,_DSISR(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
  BEGIN_MMU_FTR_SECTION
-       ld      r6,_MSR(r1)
-       li      r3,0x400
-       b       do_hash_page            /* Try to handle as hpte fault */
+       bl      do_hash_fault
  MMU_FTR_SECTION_ELSE
-       b       handle_page_fault
+       bl      do_page_fault
  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+       b       interrupt_return
  
        GEN_KVM instruction_access
  
   */
  INT_DEFINE_BEGIN(instruction_access_slb)
        IVEC=0x480
-       IAREA=PACA_EXSLB
-       IRECONCILE=0
        IISIDE=1
        IDAR=1
  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@@ -1579,10 -1512,9 +1512,9 @@@ EXC_VIRT_BEGIN(instruction_access_slb, 
  EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
  EXC_COMMON_BEGIN(instruction_access_slb_common)
        GEN_COMMON instruction_access_slb
-       ld      r4,_DAR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
  BEGIN_MMU_FTR_SECTION
        /* HPT case, do SLB fault */
+       addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_slb_fault
        cmpdi   r3,0
        bne-    1f
@@@ -1593,9 -1525,6 +1525,6 @@@ MMU_FTR_SECTION_ELS
        li      r3,-EFAULT
  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
        std     r3,RESULT(r1)
-       RECONCILE_IRQ_STATE(r10, r11)
-       ld      r4,_DAR(r1)
-       ld      r5,RESULT(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_bad_slb_fault
        b       interrupt_return
@@@ -1643,7 -1572,6 +1572,6 @@@ EXC_VIRT_END(hardware_interrupt, 0x4500
  EXC_COMMON_BEGIN(hardware_interrupt_common)
        GEN_COMMON hardware_interrupt
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_IRQ
        b       interrupt_return
@@@ -1697,6 -1625,51 +1625,51 @@@ INT_DEFINE_BEGIN(program_check
  INT_DEFINE_END(program_check)
  
  EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ #ifdef CONFIG_CPU_LITTLE_ENDIAN
+       /*
+        * There's a short window during boot where although the kernel is
+        * running little endian, any exceptions will cause the CPU to switch
+        * back to big endian. For example a WARN() boils down to a trap
+        * instruction, which will cause a program check, and we end up here but
+        * with the CPU in big endian mode. The first instruction of the program
+        * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when
+        * executed in the wrong endian is an lhzu with a ~3GB displacement from
+        * r3. The content of r3 is random, so that is a load from some random
+        * location, and depending on the system can easily lead to a checkstop,
+        * or an infinitely recursive page fault.
+        *
+        * So to handle that case we have a trampoline here that can detect we
+        * are in the wrong endian and flip us back to the correct endian. We
+        * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires
+        * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as
+        * SPRG1 is already used for the paca. SPRG3 is user readable, but this
+        * trampoline is only active very early in boot, and SPRG3 will be
+        * reinitialised in vdso_getcpu_init() before userspace starts.
+        */
+ BEGIN_FTR_SECTION
+       tdi   0,0,0x48    // Trap never, or in reverse endian: b . + 8
+       b     1f          // Skip trampoline if endian is correct
+       .long 0xa643707d  // mtsprg  0, r11      Backup r11
+       .long 0xa6027a7d  // mfsrr0  r11
+       .long 0xa643727d  // mtsprg  2, r11      Backup SRR0 in SPRG2
+       .long 0xa6027b7d  // mfsrr1  r11
+       .long 0xa643737d  // mtsprg  3, r11      Backup SRR1 in SPRG3
+       .long 0xa600607d  // mfmsr   r11
+       .long 0x01006b69  // xori    r11, r11, 1 Invert MSR[LE]
+       .long 0xa6037b7d  // mtsrr1  r11
+       .long 0x34076039  // li      r11, 0x734
+       .long 0xa6037a7d  // mtsrr0  r11
+       .long 0x2400004c  // rfid
+       mfsprg r11, 3
+       mtsrr1 r11        // Restore SRR1
+       mfsprg r11, 2
+       mtsrr0 r11        // Restore SRR0
+       mfsprg r11, 0     // Restore r11
+ 1:
+ END_FTR_SECTION(0, 1)     // nop out after boot
+ #endif /* CONFIG_CPU_LITTLE_ENDIAN */
        GEN_INT_ENTRY program_check, virt=0
  EXC_REAL_END(program_check, 0x700, 0x100)
  EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
@@@ -1755,7 -1728,6 +1728,6 @@@ EXC_COMMON_BEGIN(program_check_common
   */
  INT_DEFINE_BEGIN(fp_unavailable)
        IVEC=0x800
-       IRECONCILE=0
  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        IKVM_REAL=1
  #endif
@@@ -1770,7 -1742,6 +1742,6 @@@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x
  EXC_COMMON_BEGIN(fp_unavailable_common)
        GEN_COMMON fp_unavailable
        bne     1f                      /* if from user, just load it up */
-       RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      kernel_fp_unavailable_exception
  0:    trap
@@@ -1789,7 -1760,6 +1760,6 @@@ END_FTR_SECTION_IFSET(CPU_FTR_TM
        b       fast_interrupt_return
  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  2:    /* User process was in a transaction */
-       RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      fp_unavailable_tm
        b       interrupt_return
@@@ -1832,7 -1802,6 +1802,6 @@@ EXC_VIRT_END(decrementer, 0x4900, 0x80
  EXC_COMMON_BEGIN(decrementer_common)
        GEN_COMMON decrementer
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      timer_interrupt
        b       interrupt_return
@@@ -1854,7 -1823,6 +1823,6 @@@ INT_DEFINE_BEGIN(hdecrementer
        IVEC=0x980
        IHSRR=1
        ISTACK=0
-       IRECONCILE=0
        IKVM_REAL=1
        IKVM_VIRT=1
  INT_DEFINE_END(hdecrementer)
@@@ -1919,12 -1887,11 +1887,11 @@@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x
  EXC_COMMON_BEGIN(doorbell_super_common)
        GEN_COMMON doorbell_super
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
  #ifdef CONFIG_PPC_DOORBELL
        bl      doorbell_exception
  #else
-       bl      unknown_exception
+       bl      unknown_async_exception
  #endif
        b       interrupt_return
  
@@@ -2001,12 -1968,9 +1968,9 @@@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE
        HMT_MEDIUM
  
        .if ! \virt
-       __LOAD_HANDLER(r10, system_call_common)
-       mtspr   SPRN_SRR0,r10
-       ld      r10,PACAKMSR(r13)
-       mtspr   SPRN_SRR1,r10
-       RFI_TO_KERNEL
-       b       .       /* prevent speculative execution */
+       __LOAD_HANDLER(r10, system_call_common_real)
+       mtctr   r10
+       bctr
        .else
        li      r10,MSR_RI
        mtmsrd  r10,1                   /* Set RI (EE=0) */
@@@ -2137,9 -2101,7 +2101,7 @@@ EXC_COMMON_BEGIN(h_data_storage_common
        GEN_COMMON h_data_storage
        addi    r3,r1,STACK_FRAME_OVERHEAD
  BEGIN_MMU_FTR_SECTION
-       ld      r4,_DAR(r1)
-       li      r5,SIGSEGV
-       bl      bad_page_fault
+       bl      do_bad_page_fault_segv
  MMU_FTR_SECTION_ELSE
        bl      unknown_exception
  ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
@@@ -2230,7 -2192,6 +2192,6 @@@ INT_DEFINE_BEGIN(hmi_exception_early
        IHSRR=1
        IREALMODE_COMMON=1
        ISTACK=0
-       IRECONCILE=0
        IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
        IKVM_REAL=1
  INT_DEFINE_END(hmi_exception_early)
@@@ -2277,7 -2238,6 +2238,6 @@@ EXC_COMMON_BEGIN(hmi_exception_early_co
  EXC_COMMON_BEGIN(hmi_exception_common)
        GEN_COMMON hmi_exception
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      handle_hmi_exception
        b       interrupt_return
@@@ -2307,12 -2267,11 +2267,11 @@@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20
  EXC_COMMON_BEGIN(h_doorbell_common)
        GEN_COMMON h_doorbell
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
  #ifdef CONFIG_PPC_DOORBELL
        bl      doorbell_exception
  #else
-       bl      unknown_exception
+       bl      unknown_async_exception
  #endif
        b       interrupt_return
  
@@@ -2341,7 -2300,6 +2300,6 @@@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20
  EXC_COMMON_BEGIN(h_virt_irq_common)
        GEN_COMMON h_virt_irq
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_IRQ
        b       interrupt_return
@@@ -2388,7 -2346,6 +2346,6 @@@ EXC_VIRT_END(performance_monitor, 0x4f0
  EXC_COMMON_BEGIN(performance_monitor_common)
        GEN_COMMON performance_monitor
        FINISH_NAP
-       RUNLATCH_ON
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      performance_monitor_exception
        b       interrupt_return
   */
  INT_DEFINE_BEGIN(altivec_unavailable)
        IVEC=0xf20
-       IRECONCILE=0
  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        IKVM_REAL=1
  #endif
@@@ -2434,7 -2390,6 +2390,6 @@@ BEGIN_FTR_SECTIO
        b       fast_interrupt_return
  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  2:    /* User process was in a transaction */
-       RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      altivec_unavailable_tm
        b       interrupt_return
  1:
  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
  #endif
-       RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      altivec_unavailable_exception
        b       interrupt_return
   */
  INT_DEFINE_BEGIN(vsx_unavailable)
        IVEC=0xf40
-       IRECONCILE=0
  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        IKVM_REAL=1
  #endif
@@@ -2487,7 -2440,6 +2440,6 @@@ BEGIN_FTR_SECTIO
        b       load_up_vsx
  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  2:    /* User process was in a transaction */
-       RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      vsx_unavailable_tm
        b       interrupt_return
  1:
  END_FTR_SECTION_IFSET(CPU_FTR_VSX)
  #endif
-       RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      vsx_unavailable_exception
        b       interrupt_return
@@@ -2830,7 -2781,6 +2781,6 @@@ EXC_VIRT_NONE(0x5800, 0x100
  INT_DEFINE_BEGIN(soft_nmi)
        IVEC=0x900
        ISTACK=0
-       IRECONCILE=0    /* Soft-NMI may fire under local_irq_disable */
  INT_DEFINE_END(soft_nmi)
  
  /*
@@@ -2849,17 -2799,6 +2799,6 @@@ EXC_COMMON_BEGIN(soft_nmi_common
        subi    r1,r1,INT_FRAME_SIZE
        __GEN_COMMON_BODY soft_nmi
  
-       /*
-        * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
-        * system_reset_common)
-        */
-       li      r10,IRQS_ALL_DISABLED
-       stb     r10,PACAIRQSOFTMASK(r13)
-       lbz     r10,PACAIRQHAPPENED(r13)
-       std     r10,RESULT(r1)
-       ori     r10,r10,PACA_IRQ_HARD_DIS
-       stb     r10,PACAIRQHAPPENED(r13)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      soft_nmi_interrupt
  
        li      r9,0
        mtmsrd  r9,1
  
-       /*
-        * Restore soft mask settings.
-        */
-       ld      r10,RESULT(r1)
-       stb     r10,PACAIRQHAPPENED(r13)
-       ld      r10,SOFTE(r1)
-       stb     r10,PACAIRQSOFTMASK(r13)
        kuap_kernel_restore r9, r10
        EXCEPTION_RESTORE_REGS hsrr=0
        RFI_TO_KERNEL
@@@ -2993,25 -2924,6 +2924,25 @@@ TRAMP_REAL_BEGIN(entry_flush_fallback
        ld      r11,PACA_EXRFI+EX_R11(r13)
        blr
  
 +/*
 + * The SCV entry flush happens with interrupts enabled, so it must disable
 + * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10
 + * (containing LR) does not need to be preserved here because scv entry
 + * puts 0 in the pt_regs, CTR can be clobbered for the same reason.
 + */
 +TRAMP_REAL_BEGIN(scv_entry_flush_fallback)
 +      li      r10,0
 +      mtmsrd  r10,1
 +      lbz     r10,PACAIRQHAPPENED(r13)
 +      ori     r10,r10,PACA_IRQ_HARD_DIS
 +      stb     r10,PACAIRQHAPPENED(r13)
 +      std     r11,PACA_EXRFI+EX_R11(r13)
 +      L1D_DISPLACEMENT_FLUSH
 +      ld      r11,PACA_EXRFI+EX_R11(r13)
 +      li      r10,MSR_RI
 +      mtmsrd  r10,1
 +      blr
 +
  TRAMP_REAL_BEGIN(rfi_flush_fallback)
        SET_SCRATCH0(r13);
        GET_PACA(r13);
@@@ -3148,9 -3060,6 +3079,6 @@@ kvmppc_skip_Hinterrupt
         * come here.
         */
  
- EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
-       b       __ppc64_runlatch_on
  USE_FIXED_SECTION(virt_trampolines)
        /*
         * All code below __end_interrupts is treated as soft-masked. If
@@@ -3221,99 -3130,3 +3149,3 @@@ disable_machine_check
        RFI_TO_KERNEL
  1:    mtlr    r0
        blr
- /*
-  * Hash table stuff
-  */
-       .balign IFETCH_ALIGN_BYTES
- do_hash_page:
- #ifdef CONFIG_PPC_BOOK3S_64
-       lis     r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
-       ori     r0,r0,DSISR_BAD_FAULT_64S@l
-       and.    r0,r5,r0                /* weird error? */
-       bne-    handle_page_fault       /* if not, try to insert a HPTE */
-       /*
-        * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
-        * don't call hash_page, just fail the fault. This is required to
-        * prevent re-entrancy problems in the hash code, namely perf
-        * interrupts hitting while something holds H_PAGE_BUSY, and taking a
-        * hash fault. See the comment in hash_preload().
-        */
-       ld      r11, PACA_THREAD_INFO(r13)
-       lwz     r0,TI_PREEMPT(r11)
-       andis.  r0,r0,NMI_MASK@h
-       bne     77f
-       /*
-        * r3 contains the trap number
-        * r4 contains the faulting address
-        * r5 contains dsisr
-        * r6 msr
-        *
-        * at return r3 = 0 for success, 1 for page fault, negative for error
-        */
-       bl      __hash_page             /* build HPTE if possible */
-         cmpdi r3,0                    /* see if __hash_page succeeded */
-       /* Success */
-       beq     interrupt_return        /* Return from exception on success */
-       /* Error */
-       blt-    13f
-       /* Reload DAR/DSISR into r4/r5 for the DABR check below */
-       ld      r4,_DAR(r1)
-       ld      r5,_DSISR(r1)
- #endif /* CONFIG_PPC_BOOK3S_64 */
- /* Here we have a page fault that hash_page can't handle. */
- handle_page_fault:
- 11:   andis.  r0,r5,DSISR_DABRMATCH@h
-       bne-    handle_dabr_fault
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_page_fault
-       cmpdi   r3,0
-       beq+    interrupt_return
-       mr      r5,r3
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       ld      r4,_DAR(r1)
-       bl      __bad_page_fault
-       b       interrupt_return
- /* We have a data breakpoint exception - handle it */
- handle_dabr_fault:
-       ld      r4,_DAR(r1)
-       ld      r5,_DSISR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_break
-       /*
-        * do_break() may have changed the NV GPRS while handling a breakpoint.
-        * If so, we need to restore them with their updated values.
-        */
-       REST_NVGPRS(r1)
-       b       interrupt_return
- #ifdef CONFIG_PPC_BOOK3S_64
- /* We have a page fault that hash_page could handle but HV refused
-  * the PTE insertion
-  */
- 13:   mr      r5,r3
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       ld      r4,_DAR(r1)
-       bl      low_hash_fault
-       b       interrupt_return
- #endif
- /*
-  * We come here as a result of a DSI at a point where we don't want
-  * to call hash_page, such as when we are accessing memory (possibly
-  * user memory) inside a PMU interrupt that occurred while interrupts
-  * were soft-disabled.  We want to invoke the exception handler for
-  * the access, or panic if there isn't a handler.
-  */
- 77:   addi    r3,r1,STACK_FRAME_OVERHEAD
-       li      r5,SIGSEGV
-       bl      bad_page_fault
-       b       interrupt_return
@@@ -238,8 -238,8 +238,8 @@@ __secondary_hold_acknowledge
  
  /* System reset */
  /* core99 pmac starts the seconary here by changing the vector, and
-    putting it back to what it was (unknown_exception) when done.  */
-       EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+    putting it back to what it was (unknown_async_exception) when done.  */
+       EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD)
  
  /* Machine check */
  /*
  MachineCheck:
        EXCEPTION_PROLOG_0
  #ifdef CONFIG_PPC_CHRP
 +#ifdef CONFIG_VMAP_STACK
 +      mtspr   SPRN_SPRG_SCRATCH2,r1
 +      mfspr   r1, SPRN_SPRG_THREAD
 +      lwz     r1, RTAS_SP(r1)
 +      cmpwi   cr1, r1, 0
 +      bne     cr1, 7f
 +      mfspr   r1, SPRN_SPRG_SCRATCH2
 +#else
        mfspr   r11, SPRN_SPRG_THREAD
        lwz     r11, RTAS_SP(r11)
        cmpwi   cr1, r11, 0
        bne     cr1, 7f
 +#endif
  #endif /* CONFIG_PPC_CHRP */
        EXCEPTION_PROLOG_1 for_rtas=1
  7:    EXCEPTION_PROLOG_2
        addi    r3,r1,STACK_FRAME_OVERHEAD
  #ifdef CONFIG_PPC_CHRP
- #ifdef CONFIG_VMAP_STACK
-       mfspr   r4, SPRN_SPRG_THREAD
-       tovirt(r4, r4)
-       lwz     r4, RTAS_SP(r4)
-       cmpwi   cr1, r4, 0
- #endif
        beq     cr1, machine_check_tramp
        twi     31, 0, 0
  #else
        DO_KVM  0x300
  DataAccess:
  #ifdef CONFIG_VMAP_STACK
+ #ifdef CONFIG_PPC_BOOK3S_604
  BEGIN_MMU_FTR_SECTION
        mtspr   SPRN_SPRG_SCRATCH2,r10
        mfspr   r10, SPRN_SPRG_THREAD
  MMU_FTR_SECTION_ELSE
        b       1f
  ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+ #endif
  1:    EXCEPTION_PROLOG_0 handle_dar_dsisr=1
        EXCEPTION_PROLOG_1
        b       handle_page_fault_tramp_1
  #else /* CONFIG_VMAP_STACK */
        EXCEPTION_PROLOG handle_dar_dsisr=1
        get_and_save_dar_dsisr_on_stack r4, r5, r11
+ #ifdef CONFIG_PPC_BOOK3S_604
  BEGIN_MMU_FTR_SECTION
        andis.  r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
        bne     handle_page_fault_tramp_2       /* if not, try to put a PTE */
        bl      hash_page
        b       handle_page_fault_tramp_1
  MMU_FTR_SECTION_ELSE
+ #endif
        b       handle_page_fault_tramp_2
+ #ifdef CONFIG_PPC_BOOK3S_604
  ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+ #endif
  #endif        /* CONFIG_VMAP_STACK */
  
  /* Instruction access exception. */
@@@ -341,12 -332,14 +341,14 @@@ InstructionAccess
        mfspr   r11, SPRN_SRR1          /* check whether user or kernel */
        stw     r11, SRR1(r10)
        mfcr    r10
+ #ifdef CONFIG_PPC_BOOK3S_604
  BEGIN_MMU_FTR_SECTION
        andis.  r11, r11, SRR1_ISI_NOPT@h       /* no pte found? */
        bne     hash_page_isi
  .Lhash_page_isi_cont:
        mfspr   r11, SPRN_SRR1          /* check whether user or kernel */
  END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+ #endif
        andi.   r11, r11, MSR_PR
  
        EXCEPTION_PROLOG_1
        beq     1f                      /* if so, try to put a PTE */
        li      r3,0                    /* into the hash table */
        mr      r4,r12                  /* SRR0 is fault address */
+ #ifdef CONFIG_PPC_BOOK3S_604
  BEGIN_MMU_FTR_SECTION
        bl      hash_page
  END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+ #endif
  #endif        /* CONFIG_VMAP_STACK */
- 1:    mr      r4,r12
        andis.  r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
-       stw     r4, _DAR(r11)
+       stw     r5, _DSISR(r11)
+       stw     r12, _DAR(r11)
        EXC_XFER_LITE(0x400, handle_page_fault)
  
  /* External interrupt */
@@@ -640,7 -635,7 +644,7 @@@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_
  #endif
  
  #ifndef CONFIG_TAU_INT
- #define TAUException  unknown_exception
+ #define TAUException  unknown_async_exception
  #endif
  
        EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
@@@ -685,13 -680,16 +689,16 @@@ handle_page_fault_tramp_1
  #ifdef CONFIG_VMAP_STACK
        EXCEPTION_PROLOG_2 handle_dar_dsisr=1
  #endif
-       lwz     r4, _DAR(r11)
        lwz     r5, _DSISR(r11)
        /* fall through */
  handle_page_fault_tramp_2:
+       andis.  r0, r5, DSISR_DABRMATCH@h
+       bne-    1f
        EXC_XFER_LITE(0x300, handle_page_fault)
+ 1:    EXC_XFER_STD(0x300, do_break)
  
  #ifdef CONFIG_VMAP_STACK
+ #ifdef CONFIG_PPC_BOOK3S_604
  .macro save_regs_thread               thread
        stw     r0, THR0(\thread)
        stw     r3, THR3(\thread)
@@@ -763,6 -761,7 +770,7 @@@ fast_hash_page_return
        mfspr   r11, SPRN_SPRG_SCRATCH1
        mfspr   r10, SPRN_SPRG_SCRATCH0
        rfi
+ #endif /* CONFIG_PPC_BOOK3S_604 */
  
  stack_overflow:
        vmap_stack_overflow_exception
@@@ -54,6 -54,7 +54,7 @@@
  #include <linux/pgtable.h>
  
  #include <linux/uaccess.h>
+ #include <asm/interrupt.h>
  #include <asm/io.h>
  #include <asm/irq.h>
  #include <asm/cache.h>
@@@ -180,18 -181,13 +181,18 @@@ void notrace restore_interrupts(void
  
  void replay_soft_interrupts(void)
  {
 +      struct pt_regs regs;
 +
        /*
 -       * We use local_paca rather than get_paca() to avoid all
 -       * the debug_smp_processor_id() business in this low level
 -       * function
 +       * Be careful here, calling these interrupt handlers can cause
 +       * softirqs to be raised, which they may run when calling irq_exit,
 +       * which will cause local_irq_enable() to be run, which can then
 +       * recurse into this function. Don't keep any state across
 +       * interrupt handler calls which may change underneath us.
 +       *
 +       * We use local_paca rather than get_paca() to avoid all the
 +       * debug_smp_processor_id() business in this low level function.
         */
 -      unsigned char happened = local_paca->irq_happened;
 -      struct pt_regs regs;
  
        ppc_save_regs(&regs);
        regs.softe = IRQS_ENABLED;
@@@ -214,7 -210,7 +215,7 @@@ again
         * This is a higher priority interrupt than the others, so
         * replay it first.
         */
 -      if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) {
 +      if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) {
                local_paca->irq_happened &= ~PACA_IRQ_HMI;
                regs.trap = 0xe60;
                handle_hmi_exception(&regs);
                        hard_irq_disable();
        }
  
 -      if (happened & PACA_IRQ_DEC) {
 +      if (local_paca->irq_happened & PACA_IRQ_DEC) {
                local_paca->irq_happened &= ~PACA_IRQ_DEC;
                regs.trap = 0x900;
                timer_interrupt(&regs);
                        hard_irq_disable();
        }
  
 -      if (happened & PACA_IRQ_EE) {
 +      if (local_paca->irq_happened & PACA_IRQ_EE) {
                local_paca->irq_happened &= ~PACA_IRQ_EE;
                regs.trap = 0x500;
                do_IRQ(&regs);
                        hard_irq_disable();
        }
  
 -      if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) {
 +      if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) {
                local_paca->irq_happened &= ~PACA_IRQ_DBELL;
                if (IS_ENABLED(CONFIG_PPC_BOOK3E))
                        regs.trap = 0x280;
        }
  
        /* Book3E does not support soft-masking PMI interrupts */
 -      if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) {
 +      if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) {
                local_paca->irq_happened &= ~PACA_IRQ_PMI;
                regs.trap = 0xf00;
                performance_monitor_exception(&regs);
                        hard_irq_disable();
        }
  
 -      happened = local_paca->irq_happened;
 -      if (happened & ~PACA_IRQ_HARD_DIS) {
 +      if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) {
                /*
                 * We are responding to the next interrupt, so interrupt-off
                 * latencies should be reset here.
        }
  }
  
+ #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
+ static inline void replay_soft_interrupts_irqrestore(void)
+ {
+       unsigned long kuap_state = get_kuap();
+       /*
+        * Check if anything calls local_irq_enable/restore() when KUAP is
+        * disabled (user access enabled). We handle that case here by saving
+        * and re-locking AMR but we shouldn't get here in the first place,
+        * hence the warning.
+        */
+       kuap_check_amr();
+       if (kuap_state != AMR_KUAP_BLOCKED)
+               set_kuap(AMR_KUAP_BLOCKED);
+       replay_soft_interrupts();
+       if (kuap_state != AMR_KUAP_BLOCKED)
+               set_kuap(kuap_state);
+ }
+ #else
+ #define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
+ #endif
  notrace void arch_local_irq_restore(unsigned long mask)
  {
        unsigned char irq_happened;
        irq_soft_mask_set(IRQS_ALL_DISABLED);
        trace_hardirqs_off();
  
-       replay_soft_interrupts();
+       replay_soft_interrupts_irqrestore();
        local_paca->irq_happened = 0;
  
        trace_hardirqs_on();
@@@ -644,8 -666,6 +670,6 @@@ void __do_irq(struct pt_regs *regs
  {
        unsigned int irq;
  
-       irq_enter();
        trace_irq_entry(regs);
  
        /*
                generic_handle_irq(irq);
  
        trace_irq_exit(regs);
-       irq_exit();
  }
  
void do_IRQ(struct pt_regs *regs)
DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
  {
        struct pt_regs *old_regs = set_irq_regs(regs);
        void *cursp, *irqsp, *sirqsp;
@@@ -53,6 -53,7 +53,7 @@@
  #include <asm/cputable.h>
  #include <asm/cacheflush.h>
  #include <linux/uaccess.h>
+ #include <asm/interrupt.h>
  #include <asm/io.h>
  #include <asm/kvm_ppc.h>
  #include <asm/kvm_book3s.h>
@@@ -134,7 -135,7 +135,7 @@@ static inline bool nesting_enabled(stru
  }
  
  /* If set, the threads on each CPU core have to be in the same MMU mode */
 -static bool no_mixing_hpt_and_radix;
 +static bool no_mixing_hpt_and_radix __read_mostly;
  
  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
  
@@@ -782,24 -783,8 +783,24 @@@ static int kvmppc_h_set_mode(struct kvm
                        return H_UNSUPPORTED_FLAG_START;
                if (value2 & DABRX_HYP)
                        return H_P4;
 -              vcpu->arch.dawr  = value1;
 -              vcpu->arch.dawrx = value2;
 +              vcpu->arch.dawr0  = value1;
 +              vcpu->arch.dawrx0 = value2;
 +              return H_SUCCESS;
 +      case H_SET_MODE_RESOURCE_SET_DAWR1:
 +              if (!kvmppc_power8_compatible(vcpu))
 +                      return H_P2;
 +              if (!ppc_breakpoint_available())
 +                      return H_P2;
 +              if (!cpu_has_feature(CPU_FTR_DAWR1))
 +                      return H_P2;
 +              if (!vcpu->kvm->arch.dawr1_enabled)
 +                      return H_FUNCTION;
 +              if (mflags)
 +                      return H_UNSUPPORTED_FLAG_START;
 +              if (value2 & DABRX_HYP)
 +                      return H_P4;
 +              vcpu->arch.dawr1  = value1;
 +              vcpu->arch.dawrx1 = value2;
                return H_SUCCESS;
        case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
                /* KVM does not support mflags=2 (AIL=2) */
@@@ -1775,16 -1760,10 +1776,16 @@@ static int kvmppc_get_one_reg_hv(struc
                *val = get_reg_val(id, vcpu->arch.vcore->vtb);
                break;
        case KVM_REG_PPC_DAWR:
 -              *val = get_reg_val(id, vcpu->arch.dawr);
 +              *val = get_reg_val(id, vcpu->arch.dawr0);
                break;
        case KVM_REG_PPC_DAWRX:
 -              *val = get_reg_val(id, vcpu->arch.dawrx);
 +              *val = get_reg_val(id, vcpu->arch.dawrx0);
 +              break;
 +      case KVM_REG_PPC_DAWR1:
 +              *val = get_reg_val(id, vcpu->arch.dawr1);
 +              break;
 +      case KVM_REG_PPC_DAWRX1:
 +              *val = get_reg_val(id, vcpu->arch.dawrx1);
                break;
        case KVM_REG_PPC_CIABR:
                *val = get_reg_val(id, vcpu->arch.ciabr);
@@@ -2013,16 -1992,10 +2014,16 @@@ static int kvmppc_set_one_reg_hv(struc
                vcpu->arch.vcore->vtb = set_reg_val(id, *val);
                break;
        case KVM_REG_PPC_DAWR:
 -              vcpu->arch.dawr = set_reg_val(id, *val);
 +              vcpu->arch.dawr0 = set_reg_val(id, *val);
                break;
        case KVM_REG_PPC_DAWRX:
 -              vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
 +              vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
 +              break;
 +      case KVM_REG_PPC_DAWR1:
 +              vcpu->arch.dawr1 = set_reg_val(id, *val);
 +              break;
 +      case KVM_REG_PPC_DAWRX1:
 +              vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP;
                break;
        case KVM_REG_PPC_CIABR:
                vcpu->arch.ciabr = set_reg_val(id, *val);
@@@ -2890,6 -2863,11 +2891,6 @@@ static bool can_dynamic_split(struct kv
        if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
                return false;
  
 -      /* Some POWER9 chips require all threads to be in the same MMU mode */
 -      if (no_mixing_hpt_and_radix &&
 -          kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
 -              return false;
 -
        if (n_threads < cip->max_subcore_threads)
                n_threads = cip->max_subcore_threads;
        if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
@@@ -2928,9 -2906,6 +2929,9 @@@ static void prepare_threads(struct kvmp
        for_each_runnable_thread(i, vcpu, vc) {
                if (signal_pending(vcpu->arch.run_task))
                        vcpu->arch.ret = -EINTR;
 +              else if (no_mixing_hpt_and_radix &&
 +                       kvm_is_radix(vc->kvm) != radix_enabled())
 +                      vcpu->arch.ret = -EINVAL;
                else if (vcpu->arch.vpa.update_pending ||
                         vcpu->arch.slb_shadow.update_pending ||
                         vcpu->arch.dtl.update_pending)
@@@ -3136,6 -3111,7 +3137,6 @@@ static noinline void kvmppc_run_core(st
        int controlled_threads;
        int trap;
        bool is_power8;
 -      bool hpt_on_radix;
  
        /*
         * Remove from the list any threads that have a signal pending
         * this is a HPT guest on a radix host machine where the
         * CPU threads may not be in different MMU modes.
         */
 -      hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
 -              !kvm_is_radix(vc->kvm);
 -      if (((controlled_threads > 1) &&
 -           ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
 -          (hpt_on_radix && vc->kvm->arch.threads_indep)) {
 +      if ((controlled_threads > 1) &&
 +          ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
                for_each_runnable_thread(i, vcpu, vc) {
                        vcpu->arch.ret = -EBUSY;
                        kvmppc_remove_runnable(vc, vcpu);
        is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
                && !cpu_has_feature(CPU_FTR_ARCH_300);
  
 -      if (split > 1 || hpt_on_radix) {
 +      if (split > 1) {
                sip = &split_info;
                memset(&split_info, 0, sizeof(split_info));
                for (sub = 0; sub < core_info.n_subcores; ++sub)
                        split_info.subcore_size = subcore_size;
                } else {
                        split_info.subcore_size = 1;
 -                      if (hpt_on_radix) {
 -                              /* Use the split_info for LPCR/LPIDR changes */
 -                              split_info.lpcr_req = vc->lpcr;
 -                              split_info.lpidr_req = vc->kvm->arch.lpid;
 -                              split_info.host_lpcr = vc->kvm->arch.host_lpcr;
 -                              split_info.do_set = 1;
 -                      }
                }
  
                /* order writes to split_info before kvm_split_mode pointer */
        for (thr = 0; thr < controlled_threads; ++thr) {
                struct paca_struct *paca = paca_ptrs[pcpu + thr];
  
 -              paca->kvm_hstate.tid = thr;
                paca->kvm_hstate.napping = 0;
                paca->kvm_hstate.kvm_split_mode = sip;
        }
         * When doing micro-threading, poke the inactive threads as well.
         * This gets them to the nap instruction after kvm_do_nap,
         * which reduces the time taken to unsplit later.
 -       * For POWER9 HPT guest on radix host, we need all the secondary
 -       * threads woken up so they can do the LPCR/LPIDR change.
         */
 -      if (cmd_bit || hpt_on_radix) {
 +      if (cmd_bit) {
                split_info.do_nap = 1;  /* ask secondaries to nap when done */
                for (thr = 1; thr < threads_per_subcore; ++thr)
                        if (!(active & (1 << thr)))
                        cpu_relax();
                        ++loops;
                }
 -      } else if (hpt_on_radix) {
 -              /* Wait for all threads to have seen final sync */
 -              for (thr = 1; thr < controlled_threads; ++thr) {
 -                      struct paca_struct *paca = paca_ptrs[pcpu + thr];
 -
 -                      while (paca->kvm_hstate.kvm_split_mode) {
 -                              HMT_low();
 -                              barrier();
 -                      }
 -                      HMT_medium();
 -              }
 +              split_info.do_nap = 0;
        }
 -      split_info.do_nap = 0;
  
        kvmppc_set_host_core(pcpu);
  
+       guest_exit_irqoff();
        local_irq_enable();
-       guest_exit();
  
        /* Let secondaries go back to the offline loop */
        for (i = 0; i < controlled_threads; ++i) {
@@@ -3450,17 -3451,10 +3452,17 @@@ static int kvmhv_load_hv_regs_and_go(st
        int trap;
        unsigned long host_hfscr = mfspr(SPRN_HFSCR);
        unsigned long host_ciabr = mfspr(SPRN_CIABR);
 -      unsigned long host_dawr = mfspr(SPRN_DAWR0);
 -      unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
 +      unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
 +      unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
        unsigned long host_psscr = mfspr(SPRN_PSSCR);
        unsigned long host_pidr = mfspr(SPRN_PID);
 +      unsigned long host_dawr1 = 0;
 +      unsigned long host_dawrx1 = 0;
 +
 +      if (cpu_has_feature(CPU_FTR_DAWR1)) {
 +              host_dawr1 = mfspr(SPRN_DAWR1);
 +              host_dawrx1 = mfspr(SPRN_DAWRX1);
 +      }
  
        /*
         * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
        mtspr(SPRN_SPURR, vcpu->arch.spurr);
  
        if (dawr_enabled()) {
 -              mtspr(SPRN_DAWR0, vcpu->arch.dawr);
 -              mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
 +              mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
 +              mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
 +              if (cpu_has_feature(CPU_FTR_DAWR1)) {
 +                      mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
 +                      mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
 +              }
        }
        mtspr(SPRN_CIABR, vcpu->arch.ciabr);
        mtspr(SPRN_IC, vcpu->arch.ic);
              (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
        mtspr(SPRN_HFSCR, host_hfscr);
        mtspr(SPRN_CIABR, host_ciabr);
 -      mtspr(SPRN_DAWR0, host_dawr);
 -      mtspr(SPRN_DAWRX0, host_dawrx);
 +      mtspr(SPRN_DAWR0, host_dawr0);
 +      mtspr(SPRN_DAWRX0, host_dawrx0);
 +      if (cpu_has_feature(CPU_FTR_DAWR1)) {
 +              mtspr(SPRN_DAWR1, host_dawr1);
 +              mtspr(SPRN_DAWRX1, host_dawrx1);
 +      }
        mtspr(SPRN_PID, host_pidr);
  
        /*
@@@ -3611,7 -3597,6 +3613,7 @@@ static int kvmhv_p9_guest_entry(struct 
        unsigned long host_tidr = mfspr(SPRN_TIDR);
        unsigned long host_iamr = mfspr(SPRN_IAMR);
        unsigned long host_amr = mfspr(SPRN_AMR);
 +      unsigned long host_fscr = mfspr(SPRN_FSCR);
        s64 dec;
        u64 tb;
        int trap, save_pmu;
        if (host_amr != vcpu->arch.amr)
                mtspr(SPRN_AMR, host_amr);
  
 +      if (host_fscr != vcpu->arch.fscr)
 +              mtspr(SPRN_FSCR, host_fscr);
 +
        msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
        store_fp_state(&vcpu->arch.fp);
  #ifdef CONFIG_ALTIVEC
@@@ -4193,6 -4175,7 +4195,6 @@@ int kvmhv_run_single_vcpu(struct kvm_vc
  
        kvmppc_clear_host_core(pcpu);
  
 -      local_paca->kvm_hstate.tid = 0;
        local_paca->kvm_hstate.napping = 0;
        local_paca->kvm_hstate.kvm_split_mode = NULL;
        kvmppc_start_thread(vcpu, vc);
  
        kvmppc_set_host_core(pcpu);
  
+       guest_exit_irqoff();
        local_irq_enable();
-       guest_exit();
  
        cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
  
@@@ -4377,11 -4361,15 +4380,11 @@@ static int kvmppc_vcpu_run_hv(struct kv
  
        do {
                /*
 -               * The early POWER9 chips that can't mix radix and HPT threads
 -               * on the same core also need the workaround for the problem
 -               * where the TLB would prefetch entries in the guest exit path
 -               * for radix guests using the guest PIDR value and LPID 0.
 -               * The workaround is in the old path (kvmppc_run_vcpu())
 -               * but not the new path (kvmhv_run_single_vcpu()).
 +               * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
 +               * path, which also handles hash and dependent threads mode.
                 */
                if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
 -                  !no_mixing_hpt_and_radix)
 +                  !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
                        r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
                                                  vcpu->arch.vcore->lpcr);
                else
        return ret;
  }
  
 +static int kvmhv_enable_dawr1(struct kvm *kvm)
 +{
 +      if (!cpu_has_feature(CPU_FTR_DAWR1))
 +              return -ENODEV;
 +
 +      /* kvm == NULL means the caller is testing if the capability exists */
 +      if (kvm)
 +              kvm->arch.dawr1_enabled = true;
 +      return 0;
 +}
 +
 +static bool kvmppc_hash_v3_possible(void)
 +{
 +      if (radix_enabled() && no_mixing_hpt_and_radix)
 +              return false;
 +
 +      return cpu_has_feature(CPU_FTR_ARCH_300) &&
 +              cpu_has_feature(CPU_FTR_HVMODE);
 +}
 +
  static struct kvmppc_ops kvm_ops_hv = {
        .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
        .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
        .store_to_eaddr = kvmhv_store_to_eaddr,
        .enable_svm = kvmhv_enable_svm,
        .svm_off = kvmhv_svm_off,
 +      .enable_dawr1 = kvmhv_enable_dawr1,
 +      .hash_v3_possible = kvmppc_hash_v3_possible,
  };
  
  static int kvm_init_subcore_bitmap(void)
@@@ -17,6 -17,7 +17,7 @@@
  
  #include <asm/asm-prototypes.h>
  #include <asm/cputable.h>
+ #include <asm/interrupt.h>
  #include <asm/kvm_ppc.h>
  #include <asm/kvm_book3s.h>
  #include <asm/archrandom.h>
@@@ -277,7 -278,8 +278,7 @@@ void kvmhv_commence_exit(int trap
        struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
        int ptid = local_paca->kvm_hstate.ptid;
        struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
 -      int me, ee, i, t;
 -      int cpu0;
 +      int me, ee, i;
  
        /* Set our bit in the threads-exiting-guest map in the 0xff00
           bits of vcore->entry_exit_map */
                if ((ee >> 8) == 0)
                        kvmhv_interrupt_vcore(vc, ee);
        }
 -
 -      /*
 -       * On POWER9 when running a HPT guest on a radix host (sip != NULL),
 -       * we have to interrupt inactive CPU threads to get them to
 -       * restore the host LPCR value.
 -       */
 -      if (sip->lpcr_req) {
 -              if (cmpxchg(&sip->do_restore, 0, 1) == 0) {
 -                      vc = local_paca->kvm_hstate.kvm_vcore;
 -                      cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid;
 -                      for (t = 1; t < threads_per_core; ++t) {
 -                              if (sip->napped[t])
 -                                      kvmhv_rm_send_ipi(cpu0 + t);
 -                      }
 -              }
 -      }
  }
  
  struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
@@@ -650,6 -668,95 +651,6 @@@ void kvmppc_bad_interrupt(struct pt_reg
        panic("Bad KVM trap");
  }
  
 -/*
 - * Functions used to switch LPCR HR and UPRT bits on all threads
 - * when entering and exiting HPT guests on a radix host.
 - */
 -
 -#define PHASE_REALMODE                1       /* in real mode */
 -#define PHASE_SET_LPCR                2       /* have set LPCR */
 -#define PHASE_OUT_OF_GUEST    4       /* have finished executing in guest */
 -#define PHASE_RESET_LPCR      8       /* have reset LPCR to host value */
 -
 -#define ALL(p)                (((p) << 24) | ((p) << 16) | ((p) << 8) | (p))
 -
 -static void wait_for_sync(struct kvm_split_mode *sip, int phase)
 -{
 -      int thr = local_paca->kvm_hstate.tid;
 -
 -      sip->lpcr_sync.phase[thr] |= phase;
 -      phase = ALL(phase);
 -      while ((sip->lpcr_sync.allphases & phase) != phase) {
 -              HMT_low();
 -              barrier();
 -      }
 -      HMT_medium();
 -}
 -
 -void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip)
 -{
 -      int num_sets;
 -      unsigned long rb, set;
 -
 -      /* wait for every other thread to get to real mode */
 -      wait_for_sync(sip, PHASE_REALMODE);
 -
 -      /* Set LPCR and LPIDR */
 -      mtspr(SPRN_LPCR, sip->lpcr_req);
 -      mtspr(SPRN_LPID, sip->lpidr_req);
 -      isync();
 -
 -      /*
 -       * P10 will flush all the congruence class with a single tlbiel
 -       */
 -      if (cpu_has_feature(CPU_FTR_ARCH_31))
 -              num_sets =  1;
 -      else
 -              num_sets = POWER9_TLB_SETS_RADIX;
 -
 -      /* Invalidate the TLB on thread 0 */
 -      if (local_paca->kvm_hstate.tid == 0) {
 -              sip->do_set = 0;
 -              asm volatile("ptesync" : : : "memory");
 -              for (set = 0; set < num_sets; ++set) {
 -                      rb = TLBIEL_INVAL_SET_LPID +
 -                              (set << TLBIEL_INVAL_SET_SHIFT);
 -                      asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : :
 -                                   "r" (rb), "r" (0));
 -              }
 -              asm volatile("ptesync" : : : "memory");
 -      }
 -
 -      /* indicate that we have done so and wait for others */
 -      wait_for_sync(sip, PHASE_SET_LPCR);
 -      /* order read of sip->lpcr_sync.allphases vs. sip->do_set */
 -      smp_rmb();
 -}
 -
 -/*
 - * Called when a thread that has been in the guest needs
 - * to reload the host LPCR value - but only on POWER9 when
 - * running a HPT guest on a radix host.
 - */
 -void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
 -{
 -      /* we're out of the guest... */
 -      wait_for_sync(sip, PHASE_OUT_OF_GUEST);
 -
 -      mtspr(SPRN_LPID, 0);
 -      mtspr(SPRN_LPCR, sip->host_lpcr);
 -      isync();
 -
 -      if (local_paca->kvm_hstate.tid == 0) {
 -              sip->do_restore = 0;
 -              smp_wmb();      /* order store of do_restore vs. phase */
 -      }
 -
 -      wait_for_sync(sip, PHASE_RESET_LPCR);
 -      smp_mb();
 -      local_paca->kvm_hstate.kvm_split_mode = NULL;
 -}
 -
  static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
  {
        vcpu->arch.ceded = 0;
diff --combined arch/powerpc/kvm/booke.c
@@@ -20,6 -20,7 +20,7 @@@
  
  #include <asm/cputable.h>
  #include <linux/uaccess.h>
+ #include <asm/interrupt.h>
  #include <asm/kvm_ppc.h>
  #include <asm/cacheflush.h>
  #include <asm/dbell.h>
@@@ -698,7 -699,7 +699,7 @@@ int kvmppc_core_prepare_to_enter(struc
  
                kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
                r = 1;
 -      };
 +      }
  
        return r;
  }
@@@ -611,8 -611,8 +611,8 @@@ int kvm_vm_ioctl_check_extension(struc
                r = !!(hv_enabled && radix_enabled());
                break;
        case KVM_CAP_PPC_MMU_HASH_V3:
 -              r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
 -                     cpu_has_feature(CPU_FTR_HVMODE));
 +              r = !!(hv_enabled && kvmppc_hv_ops->hash_v3_possible &&
 +                     kvmppc_hv_ops->hash_v3_possible());
                break;
        case KVM_CAP_PPC_NESTED_HV:
                r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
                r = hv_enabled && kvmppc_hv_ops->enable_svm &&
                        !kvmppc_hv_ops->enable_svm(NULL);
                break;
 +      case KVM_CAP_PPC_DAWR1:
 +              r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
 +                     !kvmppc_hv_ops->enable_dawr1(NULL));
 +              break;
  #endif
        default:
                r = 0;
@@@ -1522,7 -1518,7 +1522,7 @@@ int kvmppc_handle_vmx_load(struct kvm_v
        return emulated;
  }
  
- int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
  {
        union kvmppc_one_reg reg;
        int vmx_offset = 0;
        return result;
  }
  
- int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
  {
        union kvmppc_one_reg reg;
        int vmx_offset = 0;
        return result;
  }
  
- int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
  {
        union kvmppc_one_reg reg;
        int vmx_offset = 0;
        return result;
  }
  
- int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
  {
        union kvmppc_one_reg reg;
        int vmx_offset = 0;
@@@ -2191,12 -2187,6 +2191,12 @@@ int kvm_vm_ioctl_enable_cap(struct kvm 
                        break;
                r = kvm->arch.kvm_ops->enable_svm(kvm);
                break;
 +      case KVM_CAP_PPC_DAWR1:
 +              r = -EINVAL;
 +              if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1)
 +                      break;
 +              r = kvm->arch.kvm_ops->enable_dawr1(kvm);
 +              break;
  #endif
        default:
                r = -EINVAL;
diff --combined arch/powerpc/lib/sstep.c
@@@ -818,15 -818,13 +818,15 @@@ void emulate_vsx_store(struct instructi
                        break;
                if (rev) {
                        /* reverse 32 bytes */
 -                      buf.d[0] = byterev_8(reg->d[3]);
 -                      buf.d[1] = byterev_8(reg->d[2]);
 -                      buf.d[2] = byterev_8(reg->d[1]);
 -                      buf.d[3] = byterev_8(reg->d[0]);
 -                      reg = &buf;
 +                      union vsx_reg buf32[2];
 +                      buf32[0].d[0] = byterev_8(reg[1].d[1]);
 +                      buf32[0].d[1] = byterev_8(reg[1].d[0]);
 +                      buf32[1].d[0] = byterev_8(reg[0].d[1]);
 +                      buf32[1].d[1] = byterev_8(reg[0].d[0]);
 +                      memcpy(mem, buf32, size);
 +              } else {
 +                      memcpy(mem, reg, size);
                }
 -              memcpy(mem, reg, size);
                break;
        case 16:
                /* stxv, stxvx, stxvl, stxvll */
@@@ -1306,9 -1304,11 +1306,11 @@@ int analyse_instr(struct instruction_o
                if ((word & 0xfe2) == 2)
                        op->type = SYSCALL;
                else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
-                               (word & 0xfe3) == 1)
+                               (word & 0xfe3) == 1) {  /* scv */
                        op->type = SYSCALL_VECTORED_0;
-               else
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
+               } else
                        op->type = UNKNOWN;
                return 0;
  #endif
  #ifdef __powerpc64__
        case 1:
                if (!cpu_has_feature(CPU_FTR_ARCH_31))
-                       return -1;
+                       goto unknown_opcode;
  
                prefix_r = GET_PREFIX_R(word);
                ra = GET_PREFIX_RA(suffix);
  
  #ifdef __powerpc64__
        case 4:
+               /*
+                * There are very many instructions with this primary opcode
+                * introduced in the ISA as early as v2.03. However, the ones
+                * we currently emulate were all introduced with ISA 3.0
+                */
                if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                       return -1;
+                       goto unknown_opcode;
  
                switch (word & 0x3f) {
                case 48:        /* maddhd */
                 * There are other instructions from ISA 3.0 with the same
                 * primary opcode which do not have emulation support yet.
                 */
-               return -1;
+               goto unknown_opcode;
  #endif
  
        case 7:         /* mulli */
        case 19:
                if (((word >> 1) & 0x1f) == 2) {
                        /* addpcis */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        imm = (short) (word & 0xffc1);  /* d0 + d2 fields */
                        imm |= (word >> 15) & 0x3e;     /* d1 field */
                        op->val = regs->nip + (imm << 16) + 4;
  #ifdef __powerpc64__
                case 265:       /* modud */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        op->val = regs->gpr[ra] % regs->gpr[rb];
                        goto compute_done;
  #endif
  
                case 267:       /* moduw */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        op->val = (unsigned int) regs->gpr[ra] %
                                (unsigned int) regs->gpr[rb];
                        goto compute_done;
  #endif
                case 755:       /* darn */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        switch (ra & 0x3) {
                        case 0:
                                /* 32-bit conditioned */
                                goto compute_done;
                        }
  
-                       return -1;
+                       goto unknown_opcode;
  #ifdef __powerpc64__
                case 777:       /* modsd */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        op->val = (long int) regs->gpr[ra] %
                                (long int) regs->gpr[rb];
                        goto compute_done;
  #endif
                case 779:       /* modsw */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        op->val = (int) regs->gpr[ra] %
                                (int) regs->gpr[rb];
                        goto compute_done;
  #endif
                case 538:       /* cnttzw */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        val = (unsigned int) regs->gpr[rd];
                        op->val = (val ? __builtin_ctz(val) : 32);
                        goto logical_done;
  #ifdef __powerpc64__
                case 570:       /* cnttzd */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        val = regs->gpr[rd];
                        op->val = (val ? __builtin_ctzl(val) : 64);
                        goto logical_done;
                case 890:       /* extswsli with sh_5 = 0 */
                case 891:       /* extswsli with sh_5 = 1 */
                        if (!cpu_has_feature(CPU_FTR_ARCH_300))
-                               return -1;
+                               goto unknown_opcode;
                        op->type = COMPUTE + SETREG;
                        sh = rb | ((word & 2) << 4);
                        val = (signed int) regs->gpr[rd];
                        break;
  
                case 268:       /* lxvx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 16);
                        op->element_size = 16;
                case 269:       /* lxvl */
                case 301: {     /* lxvll */
                        int nb;
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->ea = ra ? regs->gpr[ra] : 0;
                        nb = regs->gpr[rb] & 0xff;
  
                case 333:       /* lxvpx */
                        if (!cpu_has_feature(CPU_FTR_ARCH_31))
-                               return -1;
+                               goto unknown_opcode;
                        op->reg = VSX_REGISTER_XTP(rd);
                        op->type = MKOP(LOAD_VSX, 0, 32);
                        op->element_size = 32;
                        break;
  
                case 364:       /* lxvwsx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 4);
                        op->element_size = 4;
                        break;
  
                case 396:       /* stxvx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(STORE_VSX, 0, 16);
                        op->element_size = 16;
                case 397:       /* stxvl */
                case 429: {     /* stxvll */
                        int nb;
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->ea = ra ? regs->gpr[ra] : 0;
                        nb = regs->gpr[rb] & 0xff;
                }
                case 461:       /* stxvpx */
                        if (!cpu_has_feature(CPU_FTR_ARCH_31))
-                               return -1;
+                               goto unknown_opcode;
                        op->reg = VSX_REGISTER_XTP(rd);
                        op->type = MKOP(STORE_VSX, 0, 32);
                        op->element_size = 32;
                        break;
  
                case 781:       /* lxsibzx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 1);
                        op->element_size = 8;
                        break;
  
                case 812:       /* lxvh8x */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 16);
                        op->element_size = 2;
                        break;
  
                case 813:       /* lxsihzx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 2);
                        op->element_size = 8;
                        break;
  
                case 876:       /* lxvb16x */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(LOAD_VSX, 0, 16);
                        op->element_size = 1;
                        break;
  
                case 909:       /* stxsibx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(STORE_VSX, 0, 1);
                        op->element_size = 8;
                        break;
  
                case 940:       /* stxvh8x */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(STORE_VSX, 0, 16);
                        op->element_size = 2;
                        break;
  
                case 941:       /* stxsihx */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(STORE_VSX, 0, 2);
                        op->element_size = 8;
                        break;
  
                case 1004:      /* stxvb16x */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd | ((word & 1) << 5);
                        op->type = MKOP(STORE_VSX, 0, 16);
                        op->element_size = 1;
                        op->type = MKOP(LOAD_FP, 0, 16);
                        break;
                case 2:         /* lxsd */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd + 32;
                        op->type = MKOP(LOAD_VSX, 0, 8);
                        op->element_size = 8;
                        op->vsx_flags = VSX_CHECK_VEC;
                        break;
                case 3:         /* lxssp */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->reg = rd + 32;
                        op->type = MKOP(LOAD_VSX, 0, 4);
                        op->element_size = 8;
  #ifdef CONFIG_VSX
        case 6:
                if (!cpu_has_feature(CPU_FTR_ARCH_31))
-                       return -1;
+                       goto unknown_opcode;
                op->ea = dqform_ea(word, regs);
                op->reg = VSX_REGISTER_XTP(rd);
                op->element_size = 32;
                        break;
  
                case 1:         /* lxv */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->ea = dqform_ea(word, regs);
                        if (word & 8)
                                op->reg = rd + 32;
  
                case 2:         /* stxsd with LSB of DS field = 0 */
                case 6:         /* stxsd with LSB of DS field = 1 */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->ea = dsform_ea(word, regs);
                        op->reg = rd + 32;
                        op->type = MKOP(STORE_VSX, 0, 8);
  
                case 3:         /* stxssp with LSB of DS field = 0 */
                case 7:         /* stxssp with LSB of DS field = 1 */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->ea = dsform_ea(word, regs);
                        op->reg = rd + 32;
                        op->type = MKOP(STORE_VSX, 0, 4);
                        break;
  
                case 5:         /* stxv */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+                               goto unknown_opcode;
                        op->ea = dqform_ea(word, regs);
                        if (word & 8)
                                op->reg = rd + 32;
                break;
        case 1: /* Prefixed instructions */
                if (!cpu_has_feature(CPU_FTR_ARCH_31))
-                       return -1;
+                       goto unknown_opcode;
  
                prefix_r = GET_PREFIX_R(word);
                ra = GET_PREFIX_RA(suffix);
  
        }
  
+       if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) {
+               switch (GETTYPE(op->type)) {
+               case LOAD:
+                       if (ra == rd)
+                               goto unknown_opcode;
+                       fallthrough;
+               case STORE:
+               case LOAD_FP:
+               case STORE_FP:
+                       if (ra == 0)
+                               goto unknown_opcode;
+               }
+       }
  #ifdef CONFIG_VSX
        if ((GETTYPE(op->type) == LOAD_VSX ||
             GETTYPE(op->type) == STORE_VSX) &&
  
        return 0;
  
+  unknown_opcode:
+       op->type = UNKNOWN;
+       return 0;
   logical_done:
        if (word & 1)
                set_cr0(regs, op);
@@@ -54,6 -54,9 +54,9 @@@ struct cpu_hw_events 
        struct  perf_branch_stack       bhrb_stack;
        struct  perf_branch_entry       bhrb_entries[BHRB_MAX_ENTRIES];
        u64                             ic_init;
+       /* Store the PMC values */
+       unsigned long pmcs[MAX_HWEVENTS];
  };
  
  static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@@ -110,10 -113,6 +113,6 @@@ static inline void perf_read_regs(struc
  {
        regs->result = 0;
  }
- static inline int perf_intr_is_nmi(struct pt_regs *regs)
- {
-       return 0;
- }
  
  static inline int siar_valid(struct pt_regs *regs)
  {
@@@ -147,6 -146,17 +146,17 @@@ bool is_sier_available(void
        return false;
  }
  
+ /*
+  * Return PMC value corresponding to the
+  * index passed.
+  */
+ unsigned long get_pmcs_ext_regs(int idx)
+ {
+       struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+       return cpuhw->pmcs[idx];
+ }
  static bool regs_use_siar(struct pt_regs *regs)
  {
        /*
@@@ -354,15 -364,6 +364,6 @@@ static inline void perf_read_regs(struc
  }
  
  /*
-  * If interrupts were soft-disabled when a PMU interrupt occurs, treat
-  * it as an NMI.
-  */
- static inline int perf_intr_is_nmi(struct pt_regs *regs)
- {
-       return (regs->softe & IRQS_DISABLED);
- }
- /*
   * On processors like P7+ that have the SIAR-Valid bit, marked instructions
   * must be sampled only if the SIAR-valid bit is set.
   *
@@@ -915,7 -916,7 +916,7 @@@ void perf_event_print_debug(void
   */
  static int power_check_constraints(struct cpu_hw_events *cpuhw,
                                   u64 event_id[], unsigned int cflags[],
-                                  int n_ev)
+                                  int n_ev, struct perf_event **event)
  {
        unsigned long mask, value, nv;
        unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
                        event_id[i] = cpuhw->alternatives[i][0];
                }
                if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
-                                        &cpuhw->avalues[i][0]))
+                                        &cpuhw->avalues[i][0], event[i]->attr.config1))
                        return -1;
        }
        value = mask = 0;
                for (j = 1; j < n_alt[i]; ++j)
                        ppmu->get_constraint(cpuhw->alternatives[i][j],
                                             &cpuhw->amasks[i][j],
-                                            &cpuhw->avalues[i][j]);
+                                            &cpuhw->avalues[i][j],
+                                            event[i]->attr.config1);
        }
  
        /* enumerate all possibilities and see if any will work */
@@@ -1391,7 -1393,7 +1393,7 @@@ static void power_pmu_enable(struct pm
        memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
  
        if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
-                              &cpuhw->mmcr, cpuhw->event)) {
+                              &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
                /* shouldn't ever get here */
                printk(KERN_ERR "oops compute_mmcr failed\n");
                goto out;
@@@ -1579,7 -1581,7 +1581,7 @@@ static int power_pmu_add(struct perf_ev
  
        if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
                goto out;
-       if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+       if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
                goto out;
        event->hw.config = cpuhw->events[n0];
  
@@@ -1789,7 -1791,7 +1791,7 @@@ static int power_pmu_commit_txn(struct 
        n = cpuhw->n_events;
        if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
                return -EAGAIN;
-       i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+       i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
        if (i < 0)
                return -EAGAIN;
  
@@@ -2027,7 -2029,7 +2029,7 @@@ static int power_pmu_event_init(struct 
        local_irq_save(irq_flags);
        cpuhw = this_cpu_ptr(&cpu_hw_events);
  
-       err = power_check_constraints(cpuhw, events, cflags, n + 1);
+       err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
  
        if (has_branch_stack(event)) {
                u64 bhrb_filter = -1;
@@@ -2149,7 -2151,17 +2151,17 @@@ static void record_and_restart(struct p
                        left += period;
                        if (left <= 0)
                                left = period;
-                       record = siar_valid(regs);
+                       /*
+                        * If address is not requested in the sample via
+                        * PERF_SAMPLE_IP, just record that sample irrespective
+                        * of SIAR valid check.
+                        */
+                       if (event->attr.sample_type & PERF_SAMPLE_IP)
+                               record = siar_valid(regs);
+                       else
+                               record = 1;
                        event->hw.last_period = event->hw.sample_period;
                }
                if (left < 0x80000000LL)
         * MMCR2. Check attr.exclude_kernel and address to drop the sample in
         * these cases.
         */
-       if (event->attr.exclude_kernel && record)
-               if (is_kernel_addr(mfspr(SPRN_SIAR)))
-                       record = 0;
+       if (event->attr.exclude_kernel &&
+           (event->attr.sample_type & PERF_SAMPLE_IP) &&
+           is_kernel_addr(mfspr(SPRN_SIAR)))
+               record = 0;
  
        /*
         * Finally record data if requested.
  
                if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
                                                ppmu->get_mem_weight)
 -                      ppmu->get_mem_weight(&data.weight);
 +                      ppmu->get_mem_weight(&data.weight.full);
  
                if (perf_event_overflow(event, &data, regs))
                        power_pmu_stop(event, 0);
@@@ -2277,9 -2290,7 +2290,7 @@@ static void __perf_event_interrupt(stru
        int i, j;
        struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
        struct perf_event *event;
-       unsigned long val[8];
        int found, active;
-       int nmi;
  
        if (cpuhw->n_limited)
                freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
  
        perf_read_regs(regs);
  
-       /*
-        * If perf interrupts hit in a local_irq_disable (soft-masked) region,
-        * we consider them as NMIs. This is required to prevent hash faults on
-        * user addresses when reading callchains. See the NMI test in
-        * do_hash_page.
-        */
-       nmi = perf_intr_is_nmi(regs);
-       if (nmi)
-               nmi_enter();
-       else
-               irq_enter();
        /* Read all the PMCs since we'll need them a bunch of times */
        for (i = 0; i < ppmu->n_counter; ++i)
-               val[i] = read_pmc(i + 1);
+               cpuhw->pmcs[i] = read_pmc(i + 1);
  
        /* Try to find what caused the IRQ */
        found = 0;
        for (i = 0; i < ppmu->n_counter; ++i) {
-               if (!pmc_overflow(val[i]))
+               if (!pmc_overflow(cpuhw->pmcs[i]))
                        continue;
                if (is_limited_pmc(i + 1))
                        continue; /* these won't generate IRQs */
                        event = cpuhw->event[j];
                        if (event->hw.idx == (i + 1)) {
                                active = 1;
-                               record_and_restart(event, val[i], regs);
+                               record_and_restart(event, cpuhw->pmcs[i], regs);
                                break;
                        }
                }
                        event = cpuhw->event[i];
                        if (!event->hw.idx || is_limited_pmc(event->hw.idx))
                                continue;
-                       if (pmc_overflow_power7(val[event->hw.idx - 1])) {
+                       if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) {
                                /* event has overflowed in a buggy way*/
                                found = 1;
                                record_and_restart(event,
-                                                  val[event->hw.idx - 1],
+                                                  cpuhw->pmcs[event->hw.idx - 1],
                                                   regs);
                        }
                }
        }
-       if (!found && !nmi && printk_ratelimit())
-               printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
+       if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+               printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
  
        /*
         * Reset MMCR0 to its normal value.  This will set PMXE and
         */
        write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
  
-       if (nmi)
-               nmi_exit();
-       else
-               irq_exit();
+       /* Clear the cpuhw->pmcs */
+       memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs));
  }
  
  static void perf_event_interrupt(struct pt_regs *regs)
@@@ -120,7 -120,7 +120,7 @@@ static void mpc52xx_spi_start_transfer(
        ms->cs_change = ms->transfer->cs_change;
  
        /* Write out the first byte */
-       ms->wcol_tx_timestamp = get_tbl();
+       ms->wcol_tx_timestamp = mftb();
        if (ms->tx_buf)
                out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
        else
@@@ -221,8 -221,8 +221,8 @@@ static int mpc52xx_spi_fsmstate_transfe
                 * but it can also be worked around simply by retrying the
                 * transfer which is what we do here. */
                ms->wcol_count++;
-               ms->wcol_ticks += get_tbl() - ms->wcol_tx_timestamp;
-               ms->wcol_tx_timestamp = get_tbl();
+               ms->wcol_ticks += mftb() - ms->wcol_tx_timestamp;
+               ms->wcol_tx_timestamp = mftb();
                data = 0;
                if (ms->tx_buf)
                        data = *(ms->tx_buf - 1);
        /* Is the transfer complete? */
        ms->len--;
        if (ms->len == 0) {
-               ms->timestamp = get_tbl();
+               ms->timestamp = mftb();
 -              ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec;
 +              if (ms->transfer->delay.unit == SPI_DELAY_UNIT_USECS)
 +                      ms->timestamp += ms->transfer->delay.value *
 +                                       tb_ticks_per_usec;
                ms->state = mpc52xx_spi_fsmstate_wait;
                return FSM_CONTINUE;
        }
  
        /* Write out the next byte */
-       ms->wcol_tx_timestamp = get_tbl();
+       ms->wcol_tx_timestamp = mftb();
        if (ms->tx_buf)
                out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
        else
@@@ -278,7 -276,7 +278,7 @@@ mpc52xx_spi_fsmstate_wait(int irq, stru
                dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n",
                        status);
  
-       if (((int)get_tbl()) - ms->timestamp < 0)
+       if (((int)mftb()) - ms->timestamp < 0)
                return FSM_POLL;
  
        ms->message->actual_length += ms->transfer->len;