Merge tag 'powerpc-6.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 31 Aug 2023 19:43:10 +0000 (12:43 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 31 Aug 2023 19:43:10 +0000 (12:43 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 31 Aug 2023 19:43:10 +0000 (12:43 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 31 Aug 2023 19:43:10 +0000 (12:43 -0700)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index 0b739a3,fd9f21b..fcf79ac
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -418,20 -418,20 +418,20 @@@
         arm64.nobti     [ARM64] Unconditionally disable Branch Target
                         Identification support
   
- -      arm64.nopauth   [ARM64] Unconditionally disable Pointer Authentication
- -                      support
+ +      arm64.nomops    [ARM64] Unconditionally disable Memory Copy and Memory
+ +                      Set instructions support
   
         arm64.nomte     [ARM64] Unconditionally disable Memory Tagging Extension
                         support
   
- -      arm64.nosve     [ARM64] Unconditionally disable Scalable Vector
- -                      Extension support
+ +      arm64.nopauth   [ARM64] Unconditionally disable Pointer Authentication
+ +                      support
   
         arm64.nosme     [ARM64] Unconditionally disable Scalable Matrix
                         Extension support
   
- -      arm64.nomops    [ARM64] Unconditionally disable Memory Copy and Memory
- -                      Set instructions support
+ +      arm64.nosve     [ARM64] Unconditionally disable Scalable Vector
+ +                      Extension support
   
         ataflop=        [HW,M68k]
   
@@@ -553,7 -553,7 +553,7 @@@
                         others).
   
         ccw_timeout_log [S390]
- -                      See Documentation/s390/common_io.rst for details.
+ +                      See Documentation/arch/s390/common_io.rst for details.
   
         cgroup_disable= [KNL] Disable a particular controller or optional feature
                         Format: {name of the controller(s) or feature(s) to disable}
@@@ -598,7 -598,7 +598,7 @@@
                         Setting checkreqprot to 1 is deprecated.
   
         cio_ignore=     [S390]
- -                      See Documentation/s390/common_io.rst for details.
+ +                      See Documentation/arch/s390/common_io.rst for details.
   
         clearcpuid=X[,X...] [X86]
                         Disable CPUID feature X for the kernel. See
@@@ -696,7 -696,7 +696,7 @@@
                         kernel/dma/contiguous.c
   
         cma_pernuma=nn[MG]
- -                      [ARM64,KNL,CMA]
+ +                      [KNL,CMA]
                         Sets the size of kernel per-numa memory area for
                         contiguous memory allocations. A value of 0 disables
                         per-numa CMA altogether. And If this option is not
@@@ -706,17 -706,6 +706,17 @@@
                         which is located in node nid, if the allocation fails,
                         they will fallback to the global default memory area.
   
+ +      numa_cma=<node>:nn[MG][,<node>:nn[MG]]
+ +                      [KNL,CMA]
+ +                      Sets the size of kernel numa memory area for
+ +                      contiguous memory allocations. It will reserve CMA
+ +                      area for the specified node.
+ +
+ +                      With numa CMA enabled, DMA users on node nid will
+ +                      first try to allocate buffer from the numa area
+ +                      which is located in node nid, if the allocation fails,
+ +                      they will fallback to the global default memory area.
+ +
         cmo_free_hint=  [PPC] Format: { yes | no }
                         Specify whether pages are marked as being inactive
                         when they are freed.  This is used in CMO environments
@@@ -1634,26 -1623,6 +1634,26 @@@
                         Format: off | on
                         default: on
   
+ +      gather_data_sampling=
+ +                      [X86,INTEL] Control the Gather Data Sampling (GDS)
+ +                      mitigation.
+ +
+ +                      Gather Data Sampling is a hardware vulnerability which
+ +                      allows unprivileged speculative access to data which was
+ +                      previously stored in vector registers.
+ +
+ +                      This issue is mitigated by default in updated microcode.
+ +                      The mitigation may have a performance impact but can be
+ +                      disabled. On systems without the microcode mitigation
+ +                      disabling AVX serves as a mitigation.
+ +
+ +                      force:  Disable AVX to mitigate systems without
+ +                              microcode mitigation. No effect if the microcode
+ +                              mitigation is present. Known to cause crashes in
+ +                              userspace with buggy AVX enumeration.
+ +
+ +                      off:    Disable GDS mitigation.
+ +
         gcov_persist=   [GCOV] When non-zero (default), profiling data for
                         kernel modules is saved and remains accessible via
                         debugfs, even when the module is unloaded/reloaded.
@@@ -2655,7 -2624,7 +2655,7 @@@
   
         kvm-intel.flexpriority=
                         [KVM,Intel] Control KVM's use of FlexPriority feature
- -                      (TPR shadow). Default is 1 (enabled). Disalbe by KVM if
+ +                      (TPR shadow). Default is 1 (enabled). Disable by KVM if
                         hardware lacks support for it.
   
         kvm-intel.nested=
@@@ -2949,10 -2918,6 +2949,10 @@@
         locktorture.torture_type= [KNL]
                         Specify the locking implementation to test.
   
+ +      locktorture.writer_fifo= [KNL]
+ +                      Run the write-side locktorture kthreads at
+ +                      sched_set_fifo() real-time priority.
+ +
         locktorture.verbose= [KNL]
                         Enable additional printk() statements.
   
@@@ -3140,7 -3105,7 +3140,7 @@@
                         [KNL,SH] Allow user to override the default size for
                         per-device physically contiguous DMA buffers.
   
- -      memhp_default_state=online/offline
+ +      memhp_default_state=online/offline/online_kernel/online_movable
                         [KNL] Set the initial state for the memory hotplug
                         onlining policy. If not specified, the default value is
                         set according to the
@@@ -3308,25 -3273,24 +3308,25 @@@
                                 Disable all optional CPU mitigations.  This
                                 improves system performance, but it may also
                                 expose users to several CPU vulnerabilities.
- -                              Equivalent to: nopti [X86,PPC]
- -                                             if nokaslr then kpti=0 [ARM64]
- -                                             nospectre_v1 [X86,PPC]
- -                                             nobp=0 [S390]
- -                                             nospectre_v2 [X86,PPC,S390,ARM64]
- -                                             spectre_v2_user=off [X86]
- -                                             spec_store_bypass_disable=off [X86,PPC]
- -                                             ssbd=force-off [ARM64]
- -                                             nospectre_bhb [ARM64]
+ +                              Equivalent to: if nokaslr then kpti=0 [ARM64]
+ +                                             gather_data_sampling=off [X86]
+ +                                             kvm.nx_huge_pages=off [X86]
                                                l1tf=off [X86]
                                                mds=off [X86]
- -                                             tsx_async_abort=off [X86]
- -                                             kvm.nx_huge_pages=off [X86]
- -                                             srbds=off [X86,INTEL]
+ +                                             mmio_stale_data=off [X86]
                                                no_entry_flush [PPC]
                                                no_uaccess_flush [PPC]
- -                                             mmio_stale_data=off [X86]
+ +                                             nobp=0 [S390]
+ +                                             nopti [X86,PPC]
+ +                                             nospectre_bhb [ARM64]
+ +                                             nospectre_v1 [X86,PPC]
+ +                                             nospectre_v2 [X86,PPC,S390,ARM64]
                                                retbleed=off [X86]
+ +                                             spec_store_bypass_disable=off [X86,PPC]
+ +                                             spectre_v2_user=off [X86]
+ +                                             srbds=off [X86,INTEL]
+ +                                             ssbd=force-off [ARM64]
+ +                                             tsx_async_abort=off [X86]
   
                                 Exceptions:
                                                This does not have any effect on
@@@ -3753,7 -3717,7 +3753,7 @@@
   
         nohibernate     [HIBERNATION] Disable hibernation and resume.
   
-       nohlt           [ARM,ARM64,MICROBLAZE,MIPS,SH] Forces the kernel to
+       nohlt           [ARM,ARM64,MICROBLAZE,MIPS,PPC,SH] Forces the kernel to
                         busy wait in do_idle() and not use the arch_cpu_idle()
                         implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
                         to be effective. This is useful on platforms where the
@@@ -3889,10 -3853,10 +3889,10 @@@
         nosmp           [SMP] Tells an SMP kernel to act as a UP kernel,
                         and disable the IO APIC.  legacy for "maxcpus=0".
   
-       nosmt           [KNL,MIPS,S390] Disable symmetric multithreading (SMT).
+       nosmt           [KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT).
                         Equivalent to smt=1.
   
-                       [KNL,X86] Disable symmetric multithreading (SMT).
+                       [KNL,X86,PPC] Disable symmetric multithreading (SMT).
                         nosmt=force: Force disable SMT, cannot be undone
                                      via the sysfs control file.
   
@@@ -4073,6 -4037,20 +4073,6 @@@
                         timeout < 0: reboot immediately
                         Format: <timeout>
   
- -      panic_print=    Bitmask for printing system info when panic happens.
- -                      User can chose combination of the following bits:
- -                      bit 0: print all tasks info
- -                      bit 1: print system memory info
- -                      bit 2: print timer info
- -                      bit 3: print locks info if CONFIG_LOCKDEP is on
- -                      bit 4: print ftrace buffer
- -                      bit 5: print all printk messages in buffer
- -                      bit 6: print all CPUs backtrace (if available in the arch)
- -                      *Be aware* that this option may print a _lot_ of lines,
- -                      so there are risks of losing older messages in the log.
- -                      Use this option carefully, maybe worth to setup a
- -                      bigger log buffer with "log_buf_len" along with this.
- -
         panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
                         Format: <hex>[,nousertaint]
                         Hexadecimal bitmask representing the set of TAINT flags
@@@ -4089,20 -4067,6 +4089,20 @@@
         panic_on_warn=1 panic() instead of WARN().  Useful to cause kdump
                         on a WARN().
   
+ +      panic_print=    Bitmask for printing system info when panic happens.
+ +                      User can chose combination of the following bits:
+ +                      bit 0: print all tasks info
+ +                      bit 1: print system memory info
+ +                      bit 2: print timer info
+ +                      bit 3: print locks info if CONFIG_LOCKDEP is on
+ +                      bit 4: print ftrace buffer
+ +                      bit 5: print all printk messages in buffer
+ +                      bit 6: print all CPUs backtrace (if available in the arch)
+ +                      *Be aware* that this option may print a _lot_ of lines,
+ +                      so there are risks of losing older messages in the log.
+ +                      Use this option carefully, maybe worth to setup a
+ +                      bigger log buffer with "log_buf_len" along with this.
+ +
         parkbd.port=    [HW] Parallel port number the keyboard adapter is
                         connected to, default is 0.
                         Format: <parport#>
@@@ -4222,7 -4186,7 +4222,7 @@@
                         mode 0, bit 1 is for mode 1, and so on.  Mode 0 only
                         allowed by default.
   
- -      pause_on_oops=
+ +      pause_on_oops=<int>
                         Halt all CPUs after the first oops has been printed for
                         the specified number of seconds.  This is to be used if
                         your oopses keep scrolling off the screen.
@@@ -4964,15 -4928,6 +4964,15 @@@
                         test until boot completes in order to avoid
                         interference.
   
+ +      rcuscale.kfree_by_call_rcu= [KNL]
+ +                      In kernels built with CONFIG_RCU_LAZY=y, test
+ +                      call_rcu() instead of kfree_rcu().
+ +
+ +      rcuscale.kfree_mult= [KNL]
+ +                      Instead of allocating an object of size kfree_obj,
+ +                      allocate one of kfree_mult * sizeof(kfree_obj).
+ +                      Defaults to 1.
+ +
         rcuscale.kfree_rcu_test= [KNL]
                         Set to measure performance of kfree_rcu() flooding.
   
@@@ -4998,12 -4953,6 +4998,12 @@@
                         Number of loops doing rcuscale.kfree_alloc_num number
                         of allocations and frees.
   
+ +      rcuscale.minruntime= [KNL]
+ +                      Set the minimum test run time in seconds.  This
+ +                      does not affect the data-collection interval,
+ +                      but instead allows better measurement of things
+ +                      like CPU consumption.
+ +
         rcuscale.nreaders= [KNL]
                         Set number of RCU readers.  The value -1 selects
                         N, where N is the number of CPUs.  A value
@@@ -5018,7 -4967,7 +5018,7 @@@
                         the same as for rcuscale.nreaders.
                         N, where N is the number of CPUs
   
- -      rcuscale.perf_type= [KNL]
+ +      rcuscale.scale_type= [KNL]
                         Specify the RCU implementation to test.
   
         rcuscale.shutdown= [KNL]
@@@ -5034,11 -4983,6 +5034,11 @@@
                         in microseconds.  The default of zero says
                         no holdoff.
   
+ +      rcuscale.writer_holdoff_jiffies= [KNL]
+ +                      Additional write-side holdoff between grace
+ +                      periods, but in jiffies.  The default of zero
+ +                      says no holdoff.
+ +
         rcutorture.fqs_duration= [KNL]
                         Set duration of force_quiescent_state bursts
                         in microseconds.
@@@ -5320,13 -5264,6 +5320,13 @@@
                         number avoids disturbing real-time workloads,
                         but lengthens grace periods.
   
+ +      rcupdate.rcu_task_lazy_lim= [KNL]
+ +                      Number of callbacks on a given CPU that will
+ +                      cancel laziness on that CPU.  Use -1 to disable
+ +                      cancellation of laziness, but be advised that
+ +                      doing so increases the danger of OOM due to
+ +                      callback flooding.
+ +
         rcupdate.rcu_task_stall_info= [KNL]
                         Set initial timeout in jiffies for RCU task stall
                         informational messages, which give some indication
@@@ -5356,29 -5293,6 +5356,29 @@@
                         A change in value does not take effect until
                         the beginning of the next grace period.
   
+ +      rcupdate.rcu_tasks_lazy_ms= [KNL]
+ +                      Set timeout in milliseconds RCU Tasks asynchronous
+ +                      callback batching for call_rcu_tasks().
+ +                      A negative value will take the default.  A value
+ +                      of zero will disable batching.  Batching is
+ +                      always disabled for synchronize_rcu_tasks().
+ +
+ +      rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
+ +                      Set timeout in milliseconds RCU Tasks
+ +                      Rude asynchronous callback batching for
+ +                      call_rcu_tasks_rude().  A negative value
+ +                      will take the default.  A value of zero will
+ +                      disable batching.  Batching is always disabled
+ +                      for synchronize_rcu_tasks_rude().
+ +
+ +      rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
+ +                      Set timeout in milliseconds RCU Tasks
+ +                      Trace asynchronous callback batching for
+ +                      call_rcu_tasks_trace().  A negative value
+ +                      will take the default.  A value of zero will
+ +                      disable batching.  Batching is always disabled
+ +                      for synchronize_rcu_tasks_trace().
+ +
         rcupdate.rcu_self_test= [KNL]
                         Run the RCU early boot self tests
   
@@@ -5587,10 -5501,6 +5587,10 @@@
                         Useful for devices that are detected asynchronously
                         (e.g. USB and MMC devices).
   
+ +      rootwait=       [KNL] Maximum time (in seconds) to wait for root device
+ +                      to show up before attempting to mount the root
+ +                      filesystem.
+ +
         rproc_mem=nn[KMG][@address]
                         [KNL,ARM,CMA] Remoteproc physical memory block.
                         Memory area to be used by remote processor image,
@@@ -5965,17 -5875,6 +5965,17 @@@
                         Not specifying this option is equivalent to
                         spectre_v2_user=auto.
   
+ +      spec_rstack_overflow=
+ +                      [X86] Control RAS overflow mitigation on AMD Zen CPUs
+ +
+ +                      off             - Disable mitigation
+ +                      microcode       - Enable microcode mitigation only
+ +                      safe-ret        - Enable sw-only safe RET mitigation (default)
+ +                      ibpb            - Enable mitigation by issuing IBPB on
+ +                                        kernel entry
+ +                      ibpb-vmexit     - Issue IBPB only on VMEXIT
+ +                                        (cloud-specific mitigation)
+ +
         spec_store_bypass_disable=
                         [HW] Control Speculative Store Bypass (SSB) Disable mitigation
                         (Speculative Store Bypass vulnerability)
@@@ -6344,6 -6243,10 +6344,6 @@@
                         -1: disable all critical trip points in all thermal zones
                         <degrees C>: override all critical trip points
   
- -      thermal.nocrt=  [HW,ACPI]
- -                      Set to disable actions on ACPI thermal zone
- -                      critical and hot trip points.
- -
         thermal.off=    [HW,ACPI]
                         1: disable ACPI thermal control
   
@@@ -6405,13 -6308,6 +6405,13 @@@
                         This will guarantee that all the other pcrs
                         are saved.
   
+ +      tpm_tis.interrupts= [HW,TPM]
+ +                      Enable interrupts for the MMIO based physical layer
+ +                      for the FIFO interface. By default it is set to false
+ +                      (0). For more information about TPM hardware interfaces
+ +                      defined by Trusted Computing Group (TCG) see
+ +                      https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
+ +
         tp_printk       [FTRACE]
                         Have the tracepoints sent to printk as well as the
                         tracing ring buffer. This is useful for early boot up
diff --combined arch/powerpc/Kconfig

index 21edd66,c831e20..54b9387
--- 1/arch/powerpc/Kconfig
--- 2/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -157,7 -157,6 +157,7 @@@ config PP
         select ARCH_HAS_UBSAN_SANITIZE_ALL
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
         select ARCH_KEEP_MEMBLOCK
+ +      select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
         select ARCH_MIGHT_HAVE_PC_PARPORT
         select ARCH_MIGHT_HAVE_PC_SERIO
         select ARCH_OPTIONAL_KERNEL_RWX         if ARCH_HAS_STRICT_KERNEL_RWX
@@@ -175,7 -174,6 +175,7 @@@
         select ARCH_WANT_IPC_PARSE_VERSION
         select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
         select ARCH_WANT_LD_ORPHAN_WARN
+ +      select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP   if PPC_RADIX_MMU
         select ARCH_WANTS_MODULES_DATA_IN_VMALLOC       if PPC_BOOK3S_32 || PPC_8xx
         select ARCH_WEAK_RELEASE_ACQUIRE
         select BINFMT_ELF
@@@ -188,6 -186,7 +188,7 @@@
         select DYNAMIC_FTRACE                   if FUNCTION_TRACER
         select EDAC_ATOMIC_SCRUB
         select EDAC_SUPPORT
+       select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY
         select GENERIC_ATOMIC64                 if PPC32
         select GENERIC_CLOCKEVENTS_BROADCAST    if SMP
         select GENERIC_CMOS_UPDATE
@@@ -195,7 -194,7 +196,8 @@@
         select GENERIC_CPU_VULNERABILITIES      if PPC_BARRIER_NOSPEC
         select GENERIC_EARLY_IOREMAP
         select GENERIC_GETTIMEOFDAY
+       select GENERIC_IDLE_POLL_SETUP
+ +      select GENERIC_IOREMAP
         select GENERIC_IRQ_SHOW
         select GENERIC_IRQ_SHOW_LEVEL
         select GENERIC_PCI_IOMAP                if PCI
@@@ -229,8 -228,8 +231,8 @@@
         select HAVE_DEBUG_KMEMLEAK
         select HAVE_DEBUG_STACKOVERFLOW
         select HAVE_DYNAMIC_FTRACE
-       select HAVE_DYNAMIC_FTRACE_WITH_ARGS    if MPROFILE_KERNEL || PPC32
-       select HAVE_DYNAMIC_FTRACE_WITH_REGS    if MPROFILE_KERNEL || PPC32
+       select HAVE_DYNAMIC_FTRACE_WITH_ARGS    if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+       select HAVE_DYNAMIC_FTRACE_WITH_REGS    if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
         select HAVE_EBPF_JIT
         select HAVE_EFFICIENT_UNALIGNED_ACCESS
         select HAVE_FAST_GUP
@@@ -258,7 -257,7 +260,7 @@@
         select HAVE_MOD_ARCH_SPECIFIC
         select HAVE_NMI                         if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
         select HAVE_OPTPROBES
-       select HAVE_OBJTOOL                     if PPC32 || MPROFILE_KERNEL
+       select HAVE_OBJTOOL                     if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
         select HAVE_OBJTOOL_MCOUNT              if HAVE_OBJTOOL
         select HAVE_PERF_EVENTS
         select HAVE_PERF_EVENTS_NMI             if PPC64
@@@ -275,6 -274,8 +277,8 @@@
         select HAVE_SYSCALL_TRACEPOINTS
         select HAVE_VIRT_CPU_ACCOUNTING
         select HAVE_VIRT_CPU_ACCOUNTING_GEN
+       select HOTPLUG_SMT                      if HOTPLUG_CPU
+       select SMT_NUM_THREADS_DYNAMIC
         select HUGETLB_PAGE_SIZE_VARIABLE       if PPC_BOOK3S_64 && HUGETLB_PAGE
         select IOMMU_HELPER                     if PPC64
         select IRQ_DOMAIN
@@@ -554,6 -555,13 +558,13 @@@ config MPROFILE_KERNE
         def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mlittle-endian) if CPU_LITTLE_ENDIAN
         def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mbig-endian) if CPU_BIG_ENDIAN
   
+ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+       depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2)
+       depends on $(cc-option,-fpatchable-function-entry=2)
+       def_bool y if PPC32
+       def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
+       def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
+ 
   config HOTPLUG_CPU
         bool "Support for enabling/disabling CPUs"
         depends on SMP && (PPC_PSERIES || \
@@@ -592,21 -600,41 +603,21 @@@ config PPC64_SUPPORTS_MEMORY_FAILUR
         default "y" if PPC_POWERNV
         select ARCH_SUPPORTS_MEMORY_FAILURE
   
- -config KEXEC
- -      bool "kexec system call"
- -      depends on PPC_BOOK3S || PPC_E500 || (44x && !SMP)
- -      select KEXEC_CORE
- -      help
- -        kexec is a system call that implements the ability to shutdown your
- -        current kernel, and to start another kernel.  It is like a reboot
- -        but it is independent of the system firmware.   And like a reboot
- -        you can start any kernel with it, not just Linux.
- -
- -        The name comes from the similarity to the exec system call.
- -
- -        It is an ongoing process to be certain the hardware in a machine
- -        is properly shutdown, so do not be surprised if this code does not
- -        initially work for you.  As of this writing the exact hardware
- -        interface is strongly in flux, so no good recommendation can be
- -        made.
- -
- -config KEXEC_FILE
- -      bool "kexec file based system call"
- -      select KEXEC_CORE
- -      select HAVE_IMA_KEXEC if IMA
- -      select KEXEC_ELF
- -      depends on PPC64
- -      depends on CRYPTO=y
- -      depends on CRYPTO_SHA256=y
- -      help
- -        This is a new version of the kexec system call. This call is
- -        file based and takes in file descriptors as system call arguments
- -        for kernel and initramfs as opposed to a list of segments as is the
- -        case for the older kexec call.
+ +config ARCH_SUPPORTS_KEXEC
+ +      def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
+ +
+ +config ARCH_SUPPORTS_KEXEC_FILE
+ +      def_bool PPC64 && CRYPTO=y && CRYPTO_SHA256=y
   
- -config ARCH_HAS_KEXEC_PURGATORY
+ +config ARCH_SUPPORTS_KEXEC_PURGATORY
         def_bool KEXEC_FILE
   
+ +config ARCH_SELECTS_KEXEC_FILE
+ +      def_bool y
+ +      depends on KEXEC_FILE
+ +      select KEXEC_ELF
+ +      select HAVE_IMA_KEXEC if IMA
+ +
   config PPC64_BIG_ENDIAN_ELF_ABI_V2
         # Option is available to BFD, but LLD does not support ELFv1 so this is
         # always true there.
@@@ -666,13 -694,14 +677,13 @@@ config RELOCATABLE_TES
           loaded at, which tends to be non-zero and therefore test the
           relocation code.
   
- -config CRASH_DUMP
- -      bool "Build a dump capture kernel"
- -      depends on PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+ +config ARCH_SUPPORTS_CRASH_DUMP
+ +      def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+ +
+ +config ARCH_SELECTS_CRASH_DUMP
+ +      def_bool y
+ +      depends on CRASH_DUMP
         select RELOCATABLE if PPC64 || 44x || PPC_85xx
- -      help
- -        Build a kernel suitable for use as a dump capture kernel.
- -        The same kernel binary can be used as production kernel and dump
- -        capture kernel.
   
   config FA_DUMP
         bool "Firmware-assisted dump"
@@@ -1126,12 -1155,6 +1137,6 @@@ config FSL_GT
         help
           Freescale General-purpose Timers support
   
- config PCI_8260
-       bool
-       depends on PCI && 8260
-       select PPC_INDIRECT_PCI
-       default y
- 
   config FSL_RIO
         bool "Freescale Embedded SRIO Controller support"
         depends on RAPIDIO = y && HAVE_RAPIDIO
diff --combined arch/powerpc/configs/pmac32_defconfig

index 05ed585,3aae79a..a205da9
--- 1/arch/powerpc/configs/pmac32_defconfig
--- 2/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@@ -176,8 -176,9 +176,9 @@@ CONFIG_MOUSE_APPLETOUCH=
   # CONFIG_SERIO_I8042 is not set
   # CONFIG_SERIO_SERPORT is not set
   CONFIG_SERIAL_8250=m
- CONFIG_SERIAL_PMACZILOG=m
+ CONFIG_SERIAL_PMACZILOG=y
   CONFIG_SERIAL_PMACZILOG_TTYS=y
+ CONFIG_SERIAL_PMACZILOG_CONSOLE=y
   CONFIG_NVRAM=y
   CONFIG_I2C_CHARDEV=m
   CONFIG_APM_POWER=y
@@@ -254,7 -255,7 +255,7 @@@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=
   CONFIG_EXT2_FS=y
   CONFIG_EXT4_FS=y
   CONFIG_EXT4_FS_POSIX_ACL=y
- -CONFIG_AUTOFS4_FS=m
+ +CONFIG_AUTOFS_FS=m
   CONFIG_FUSE_FS=m
   CONFIG_ISO9660_FS=y
   CONFIG_JOLIET=y
diff --combined arch/powerpc/configs/ppc64_defconfig

index c0f4bbc,40a1f4a..6e7b9e8
--- 1/arch/powerpc/configs/ppc64_defconfig
--- 2/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@@ -327,7 -327,7 +327,7 @@@ CONFIG_BTRFS_FS=
   CONFIG_BTRFS_FS_POSIX_ACL=y
   CONFIG_NILFS2_FS=m
   CONFIG_FS_DAX=y
- -CONFIG_AUTOFS4_FS=m
+ +CONFIG_AUTOFS_FS=m
   CONFIG_FUSE_FS=m
   CONFIG_OVERLAY_FS=m
   CONFIG_ISO9660_FS=y
@@@ -390,8 -390,11 +390,11 @@@ CONFIG_CRYPTO_SHA256=
   CONFIG_CRYPTO_WP512=m
   CONFIG_CRYPTO_LZO=m
   CONFIG_CRYPTO_CRC32C_VPMSUM=m
+ CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
+ CONFIG_CRYPTO_VPMSUM_TESTER=m
   CONFIG_CRYPTO_MD5_PPC=m
   CONFIG_CRYPTO_SHA1_PPC=m
+ CONFIG_CRYPTO_AES_GCM_P10=m
   CONFIG_CRYPTO_DEV_NX=y
   CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
   CONFIG_CRYPTO_DEV_VMX=y
diff --combined arch/powerpc/configs/ppc6xx_defconfig

index 1034aea,98c32f7..eaf3273
--- 1/arch/powerpc/configs/ppc6xx_defconfig
--- 2/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@@ -183,7 -183,6 +183,6 @@@ CONFIG_IP_NF_MATCH_TTL=
   CONFIG_IP_NF_FILTER=m
   CONFIG_IP_NF_TARGET_REJECT=m
   CONFIG_IP_NF_MANGLE=m
- CONFIG_IP_NF_TARGET_CLUSTERIP=m
   CONFIG_IP_NF_TARGET_ECN=m
   CONFIG_IP_NF_TARGET_TTL=m
   CONFIG_IP_NF_RAW=m
@@@ -969,7 -968,7 +968,7 @@@ CONFIG_XFS_POSIX_ACL=
   CONFIG_GFS2_FS=m
   CONFIG_FS_DAX=y
   CONFIG_QUOTA_NETLINK_INTERFACE=y
- -CONFIG_AUTOFS4_FS=m
+ +CONFIG_AUTOFS_FS=m
   CONFIG_FUSE_FS=m
   CONFIG_ISO9660_FS=y
   CONFIG_JOLIET=y
diff --combined arch/powerpc/crypto/Kconfig

index f25024a,97802c7..803da4a
--- 1/arch/powerpc/crypto/Kconfig
--- 2/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@@ -100,7 -100,7 +100,7 @@@ config CRYPTO_AES_GCM_P1
         select CRYPTO_LIB_AES
         select CRYPTO_ALGAPI
         select CRYPTO_AEAD
-       default m
+       select CRYPTO_SKCIPHER
         help
           AEAD cipher: AES cipher algorithms (FIPS-197)
           GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
@@@ -111,30 -111,4 +111,30 @@@
           Support for cryptographic acceleration instructions on Power10 or
           later CPU. This module supports stitched acceleration for AES/GCM.
   
+ +config CRYPTO_CHACHA20_P10
+ +      tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)"
+ +      depends on PPC64 && CPU_LITTLE_ENDIAN
+ +      select CRYPTO_SKCIPHER
+ +      select CRYPTO_LIB_CHACHA_GENERIC
+ +      select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ +      help
+ +        Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+ +        stream cipher algorithms
+ +
+ +        Architecture: PowerPC64
+ +        - Power10 or later
+ +        - Little-endian
+ +
+ +config CRYPTO_POLY1305_P10
+ +      tristate "Hash functions: Poly1305 (P10 or later)"
+ +      depends on PPC64 && CPU_LITTLE_ENDIAN
+ +      select CRYPTO_HASH
+ +      select CRYPTO_LIB_POLY1305_GENERIC
+ +      help
+ +        Poly1305 authenticator algorithm (RFC7539)
+ +
+ +        Architecture: PowerPC64
+ +        - Power10 or later
+ +        - Little-endian
+ +
   endmenu
diff --combined arch/powerpc/include/asm/book3s/32/pgtable.h

index 86650d1,d49c2a9..9b13eb1
--- 1/arch/powerpc/include/asm/book3s/32/pgtable.h
--- 2/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@@ -462,6 -462,11 +462,6 @@@ static inline pte_t pfn_pte(unsigned lo
                      pgprot_val(pgprot));
   }
   
- -static inline unsigned long pte_pfn(pte_t pte)
- -{
- -      return pte_val(pte) >> PTE_RPN_SHIFT;
- -}
- -
   /* Generic modifiers for PTE bits */
   static inline pte_t pte_wrprotect(pte_t pte)
   {
@@@ -493,7 -498,7 +493,7 @@@ static inline pte_t pte_mkpte(pte_t pte
         return pte;
   }
   
- -static inline pte_t pte_mkwrite(pte_t pte)
+ +static inline pte_t pte_mkwrite_novma(pte_t pte)
   {
         return __pte(pte_val(pte) | _PAGE_RW);
   }
@@@ -536,58 -541,43 +536,43 @@@ static inline pte_t pte_modify(pte_t pt
   
   
   /* This low level function performs the actual PTE insertion
-  * Setting the PTE depends on the MMU type and other factors. It's
-  * an horrible mess that I'm not going to try to clean up now but
-  * I'm keeping it in one place rather than spread around
+  * Setting the PTE depends on the MMU type and other factors.
+  *
+  * First case is 32-bit in UP mode with 32-bit PTEs, we need to preserve
+  * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+  * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+  * and see we need to keep track that this PTE needs invalidating.
+  *
+  * Second case is 32-bit with 64-bit PTE.  In this case, we
+  * can just store as long as we do the two halves in the right order
+  * with a barrier in between. This is possible because we take care,
+  * in the hash code, to pre-invalidate if the PTE was already hashed,
+  * which synchronizes us with any concurrent invalidation.
+  * In the percpu case, we fallback to the simple update preserving
+  * the hash bits (ie, same as the non-SMP case).
+  *
+  * Third case is 32-bit in SMP mode with 32-bit PTEs. We use the
+  * helper pte_update() which does an atomic update. We need to do that
+  * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+  * per-CPU PTE such as a kmap_atomic, we also do a simple update preserving
+  * the hash bits instead.
    */
   static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
                                 pte_t *ptep, pte_t pte, int percpu)
   {
- #if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
-       /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
-        * helper pte_update() which does an atomic update. We need to do that
-        * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
-        * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
-        * the hash bits instead (ie, same as the non-SMP case)
-        */
-       if (percpu)
-               *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-                             | (pte_val(pte) & ~_PAGE_HASHPTE));
-       else
-               pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
+       if ((!IS_ENABLED(CONFIG_SMP) && !IS_ENABLED(CONFIG_PTE_64BIT)) || percpu) {
+               *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) |
+                             (pte_val(pte) & ~_PAGE_HASHPTE));
+       } else if (IS_ENABLED(CONFIG_PTE_64BIT)) {
+               if (pte_val(*ptep) & _PAGE_HASHPTE)
+                       flush_hash_entry(mm, ptep, addr);
   
- #elif defined(CONFIG_PTE_64BIT)
-       /* Second case is 32-bit with 64-bit PTE.  In this case, we
-        * can just store as long as we do the two halves in the right order
-        * with a barrier in between. This is possible because we take care,
-        * in the hash code, to pre-invalidate if the PTE was already hashed,
-        * which synchronizes us with any concurrent invalidation.
-        * In the percpu case, we also fallback to the simple update preserving
-        * the hash bits
-        */
-       if (percpu) {
-               *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-                             | (pte_val(pte) & ~_PAGE_HASHPTE));
-               return;
+               asm volatile("stw%X0 %2,%0; eieio; stw%X1 %L2,%1" :
+                            "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) :
+                            "r" (pte) : "memory");
+       } else {
+               pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
         }
-       if (pte_val(*ptep) & _PAGE_HASHPTE)
-               flush_hash_entry(mm, ptep, addr);
-       __asm__ __volatile__("\
-               stw%X0 %2,%0\n\
-               eieio\n\
-               stw%X1 %L2,%1"
-       : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
-       : "r" (pte) : "memory");
- 
- #else
-       /* Third case is 32-bit hash table in UP mode, we need to preserve
-        * the _PAGE_HASHPTE bit since we may not have invalidated the previous
-        * translation in the hash yet (done in a subsequent flush_tlb_xxx())
-        * and see we need to keep track that this PTE needs invalidating
-        */
-       *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-                     | (pte_val(pte) & ~_PAGE_HASHPTE));
- #endif
   }
   
   /*
diff --combined arch/powerpc/include/asm/nohash/32/pgtable.h

index 33213b3,d6201b5..f99c53a
--- 1/arch/powerpc/include/asm/nohash/32/pgtable.h
--- 2/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@@ -170,8 -170,8 +170,8 @@@ void unmap_kernel_page(unsigned long va
   #define pte_clear(mm, addr, ptep) \
         do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0)
   
- -#ifndef pte_mkwrite
- -static inline pte_t pte_mkwrite(pte_t pte)
+ +#ifndef pte_mkwrite_novma
+ +static inline pte_t pte_mkwrite_novma(pte_t pte)
   {
         return __pte(pte_val(pte) | _PAGE_RW);
   }
@@@ -355,7 -355,7 +355,7 @@@ static inline int pte_young(pte_t pte
   #define pmd_pfn(pmd)          (pmd_val(pmd) >> PAGE_SHIFT)
   #else
   #define pmd_page_vaddr(pmd)   \
-       ((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
+       ((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
   #define pmd_pfn(pmd)          (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
   #endif
   
diff --combined arch/powerpc/include/asm/nohash/64/pgtable.h

index abe4fd8,81c8018..5cd9acf
--- 1/arch/powerpc/include/asm/nohash/64/pgtable.h
--- 2/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@@ -85,7 -85,7 +85,7 @@@
   #ifndef __ASSEMBLY__
   /* pte_clear moved to later in this file */
   
- -static inline pte_t pte_mkwrite(pte_t pte)
+ +static inline pte_t pte_mkwrite_novma(pte_t pte)
   {
         return __pte(pte_val(pte) | _PAGE_RW);
   }
@@@ -127,7 -127,7 +127,7 @@@ static inline pte_t pmd_pte(pmd_t pmd
   #define       pmd_bad(pmd)            (!is_kernel_addr(pmd_val(pmd)) \
                                  || (pmd_val(pmd) & PMD_BAD_BITS))
   #define       pmd_present(pmd)        (!pmd_none(pmd))
- #define pmd_page_vaddr(pmd)   (pmd_val(pmd) & ~PMD_MASKED_BITS)
+ #define pmd_page_vaddr(pmd)   ((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS))
   extern struct page *pmd_page(pmd_t pmd);
   #define pmd_pfn(pmd)          (page_to_pfn(pmd_page(pmd)))
   
diff --combined arch/powerpc/include/asm/pgtable.h

index b2e9bc4,a92f1f6..d0ee46d
--- 1/arch/powerpc/include/asm/pgtable.h
--- 2/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@@ -41,12 -41,6 +41,12 @@@ struct mm_struct
   
   #ifndef __ASSEMBLY__
   
+ +void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ +              pte_t pte, unsigned int nr);
+ +#define set_ptes set_ptes
+ +#define update_mmu_cache(vma, addr, ptep) \
+ +      update_mmu_cache_range(NULL, vma, addr, ptep, 1)
+ +
   #ifndef MAX_PTRS_PER_PGD
   #define MAX_PTRS_PER_PGD PTRS_PER_PGD
   #endif
@@@ -54,12 -48,6 +54,12 @@@
   /* Keep these as a macros to avoid include dependency mess */
   #define pte_page(x)           pfn_to_page(pte_pfn(x))
   #define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
+ +
+ +static inline unsigned long pte_pfn(pte_t pte)
+ +{
+ +      return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
+ +}
+ +
   /*
    * Select all bits except the pfn
    */
@@@ -72,9 -60,9 +72,9 @@@ static inline pgprot_t pte_pgprot(pte_
   }
   
   #ifndef pmd_page_vaddr
- static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+ static inline const void *pmd_page_vaddr(pmd_t pmd)
   {
-       return ((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS));
+       return __va(pmd_val(pmd) & ~PMD_MASKED_BITS);
   }
   #define pmd_page_vaddr pmd_page_vaddr
   #endif
@@@ -170,30 -158,13 +170,30 @@@ static inline pgtable_t pmd_pgtable(pmd
   }
   
   #ifdef CONFIG_PPC64
- -#define is_ioremap_addr is_ioremap_addr
- -static inline bool is_ioremap_addr(const void *x)
+ +int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
+ +bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ +                         unsigned long page_size);
+ +/*
+ + * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details
+ + * some of the restrictions. We don't check for PMD_SIZE because our
+ + * vmemmap allocation code can fallback correctly. The pageblock
+ + * alignment requirement is met using altmap->reserve blocks.
+ + */
+ +#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory
+ +static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
   {
- -      unsigned long addr = (unsigned long)x;
- -
- -      return addr >= IOREMAP_BASE && addr < IOREMAP_END;
+ +      if (!radix_enabled())
+ +              return false;
+ +      /*
+ +       * With 4K page size and 2M PMD_SIZE, we can align
+ +       * things better with memory block size value
+ +       * starting from 128MB. Hence align things with PMD_SIZE.
+ +       */
+ +      if (IS_ENABLED(CONFIG_PPC_4K_PAGES))
+ +              return IS_ALIGNED(vmemmap_size, PMD_SIZE);
+ +      return true;
   }
+ +
   #endif /* CONFIG_PPC64 */
   
   #endif /* __ASSEMBLY__ */
diff --combined arch/powerpc/include/asm/processor.h

index a6c7069,9e67cb1..b2c51d3
--- 1/arch/powerpc/include/asm/processor.h
--- 2/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@@ -172,11 -172,6 +172,6 @@@ struct thread_struct 
         unsigned int    align_ctl;      /* alignment handling control */
   #ifdef CONFIG_HAVE_HW_BREAKPOINT
         struct perf_event *ptrace_bps[HBP_NUM_MAX];
-       /*
-        * Helps identify source of single-step exception and subsequent
-        * hw-breakpoint enablement
-        */
-       struct perf_event *last_hit_ubp[HBP_NUM_MAX];
   #endif /* CONFIG_HAVE_HW_BREAKPOINT */
         struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */
         unsigned long   trap_nr;        /* last trap # on this thread */
@@@ -393,6 -388,7 +388,6 @@@ int validate_sp_size(unsigned long sp, 
    */
   #define ARCH_HAS_PREFETCH
   #define ARCH_HAS_PREFETCHW
- -#define ARCH_HAS_SPINLOCK_PREFETCH
   
   static inline void prefetch(const void *x)
   {
@@@ -410,6 -406,8 +405,6 @@@ static inline void prefetchw(const voi
         __asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
   }
   
- -#define spin_lock_prefetch(x) prefetchw(x)
- -
   /* asm stubs */
   extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
   extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --combined arch/powerpc/kernel/head_64.S

index 6440b1b,bc588ff..4690c21
--- 1/arch/powerpc/kernel/head_64.S
--- 2/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@@ -40,7 -40,6 +40,6 @@@
   #include <asm/hw_irq.h>
   #include <asm/cputhreads.h>
   #include <asm/ppc-opcode.h>
- #include <asm/export.h>
   #include <asm/feature-fixups.h>
   #ifdef CONFIG_PPC_BOOK3S
   #include <asm/exception-64s.h>
@@@ -375,7 -374,8 +374,7 @@@ _GLOBAL(generic_secondary_smp_init
         beq     20f
   
         /* start the specified thread */
- -      LOAD_REG_ADDR(r5, fsl_secondary_thread_init)
- -      ld      r4, 0(r5)
+ +      LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
         bl      book3e_start_thread
   
         /* stop the current thread */
diff --combined arch/powerpc/kernel/trace/ftrace_entry.S

index 1f7d86d,0b3d77d..9070188
--- 1/arch/powerpc/kernel/trace/ftrace_mprofile.S
--- 2/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@@ -3,12 -3,12 +3,12 @@@
    * Split from ftrace_64.S
    */
   
+ #include <linux/export.h>
   #include <linux/magic.h>
   #include <asm/ppc_asm.h>
   #include <asm/asm-offsets.h>
   #include <asm/ftrace.h>
   #include <asm/ppc-opcode.h>
- #include <asm/export.h>
   #include <asm/thread_info.h>
   #include <asm/bug.h>
   #include <asm/ptrace.h>
@@@ -33,9 -33,6 +33,9 @@@
    * and then arrange for the ftrace function to be called.
    */
   .macro        ftrace_regs_entry allregs
+ +      /* Create a minimal stack frame for representing B */
+ +      PPC_STLU        r1, -STACK_FRAME_MIN_SIZE(r1)
+ +
         /* Create our stack frame + pt_regs */
         PPC_STLU        r1,-SWITCH_FRAME_SIZE(r1)
   
@@@ -45,7 -42,7 +45,7 @@@
   
   #ifdef CONFIG_PPC64
         /* Save the original return address in A's stack frame */
- -      std     r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
+ +      std     r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
         /* Ok to continue? */
         lbz     r3, PACA_FTRACE_ENABLED(r13)
         cmpdi   r3, 0
@@@ -80,8 -77,6 +80,8 @@@
         mflr    r7
         /* Save it as pt_regs->nip */
         PPC_STL r7, _NIP(r1)
+ +      /* Also save it in B's stackframe header for proper unwind */
+ +      PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
         /* Save the read LR in pt_regs->link */
         PPC_STL r0, _LINK(r1)
   
@@@ -147,7 -142,7 +147,7 @@@
   #endif
   
         /* Pop our stack frame */
- -      addi r1, r1, SWITCH_FRAME_SIZE
+ +      addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
   
   #ifdef CONFIG_LIVEPATCH_64
           /* Based on the cmpd above, if the NIP was altered handle livepatch */
@@@ -254,3 -249,70 +254,70 @@@ livepatch_handler
         /* Return to original caller of live patched function */
         blr
   #endif /* CONFIG_LIVEPATCH */
+ 
+ #ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+ _GLOBAL(mcount)
+ _GLOBAL(_mcount)
+ EXPORT_SYMBOL(_mcount)
+       mflr    r12
+       mtctr   r12
+       mtlr    r0
+       bctr
+ #endif
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ _GLOBAL(return_to_handler)
+       /* need to save return values */
+ #ifdef CONFIG_PPC64
+       std     r4,  -32(r1)
+       std     r3,  -24(r1)
+       /* save TOC */
+       std     r2,  -16(r1)
+       std     r31, -8(r1)
+       mr      r31, r1
+       stdu    r1, -112(r1)
+ 
+       /*
+        * We might be called from a module.
+        * Switch to our TOC to run inside the core kernel.
+        */
+       LOAD_PACA_TOC()
+ #else
+       stwu    r1, -16(r1)
+       stw     r3, 8(r1)
+       stw     r4, 12(r1)
+ #endif
+ 
+       bl      ftrace_return_to_handler
+       nop
+ 
+       /* return value has real return address */
+       mtlr    r3
+ 
+ #ifdef CONFIG_PPC64
+       ld      r1, 0(r1)
+       ld      r4,  -32(r1)
+       ld      r3,  -24(r1)
+       ld      r2,  -16(r1)
+       ld      r31, -8(r1)
+ #else
+       lwz     r3, 8(r1)
+       lwz     r4, 12(r1)
+       addi    r1, r1, 16
+ #endif
+ 
+       /* Jump back to real return address */
+       blr
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ 
+ .pushsection ".tramp.ftrace.text","aw",@progbits;
+ .globl ftrace_tramp_text
+ ftrace_tramp_text:
+       .space 32
+ .popsection
+ 
+ .pushsection ".tramp.ftrace.init","aw",@progbits;
+ .globl ftrace_tramp_init
+ ftrace_tramp_init:
+       .space 32
+ .popsection
diff --combined arch/powerpc/mm/book3s64/pgtable.c

index 1498ccd,2f28729..8f8a62d
--- 1/arch/powerpc/mm/book3s64/pgtable.c
--- 2/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@@ -9,6 -9,7 +9,7 @@@
   #include <linux/memremap.h>
   #include <linux/pkeys.h>
   #include <linux/debugfs.h>
+ #include <linux/proc_fs.h>
   #include <misc/cxl-base.h>
   
   #include <asm/pgalloc.h>
@@@ -64,39 -65,11 +65,39 @@@ int pmdp_set_access_flags(struct vm_are
         return changed;
   }
   
+ +int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ +                        pud_t *pudp, pud_t entry, int dirty)
+ +{
+ +      int changed;
+ +#ifdef CONFIG_DEBUG_VM
+ +      WARN_ON(!pud_devmap(*pudp));
+ +      assert_spin_locked(pud_lockptr(vma->vm_mm, pudp));
+ +#endif
+ +      changed = !pud_same(*(pudp), entry);
+ +      if (changed) {
+ +              /*
+ +               * We can use MMU_PAGE_1G here, because only radix
+ +               * path look at the psize.
+ +               */
+ +              __ptep_set_access_flags(vma, pudp_ptep(pudp),
+ +                                      pud_pte(entry), address, MMU_PAGE_1G);
+ +      }
+ +      return changed;
+ +}
+ +
+ +
   int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                               unsigned long address, pmd_t *pmdp)
   {
         return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
   }
+ +
+ +int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ +                            unsigned long address, pud_t *pudp)
+ +{
+ +      return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
+ +}
+ +
   /*
    * set a new huge pmd. We should not be called for updating
    * an existing pmd entry. That should go via pmd_hugepage_update.
@@@ -118,23 -91,6 +119,23 @@@ void set_pmd_at(struct mm_struct *mm, u
         return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
   }
   
+ +void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ +              pud_t *pudp, pud_t pud)
+ +{
+ +#ifdef CONFIG_DEBUG_VM
+ +      /*
+ +       * Make sure hardware valid bit is not set. We don't do
+ +       * tlb flush for this update.
+ +       */
+ +
+ +      WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+ +      assert_spin_locked(pud_lockptr(mm, pudp));
+ +      WARN_ON(!(pud_large(pud)));
+ +#endif
+ +      trace_hugepage_set_pud(addr, pud_val(pud));
+ +      return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+ +}
+ +
   static void do_serialize(void *arg)
   {
         /* We've taken the IPI, so try to trim the mask while here */
@@@ -192,35 -148,11 +193,35 @@@ pmd_t pmdp_huge_get_and_clear_full(stru
         return pmd;
   }
   
+ +pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ +                                 unsigned long addr, pud_t *pudp, int full)
+ +{
+ +      pud_t pud;
+ +
+ +      VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+ +      VM_BUG_ON((pud_present(*pudp) && !pud_devmap(*pudp)) ||
+ +                !pud_present(*pudp));
+ +      pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp);
+ +      /*
+ +       * if it not a fullmm flush, then we can possibly end up converting
+ +       * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+ +       * Make sure we flush the tlb in this case.
+ +       */
+ +      if (!full)
+ +              flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE);
+ +      return pud;
+ +}
+ +
   static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
   {
         return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
   }
   
+ +static pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot)
+ +{
+ +      return __pud(pud_val(pud) | pgprot_val(pgprot));
+ +}
+ +
   /*
    * At some point we should be able to get rid of
    * pmd_mkhuge() and mk_huge_pmd() when we update all the
@@@ -235,15 -167,6 +236,15 @@@ pmd_t pfn_pmd(unsigned long pfn, pgprot
         return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot));
   }
   
+ +pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot)
+ +{
+ +      unsigned long pudv;
+ +
+ +      pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+ +
+ +      return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot));
+ +}
+ +
   pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
   {
         return pfn_pmd(page_to_pfn(page), pgprot);
@@@ -384,22 -307,22 +385,22 @@@ static pmd_t *get_pmd_from_cache(struc
   static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
   {
         void *ret = NULL;
- -      struct page *page;
+ +      struct ptdesc *ptdesc;
         gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
   
         if (mm == &init_mm)
                 gfp &= ~__GFP_ACCOUNT;
- -      page = alloc_page(gfp);
- -      if (!page)
+ +      ptdesc = pagetable_alloc(gfp, 0);
+ +      if (!ptdesc)
                 return NULL;
- -      if (!pgtable_pmd_page_ctor(page)) {
- -              __free_pages(page, 0);
+ +      if (!pagetable_pmd_ctor(ptdesc)) {
+ +              pagetable_free(ptdesc);
                 return NULL;
         }
   
- -      atomic_set(&page->pt_frag_refcount, 1);
+ +      atomic_set(&ptdesc->pt_frag_refcount, 1);
   
- -      ret = page_address(page);
+ +      ret = ptdesc_address(ptdesc);
         /*
          * if we support only one fragment just return the
          * allocated page.
@@@ -409,12 -332,12 +410,12 @@@
   
         spin_lock(&mm->page_table_lock);
         /*
- -       * If we find pgtable_page set, we return
+ +       * If we find ptdesc_page set, we return
          * the allocated page with single fragment
          * count.
          */
         if (likely(!mm->context.pmd_frag)) {
- -              atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
+ +              atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR);
                 mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
         }
         spin_unlock(&mm->page_table_lock);
@@@ -435,15 -358,15 +436,15 @@@ pmd_t *pmd_fragment_alloc(struct mm_str
   
   void pmd_fragment_free(unsigned long *pmd)
   {
- -      struct page *page = virt_to_page(pmd);
+ +      struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
   
- -      if (PageReserved(page))
- -              return free_reserved_page(page);
+ +      if (pagetable_is_reserved(ptdesc))
+ +              return free_reserved_ptdesc(ptdesc);
   
- -      BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- -      if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- -              pgtable_pmd_page_dtor(page);
- -              __free_page(page);
+ +      BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ +      if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ +              pagetable_pmd_dtor(ptdesc);
+ +              pagetable_free(ptdesc);
         }
   }
   
diff --combined arch/powerpc/mm/book3s64/radix_pgtable.c

index 9667901,b510249..c6a4ac7
--- 1/arch/powerpc/mm/book3s64/radix_pgtable.c
--- 2/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@@ -37,7 -37,6 +37,6 @@@
   #include <mm/mmu_decl.h>
   
   unsigned int mmu_base_pid;
- unsigned long radix_mem_block_size __ro_after_init;
   
   static __ref void *early_alloc_pgtable(unsigned long size, int nid,
                         unsigned long region_start, unsigned long region_end)
@@@ -300,7 -299,7 +299,7 @@@ static int __meminit create_physical_ma
         bool prev_exec, exec = false;
         pgprot_t prot;
         int psize;
-       unsigned long max_mapping_size = radix_mem_block_size;
+       unsigned long max_mapping_size = memory_block_size;
   
         if (debug_pagealloc_enabled_or_kfence())
                 max_mapping_size = PAGE_SIZE;
@@@ -502,58 -501,6 +501,6 @@@ static int __init radix_dt_scan_page_si
         return 1;
   }
   
- #ifdef CONFIG_MEMORY_HOTPLUG
- static int __init probe_memory_block_size(unsigned long node, const char *uname, int
-                                         depth, void *data)
- {
-       unsigned long *mem_block_size = (unsigned long *)data;
-       const __be32 *prop;
-       int len;
- 
-       if (depth != 1)
-               return 0;
- 
-       if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
-               return 0;
- 
-       prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
- 
-       if (!prop || len < dt_root_size_cells * sizeof(__be32))
-               /*
-                * Nothing in the device tree
-                */
-               *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-       else
-               *mem_block_size = of_read_number(prop, dt_root_size_cells);
-       return 1;
- }
- 
- static unsigned long __init radix_memory_block_size(void)
- {
-       unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
- 
-       /*
-        * OPAL firmware feature is set by now. Hence we are ok
-        * to test OPAL feature.
-        */
-       if (firmware_has_feature(FW_FEATURE_OPAL))
-               mem_block_size = 1UL * 1024 * 1024 * 1024;
-       else
-               of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
- 
-       return mem_block_size;
- }
- 
- #else   /* CONFIG_MEMORY_HOTPLUG */
- 
- static unsigned long __init radix_memory_block_size(void)
- {
-       return 1UL * 1024 * 1024 * 1024;
- }
- 
- #endif /* CONFIG_MEMORY_HOTPLUG */
- 
- 
   void __init radix__early_init_devtree(void)
   {
         int rc;
@@@ -577,16 -524,6 +524,6 @@@
                 mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
                         psize_to_rpti_pgsize(MMU_PAGE_64K);
         }
- 
-       /*
-        * Max mapping size used when mapping pages. We don't use
-        * ppc_md.memory_block_size() here because this get called
-        * early and we don't have machine probe called yet. Also
-        * the pseries implementation only check for ibm,lmb-size.
-        * All hypervisor supporting radix do expose that device
-        * tree node.
-        */
-       radix_mem_block_size = radix_memory_block_size();
         return;
   }
   
@@@ -601,6 -538,17 +538,6 @@@ void __init radix__early_init_mmu(void
   #else
         mmu_virtual_psize = MMU_PAGE_4K;
   #endif
- -
- -#ifdef CONFIG_SPARSEMEM_VMEMMAP
- -      /* vmemmap mapping */
- -      if (mmu_psize_defs[MMU_PAGE_2M].shift) {
- -              /*
- -               * map vmemmap using 2M if available
- -               */
- -              mmu_vmemmap_psize = MMU_PAGE_2M;
- -      } else
- -              mmu_vmemmap_psize = mmu_virtual_psize;
- -#endif
   #endif
         /*
          * initialize page table size
@@@ -733,58 -681,8 +670,58 @@@ static void free_pud_table(pud_t *pud_s
         p4d_clear(p4d);
   }
   
- -static void remove_pte_table(pte_t *pte_start, unsigned long addr,
- -                           unsigned long end, bool direct)
+ +#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
+ +{
+ +      unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
+ +
+ +      return !vmemmap_populated(start, PMD_SIZE);
+ +}
+ +
+ +static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
+ +{
+ +      unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+ +
+ +      return !vmemmap_populated(start, PAGE_SIZE);
+ +
+ +}
+ +#endif
+ +
+ +static void __meminit free_vmemmap_pages(struct page *page,
+ +                                       struct vmem_altmap *altmap,
+ +                                       int order)
+ +{
+ +      unsigned int nr_pages = 1 << order;
+ +
+ +      if (altmap) {
+ +              unsigned long alt_start, alt_end;
+ +              unsigned long base_pfn = page_to_pfn(page);
+ +
+ +              /*
+ +               * with 2M vmemmap mmaping we can have things setup
+ +               * such that even though atlmap is specified we never
+ +               * used altmap.
+ +               */
+ +              alt_start = altmap->base_pfn;
+ +              alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+ +
+ +              if (base_pfn >= alt_start && base_pfn < alt_end) {
+ +                      vmem_altmap_free(altmap, nr_pages);
+ +                      return;
+ +              }
+ +      }
+ +
+ +      if (PageReserved(page)) {
+ +              /* allocated from memblock */
+ +              while (nr_pages--)
+ +                      free_reserved_page(page++);
+ +      } else
+ +              free_pages((unsigned long)page_address(page), order);
+ +}
+ +
+ +static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr,
+ +                                     unsigned long end, bool direct,
+ +                                     struct vmem_altmap *altmap)
   {
         unsigned long next, pages = 0;
         pte_t *pte;
@@@ -798,26 -696,24 +735,26 @@@
                 if (!pte_present(*pte))
                         continue;
   
- -              if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
- -                      /*
- -                       * The vmemmap_free() and remove_section_mapping()
- -                       * codepaths call us with aligned addresses.
- -                       */
- -                      WARN_ONCE(1, "%s: unaligned range\n", __func__);
- -                      continue;
+ +              if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ +                      if (!direct)
+ +                              free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ +                      pte_clear(&init_mm, addr, pte);
+ +                      pages++;
                 }
- -
- -              pte_clear(&init_mm, addr, pte);
- -              pages++;
+ +#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ +              else if (!direct && vmemmap_page_is_unused(addr, next)) {
+ +                      free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ +                      pte_clear(&init_mm, addr, pte);
+ +              }
+ +#endif
         }
         if (direct)
                 update_page_count(mmu_virtual_psize, -pages);
   }
   
   static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
- -                                     unsigned long end, bool direct)
+ +                                     unsigned long end, bool direct,
+ +                                     struct vmem_altmap *altmap)
   {
         unsigned long next, pages = 0;
         pte_t *pte_base;
@@@ -831,24 -727,18 +768,24 @@@
                         continue;
   
                 if (pmd_is_leaf(*pmd)) {
- -                      if (!IS_ALIGNED(addr, PMD_SIZE) ||
- -                          !IS_ALIGNED(next, PMD_SIZE)) {
- -                              WARN_ONCE(1, "%s: unaligned range\n", __func__);
- -                              continue;
+ +                      if (IS_ALIGNED(addr, PMD_SIZE) &&
+ +                          IS_ALIGNED(next, PMD_SIZE)) {
+ +                              if (!direct)
+ +                                      free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ +                              pte_clear(&init_mm, addr, (pte_t *)pmd);
+ +                              pages++;
                         }
- -                      pte_clear(&init_mm, addr, (pte_t *)pmd);
- -                      pages++;
+ +#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ +                      else if (!direct && vmemmap_pmd_is_unused(addr, next)) {
+ +                              free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ +                              pte_clear(&init_mm, addr, (pte_t *)pmd);
+ +                      }
+ +#endif
                         continue;
                 }
   
                 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- -              remove_pte_table(pte_base, addr, next, direct);
+ +              remove_pte_table(pte_base, addr, next, direct, altmap);
                 free_pte_table(pte_base, pmd);
         }
         if (direct)
@@@ -856,8 -746,7 +793,8 @@@
   }
   
   static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
- -                                     unsigned long end, bool direct)
+ +                                     unsigned long end, bool direct,
+ +                                     struct vmem_altmap *altmap)
   {
         unsigned long next, pages = 0;
         pmd_t *pmd_base;
@@@ -882,16 -771,15 +819,16 @@@
                 }
   
                 pmd_base = pud_pgtable(*pud);
- -              remove_pmd_table(pmd_base, addr, next, direct);
+ +              remove_pmd_table(pmd_base, addr, next, direct, altmap);
                 free_pmd_table(pmd_base, pud);
         }
         if (direct)
                 update_page_count(MMU_PAGE_1G, -pages);
   }
   
- -static void __meminit remove_pagetable(unsigned long start, unsigned long end,
- -                                     bool direct)
+ +static void __meminit
+ +remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ +               struct vmem_altmap *altmap)
   {
         unsigned long addr, next;
         pud_t *pud_base;
@@@ -920,7 -808,7 +857,7 @@@
                 }
   
                 pud_base = p4d_pgtable(*p4d);
- -              remove_pud_table(pud_base, addr, next, direct);
+ +              remove_pud_table(pud_base, addr, next, direct, altmap);
                 free_pud_table(pud_base, p4d);
         }
   
@@@ -943,7 -831,7 +880,7 @@@ int __meminit radix__create_section_map
   
   int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
   {
- -      remove_pagetable(start, end, true);
+ +      remove_pagetable(start, end, true, NULL);
         return 0;
   }
   #endif /* CONFIG_MEMORY_HOTPLUG */
@@@ -975,429 -863,10 +912,429 @@@ int __meminit radix__vmemmap_create_map
         return 0;
   }
   
+ +
+ +bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
+ +{
+ +      if (radix_enabled())
+ +              return __vmemmap_can_optimize(altmap, pgmap);
+ +
+ +      return false;
+ +}
+ +
+ +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+ +                              unsigned long addr, unsigned long next)
+ +{
+ +      int large = pmd_large(*pmdp);
+ +
+ +      if (large)
+ +              vmemmap_verify(pmdp_ptep(pmdp), node, addr, next);
+ +
+ +      return large;
+ +}
+ +
+ +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+ +                             unsigned long addr, unsigned long next)
+ +{
+ +      pte_t entry;
+ +      pte_t *ptep = pmdp_ptep(pmdp);
+ +
+ +      VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
+ +      entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ +      set_pte_at(&init_mm, addr, ptep, entry);
+ +      asm volatile("ptesync": : :"memory");
+ +
+ +      vmemmap_verify(ptep, node, addr, next);
+ +}
+ +
+ +static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long addr,
+ +                                                   int node,
+ +                                                   struct vmem_altmap *altmap,
+ +                                                   struct page *reuse)
+ +{
+ +      pte_t *pte = pte_offset_kernel(pmdp, addr);
+ +
+ +      if (pte_none(*pte)) {
+ +              pte_t entry;
+ +              void *p;
+ +
+ +              if (!reuse) {
+ +                      /*
+ +                       * make sure we don't create altmap mappings
+ +                       * covering things outside the device.
+ +                       */
+ +                      if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
+ +                              altmap = NULL;
+ +
+ +                      p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
+ +                      if (!p && altmap)
+ +                              p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
+ +                      if (!p)
+ +                              return NULL;
+ +                      pr_debug("PAGE_SIZE vmemmap mapping\n");
+ +              } else {
+ +                      /*
+ +                       * When a PTE/PMD entry is freed from the init_mm
+ +                       * there's a free_pages() call to this page allocated
+ +                       * above. Thus this get_page() is paired with the
+ +                       * put_page_testzero() on the freeing path.
+ +                       * This can only called by certain ZONE_DEVICE path,
+ +                       * and through vmemmap_populate_compound_pages() when
+ +                       * slab is available.
+ +                       */
+ +                      get_page(reuse);
+ +                      p = page_to_virt(reuse);
+ +                      pr_debug("Tail page reuse vmemmap mapping\n");
+ +              }
+ +
+ +              VM_BUG_ON(!PAGE_ALIGNED(addr));
+ +              entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ +              set_pte_at(&init_mm, addr, pte, entry);
+ +              asm volatile("ptesync": : :"memory");
+ +      }
+ +      return pte;
+ +}
+ +
+ +static inline pud_t *vmemmap_pud_alloc(p4d_t *p4dp, int node,
+ +                                     unsigned long address)
+ +{
+ +      pud_t *pud;
+ +
+ +      /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ +      if (unlikely(p4d_none(*p4dp))) {
+ +              if (unlikely(!slab_is_available())) {
+ +                      pud = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ +                      p4d_populate(&init_mm, p4dp, pud);
+ +                      /* go to the pud_offset */
+ +              } else
+ +                      return pud_alloc(&init_mm, p4dp, address);
+ +      }
+ +      return pud_offset(p4dp, address);
+ +}
+ +
+ +static inline pmd_t *vmemmap_pmd_alloc(pud_t *pudp, int node,
+ +                                     unsigned long address)
+ +{
+ +      pmd_t *pmd;
+ +
+ +      /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ +      if (unlikely(pud_none(*pudp))) {
+ +              if (unlikely(!slab_is_available())) {
+ +                      pmd = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ +                      pud_populate(&init_mm, pudp, pmd);
+ +              } else
+ +                      return pmd_alloc(&init_mm, pudp, address);
+ +      }
+ +      return pmd_offset(pudp, address);
+ +}
+ +
+ +static inline pte_t *vmemmap_pte_alloc(pmd_t *pmdp, int node,
+ +                                     unsigned long address)
+ +{
+ +      pte_t *pte;
+ +
+ +      /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ +      if (unlikely(pmd_none(*pmdp))) {
+ +              if (unlikely(!slab_is_available())) {
+ +                      pte = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ +                      pmd_populate(&init_mm, pmdp, pte);
+ +              } else
+ +                      return pte_alloc_kernel(pmdp, address);
+ +      }
+ +      return pte_offset_kernel(pmdp, address);
+ +}
+ +
+ +
+ +
+ +int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, int node,
+ +                                    struct vmem_altmap *altmap)
+ +{
+ +      unsigned long addr;
+ +      unsigned long next;
+ +      pgd_t *pgd;
+ +      p4d_t *p4d;
+ +      pud_t *pud;
+ +      pmd_t *pmd;
+ +      pte_t *pte;
+ +
+ +      for (addr = start; addr < end; addr = next) {
+ +              next = pmd_addr_end(addr, end);
+ +
+ +              pgd = pgd_offset_k(addr);
+ +              p4d = p4d_offset(pgd, addr);
+ +              pud = vmemmap_pud_alloc(p4d, node, addr);
+ +              if (!pud)
+ +                      return -ENOMEM;
+ +              pmd = vmemmap_pmd_alloc(pud, node, addr);
+ +              if (!pmd)
+ +                      return -ENOMEM;
+ +
+ +              if (pmd_none(READ_ONCE(*pmd))) {
+ +                      void *p;
+ +
+ +                      /*
+ +                       * keep it simple by checking addr PMD_SIZE alignment
+ +                       * and verifying the device boundary condition.
+ +                       * For us to use a pmd mapping, both addr and pfn should
+ +                       * be aligned. We skip if addr is not aligned and for
+ +                       * pfn we hope we have extra area in the altmap that
+ +                       * can help to find an aligned block. This can result
+ +                       * in altmap block allocation failures, in which case
+ +                       * we fallback to RAM for vmemmap allocation.
+ +                       */
+ +                      if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) ||
+ +                                     altmap_cross_boundary(altmap, addr, PMD_SIZE))) {
+ +                              /*
+ +                               * make sure we don't create altmap mappings
+ +                               * covering things outside the device.
+ +                               */
+ +                              goto base_mapping;
+ +                      }
+ +
+ +                      p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ +                      if (p) {
+ +                              vmemmap_set_pmd(pmd, p, node, addr, next);
+ +                              pr_debug("PMD_SIZE vmemmap mapping\n");
+ +                              continue;
+ +                      } else if (altmap) {
+ +                              /*
+ +                               * A vmemmap block allocation can fail due to
+ +                               * alignment requirements and we trying to align
+ +                               * things aggressively there by running out of
+ +                               * space. Try base mapping on failure.
+ +                               */
+ +                              goto base_mapping;
+ +                      }
+ +              } else if (vmemmap_check_pmd(pmd, node, addr, next)) {
+ +                      /*
+ +                       * If a huge mapping exist due to early call to
+ +                       * vmemmap_populate, let's try to use that.
+ +                       */
+ +                      continue;
+ +              }
+ +base_mapping:
+ +              /*
+ +               * Not able allocate higher order memory to back memmap
+ +               * or we found a pointer to pte page. Allocate base page
+ +               * size vmemmap
+ +               */
+ +              pte = vmemmap_pte_alloc(pmd, node, addr);
+ +              if (!pte)
+ +                      return -ENOMEM;
+ +
+ +              pte = radix__vmemmap_pte_populate(pmd, addr, node, altmap, NULL);
+ +              if (!pte)
+ +                      return -ENOMEM;
+ +
+ +              vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ +              next = addr + PAGE_SIZE;
+ +      }
+ +      return 0;
+ +}
+ +
+ +static pte_t * __meminit radix__vmemmap_populate_address(unsigned long addr, int node,
+ +                                                       struct vmem_altmap *altmap,
+ +                                                       struct page *reuse)
+ +{
+ +      pgd_t *pgd;
+ +      p4d_t *p4d;
+ +      pud_t *pud;
+ +      pmd_t *pmd;
+ +      pte_t *pte;
+ +
+ +      pgd = pgd_offset_k(addr);
+ +      p4d = p4d_offset(pgd, addr);
+ +      pud = vmemmap_pud_alloc(p4d, node, addr);
+ +      if (!pud)
+ +              return NULL;
+ +      pmd = vmemmap_pmd_alloc(pud, node, addr);
+ +      if (!pmd)
+ +              return NULL;
+ +      if (pmd_leaf(*pmd))
+ +              /*
+ +               * The second page is mapped as a hugepage due to a nearby request.
+ +               * Force our mapping to page size without deduplication
+ +               */
+ +              return NULL;
+ +      pte = vmemmap_pte_alloc(pmd, node, addr);
+ +      if (!pte)
+ +              return NULL;
+ +      radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ +      vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ +
+ +      return pte;
+ +}
+ +
+ +static pte_t * __meminit vmemmap_compound_tail_page(unsigned long addr,
+ +                                                  unsigned long pfn_offset, int node)
+ +{
+ +      pgd_t *pgd;
+ +      p4d_t *p4d;
+ +      pud_t *pud;
+ +      pmd_t *pmd;
+ +      pte_t *pte;
+ +      unsigned long map_addr;
+ +
+ +      /* the second vmemmap page which we use for duplication */
+ +      map_addr = addr - pfn_offset * sizeof(struct page) + PAGE_SIZE;
+ +      pgd = pgd_offset_k(map_addr);
+ +      p4d = p4d_offset(pgd, map_addr);
+ +      pud = vmemmap_pud_alloc(p4d, node, map_addr);
+ +      if (!pud)
+ +              return NULL;
+ +      pmd = vmemmap_pmd_alloc(pud, node, map_addr);
+ +      if (!pmd)
+ +              return NULL;
+ +      if (pmd_leaf(*pmd))
+ +              /*
+ +               * The second page is mapped as a hugepage due to a nearby request.
+ +               * Force our mapping to page size without deduplication
+ +               */
+ +              return NULL;
+ +      pte = vmemmap_pte_alloc(pmd, node, map_addr);
+ +      if (!pte)
+ +              return NULL;
+ +      /*
+ +       * Check if there exist a mapping to the left
+ +       */
+ +      if (pte_none(*pte)) {
+ +              /*
+ +               * Populate the head page vmemmap page.
+ +               * It can fall in different pmd, hence
+ +               * vmemmap_populate_address()
+ +               */
+ +              pte = radix__vmemmap_populate_address(map_addr - PAGE_SIZE, node, NULL, NULL);
+ +              if (!pte)
+ +                      return NULL;
+ +              /*
+ +               * Populate the tail pages vmemmap page
+ +               */
+ +              pte = radix__vmemmap_pte_populate(pmd, map_addr, node, NULL, NULL);
+ +              if (!pte)
+ +                      return NULL;
+ +              vmemmap_verify(pte, node, map_addr, map_addr + PAGE_SIZE);
+ +              return pte;
+ +      }
+ +      return pte;
+ +}
+ +
+ +int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ +                                            unsigned long start,
+ +                                            unsigned long end, int node,
+ +                                            struct dev_pagemap *pgmap)
+ +{
+ +      /*
+ +       * we want to map things as base page size mapping so that
+ +       * we can save space in vmemmap. We could have huge mapping
+ +       * covering out both edges.
+ +       */
+ +      unsigned long addr;
+ +      unsigned long addr_pfn = start_pfn;
+ +      unsigned long next;
+ +      pgd_t *pgd;
+ +      p4d_t *p4d;
+ +      pud_t *pud;
+ +      pmd_t *pmd;
+ +      pte_t *pte;
+ +
+ +      for (addr = start; addr < end; addr = next) {
+ +
+ +              pgd = pgd_offset_k(addr);
+ +              p4d = p4d_offset(pgd, addr);
+ +              pud = vmemmap_pud_alloc(p4d, node, addr);
+ +              if (!pud)
+ +                      return -ENOMEM;
+ +              pmd = vmemmap_pmd_alloc(pud, node, addr);
+ +              if (!pmd)
+ +                      return -ENOMEM;
+ +
+ +              if (pmd_leaf(READ_ONCE(*pmd))) {
+ +                      /* existing huge mapping. Skip the range */
+ +                      addr_pfn += (PMD_SIZE >> PAGE_SHIFT);
+ +                      next = pmd_addr_end(addr, end);
+ +                      continue;
+ +              }
+ +              pte = vmemmap_pte_alloc(pmd, node, addr);
+ +              if (!pte)
+ +                      return -ENOMEM;
+ +              if (!pte_none(*pte)) {
+ +                      /*
+ +                       * This could be because we already have a compound
+ +                       * page whose VMEMMAP_RESERVE_NR pages were mapped and
+ +                       * this request fall in those pages.
+ +                       */
+ +                      addr_pfn += 1;
+ +                      next = addr + PAGE_SIZE;
+ +                      continue;
+ +              } else {
+ +                      unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+ +                      unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
+ +                      pte_t *tail_page_pte;
+ +
+ +                      /*
+ +                       * if the address is aligned to huge page size it is the
+ +                       * head mapping.
+ +                       */
+ +                      if (pfn_offset == 0) {
+ +                              /* Populate the head page vmemmap page */
+ +                              pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ +                              if (!pte)
+ +                                      return -ENOMEM;
+ +                              vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ +
+ +                              /*
+ +                               * Populate the tail pages vmemmap page
+ +                               * It can fall in different pmd, hence
+ +                               * vmemmap_populate_address()
+ +                               */
+ +                              pte = radix__vmemmap_populate_address(addr + PAGE_SIZE, node, NULL, NULL);
+ +                              if (!pte)
+ +                                      return -ENOMEM;
+ +
+ +                              addr_pfn += 2;
+ +                              next = addr + 2 * PAGE_SIZE;
+ +                              continue;
+ +                      }
+ +                      /*
+ +                       * get the 2nd mapping details
+ +                       * Also create it if that doesn't exist
+ +                       */
+ +                      tail_page_pte = vmemmap_compound_tail_page(addr, pfn_offset, node);
+ +                      if (!tail_page_pte) {
+ +
+ +                              pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ +                              if (!pte)
+ +                                      return -ENOMEM;
+ +                              vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ +
+ +                              addr_pfn += 1;
+ +                              next = addr + PAGE_SIZE;
+ +                              continue;
+ +                      }
+ +
+ +                      pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, pte_page(*tail_page_pte));
+ +                      if (!pte)
+ +                              return -ENOMEM;
+ +                      vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ +
+ +                      addr_pfn += 1;
+ +                      next = addr + PAGE_SIZE;
+ +                      continue;
+ +              }
+ +      }
+ +      return 0;
+ +}
+ +
+ +
   #ifdef CONFIG_MEMORY_HOTPLUG
   void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
   {
- -      remove_pagetable(start, start + page_size, false);
+ +      remove_pagetable(start, start + page_size, true, NULL);
+ +}
+ +
+ +void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ +                             struct vmem_altmap *altmap)
+ +{
+ +      remove_pagetable(start, end, false, altmap);
   }
   #endif
   #endif
@@@ -1430,24 -899,7 +1367,24 @@@ unsigned long radix__pmd_hugepage_updat
   #endif
   
         old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
- -      trace_hugepage_update(addr, old, clr, set);
+ +      trace_hugepage_update_pmd(addr, old, clr, set);
+ +
+ +      return old;
+ +}
+ +
+ +unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ +                                       pud_t *pudp, unsigned long clr,
+ +                                       unsigned long set)
+ +{
+ +      unsigned long old;
+ +
+ +#ifdef CONFIG_DEBUG_VM
+ +      WARN_ON(!pud_devmap(*pudp));
+ +      assert_spin_locked(pud_lockptr(mm, pudp));
+ +#endif
+ +
+ +      old = radix__pte_update(mm, addr, pudp_ptep(pudp), clr, set, 1);
+ +      trace_hugepage_update_pud(addr, old, clr, set);
   
         return old;
   }
@@@ -1528,17 -980,6 +1465,17 @@@ pmd_t radix__pmdp_huge_get_and_clear(st
         return old_pmd;
   }
   
+ +pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ +                                   unsigned long addr, pud_t *pudp)
+ +{
+ +      pud_t old_pud;
+ +      unsigned long old;
+ +
+ +      old = radix__pud_hugepage_update(mm, addr, pudp, ~0UL, 0);
+ +      old_pud = __pud(old);
+ +      return old_pud;
+ +}
+ +
   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
   
   void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
diff --combined arch/powerpc/mm/book3s64/radix_tlb.c

index 3020a8b,6f48fff..39acc2c
--- 1/arch/powerpc/mm/book3s64/radix_tlb.c
--- 2/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@@ -127,21 -127,6 +127,6 @@@ static __always_inline void __tlbie_pid
         trace_tlbie(0, 0, rb, rs, ric, prs, r);
   }
   
- static __always_inline void __tlbie_pid_lpid(unsigned long pid,
-                                            unsigned long lpid,
-                                            unsigned long ric)
- {
-       unsigned long rb, rs, prs, r;
- 
-       rb = PPC_BIT(53); /* IS = 1 */
-       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
-       prs = 1; /* process scoped */
-       r = 1;   /* radix format */
- 
-       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-       trace_tlbie(0, 0, rb, rs, ric, prs, r);
- }
   static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
   {
         unsigned long rb,rs,prs,r;
@@@ -202,23 -187,6 +187,6 @@@ static __always_inline void __tlbie_va(
         trace_tlbie(0, 0, rb, rs, ric, prs, r);
   }
   
- static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
-                                           unsigned long lpid,
-                                           unsigned long ap, unsigned long ric)
- {
-       unsigned long rb, rs, prs, r;
- 
-       rb = va & ~(PPC_BITMASK(52, 63));
-       rb |= ap << PPC_BITLSHIFT(58);
-       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
-       prs = 1; /* process scoped */
-       r = 1;   /* radix format */
- 
-       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-       trace_tlbie(0, 0, rb, rs, ric, prs, r);
- }
- 
   static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
                                             unsigned long ap, unsigned long ric)
   {
@@@ -264,22 -232,6 +232,6 @@@ static inline void fixup_tlbie_va_range
         }
   }
   
- static inline void fixup_tlbie_va_range_lpid(unsigned long va,
-                                            unsigned long pid,
-                                            unsigned long lpid,
-                                            unsigned long ap)
- {
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
-       }
- 
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
-       }
- }
- 
   static inline void fixup_tlbie_pid(unsigned long pid)
   {
         /*
@@@ -299,26 -251,6 +251,6 @@@
         }
   }
   
- static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
- {
-       /*
-        * We can use any address for the invalidation, pick one which is
-        * probably unused as an optimisation.
-        */
-       unsigned long va = ((1UL << 52) - 1);
- 
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
-       }
- 
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
-                               RIC_FLUSH_TLB);
-       }
- }
- 
   static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
                                        unsigned long ap)
   {
@@@ -416,31 -348,6 +348,6 @@@ static inline void _tlbie_pid(unsigned 
         asm volatile("eieio; tlbsync; ptesync": : :"memory");
   }
   
- static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
-                                  unsigned long ric)
- {
-       asm volatile("ptesync" : : : "memory");
- 
-       /*
-        * Workaround the fact that the "ric" argument to __tlbie_pid
-        * must be a compile-time contraint to match the "i" constraint
-        * in the asm statement.
-        */
-       switch (ric) {
-       case RIC_FLUSH_TLB:
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
-               fixup_tlbie_pid_lpid(pid, lpid);
-               break;
-       case RIC_FLUSH_PWC:
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
-               break;
-       case RIC_FLUSH_ALL:
-       default:
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
-               fixup_tlbie_pid_lpid(pid, lpid);
-       }
-       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- }
   struct tlbiel_pid {
         unsigned long pid;
         unsigned long ric;
@@@ -566,20 -473,6 +473,6 @@@ static inline void __tlbie_va_range(uns
         fixup_tlbie_va_range(addr - page_size, pid, ap);
   }
   
- static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
-                                        unsigned long pid, unsigned long lpid,
-                                        unsigned long page_size,
-                                        unsigned long psize)
- {
-       unsigned long addr;
-       unsigned long ap = mmu_get_ap(psize);
- 
-       for (addr = start; addr < end; addr += page_size)
-               __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
- 
-       fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
- }
- 
   static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
                                       unsigned long psize, unsigned long ric)
   {
@@@ -660,18 -553,6 +553,6 @@@ static inline void _tlbie_va_range(unsi
         asm volatile("eieio; tlbsync; ptesync": : :"memory");
   }
   
- static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
-                                       unsigned long pid, unsigned long lpid,
-                                       unsigned long page_size,
-                                       unsigned long psize, bool also_pwc)
- {
-       asm volatile("ptesync" : : : "memory");
-       if (also_pwc)
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
-       __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
-       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- }
- 
   static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
                                 unsigned long start, unsigned long end,
                                 unsigned long pid, unsigned long page_size,
@@@ -820,7 -701,7 +701,7 @@@ void exit_lazy_flush_tlb(struct mm_stru
          * that's what the caller expects.
          */
         if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
-               atomic_dec(&mm->context.active_cpus);
+               dec_mm_active_cpus(mm);
                 cpumask_clear_cpu(cpu, mm_cpumask(mm));
                 always_flush = true;
         }
@@@ -987,7 -868,6 +868,7 @@@ void radix__flush_tlb_mm(struct mm_stru
                 }
         }
         preempt_enable();
+ +      mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
   }
   EXPORT_SYMBOL(radix__flush_tlb_mm);
   
@@@ -1021,7 -901,6 +902,7 @@@ static void __flush_all_mm(struct mm_st
                         _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
         }
         preempt_enable();
+ +      mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
   }
   
   void radix__flush_all_mm(struct mm_struct *mm)
@@@ -1230,7 -1109,6 +1111,7 @@@ static inline void __radix__flush_tlb_r
         }
   out:
         preempt_enable();
+ +      mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
   }
   
   void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@@ -1316,7 -1194,35 +1197,35 @@@ void radix__tlb_flush(struct mmu_gathe
          * See the comment for radix in arch_exit_mmap().
          */
         if (tlb->fullmm) {
-               __flush_all_mm(mm, true);
+               if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+                       /*
+                        * Shootdown based lazy tlb mm refcounting means we
+                        * have to IPI everyone in the mm_cpumask anyway soon
+                        * when the mm goes away, so might as well do it as
+                        * part of the final flush now.
+                        *
+                        * If lazy shootdown was improved to reduce IPIs (e.g.,
+                        * by batching), then it may end up being better to use
+                        * tlbies here instead.
+                        */
+                       preempt_disable();
+ 
+                       smp_mb(); /* see radix__flush_tlb_mm */
+                       exit_flush_lazy_tlbs(mm);
+                       _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
+ 
+                       /*
+                        * It should not be possible to have coprocessors still
+                        * attached here.
+                        */
+                       if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
+                               __flush_all_mm(mm, true);
+ 
+                       preempt_enable();
+               } else {
+                       __flush_all_mm(mm, true);
+               }
+ 
         } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
                 if (!tlb->freed_tables)
                         radix__flush_tlb_mm(mm);
@@@ -1395,7 -1301,6 +1304,7 @@@ static void __radix__flush_tlb_range_ps
         }
   out:
         preempt_enable();
+ +      mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
   }
   
   void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
@@@ -1465,13 -1370,6 +1374,13 @@@ void radix__flush_pmd_tlb_range(struct 
   }
   EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
   
+ +void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ +                              unsigned long start, unsigned long end)
+ +{
+ +      radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G);
+ +}
+ +EXPORT_SYMBOL(radix__flush_pud_tlb_range);
+ +
   void radix__flush_tlb_all(void)
   {
         unsigned long rb,prs,r,rs;
@@@ -1497,6 -1395,127 +1406,127 @@@
   }
   
   #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+                                            unsigned long lpid,
+                                            unsigned long ric)
+ {
+       unsigned long rb, rs, prs, r;
+ 
+       rb = PPC_BIT(53); /* IS = 1 */
+       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+       prs = 1; /* process scoped */
+       r = 1;   /* radix format */
+ 
+       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+       trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+ 
+ static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+                                           unsigned long lpid,
+                                           unsigned long ap, unsigned long ric)
+ {
+       unsigned long rb, rs, prs, r;
+ 
+       rb = va & ~(PPC_BITMASK(52, 63));
+       rb |= ap << PPC_BITLSHIFT(58);
+       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+       prs = 1; /* process scoped */
+       r = 1;   /* radix format */
+ 
+       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+       trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+ 
+ static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+ {
+       /*
+        * We can use any address for the invalidation, pick one which is
+        * probably unused as an optimisation.
+        */
+       unsigned long va = ((1UL << 52) - 1);
+ 
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+       }
+ 
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+                               RIC_FLUSH_TLB);
+       }
+ }
+ 
+ static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+                                  unsigned long ric)
+ {
+       asm volatile("ptesync" : : : "memory");
+ 
+       /*
+        * Workaround the fact that the "ric" argument to __tlbie_pid
+        * must be a compile-time contraint to match the "i" constraint
+        * in the asm statement.
+        */
+       switch (ric) {
+       case RIC_FLUSH_TLB:
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+               fixup_tlbie_pid_lpid(pid, lpid);
+               break;
+       case RIC_FLUSH_PWC:
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+               break;
+       case RIC_FLUSH_ALL:
+       default:
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+               fixup_tlbie_pid_lpid(pid, lpid);
+       }
+       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ }
+ 
+ static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+                                            unsigned long pid,
+                                            unsigned long lpid,
+                                            unsigned long ap)
+ {
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+       }
+ 
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+       }
+ }
+ 
+ static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+                                        unsigned long pid, unsigned long lpid,
+                                        unsigned long page_size,
+                                        unsigned long psize)
+ {
+       unsigned long addr;
+       unsigned long ap = mmu_get_ap(psize);
+ 
+       for (addr = start; addr < end; addr += page_size)
+               __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+ 
+       fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+ }
+ 
+ static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+                                       unsigned long pid, unsigned long lpid,
+                                       unsigned long page_size,
+                                       unsigned long psize, bool also_pwc)
+ {
+       asm volatile("ptesync" : : : "memory");
+       if (also_pwc)
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+       __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ }
+ 
   /*
    * Performs process-scoped invalidations for a given LPID
    * as part of H_RPT_INVALIDATE hcall.
diff --combined arch/powerpc/mm/init_64.c

index e0208cb,70f8e9c..d96bbc0
--- 1/arch/powerpc/mm/init_64.c
--- 2/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@@ -40,6 -40,7 +40,7 @@@
   #include <linux/of_fdt.h>
   #include <linux/libfdt.h>
   #include <linux/memremap.h>
+ #include <linux/memory.h>
   
   #include <asm/pgalloc.h>
   #include <asm/page.h>
@@@ -92,7 -93,7 +93,7 @@@ static struct page * __meminit vmemmap_
    * a page table lookup here because with the hash translation we don't keep
    * vmemmap details in linux page table.
    */
- -static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
+ +int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
   {
         struct page *start;
         unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
@@@ -183,8 -184,8 +184,8 @@@ static __meminit int vmemmap_list_popul
         return 0;
   }
   
- -static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
- -                              unsigned long page_size)
+ +bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ +                         unsigned long page_size)
   {
         unsigned long nr_pfn = page_size / sizeof(struct page);
         unsigned long start_pfn = page_to_pfn((struct page *)start);
@@@ -198,8 -199,8 +199,8 @@@
         return false;
   }
   
- -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
- -              struct vmem_altmap *altmap)
+ +static int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node,
+ +                                      struct vmem_altmap *altmap)
   {
         bool altmap_alloc;
         unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@@ -272,18 -273,6 +273,18 @@@
         return 0;
   }
   
+ +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ +                             struct vmem_altmap *altmap)
+ +{
+ +
+ +#ifdef CONFIG_PPC_BOOK3S_64
+ +      if (radix_enabled())
+ +              return radix__vmemmap_populate(start, end, node, altmap);
+ +#endif
+ +
+ +      return __vmemmap_populate(start, end, node, altmap);
+ +}
+ +
   #ifdef CONFIG_MEMORY_HOTPLUG
   static unsigned long vmemmap_list_free(unsigned long start)
   {
@@@ -315,8 -304,8 +316,8 @@@
         return vmem_back->phys;
   }
   
- -void __ref vmemmap_free(unsigned long start, unsigned long end,
- -              struct vmem_altmap *altmap)
+ +static void __ref __vmemmap_free(unsigned long start, unsigned long end,
+ +                               struct vmem_altmap *altmap)
   {
         unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
         unsigned long page_order = get_order(page_size);
@@@ -326,7 -315,8 +327,7 @@@
         start = ALIGN_DOWN(start, page_size);
         if (altmap) {
                 alt_start = altmap->base_pfn;
- -              alt_end = altmap->base_pfn + altmap->reserve +
- -                        altmap->free + altmap->alloc + altmap->align;
+ +              alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
         }
   
         pr_debug("vmemmap_free %lx...%lx\n", start, end);
@@@ -373,17 -363,6 +374,17 @@@
                 vmemmap_remove_mapping(start, page_size);
         }
   }
+ +
+ +void __ref vmemmap_free(unsigned long start, unsigned long end,
+ +                      struct vmem_altmap *altmap)
+ +{
+ +#ifdef CONFIG_PPC_BOOK3S_64
+ +      if (radix_enabled())
+ +              return radix__vmemmap_free(start, end, altmap);
+ +#endif
+ +      return __vmemmap_free(start, end, altmap);
+ +}
+ +
   #endif
   void register_page_bootmem_memmap(unsigned long section_nr,
                                   struct page *start_page, unsigned long size)
@@@ -493,6 -472,130 +494,130 @@@ static int __init dt_scan_mmu_pid_width
         return 1;
   }
   
+ /*
+  * Outside hotplug the kernel uses this value to map the kernel direct map
+  * with radix. To be compatible with older kernels, let's keep this value
+  * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map
+  * things with 1GB size in the case where we don't support hotplug.
+  */
+ #ifndef CONFIG_MEMORY_HOTPLUG
+ #define DEFAULT_MEMORY_BLOCK_SIZE     SZ_16M
+ #else
+ #define DEFAULT_MEMORY_BLOCK_SIZE     MIN_MEMORY_BLOCK_SIZE
+ #endif
+ 
+ static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+ {
+       unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+ 
+       for (; *block_size > min_memory_block_size; *block_size >>= 2) {
+               if ((mem_size & *block_size) == 0)
+                       break;
+       }
+ }
+ 
+ static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+                                         depth, void *data)
+ {
+       const char *type;
+       unsigned long *block_size = (unsigned long *)data;
+       const __be32 *reg, *endp;
+       int l;
+ 
+       if (depth != 1)
+               return 0;
+       /*
+        * If we have dynamic-reconfiguration-memory node, use the
+        * lmb value.
+        */
+       if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+ 
+               const __be32 *prop;
+ 
+               prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+ 
+               if (!prop || l < dt_root_size_cells * sizeof(__be32))
+                       /*
+                        * Nothing in the device tree
+                        */
+                       *block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+               else
+                       *block_size = of_read_number(prop, dt_root_size_cells);
+               /*
+                * We have found the final value. Don't probe further.
+                */
+               return 1;
+       }
+       /*
+        * Find all the device tree nodes of memory type and make sure
+        * the area can be mapped using the memory block size value
+        * we end up using. We start with 1G value and keep reducing
+        * it such that we can map the entire area using memory_block_size.
+        * This will be used on powernv and older pseries that don't
+        * have ibm,lmb-size node.
+        * For ex: with P5 we can end up with
+        * memory@0 -> 128MB
+        * memory@128M -> 64M
+        * This will end up using 64MB  memory block size value.
+        */
+       type = of_get_flat_dt_prop(node, "device_type", NULL);
+       if (type == NULL || strcmp(type, "memory") != 0)
+               return 0;
+ 
+       reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+       if (!reg)
+               reg = of_get_flat_dt_prop(node, "reg", &l);
+       if (!reg)
+               return 0;
+ 
+       endp = reg + (l / sizeof(__be32));
+       while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+               const char *compatible;
+               u64 size;
+ 
+               dt_mem_next_cell(dt_root_addr_cells, &reg);
+               size = dt_mem_next_cell(dt_root_size_cells, &reg);
+ 
+               if (size) {
+                       update_memory_block_size(block_size, size);
+                       continue;
+               }
+               /*
+                * ibm,coherent-device-memory with linux,usable-memory = 0
+                * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+                * linux,usable-memory == 0 implies driver managed memory and
+                * we can't use large memory block size due to hotplug/unplug
+                * limitations.
+                */
+               compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+               if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+                       if (*block_size > SZ_256M)
+                               *block_size = SZ_256M;
+                       /*
+                        * We keep 256M as the upper limit with GPU present.
+                        */
+                       return 0;
+               }
+       }
+       /* continue looking for other memory device types */
+       return 0;
+ }
+ 
+ /*
+  * start with 1G memory block size. Early init will
+  * fix this with correct value.
+  */
+ unsigned long memory_block_size __ro_after_init = 1UL << 30;
+ static void __init early_init_memory_block_size(void)
+ {
+       /*
+        * We need to do memory_block_size probe early so that
+        * radix__early_init_mmu() can use this as limit for
+        * mapping page size.
+        */
+       of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+ }
+ 
   void __init mmu_early_init_devtree(void)
   {
         bool hvmode = !!(mfmsr() & MSR_HV);
@@@ -526,6 -629,8 +651,8 @@@
         if (!hvmode)
                 early_check_vec5();
   
+       early_init_memory_block_size();
+ 
         if (early_radix_enabled()) {
                 radix__early_init_devtree();
   
diff --combined arch/powerpc/platforms/8xx/adder875.c

index f6bd232,ae72c57..d02f8dd
--- 1/arch/powerpc/platforms/8xx/adder875.c
--- 2/arch/powerpc/platforms/8xx/adder875.c
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@@ -7,12 -7,13 +7,12 @@@
    */
   
   #include <linux/init.h>
- -#include <linux/fs_enet_pd.h>
   #include <linux/of_platform.h>
   
   #include <asm/time.h>
   #include <asm/machdep.h>
   #include <asm/cpm1.h>
- #include <asm/fs_pd.h>
+ #include <asm/8xx_immap.h>
   #include <asm/udbg.h>
   
   #include "mpc8xx.h"
diff --combined arch/powerpc/platforms/8xx/mpc885ads_setup.c

index c7c4f08,eb4e54b..76c7cd7
--- 1/arch/powerpc/platforms/8xx/mpc885ads_setup.c
--- 2/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@@ -21,6 -21,7 +21,6 @@@
   #include <linux/device.h>
   #include <linux/delay.h>
   
- -#include <linux/fs_enet_pd.h>
   #include <linux/fs_uart_pd.h>
   #include <linux/fsl_devices.h>
   #include <linux/mii.h>
@@@ -36,7 -37,6 +36,6 @@@
   #include <asm/time.h>
   #include <asm/8xx_immap.h>
   #include <asm/cpm1.h>
- #include <asm/fs_pd.h>
   #include <asm/udbg.h>
   
   #include "mpc885ads.h"
diff --combined arch/powerpc/platforms/8xx/tqm8xx_setup.c

index 6e56be8,c422262..1670dfd
--- 1/arch/powerpc/platforms/8xx/tqm8xx_setup.c
--- 2/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@@ -24,6 -24,7 +24,6 @@@
   #include <linux/device.h>
   #include <linux/delay.h>
   
- -#include <linux/fs_enet_pd.h>
   #include <linux/fs_uart_pd.h>
   #include <linux/fsl_devices.h>
   #include <linux/mii.h>
@@@ -38,7 -39,6 +38,6 @@@
   #include <asm/time.h>
   #include <asm/8xx_immap.h>
   #include <asm/cpm1.h>
- #include <asm/fs_pd.h>
   #include <asm/udbg.h>
   
   #include "mpc8xx.h"
diff --combined arch/powerpc/platforms/Kconfig.cputype

index 340b86e,db39eca..b2d8c0d
--- 1/arch/powerpc/platforms/Kconfig.cputype
--- 2/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@@ -94,7 -94,6 +94,7 @@@ config PPC_BOOK3S_6
         select PPC_FPU
         select PPC_HAVE_PMU_SUPPORT
         select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ +      select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
         select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
         select ARCH_ENABLE_SPLIT_PMD_PTLOCK
         select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
@@@ -276,6 -275,13 +276,13 @@@ config TARGET_CP
         default "e500mc" if E500MC_CPU
         default "powerpc" if POWERPC_CPU
   
+ config TUNE_CPU
+       string
+       depends on POWERPC64_CPU
+       default "-mtune=power10" if $(cc-option,-mtune=power10)
+       default "-mtune=power9"  if $(cc-option,-mtune=power9)
+       default "-mtune=power8"  if $(cc-option,-mtune=power8)
+ 
   config PPC_BOOK3S
         def_bool y
         depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
diff --combined arch/powerpc/platforms/pseries/hotplug-memory.c

index 4f3d6a2,1333d9a..aa4042d
--- 1/arch/powerpc/platforms/pseries/hotplug-memory.c
--- 2/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@@ -21,54 -21,6 +21,6 @@@
   #include <asm/drmem.h>
   #include "pseries.h"
   
- unsigned long pseries_memory_block_size(void)
- {
-       struct device_node *np;
-       u64 memblock_size = MIN_MEMORY_BLOCK_SIZE;
-       struct resource r;
- 
-       np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-       if (np) {
-               int len;
-               int size_cells;
-               const __be32 *prop;
- 
-               size_cells = of_n_size_cells(np);
- 
-               prop = of_get_property(np, "ibm,lmb-size", &len);
-               if (prop && len >= size_cells * sizeof(__be32))
-                       memblock_size = of_read_number(prop, size_cells);
-               of_node_put(np);
- 
-       } else  if (machine_is(pseries)) {
-               /* This fallback really only applies to pseries */
-               unsigned int memzero_size = 0;
- 
-               np = of_find_node_by_path("/memory@0");
-               if (np) {
-                       if (!of_address_to_resource(np, 0, &r))
-                               memzero_size = resource_size(&r);
-                       of_node_put(np);
-               }
- 
-               if (memzero_size) {
-                       /* We now know the size of memory@0, use this to find
-                        * the first memoryblock and get its size.
-                        */
-                       char buf[64];
- 
-                       sprintf(buf, "/memory@%x", memzero_size);
-                       np = of_find_node_by_path(buf);
-                       if (np) {
-                               if (!of_address_to_resource(np, 0, &r))
-                                       memblock_size = resource_size(&r);
-                               of_node_put(np);
-                       }
-               }
-       }
-       return memblock_size;
- }
- 
   static void dlpar_free_property(struct property *prop)
   {
         kfree(prop->name);
@@@ -283,7 -235,7 +235,7 @@@ static int dlpar_offline_lmb(struct drm
   
   static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
   {
-       unsigned long block_sz, start_pfn;
+       unsigned long start_pfn;
         int sections_per_block;
         int i;
   
@@@ -294,8 -246,7 +246,7 @@@
         if (!pfn_valid(start_pfn))
                 goto out;
   
-       block_sz = pseries_memory_block_size();
-       sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+       sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
   
         for (i = 0; i < sections_per_block; i++) {
                 __remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
@@@ -354,7 -305,6 +305,6 @@@ static int dlpar_add_lmb(struct drmem_l
   static int dlpar_remove_lmb(struct drmem_lmb *lmb)
   {
         struct memory_block *mem_block;
-       unsigned long block_sz;
         int rc;
   
         if (!lmb_is_removable(lmb))
@@@ -370,13 -320,11 +320,11 @@@
                 return rc;
         }
   
-       block_sz = pseries_memory_block_size();
- 
-       __remove_memory(lmb->base_addr, block_sz);
+       __remove_memory(lmb->base_addr, memory_block_size);
         put_device(&mem_block->dev);
   
         /* Update memory regions for memory remove */
-       memblock_remove(lmb->base_addr, block_sz);
+       memblock_remove(lmb->base_addr, memory_block_size);
   
         invalidate_lmb_associativity_index(lmb);
         lmb->flags &= ~DRCONF_MEM_ASSIGNED;
@@@ -637,7 -585,7 +585,7 @@@ static int dlpar_add_lmb(struct drmem_l
                 nid = first_online_node;
   
         /* Add the memory */
- -      rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
+ +      rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
         if (rc) {
                 invalidate_lmb_associativity_index(lmb);
                 return rc;
diff --combined arch/powerpc/sysdev/fsl_soc.c

index c117715,e71b3ed..528506f
--- 1/arch/powerpc/sysdev/fsl_soc.c
--- 2/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@@ -19,10 -19,10 +19,9 @@@
   #include <linux/device.h>
   #include <linux/platform_device.h>
   #include <linux/of.h>
- #include <linux/of_platform.h>
   #include <linux/phy.h>
   #include <linux/spi/spi.h>
   #include <linux/fsl_devices.h>
- -#include <linux/fs_enet_pd.h>
   #include <linux/fs_uart_pd.h>
   #include <linux/reboot.h>
   
@@@ -36,6 -36,8 +35,6 @@@
   #include <asm/cpm2.h>
   #include <asm/fsl_hcalls.h>   /* For the Freescale hypervisor */
   
- -extern void init_fcc_ioports(struct fs_platform_info*);
- -extern void init_fec_ioports(struct fs_platform_info*);
   extern void init_smc_ioports(struct fs_uart_platform_info*);
   static phys_addr_t immrbase = -1;
   
diff --combined arch/powerpc/xmon/xmon.c

index ee17270,6c6f90f..5888fcd
--- 1/arch/powerpc/xmon/xmon.c
--- 2/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@@ -58,6 -58,7 +58,7 @@@
   #ifdef CONFIG_PPC64
   #include <asm/hvcall.h>
   #include <asm/paca.h>
+ #include <asm/lppaca.h>
   #endif
   
   #include "nonstdio.h"
@@@ -1084,7 -1085,7 +1085,7 @@@ cmds(struct pt_regs *excp
                                 memzcan();
                                 break;
                         case 'i':
- -                              show_mem(0, NULL);
+ +                              show_mem();
                                 break;
                         default:
                                 termch = cmd;
@@@ -3303,7 -3304,7 +3304,7 @@@ static void show_pte(unsigned long addr
   {
         unsigned long tskv = 0;
         struct task_struct *volatile tsk = NULL;
-       struct mm_struct *mm;
+       struct mm_struct *volatile mm;
         pgd_t *pgdp;
         p4d_t *p4dp;
         pud_t *pudp;
@@@ -3828,9 -3829,9 +3829,9 @@@ static void dump_tlb_44x(void
   #ifdef CONFIG_PPC_BOOK3E_64
   static void dump_tlb_book3e(void)
   {
-       u32 mmucfg, pidmask, lpidmask;
+       u32 mmucfg;
         u64 ramask;
-       int i, tlb, ntlbs, pidsz, lpidsz, rasz, lrat = 0;
+       int i, tlb, ntlbs, pidsz, lpidsz, rasz;
         int mmu_version;
         static const char *pgsz_names[] = {
                 "  1K",
@@@ -3874,12 -3875,8 +3875,8 @@@
         pidsz = ((mmucfg >> 6) & 0x1f) + 1;
         lpidsz = (mmucfg >> 24) & 0xf;
         rasz = (mmucfg >> 16) & 0x7f;
-       if ((mmu_version > 1) && (mmucfg & 0x10000))
-               lrat = 1;
         printf("Book3E MMU MAV=%d.0,%d TLBs,%d-bit PID,%d-bit LPID,%d-bit RA\n",
                mmu_version, ntlbs, pidsz, lpidsz, rasz);
-       pidmask = (1ul << pidsz) - 1;
-       lpidmask = (1ul << lpidsz) - 1;
         ramask = (1ull << rasz) - 1;
   
         for (tlb = 0; tlb < ntlbs; tlb++) {
diff --combined drivers/macintosh/ams/ams.h

index e053c15,4b93c76..5b295f5
--- 1/drivers/macintosh/ams/ams.h
--- 2/drivers/macintosh/ams/ams.h
+++ b/drivers/macintosh/ams/ams.h
@@@ -6,8 -6,10 +6,9 @@@
   #include <linux/input.h>
   #include <linux/kthread.h>
   #include <linux/mutex.h>
+ #include <linux/platform_device.h>
   #include <linux/spinlock.h>
   #include <linux/types.h>
- -#include <linux/of_device.h>
   
   enum ams_irq {
         AMS_IRQ_FREEFALL = 0x01,
diff --combined drivers/net/ethernet/freescale/fs_enet/fs_enet.h

index 759bb70,aad96cb..21c07ac
--- 1/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
--- 2/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@@ -2,7 -2,6 +2,7 @@@
   #ifndef FS_ENET_H
   #define FS_ENET_H
   
+ +#include <linux/clk.h>
   #include <linux/mii.h>
   #include <linux/netdevice.h>
   #include <linux/types.h>
@@@ -10,8 -9,8 +10,6 @@@
   #include <linux/phy.h>
   #include <linux/dma-mapping.h>
   
- #include <asm/fs_pd.h>
- -#include <linux/fs_enet_pd.h>
--
   #ifdef CONFIG_CPM1
   #include <asm/cpm1.h>
   #endif
@@@ -118,23 -117,6 +116,23 @@@ struct phy_info 
   #define ENET_RX_ALIGN  16
   #define ENET_RX_FRSIZE L1_CACHE_ALIGN(PKT_MAXBUF_SIZE + ENET_RX_ALIGN - 1)
   
+ +struct fs_platform_info {
+ +      /* device specific information */
+ +      u32 cp_command;         /* CPM page/sblock/mcn */
+ +
+ +      u32 dpram_offset;
+ +
+ +      struct device_node *phy_node;
+ +
+ +      int rx_ring, tx_ring;   /* number of buffers on rx      */
+ +      int rx_copybreak;       /* limit we copy small frames   */
+ +      int napi_weight;        /* NAPI weight                  */
+ +
+ +      int use_rmii;           /* use RMII mode                */
+ +
+ +      struct clk *clk_per;    /* 'per' clock for register access */
+ +};
+ +
   struct fs_enet_private {
         struct napi_struct napi;
         struct device *dev;     /* pointer back to the device (must be initialized first) */
@@@ -209,6 -191,11 +207,6 @@@ void fs_cleanup_bds(struct net_device *
   #define PFX DRV_MODULE_NAME   ": "
   
   /***************************************************************************/
- -
- -int fs_enet_platform_init(void);
- -void fs_enet_platform_cleanup(void);
- -
- -/***************************************************************************/
   /* buffer descriptor access macros */
   
   /* access macros */
diff --combined drivers/net/ethernet/freescale/fs_enet/mac-fcc.c

index d903a90,c9491b6..e2ffac9
--- 1/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
--- 2/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@@ -32,12 -32,12 +32,11 @@@
   #include <linux/platform_device.h>
   #include <linux/phy.h>
   #include <linux/of_address.h>
- -#include <linux/of_device.h>
   #include <linux/of_irq.h>
   #include <linux/gfp.h>
   #include <linux/pgtable.h>
   
   #include <asm/immap_cpm2.h>
- #include <asm/mpc8260.h>
   #include <asm/cpm2.h>
   
   #include <asm/irq.h>
@@@ -105,7 -105,7 +104,7 @@@ static int do_pd_setup(struct fs_enet_p
                 goto out_ep;
   
         fep->fcc.mem = (void __iomem *)cpm2_immr;
- -      fpi->dpram_offset = cpm_dpalloc(128, 32);
+ +      fpi->dpram_offset = cpm_muram_alloc(128, 32);
         if (IS_ERR_VALUE(fpi->dpram_offset)) {
                 ret = fpi->dpram_offset;
                 goto out_fcccp;
@@@ -547,7 -547,7 +546,7 @@@ static void tx_restart(struct net_devic
         }
         /* Now update the TBPTR and dirty flag to the current buffer */
         W32(ep, fen_genfcc.fcc_tbptr,
- -              (uint) (((void *)recheck_bd - fep->ring_base) +
+ +              (uint)(((void __iomem *)recheck_bd - fep->ring_base) +
                 fep->ring_mem_addr));
         fep->dirty_tx = recheck_bd;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 31 Aug 2023 19:43:10 +0000 (12:43 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 31 Aug 2023 19:43:10 +0000 (12:43 -0700)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/configs/pmac32_defconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/configs/ppc64_defconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/configs/ppc6xx_defconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/crypto/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/book3s/32/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/nohash/32/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/nohash/64/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/head_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/trace/ftrace_entry.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/book3s64/pgtable.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/book3s64/radix_pgtable.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/book3s64/radix_tlb.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/init_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/8xx/adder875.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/8xx/mpc885ads_setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/8xx/tqm8xx_setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/Kconfig.cputype	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/pseries/hotplug-memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/sysdev/fsl_soc.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/xmon/xmon.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/macintosh/ams/ams.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/freescale/fs_enet/fs_enet.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/freescale/fs_enet/mac-fcc.c	patch \|	diff1 \|	diff2 \|	blob \| history