Merge tag 'acpi-6.6-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Sep 2023 16:59:37 +0000 (09:59 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Sep 2023 16:59:37 +0000 (09:59 -0700)
Pull ACPI fix from Rafael Wysocki:
 "Fix a possible NULL pointer dereference in the error path of
  acpi_video_bus_add() resulting from recent changes (Dinghao Liu)"

* tag 'acpi-6.6-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: video: Fix NULL pointer dereference in acpi_video_bus_add()

134 files changed:
Documentation/devicetree/bindings/ata/pata-common.yaml
Documentation/devicetree/bindings/clock/renesas,5p35023.yaml
Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml
Documentation/filesystems/porting.rst
MAINTAINERS
arch/loongarch/include/asm/elf.h
arch/loongarch/kernel/module.c
arch/loongarch/kernel/numa.c
arch/mips/alchemy/devboards/db1000.c
arch/mips/alchemy/devboards/db1200.c
arch/mips/alchemy/devboards/db1300.c
arch/powerpc/kernel/stacktrace.c
arch/xtensa/boot/Makefile
arch/xtensa/boot/lib/zmem.c
arch/xtensa/include/asm/core.h
arch/xtensa/include/asm/hw_breakpoint.h
arch/xtensa/include/asm/processor.h
arch/xtensa/include/asm/ptrace.h
arch/xtensa/include/asm/smp.h
arch/xtensa/include/asm/tlb.h
arch/xtensa/kernel/hw_breakpoint.c
arch/xtensa/kernel/irq.c
arch/xtensa/kernel/ptrace.c
arch/xtensa/kernel/signal.c
arch/xtensa/kernel/smp.c
arch/xtensa/kernel/stacktrace.c
arch/xtensa/kernel/traps.c
arch/xtensa/lib/umulsidi3.S
arch/xtensa/mm/fault.c
arch/xtensa/mm/tlb.c
arch/xtensa/platforms/iss/network.c
block/blk-rq-qos.c
block/disk-events.c
drivers/accel/ivpu/ivpu_drv.c
drivers/accel/ivpu/ivpu_drv.h
drivers/accel/ivpu/ivpu_fw.c
drivers/accel/ivpu/ivpu_gem.h
drivers/accel/ivpu/ivpu_hw_40xx.c
drivers/accel/ivpu/ivpu_hw_40xx_reg.h
drivers/accel/ivpu/ivpu_ipc.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-scsi.c
drivers/ata/libata-transport.c
drivers/ata/libata.h
drivers/block/rbd.c
drivers/clk/clk-si521xx.c
drivers/clk/clk-versaclock3.c
drivers/clk/sprd/ums512-clk.c
drivers/clk/tegra/clk-bpmp.c
drivers/firewire/sbp2.c
drivers/gpio/gpio-pmic-eic-sprd.c
drivers/gpio/gpio-timberdale.c
drivers/gpu/drm/i915/gem/i915_gem_shmem.c
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/irqchip/irq-xtensa-mx.c
drivers/power/reset/Kconfig
drivers/power/reset/pwr-mlxbf.c
drivers/power/reset/vexpress-poweroff.c
drivers/power/supply/Kconfig
drivers/power/supply/ab8500_btemp.c
drivers/power/supply/ab8500_chargalg.c
drivers/power/supply/mt6370-charger.c
drivers/power/supply/power_supply_sysfs.c
drivers/power/supply/rk817_charger.c
drivers/power/supply/rt9467-charger.c
drivers/power/supply/ucs1002_power.c
drivers/scsi/scsi.c
drivers/scsi/scsi_scan.c
drivers/scsi/sd.c
drivers/scsi/sd.h
drivers/spi/spi-cs42l43.c
drivers/spi/spi-gxp.c
drivers/vfio/mdev/mdev_sysfs.c
drivers/vfio/pci/pds/Kconfig
drivers/vfio/pci/pds/vfio_dev.c
fs/aio.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/ceph/crypto.c
fs/fs-writeback.c
fs/libfs.c
fs/nfsd/nfs4xdr.c
fs/ntfs3/super.c
fs/overlayfs/copy_up.c
fs/overlayfs/file.c
fs/pipe.c
fs/reiserfs/reiserfs.h
fs/smb/client/fs_context.c
fs/smb/server/connection.c
fs/smb/server/server.c
fs/smb/server/smb2misc.c
fs/xfs/xfs_export.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_qm.c
include/linux/ceph/ceph_fs.h
include/linux/libata.h
include/scsi/scsi.h
include/scsi/scsi_device.h
include/scsi/scsi_host.h
io_uring/fs.c
kernel/workqueue.c
mm/slab_common.c
tools/arch/x86/include/asm/msr-index.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/drm.h
tools/include/uapi/linux/seccomp.h [new file with mode: 0644]
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/bench/sched-seccomp-notify.c
tools/perf/check-headers.sh
tools/perf/pmu-events/jevents.py
tools/perf/pmu-events/metric.py
tools/perf/util/bpf-prologue.c [deleted file]
tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
tools/perf/util/hashmap.h
tools/perf/util/pmu.c
tools/testing/selftests/powerpc/Makefile
tools/testing/selftests/powerpc/pmu/Makefile
tools/testing/selftests/user_events/abi_test.c
tools/testing/selftests/user_events/dyn_test.c
tools/testing/selftests/user_events/ftrace_test.c
tools/testing/selftests/user_events/perf_test.c
tools/testing/selftests/user_events/user_events_selftests.h

index 337ddf1..4e867dd 100644 (file)
@@ -38,6 +38,7 @@ patternProperties:
       ID number 0 and the slave drive will have ID number 1. The PATA port
       nodes will be named "ide-port".
     type: object
+    additionalProperties: false
 
     properties:
       reg:
index 839648e..42b6f80 100644 (file)
@@ -37,6 +37,9 @@ properties:
     maxItems: 1
 
   '#clock-cells':
+    description:
+      The index in the assigned-clocks is mapped to the output clock as below
+      0 - REF, 1 - SE1, 2 - SE2, 3 - SE3, 4 - DIFF1, 5 - DIFF2.
     const: 1
 
   clocks:
@@ -68,7 +71,7 @@ examples:
             reg = <0x68>;
             #clock-cells = <1>;
 
-            clocks = <&x1_x2>;
+            clocks = <&x1>;
 
             renesas,settings = [
                 80 00 11 19 4c 02 23 7f 83 19 08 a9 5f 25 24 bf
@@ -79,8 +82,8 @@ examples:
             assigned-clocks = <&versa3 0>, <&versa3 1>,
                               <&versa3 2>, <&versa3 3>,
                               <&versa3 4>, <&versa3 5>;
-            assigned-clock-rates = <12288000>, <25000000>,
-                                   <12000000>, <11289600>,
-                                   <11289600>, <24000000>;
+            assigned-clock-rates = <24000000>, <11289600>,
+                                   <11289600>, <12000000>,
+                                   <25000000>, <12288000>;
         };
     };
index 2f593c7..14cac0e 100644 (file)
@@ -23,6 +23,13 @@ properties:
       - const: fsl,imx51-ecspi
       - const: fsl,imx53-ecspi
       - items:
+          - enum:
+              - fsl,imx25-cspi
+              - fsl,imx50-cspi
+              - fsl,imx51-cspi
+              - fsl,imx53-cspi
+          - const: fsl,imx35-cspi
+      - items:
           - const: fsl,imx8mp-ecspi
           - const: fsl,imx6ul-ecspi
       - items:
index deac4e9..4d05b98 100644 (file)
@@ -949,3 +949,99 @@ mmap_lock held.  All in-tree users have been audited and do not seem to
 depend on the mmap_lock being held, but out of tree users should verify
 for themselves.  If they do need it, they can return VM_FAULT_RETRY to
 be called with the mmap_lock held.
+
+---
+
+**mandatory**
+
+The order of opening block devices and matching or creating superblocks has
+changed.
+
+The old logic opened block devices first and then tried to find a
+suitable superblock to reuse based on the block device pointer.
+
+The new logic tries to find a suitable superblock first based on the device
+number, and opening the block device afterwards.
+
+Since opening block devices cannot happen under s_umount because of lock
+ordering requirements s_umount is now dropped while opening block devices and
+reacquired before calling fill_super().
+
+In the old logic concurrent mounters would find the superblock on the list of
+superblocks for the filesystem type. Since the first opener of the block device
+would hold s_umount they would wait until the superblock became either born or
+was discarded due to initialization failure.
+
+Since the new logic drops s_umount concurrent mounters could grab s_umount and
+would spin. Instead they are now made to wait using an explicit wait-wake
+mechanism without having to hold s_umount.
+
+---
+
+**mandatory**
+
+The holder of a block device is now the superblock.
+
+The holder of a block device used to be the file_system_type which wasn't
+particularly useful. It wasn't possible to go from block device to owning
+superblock without matching on the device pointer stored in the superblock.
+This mechanism would only work for a single device so the block layer couldn't
+find the owning superblock of any additional devices.
+
+In the old mechanism reusing or creating a superblock for a racing mount(2) and
+umount(2) relied on the file_system_type as the holder. This was severly
+underdocumented however:
+
+(1) Any concurrent mounter that managed to grab an active reference on an
+    existing superblock was made to wait until the superblock either became
+    ready or until the superblock was removed from the list of superblocks of
+    the filesystem type. If the superblock is ready the caller would simple
+    reuse it.
+
+(2) If the mounter came after deactivate_locked_super() but before
+    the superblock had been removed from the list of superblocks of the
+    filesystem type the mounter would wait until the superblock was shutdown,
+    reuse the block device and allocate a new superblock.
+
+(3) If the mounter came after deactivate_locked_super() and after
+    the superblock had been removed from the list of superblocks of the
+    filesystem type the mounter would reuse the block device and allocate a new
+    superblock (the bd_holder point may still be set to the filesystem type).
+
+Because the holder of the block device was the file_system_type any concurrent
+mounter could open the block devices of any superblock of the same
+file_system_type without risking seeing EBUSY because the block device was
+still in use by another superblock.
+
+Making the superblock the owner of the block device changes this as the holder
+is now a unique superblock and thus block devices associated with it cannot be
+reused by concurrent mounters. So a concurrent mounter in (2) could suddenly
+see EBUSY when trying to open a block device whose holder was a different
+superblock.
+
+The new logic thus waits until the superblock and the devices are shutdown in
+->kill_sb(). Removal of the superblock from the list of superblocks of the
+filesystem type is now moved to a later point when the devices are closed:
+
+(1) Any concurrent mounter managing to grab an active reference on an existing
+    superblock is made to wait until the superblock is either ready or until
+    the superblock and all devices are shutdown in ->kill_sb(). If the
+    superblock is ready the caller will simply reuse it.
+
+(2) If the mounter comes after deactivate_locked_super() but before
+    the superblock has been removed from the list of superblocks of the
+    filesystem type the mounter is made to wait until the superblock and the
+    devices are shut down in ->kill_sb() and the superblock is removed from the
+    list of superblocks of the filesystem type. The mounter will allocate a new
+    superblock and grab ownership of the block device (the bd_holder pointer of
+    the block device will be set to the newly allocated superblock).
+
+(3) This case is now collapsed into (2) as the superblock is left on the list
+    of superblocks of the filesystem type until all devices are shutdown in
+    ->kill_sb(). In other words, if the superblock isn't on the list of
+    superblock of the filesystem type anymore then it has given up ownership of
+    all associated block devices (the bd_holder pointer is NULL).
+
+As this is a VFS level change it has no practical consequences for filesystems
+other than that all of them must use one of the provided kill_litter_super(),
+kill_anon_super(), or kill_block_super() helpers.
index b199956..67ce91c 100644 (file)
@@ -6647,6 +6647,7 @@ F:        drivers/gpu/drm/panel/panel-novatek-nt36672a.c
 DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
 M:     Karol Herbst <kherbst@redhat.com>
 M:     Lyude Paul <lyude@redhat.com>
+M:     Danilo Krummrich <dakr@redhat.com>
 L:     dri-devel@lists.freedesktop.org
 L:     nouveau@lists.freedesktop.org
 S:     Supported
index 7af0ceb..b9a4ab5 100644 (file)
 #define R_LARCH_TLS_GD_HI20                    98
 #define R_LARCH_32_PCREL                       99
 #define R_LARCH_RELAX                          100
+#define R_LARCH_DELETE                         101
+#define R_LARCH_ALIGN                          102
+#define R_LARCH_PCREL20_S2                     103
+#define R_LARCH_CFA                            104
+#define R_LARCH_ADD6                           105
+#define R_LARCH_SUB6                           106
+#define R_LARCH_ADD_ULEB128                    107
+#define R_LARCH_SUB_ULEB128                    108
+#define R_LARCH_64_PCREL                       109
 
 #ifndef ELF_ARCH
 
index b8b8608..b13b285 100644 (file)
@@ -367,6 +367,24 @@ static int apply_r_larch_got_pc(struct module *mod,
        return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type);
 }
 
+static int apply_r_larch_32_pcrel(struct module *mod, u32 *location, Elf_Addr v,
+                                 s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+       ptrdiff_t offset = (void *)v - (void *)location;
+
+       *(u32 *)location = offset;
+       return 0;
+}
+
+static int apply_r_larch_64_pcrel(struct module *mod, u32 *location, Elf_Addr v,
+                                 s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+       ptrdiff_t offset = (void *)v - (void *)location;
+
+       *(u64 *)location = offset;
+       return 0;
+}
+
 /*
  * reloc_handlers_rela() - Apply a particular relocation to a module
  * @mod: the module to apply the reloc to
@@ -382,7 +400,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v,
 
 /* The handlers for known reloc types */
 static reloc_rela_handler reloc_rela_handlers[] = {
-       [R_LARCH_NONE ... R_LARCH_RELAX]                     = apply_r_larch_error,
+       [R_LARCH_NONE ... R_LARCH_64_PCREL]                  = apply_r_larch_error,
 
        [R_LARCH_NONE]                                       = apply_r_larch_none,
        [R_LARCH_32]                                         = apply_r_larch_32,
@@ -396,6 +414,8 @@ static reloc_rela_handler reloc_rela_handlers[] = {
        [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
        [R_LARCH_ADD32 ... R_LARCH_SUB64]                    = apply_r_larch_add_sub,
        [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12]          = apply_r_larch_pcala,
+       [R_LARCH_32_PCREL]                                   = apply_r_larch_32_pcrel,
+       [R_LARCH_64_PCREL]                                   = apply_r_larch_64_pcrel,
 };
 
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
index c7d33c4..6e65ff1 100644 (file)
@@ -436,7 +436,7 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-       high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
+       high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
        memblock_free_all();
 }
 
index 012da04..7b9f91d 100644 (file)
@@ -164,6 +164,7 @@ static struct platform_device db1x00_audio_dev = {
 
 /******************************************************************************/
 
+#ifdef CONFIG_MMC_AU1X
 static irqreturn_t db1100_mmc_cd(int irq, void *ptr)
 {
        mmc_detect_change(ptr, msecs_to_jiffies(500));
@@ -369,6 +370,7 @@ static struct platform_device db1100_mmc1_dev = {
        .num_resources  = ARRAY_SIZE(au1100_mmc1_res),
        .resource       = au1100_mmc1_res,
 };
+#endif /* CONFIG_MMC_AU1X */
 
 /******************************************************************************/
 
@@ -440,8 +442,10 @@ static struct platform_device *db1x00_devs[] = {
 
 static struct platform_device *db1100_devs[] = {
        &au1100_lcd_device,
+#ifdef CONFIG_MMC_AU1X
        &db1100_mmc0_dev,
        &db1100_mmc1_dev,
+#endif
 };
 
 int __init db1000_dev_setup(void)
index 76080c7..f521874 100644 (file)
@@ -326,6 +326,7 @@ static struct platform_device db1200_ide_dev = {
 
 /**********************************************************************/
 
+#ifdef CONFIG_MMC_AU1X
 /* SD carddetects:  they're supposed to be edge-triggered, but ack
  * doesn't seem to work (CPLD Rev 2).  Instead, the screaming one
  * is disabled and its counterpart enabled.  The 200ms timeout is
@@ -584,6 +585,7 @@ static struct platform_device pb1200_mmc1_dev = {
        .num_resources  = ARRAY_SIZE(au1200_mmc1_res),
        .resource       = au1200_mmc1_res,
 };
+#endif /* CONFIG_MMC_AU1X */
 
 /**********************************************************************/
 
@@ -751,7 +753,9 @@ static struct platform_device db1200_audiodma_dev = {
 static struct platform_device *db1200_devs[] __initdata = {
        NULL,           /* PSC0, selected by S6.8 */
        &db1200_ide_dev,
+#ifdef CONFIG_MMC_AU1X
        &db1200_mmc0_dev,
+#endif
        &au1200_lcd_dev,
        &db1200_eth_dev,
        &db1200_nand_dev,
@@ -762,7 +766,9 @@ static struct platform_device *db1200_devs[] __initdata = {
 };
 
 static struct platform_device *pb1200_devs[] __initdata = {
+#ifdef CONFIG_MMC_AU1X
        &pb1200_mmc1_dev,
+#endif
 };
 
 /* Some peripheral base addresses differ on the PB1200 */
index ff61901..d377e04 100644 (file)
@@ -450,6 +450,7 @@ static struct platform_device db1300_ide_dev = {
 
 /**********************************************************************/
 
+#ifdef CONFIG_MMC_AU1X
 static irqreturn_t db1300_mmc_cd(int irq, void *ptr)
 {
        disable_irq_nosync(irq);
@@ -632,6 +633,7 @@ static struct platform_device db1300_sd0_dev = {
        .resource       = au1300_sd0_res,
        .num_resources  = ARRAY_SIZE(au1300_sd0_res),
 };
+#endif /* CONFIG_MMC_AU1X */
 
 /**********************************************************************/
 
@@ -767,8 +769,10 @@ static struct platform_device *db1300_dev[] __initdata = {
        &db1300_5waysw_dev,
        &db1300_nand_dev,
        &db1300_ide_dev,
+#ifdef CONFIG_MMC_AU1X
        &db1300_sd0_dev,
        &db1300_sd1_dev,
+#endif
        &db1300_lcd_dev,
        &db1300_ac97_dev,
        &db1300_i2s_dev,
index b15f15d..e6a958a 100644 (file)
@@ -73,29 +73,12 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
        bool firstframe;
 
        stack_end = stack_page + THREAD_SIZE;
-       if (!is_idle_task(task)) {
-               /*
-                * For user tasks, this is the SP value loaded on
-                * kernel entry, see "PACAKSAVE(r13)" in _switch() and
-                * system_call_common().
-                *
-                * Likewise for non-swapper kernel threads,
-                * this also happens to be the top of the stack
-                * as setup by copy_thread().
-                *
-                * Note that stack backlinks are not properly setup by
-                * copy_thread() and thus, a forked task() will have
-                * an unreliable stack trace until it's been
-                * _switch()'ed to for the first time.
-                */
-               stack_end -= STACK_USER_INT_FRAME_SIZE;
-       } else {
-               /*
-                * idle tasks have a custom stack layout,
-                * c.f. cpu_idle_thread_init().
-                */
+
+       // See copy_thread() for details.
+       if (task->flags & PF_KTHREAD)
                stack_end -= STACK_FRAME_MIN_SIZE;
-       }
+       else
+               stack_end -= STACK_USER_INT_FRAME_SIZE;
 
        if (task == current)
                sp = current_stack_frame();
index a65b7a9..d8b0fad 100644 (file)
@@ -9,8 +9,7 @@
 
 
 # KBUILD_CFLAGS used when building rest of boot (takes effect recursively)
-KBUILD_CFLAGS  += -fno-builtin -Iarch/$(ARCH)/boot/include
-HOSTFLAGS      += -Iarch/$(ARCH)/boot/include
+KBUILD_CFLAGS  += -fno-builtin
 
 subdir-y       := lib
 targets                += vmlinux.bin vmlinux.bin.gz
index e3ecd74..b891893 100644 (file)
@@ -4,13 +4,14 @@
 /* bits taken from ppc */
 
 extern void *avail_ram, *end_avail;
+void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp);
 
-void exit (void)
+static void exit(void)
 {
   for (;;);
 }
 
-void *zalloc(unsigned size)
+static void *zalloc(unsigned int size)
 {
         void *p = avail_ram;
 
index 3f5ffae..6f02f6f 100644 (file)
@@ -6,6 +6,10 @@
 
 #include <variant/core.h>
 
+#ifndef XCHAL_HAVE_DIV32
+#define XCHAL_HAVE_DIV32 0
+#endif
+
 #ifndef XCHAL_HAVE_EXCLUSIVE
 #define XCHAL_HAVE_EXCLUSIVE 0
 #endif
index 9f119c1..9ec86f4 100644 (file)
@@ -48,6 +48,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp);
 void hw_breakpoint_pmu_read(struct perf_event *bp);
 int check_hw_breakpoint(struct pt_regs *regs);
 void clear_ptrace_hw_breakpoint(struct task_struct *tsk);
+void restore_dbreak(void);
 
 #else
 
index a6d09fe..d008a15 100644 (file)
@@ -14,6 +14,8 @@
 
 #include <linux/compiler.h>
 #include <linux/stringify.h>
+
+#include <asm/bootparam.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
 #include <asm/regs.h>
@@ -217,6 +219,9 @@ struct mm_struct;
 
 extern unsigned long __get_wchan(struct task_struct *p);
 
+void init_arch(bp_tag_t *bp_start);
+void do_notify_resume(struct pt_regs *regs);
+
 #define KSTK_EIP(tsk)          (task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)          (task_pt_regs(tsk)->areg[1])
 
index 308f209..a270467 100644 (file)
@@ -106,6 +106,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
        return regs->areg[2];
 }
 
+int do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
+
 #else  /* __ASSEMBLY__ */
 
 # include <asm/asm-offsets.h>
index 5dc5bf8..e446e6f 100644 (file)
@@ -23,6 +23,7 @@ struct cpumask;
 void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 void arch_send_call_function_single_ipi(int cpu);
 
+void secondary_start_kernel(void);
 void smp_init_cpus(void);
 void secondary_init_irq(void);
 void ipi_init(void);
index 5088993..8c3ceb4 100644 (file)
@@ -18,4 +18,6 @@
 
 #define __pte_free_tlb(tlb, pte, address)      pte_free((tlb)->mm, pte)
 
+void check_tlb_sanity(void);
+
 #endif /* _XTENSA_TLB_H */
index 285fb29..1eeecd5 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/perf_event.h>
 #include <asm/core.h>
+#include <asm/hw_breakpoint.h>
 
 /* Breakpoint currently in use for each IBREAKA. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[XCHAL_NUM_IBREAK]);
index 42f1060..b1e410f 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/mxregs.h>
 #include <linux/uaccess.h>
 #include <asm/platform.h>
+#include <asm/traps.h>
 
 DECLARE_PER_CPU(unsigned long, nmi_count);
 
index f294771..9056cd1 100644 (file)
@@ -541,7 +541,6 @@ long arch_ptrace(struct task_struct *child, long request,
        return ret;
 }
 
-void do_syscall_trace_leave(struct pt_regs *regs);
 int do_syscall_trace_enter(struct pt_regs *regs)
 {
        if (regs->syscall == NO_SYSCALL)
index 5c01d7e..81f0b10 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/coprocessor.h>
+#include <asm/processor.h>
+#include <asm/syscall.h>
 #include <asm/unistd.h>
 
 extern struct task_struct *coproc_owners[];
index 07dd6ba..94a23f1 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/profile.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
index f643ea5..831ffb6 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 
+#include <asm/ftrace.h>
 #include <asm/stacktrace.h>
 #include <asm/traps.h>
 #include <linux/uaccess.h>
index 427c125..38092d2 100644 (file)
@@ -23,6 +23,7 @@
  * for more details.
  */
 
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
index 8c7a94a..5da501b 100644 (file)
@@ -3,7 +3,9 @@
 #include <asm/asmmacro.h>
 #include <asm/core.h>
 
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 0
+#else
 #define XCHAL_NO_MUL 1
 #endif
 
index d1eb8d6..16e11b6 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/hardirq.h>
+#include <asm/traps.h>
 
 void bad_page_fault(struct pt_regs*, unsigned long, int);
 
index 0a11fc5..4f974b7 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
index 85c82cd..e89f27f 100644 (file)
@@ -201,7 +201,7 @@ static int tuntap_write(struct iss_net_private *lp, struct sk_buff **skb)
        return simc_write(lp->tp.info.tuntap.fd, (*skb)->data, (*skb)->len);
 }
 
-unsigned short tuntap_protocol(struct sk_buff *skb)
+static unsigned short tuntap_protocol(struct sk_buff *skb)
 {
        return eth_type_trans(skb, skb->dev);
 }
@@ -441,7 +441,7 @@ static int iss_net_change_mtu(struct net_device *dev, int new_mtu)
        return -EINVAL;
 }
 
-void iss_net_user_timer_expire(struct timer_list *unused)
+static void iss_net_user_timer_expire(struct timer_list *unused)
 {
 }
 
index 167be74..dd7310c 100644 (file)
@@ -270,7 +270,7 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
                        finish_wait(&rqw->wait, &data.wq);
 
                        /*
-                        * We raced with wbt_wake_function() getting a token,
+                        * We raced with rq_qos_wake_function() getting a token,
                         * which means we now have two. Put our local token
                         * and wake anyone else potentially waiting for one.
                         */
index 422db82..13c3372 100644 (file)
@@ -290,7 +290,6 @@ EXPORT_SYMBOL(disk_check_media_change);
 /**
  * disk_force_media_change - force a media change event
  * @disk: the disk which will raise the event
- * @events: the events to raise
  *
  * Should be called when the media changes for @disk.  Generates a uevent
  * and attempts to free all dentries and inodes and invalidates all block
index ba79f39..467a602 100644 (file)
@@ -327,7 +327,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
        }
 
        if (!ret)
-               ivpu_info(vdev, "VPU ready message received successfully\n");
+               ivpu_dbg(vdev, PM, "VPU ready message received successfully\n");
        else
                ivpu_hw_diagnose_failure(vdev);
 
@@ -634,6 +634,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
 
 static struct pci_device_id ivpu_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
        { }
 };
index 9e8c075..03b3d65 100644 (file)
@@ -23,6 +23,7 @@
 #define DRIVER_DATE "20230117"
 
 #define PCI_DEVICE_ID_MTL   0x7d1d
+#define PCI_DEVICE_ID_ARL   0xad1d
 #define PCI_DEVICE_ID_LNL   0x643e
 
 #define IVPU_HW_37XX   37
@@ -165,6 +166,7 @@ static inline int ivpu_hw_gen(struct ivpu_device *vdev)
 {
        switch (ivpu_device_id(vdev)) {
        case PCI_DEVICE_ID_MTL:
+       case PCI_DEVICE_ID_ARL:
                return IVPU_HW_37XX;
        case PCI_DEVICE_ID_LNL:
                return IVPU_HW_40XX;
index 9827ea4..0191cf8 100644 (file)
@@ -220,7 +220,8 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
        if (ret)
                return ret;
 
-       fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC);
+       fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size,
+                                        DRM_IVPU_BO_CACHED | DRM_IVPU_BO_NOSNOOP);
        if (!fw->mem) {
                ivpu_err(vdev, "Failed to allocate firmware runtime memory\n");
                return -ENOMEM;
@@ -330,7 +331,7 @@ int ivpu_fw_load(struct ivpu_device *vdev)
                memset(start, 0, size);
        }
 
-       wmb(); /* Flush WC buffers after writing fw->mem */
+       clflush_cache_range(fw->mem->kvaddr, fw->mem->base.size);
 
        return 0;
 }
@@ -432,6 +433,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
        if (!ivpu_fw_is_cold_boot(vdev)) {
                boot_params->save_restore_ret_address = 0;
                vdev->pm->is_warmboot = true;
+               clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K);
                return;
        }
 
@@ -493,7 +495,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
        boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
        boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
 
-       wmb(); /* Flush WC buffers after writing bootparams */
+       clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K);
 
        ivpu_fw_boot_params_print(vdev, boot_params);
 }
index 6b0ceda..f413058 100644 (file)
@@ -8,6 +8,8 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_mm.h>
 
+#define DRM_IVPU_BO_NOSNOOP       0x10000000
+
 struct dma_buf;
 struct ivpu_bo_ops;
 struct ivpu_file_priv;
@@ -83,6 +85,9 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
 
 static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
 {
+       if (bo->flags & DRM_IVPU_BO_NOSNOOP)
+               return false;
+
        return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
 }
 
index 00c5dbb..8bdb59a 100644 (file)
@@ -57,8 +57,7 @@
 
 #define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK)
 
-#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \
-                          (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
+#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
                           (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \
                           (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \
                           (REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \
@@ -196,6 +195,14 @@ static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev)
        return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US);
 }
 
+static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev)
+{
+       if (ivpu_is_simics(vdev))
+               return 0;
+
+       return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US);
+}
+
 static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev)
 {
        struct ivpu_hw_info *hw = vdev->hw;
@@ -556,6 +563,12 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
 {
        int ret;
 
+       ret = ivpu_wait_for_clock_own_resource_ack(vdev);
+       if (ret) {
+               ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n");
+               return ret;
+       }
+
        ivpu_boot_pwr_island_trickle_drive(vdev, true);
        ivpu_boot_pwr_island_drive(vdev, true);
 
@@ -1046,9 +1059,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
        if (status == 0)
                return IRQ_NONE;
 
-       /* Disable global interrupt before handling local buttress interrupts */
-       REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
-
        if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
                ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
 
@@ -1096,9 +1106,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
        /* This must be done after interrupts are cleared at the source. */
        REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
 
-       /* Re-enable global interrupt */
-       REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
-
        if (schedule_recovery)
                ivpu_pm_schedule_recovery(vdev);
 
@@ -1110,9 +1117,14 @@ static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
        struct ivpu_device *vdev = ptr;
        irqreturn_t ret = IRQ_NONE;
 
+       REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
+
        ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
        ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
 
+       /* Re-enable global interrupts to re-trigger MSI for pending interrupts */
+       REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
+
        if (ret & IRQ_WAKE_THREAD)
                return IRQ_WAKE_THREAD;
 
index 5139cfe..ff4a5d4 100644 (file)
@@ -70,6 +70,8 @@
 #define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK                                BIT_MASK(0)
 #define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK                         BIT_MASK(1)
 #define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK                     BIT_MASK(2)
+#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK       BIT_MASK(6)
+#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK       BIT_MASK(7)
 #define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK                     BIT_MASK(11)
 #define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK        BIT_MASK(12)
 
index fa0af59..295c0d7 100644 (file)
@@ -209,10 +209,10 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
        struct ivpu_ipc_rx_msg *rx_msg;
        int wait_ret, ret = 0;
 
-       wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq,
-                                                   (IS_KTHREAD() && kthread_should_stop()) ||
-                                                   !list_empty(&cons->rx_msg_list),
-                                                   msecs_to_jiffies(timeout_ms));
+       wait_ret = wait_event_timeout(cons->rx_msg_wq,
+                                     (IS_KTHREAD() && kthread_should_stop()) ||
+                                     !list_empty(&cons->rx_msg_list),
+                                     msecs_to_jiffies(timeout_ms));
 
        if (IS_KTHREAD() && kthread_should_stop())
                return -EINTR;
@@ -220,9 +220,6 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
        if (wait_ret == 0)
                return -ETIMEDOUT;
 
-       if (wait_ret < 0)
-               return -ERESTARTSYS;
-
        spin_lock_irq(&cons->rx_msg_lock);
        rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
        if (!rx_msg) {
index 0072e0f..d8cc1e2 100644 (file)
@@ -1973,6 +1973,96 @@ retry:
 }
 
 /**
+ *     ata_dev_power_set_standby - Set a device power mode to standby
+ *     @dev: target device
+ *
+ *     Issue a STANDBY IMMEDIATE command to set a device power mode to standby.
+ *     For an HDD device, this spins down the disks.
+ *
+ *     LOCKING:
+ *     Kernel thread context (may sleep).
+ */
+void ata_dev_power_set_standby(struct ata_device *dev)
+{
+       unsigned long ap_flags = dev->link->ap->flags;
+       struct ata_taskfile tf;
+       unsigned int err_mask;
+
+       /* Issue STANDBY IMMEDIATE command only if supported by the device */
+       if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC)
+               return;
+
+       /*
+        * Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5)
+        * causing some drives to spin up and down again. For these, do nothing
+        * if we are being called on shutdown.
+        */
+       if ((ap_flags & ATA_FLAG_NO_POWEROFF_SPINDOWN) &&
+           system_state == SYSTEM_POWER_OFF)
+               return;
+
+       if ((ap_flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) &&
+           system_entering_hibernation())
+               return;
+
+       ata_tf_init(dev, &tf);
+       tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+       tf.protocol = ATA_PROT_NODATA;
+       tf.command = ATA_CMD_STANDBYNOW1;
+
+       ata_dev_notice(dev, "Entering standby power mode\n");
+
+       err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+       if (err_mask)
+               ata_dev_err(dev, "STANDBY IMMEDIATE failed (err_mask=0x%x)\n",
+                           err_mask);
+}
+
+/**
+ *     ata_dev_power_set_active -  Set a device power mode to active
+ *     @dev: target device
+ *
+ *     Issue a VERIFY command to enter to ensure that the device is in the
+ *     active power mode. For a spun-down HDD (standby or idle power mode),
+ *     the VERIFY command will complete after the disk spins up.
+ *
+ *     LOCKING:
+ *     Kernel thread context (may sleep).
+ */
+void ata_dev_power_set_active(struct ata_device *dev)
+{
+       struct ata_taskfile tf;
+       unsigned int err_mask;
+
+       /*
+        * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only
+        * if supported by the device.
+        */
+       if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC)
+               return;
+
+       ata_tf_init(dev, &tf);
+       tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+       tf.protocol = ATA_PROT_NODATA;
+       tf.command = ATA_CMD_VERIFY;
+       tf.nsect = 1;
+       if (dev->flags & ATA_DFLAG_LBA) {
+               tf.flags |= ATA_TFLAG_LBA;
+               tf.device |= ATA_LBA;
+       } else {
+               /* CHS */
+               tf.lbal = 0x1; /* sect */
+       }
+
+       ata_dev_notice(dev, "Entering active power mode\n");
+
+       err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+       if (err_mask)
+               ata_dev_err(dev, "VERIFY failed (err_mask=0x%x)\n",
+                           err_mask);
+}
+
+/**
  *     ata_read_log_page - read a specific log page
  *     @dev: target device
  *     @log: log to read
@@ -2529,7 +2619,7 @@ static int ata_dev_config_lba(struct ata_device *dev)
 {
        const u16 *id = dev->id;
        const char *lba_desc;
-       char ncq_desc[24];
+       char ncq_desc[32];
        int ret;
 
        dev->flags |= ATA_DFLAG_LBA;
@@ -5037,17 +5127,19 @@ static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg,
        struct ata_link *link;
        unsigned long flags;
 
-       /* Previous resume operation might still be in
-        * progress.  Wait for PM_PENDING to clear.
+       spin_lock_irqsave(ap->lock, flags);
+
+       /*
+        * A previous PM operation might still be in progress. Wait for
+        * ATA_PFLAG_PM_PENDING to clear.
         */
        if (ap->pflags & ATA_PFLAG_PM_PENDING) {
+               spin_unlock_irqrestore(ap->lock, flags);
                ata_port_wait_eh(ap);
-               WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
+               spin_lock_irqsave(ap->lock, flags);
        }
 
-       /* request PM ops to EH */
-       spin_lock_irqsave(ap->lock, flags);
-
+       /* Request PM operation to EH */
        ap->pm_mesg = mesg;
        ap->pflags |= ATA_PFLAG_PM_PENDING;
        ata_for_each_link(link, ap, HOST_FIRST) {
@@ -5059,10 +5151,8 @@ static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg,
 
        spin_unlock_irqrestore(ap->lock, flags);
 
-       if (!async) {
+       if (!async)
                ata_port_wait_eh(ap);
-               WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
-       }
 }
 
 /*
@@ -5078,11 +5168,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET
 
 static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg)
 {
+       /*
+        * We are about to suspend the port, so we do not care about
+        * scsi_rescan_device() calls scheduled by previous resume operations.
+        * The next resume will schedule the rescan again. So cancel any rescan
+        * that is not done yet.
+        */
+       cancel_delayed_work_sync(&ap->scsi_rescan_task);
+
        ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false);
 }
 
 static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg)
 {
+       /*
+        * We are about to suspend the port, so we do not care about
+        * scsi_rescan_device() calls scheduled by previous resume operations.
+        * The next resume will schedule the rescan again. So cancel any rescan
+        * that is not done yet.
+        */
+       cancel_delayed_work_sync(&ap->scsi_rescan_task);
+
        ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true);
 }
 
@@ -5229,7 +5335,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume);
 #endif
 
 const struct device_type ata_port_type = {
-       .name = "ata_port",
+       .name = ATA_PORT_TYPE_NAME,
 #ifdef CONFIG_PM
        .pm = &ata_port_pm_ops,
 #endif
@@ -5948,11 +6054,30 @@ static void ata_port_detach(struct ata_port *ap)
        struct ata_link *link;
        struct ata_device *dev;
 
-       /* tell EH we're leaving & flush EH */
+       /* Wait for any ongoing EH */
+       ata_port_wait_eh(ap);
+
+       mutex_lock(&ap->scsi_scan_mutex);
        spin_lock_irqsave(ap->lock, flags);
+
+       /* Remove scsi devices */
+       ata_for_each_link(link, ap, HOST_FIRST) {
+               ata_for_each_dev(dev, link, ALL) {
+                       if (dev->sdev) {
+                               spin_unlock_irqrestore(ap->lock, flags);
+                               scsi_remove_device(dev->sdev);
+                               spin_lock_irqsave(ap->lock, flags);
+                               dev->sdev = NULL;
+                       }
+               }
+       }
+
+       /* Tell EH to disable all devices */
        ap->pflags |= ATA_PFLAG_UNLOADING;
        ata_port_schedule_eh(ap);
+
        spin_unlock_irqrestore(ap->lock, flags);
+       mutex_unlock(&ap->scsi_scan_mutex);
 
        /* wait till EH commits suicide */
        ata_port_wait_eh(ap);
index 4cf4f57..5686353 100644 (file)
@@ -147,6 +147,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
          .timeouts = ata_eh_other_timeouts, },
        { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
          .timeouts = ata_eh_flush_timeouts },
+       { .commands = CMDS(ATA_CMD_VERIFY),
+         .timeouts = ata_eh_reset_timeouts },
 };
 #undef CMDS
 
@@ -498,7 +500,19 @@ static void ata_eh_unload(struct ata_port *ap)
        struct ata_device *dev;
        unsigned long flags;
 
-       /* Restore SControl IPM and SPD for the next driver and
+       /*
+        * Unless we are restarting, transition all enabled devices to
+        * standby power mode.
+        */
+       if (system_state != SYSTEM_RESTART) {
+               ata_for_each_link(link, ap, PMP_FIRST) {
+                       ata_for_each_dev(dev, link, ENABLED)
+                               ata_dev_power_set_standby(dev);
+               }
+       }
+
+       /*
+        * Restore SControl IPM and SPD for the next driver and
         * disable attached devices.
         */
        ata_for_each_link(link, ap, PMP_FIRST) {
@@ -684,6 +698,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
                        ehc->saved_xfer_mode[devno] = dev->xfer_mode;
                        if (ata_ncq_enabled(dev))
                                ehc->saved_ncq_enabled |= 1 << devno;
+
+                       /* If we are resuming, wake up the device */
+                       if (ap->pflags & ATA_PFLAG_RESUMING)
+                               ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
                }
        }
 
@@ -743,6 +761,8 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
        /* clean up */
        spin_lock_irqsave(ap->lock, flags);
 
+       ap->pflags &= ~ATA_PFLAG_RESUMING;
+
        if (ap->pflags & ATA_PFLAG_LOADING)
                ap->pflags &= ~ATA_PFLAG_LOADING;
        else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
@@ -1218,6 +1238,13 @@ void ata_eh_detach_dev(struct ata_device *dev)
        struct ata_eh_context *ehc = &link->eh_context;
        unsigned long flags;
 
+       /*
+        * If the device is still enabled, transition it to standby power mode
+        * (i.e. spin down HDDs).
+        */
+       if (ata_dev_enabled(dev))
+               ata_dev_power_set_standby(dev);
+
        ata_dev_disable(dev);
 
        spin_lock_irqsave(ap->lock, flags);
@@ -2305,7 +2332,7 @@ static void ata_eh_link_report(struct ata_link *link)
        struct ata_eh_context *ehc = &link->eh_context;
        struct ata_queued_cmd *qc;
        const char *frozen, *desc;
-       char tries_buf[6] = "";
+       char tries_buf[16] = "";
        int tag, nr_failed = 0;
 
        if (ehc->i.flags & ATA_EHI_QUIET)
@@ -3016,6 +3043,15 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
                if (ehc->i.flags & ATA_EHI_DID_RESET)
                        readid_flags |= ATA_READID_POSTRESET;
 
+               /*
+                * When resuming, before executing any command, make sure to
+                * transition the device to the active power mode.
+                */
+               if ((action & ATA_EH_SET_ACTIVE) && ata_dev_enabled(dev)) {
+                       ata_dev_power_set_active(dev);
+                       ata_eh_done(link, dev, ATA_EH_SET_ACTIVE);
+               }
+
                if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
                        WARN_ON(dev->class == ATA_DEV_PMP);
 
@@ -3989,6 +4025,7 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap)
        unsigned long flags;
        int rc = 0;
        struct ata_device *dev;
+       struct ata_link *link;
 
        /* are we suspending? */
        spin_lock_irqsave(ap->lock, flags);
@@ -4001,6 +4038,12 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap)
 
        WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
 
+       /* Set all devices attached to the port in standby mode */
+       ata_for_each_link(link, ap, HOST_FIRST) {
+               ata_for_each_dev(dev, link, ENABLED)
+                       ata_dev_power_set_standby(dev);
+       }
+
        /*
         * If we have a ZPODD attached, check its zero
         * power ready status before the port is frozen.
@@ -4083,6 +4126,7 @@ static void ata_eh_handle_port_resume(struct ata_port *ap)
        /* update the flags */
        spin_lock_irqsave(ap->lock, flags);
        ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
+       ap->pflags |= ATA_PFLAG_RESUMING;
        spin_unlock_irqrestore(ap->lock, flags);
 }
 #endif /* CONFIG_PM */
index d3f28b8..a371b49 100644 (file)
@@ -1050,14 +1050,13 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
                }
        } else {
                sdev->sector_size = ata_id_logical_sector_size(dev->id);
+
                /*
-                * Stop the drive on suspend but do not issue START STOP UNIT
-                * on resume as this is not necessary and may fail: the device
-                * will be woken up by ata_port_pm_resume() with a port reset
-                * and device revalidation.
+                * Ask the sd driver to issue START STOP UNIT on runtime suspend
+                * and resume only. For system level suspend/resume, devices
+                * power state is handled directly by libata EH.
                 */
-               sdev->manage_start_stop = 1;
-               sdev->no_start_on_resume = 1;
+               sdev->manage_runtime_start_stop = true;
        }
 
        /*
@@ -1090,6 +1089,42 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
 }
 
 /**
+ *     ata_scsi_slave_alloc - Early setup of SCSI device
+ *     @sdev: SCSI device to examine
+ *
+ *     This is called from scsi_alloc_sdev() when the scsi device
+ *     associated with an ATA device is scanned on a port.
+ *
+ *     LOCKING:
+ *     Defined by SCSI layer.  We don't really care.
+ */
+
+int ata_scsi_slave_alloc(struct scsi_device *sdev)
+{
+       struct ata_port *ap = ata_shost_to_port(sdev->host);
+       struct device_link *link;
+
+       ata_scsi_sdev_config(sdev);
+
+       /*
+        * Create a link from the ata_port device to the scsi device to ensure
+        * that PM does suspend/resume in the correct order: the scsi device is
+        * consumer (child) and the ata port the supplier (parent).
+        */
+       link = device_link_add(&sdev->sdev_gendev, &ap->tdev,
+                              DL_FLAG_STATELESS |
+                              DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE);
+       if (!link) {
+               ata_port_err(ap, "Failed to create link to scsi device %s\n",
+                            dev_name(&sdev->sdev_gendev));
+               return -ENODEV;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ata_scsi_slave_alloc);
+
+/**
  *     ata_scsi_slave_config - Set SCSI device attributes
  *     @sdev: SCSI device to examine
  *
@@ -1105,14 +1140,11 @@ int ata_scsi_slave_config(struct scsi_device *sdev)
 {
        struct ata_port *ap = ata_shost_to_port(sdev->host);
        struct ata_device *dev = __ata_scsi_find_dev(ap, sdev);
-       int rc = 0;
-
-       ata_scsi_sdev_config(sdev);
 
        if (dev)
-               rc = ata_scsi_dev_config(sdev, dev);
+               return ata_scsi_dev_config(sdev, dev);
 
-       return rc;
+       return 0;
 }
 EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
 
@@ -1136,6 +1168,8 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
        unsigned long flags;
        struct ata_device *dev;
 
+       device_link_remove(&sdev->sdev_gendev, &ap->tdev);
+
        spin_lock_irqsave(ap->lock, flags);
        dev = __ata_scsi_find_dev(ap, sdev);
        if (dev && dev->sdev) {
@@ -1195,7 +1229,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc)
        }
 
        if (cdb[4] & 0x1) {
-               tf->nsect = 1;  /* 1 sector, lba=0 */
+               tf->nsect = 1;  /* 1 sector, lba=0 */
 
                if (qc->dev->flags & ATA_DFLAG_LBA) {
                        tf->flags |= ATA_TFLAG_LBA;
@@ -1211,7 +1245,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc)
                        tf->lbah = 0x0; /* cyl high */
                }
 
-               tf->command = ATA_CMD_VERIFY;   /* READ VERIFY */
+               tf->command = ATA_CMD_VERIFY;   /* READ VERIFY */
        } else {
                /* Some odd clown BIOSen issue spindown on power off (ACPI S4
                 * or S5) causing some drives to spin up and down again.
@@ -1221,7 +1255,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc)
                        goto skip;
 
                if ((qc->ap->flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) &&
-                    system_entering_hibernation())
+                   system_entering_hibernation())
                        goto skip;
 
                /* Issue ATA STANDBY IMMEDIATE command */
@@ -1835,6 +1869,9 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
                hdr[2] = 0x7; /* claim SPC-5 version compatibility */
        }
 
+       if (args->dev->flags & ATA_DFLAG_CDL)
+               hdr[2] = 0xd; /* claim SPC-6 version compatibility */
+
        memcpy(rbuf, hdr, sizeof(hdr));
        memcpy(&rbuf[8], "ATA     ", 8);
        ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
@@ -4312,7 +4349,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
                break;
 
        case MAINTENANCE_IN:
-               if (scsicmd[1] == MI_REPORT_SUPPORTED_OPERATION_CODES)
+               if ((scsicmd[1] & 0x1f) == MI_REPORT_SUPPORTED_OPERATION_CODES)
                        ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in);
                else
                        ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
@@ -4722,7 +4759,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
        struct ata_link *link;
        struct ata_device *dev;
        unsigned long flags;
-       bool delay_rescan = false;
+       int ret = 0;
 
        mutex_lock(&ap->scsi_scan_mutex);
        spin_lock_irqsave(ap->lock, flags);
@@ -4731,37 +4768,34 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                ata_for_each_dev(dev, link, ENABLED) {
                        struct scsi_device *sdev = dev->sdev;
 
+                       /*
+                        * If the port was suspended before this was scheduled,
+                        * bail out.
+                        */
+                       if (ap->pflags & ATA_PFLAG_SUSPENDED)
+                               goto unlock;
+
                        if (!sdev)
                                continue;
                        if (scsi_device_get(sdev))
                                continue;
 
-                       /*
-                        * If the rescan work was scheduled because of a resume
-                        * event, the port is already fully resumed, but the
-                        * SCSI device may not yet be fully resumed. In such
-                        * case, executing scsi_rescan_device() may cause a
-                        * deadlock with the PM code on device_lock(). Prevent
-                        * this by giving up and retrying rescan after a short
-                        * delay.
-                        */
-                       delay_rescan = sdev->sdev_gendev.power.is_suspended;
-                       if (delay_rescan) {
-                               scsi_device_put(sdev);
-                               break;
-                       }
-
                        spin_unlock_irqrestore(ap->lock, flags);
-                       scsi_rescan_device(sdev);
+                       ret = scsi_rescan_device(sdev);
                        scsi_device_put(sdev);
                        spin_lock_irqsave(ap->lock, flags);
+
+                       if (ret)
+                               goto unlock;
                }
        }
 
+unlock:
        spin_unlock_irqrestore(ap->lock, flags);
        mutex_unlock(&ap->scsi_scan_mutex);
 
-       if (delay_rescan)
+       /* Reschedule with a delay if scsi_rescan_device() returned an error */
+       if (ret)
                schedule_delayed_work(&ap->scsi_rescan_task,
                                      msecs_to_jiffies(5));
 }
index e4fb9d1..3e49a87 100644 (file)
@@ -266,6 +266,10 @@ void ata_tport_delete(struct ata_port *ap)
        put_device(dev);
 }
 
+static const struct device_type ata_port_sas_type = {
+       .name = ATA_PORT_TYPE_NAME,
+};
+
 /** ata_tport_add - initialize a transport ATA port structure
  *
  * @parent:    parent device
@@ -283,7 +287,10 @@ int ata_tport_add(struct device *parent,
        struct device *dev = &ap->tdev;
 
        device_initialize(dev);
-       dev->type = &ata_port_type;
+       if (ap->flags & ATA_FLAG_SAS_HOST)
+               dev->type = &ata_port_sas_type;
+       else
+               dev->type = &ata_port_type;
 
        dev->parent = parent;
        ata_host_get(ap->host);
index 6e7d352..05ac80d 100644 (file)
@@ -30,6 +30,8 @@ enum {
        ATA_DNXFER_QUIET        = (1 << 31),
 };
 
+#define ATA_PORT_TYPE_NAME     "ata_port"
+
 extern atomic_t ata_print_id;
 extern int atapi_passthru16;
 extern int libata_fua;
@@ -60,6 +62,8 @@ extern int ata_dev_reread_id(struct ata_device *dev, unsigned int readid_flags);
 extern int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
                              unsigned int readid_flags);
 extern int ata_dev_configure(struct ata_device *dev);
+extern void ata_dev_power_set_standby(struct ata_device *dev);
+extern void ata_dev_power_set_active(struct ata_device *dev);
 extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit);
 extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
 extern unsigned int ata_dev_set_feature(struct ata_device *dev,
index 3de11f0..a999b69 100644 (file)
@@ -632,9 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
 
 static int rbd_dev_refresh(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+                                    struct rbd_image_header *header);
 static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
                                        u64 snap_id);
 static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
@@ -995,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
        RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
 }
 
+static void rbd_image_header_cleanup(struct rbd_image_header *header)
+{
+       kfree(header->object_prefix);
+       ceph_put_snap_context(header->snapc);
+       kfree(header->snap_sizes);
+       kfree(header->snap_names);
+
+       memset(header, 0, sizeof(*header));
+}
+
 /*
  * Fill an rbd image header with information from the given format 1
  * on-disk header.
  */
-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
-                                struct rbd_image_header_ondisk *ondisk)
+static int rbd_header_from_disk(struct rbd_image_header *header,
+                               struct rbd_image_header_ondisk *ondisk,
+                               bool first_time)
 {
-       struct rbd_image_header *header = &rbd_dev->header;
-       bool first_time = header->object_prefix == NULL;
        struct ceph_snap_context *snapc;
        char *object_prefix = NULL;
        char *snap_names = NULL;
@@ -1070,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
        if (first_time) {
                header->object_prefix = object_prefix;
                header->obj_order = ondisk->options.order;
-               rbd_init_layout(rbd_dev);
-       } else {
-               ceph_put_snap_context(header->snapc);
-               kfree(header->snap_names);
-               kfree(header->snap_sizes);
        }
 
        /* The remaining fields always get updated (when we refresh) */
@@ -4859,7 +4862,9 @@ out_req:
  * return, the rbd_dev->header field will contain up-to-date
  * information about the image.
  */
-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
+                                 struct rbd_image_header *header,
+                                 bool first_time)
 {
        struct rbd_image_header_ondisk *ondisk = NULL;
        u32 snap_count = 0;
@@ -4907,7 +4912,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
                snap_count = le32_to_cpu(ondisk->snap_count);
        } while (snap_count != want_count);
 
-       ret = rbd_header_from_disk(rbd_dev, ondisk);
+       ret = rbd_header_from_disk(header, ondisk, first_time);
 out:
        kfree(ondisk);
 
@@ -4931,39 +4936,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
        }
 }
 
-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
-{
-       u64 mapping_size;
-       int ret;
-
-       down_write(&rbd_dev->header_rwsem);
-       mapping_size = rbd_dev->mapping.size;
-
-       ret = rbd_dev_header_info(rbd_dev);
-       if (ret)
-               goto out;
-
-       /*
-        * If there is a parent, see if it has disappeared due to the
-        * mapped image getting flattened.
-        */
-       if (rbd_dev->parent) {
-               ret = rbd_dev_v2_parent_info(rbd_dev);
-               if (ret)
-                       goto out;
-       }
-
-       rbd_assert(!rbd_is_snap(rbd_dev));
-       rbd_dev->mapping.size = rbd_dev->header.image_size;
-
-out:
-       up_write(&rbd_dev->header_rwsem);
-       if (!ret && mapping_size != rbd_dev->mapping.size)
-               rbd_dev_update_size(rbd_dev);
-
-       return ret;
-}
-
 static const struct blk_mq_ops rbd_mq_ops = {
        .queue_rq       = rbd_queue_rq,
 };
@@ -5503,17 +5475,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
        return 0;
 }
 
-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
-{
-       return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
-                                       &rbd_dev->header.obj_order,
-                                       &rbd_dev->header.image_size);
-}
-
-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
+                                   char **pobject_prefix)
 {
        size_t size;
        void *reply_buf;
+       char *object_prefix;
        int ret;
        void *p;
 
@@ -5531,16 +5498,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
                goto out;
 
        p = reply_buf;
-       rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
-                                               p + ret, NULL, GFP_NOIO);
+       object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
+                                                   GFP_NOIO);
+       if (IS_ERR(object_prefix)) {
+               ret = PTR_ERR(object_prefix);
+               goto out;
+       }
        ret = 0;
 
-       if (IS_ERR(rbd_dev->header.object_prefix)) {
-               ret = PTR_ERR(rbd_dev->header.object_prefix);
-               rbd_dev->header.object_prefix = NULL;
-       } else {
-               dout("  object_prefix = %s\n", rbd_dev->header.object_prefix);
-       }
+       *pobject_prefix = object_prefix;
+       dout("  object_prefix = %s\n", object_prefix);
 out:
        kfree(reply_buf);
 
@@ -5591,13 +5558,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
        return 0;
 }
 
-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
-{
-       return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
-                                        rbd_is_ro(rbd_dev),
-                                        &rbd_dev->header.features);
-}
-
 /*
  * These are generic image flags, but since they are used only for
  * object map, store them in rbd_dev->object_map_flags.
@@ -5634,6 +5594,14 @@ struct parent_image_info {
        u64             overlap;
 };
 
+static void rbd_parent_info_cleanup(struct parent_image_info *pii)
+{
+       kfree(pii->pool_ns);
+       kfree(pii->image_id);
+
+       memset(pii, 0, sizeof(*pii));
+}
+
 /*
  * The caller is responsible for @pii.
  */
@@ -5703,6 +5671,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
        if (pii->has_overlap)
                ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
 
+       dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+            __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+            pii->has_overlap, pii->overlap);
        return 0;
 
 e_inval:
@@ -5741,14 +5712,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
        pii->has_overlap = true;
        ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
 
+       dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+            __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+            pii->has_overlap, pii->overlap);
        return 0;
 
 e_inval:
        return -EINVAL;
 }
 
-static int get_parent_info(struct rbd_device *rbd_dev,
-                          struct parent_image_info *pii)
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
+                                 struct parent_image_info *pii)
 {
        struct page *req_page, *reply_page;
        void *p;
@@ -5776,7 +5750,7 @@ static int get_parent_info(struct rbd_device *rbd_dev,
        return ret;
 }
 
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
 {
        struct rbd_spec *parent_spec;
        struct parent_image_info pii = { 0 };
@@ -5786,37 +5760,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        if (!parent_spec)
                return -ENOMEM;
 
-       ret = get_parent_info(rbd_dev, &pii);
+       ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
        if (ret)
                goto out_err;
 
-       dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
-            __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
-            pii.has_overlap, pii.overlap);
-
-       if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
-               /*
-                * Either the parent never existed, or we have
-                * record of it but the image got flattened so it no
-                * longer has a parent.  When the parent of a
-                * layered image disappears we immediately set the
-                * overlap to 0.  The effect of this is that all new
-                * requests will be treated as if the image had no
-                * parent.
-                *
-                * If !pii.has_overlap, the parent image spec is not
-                * applicable.  It's there to avoid duplication in each
-                * snapshot record.
-                */
-               if (rbd_dev->parent_overlap) {
-                       rbd_dev->parent_overlap = 0;
-                       rbd_dev_parent_put(rbd_dev);
-                       pr_info("%s: clone image has been flattened\n",
-                               rbd_dev->disk->disk_name);
-               }
-
+       if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
                goto out;       /* No parent?  No problem. */
-       }
 
        /* The ceph file layout needs to fit pool id in 32 bits */
 
@@ -5828,58 +5777,46 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        }
 
        /*
-        * The parent won't change (except when the clone is
-        * flattened, already handled that).  So we only need to
-        * record the parent spec we have not already done so.
+        * The parent won't change except when the clone is flattened,
+        * so we only need to record the parent image spec once.
         */
-       if (!rbd_dev->parent_spec) {
-               parent_spec->pool_id = pii.pool_id;
-               if (pii.pool_ns && *pii.pool_ns) {
-                       parent_spec->pool_ns = pii.pool_ns;
-                       pii.pool_ns = NULL;
-               }
-               parent_spec->image_id = pii.image_id;
-               pii.image_id = NULL;
-               parent_spec->snap_id = pii.snap_id;
-
-               rbd_dev->parent_spec = parent_spec;
-               parent_spec = NULL;     /* rbd_dev now owns this */
+       parent_spec->pool_id = pii.pool_id;
+       if (pii.pool_ns && *pii.pool_ns) {
+               parent_spec->pool_ns = pii.pool_ns;
+               pii.pool_ns = NULL;
        }
+       parent_spec->image_id = pii.image_id;
+       pii.image_id = NULL;
+       parent_spec->snap_id = pii.snap_id;
+
+       rbd_assert(!rbd_dev->parent_spec);
+       rbd_dev->parent_spec = parent_spec;
+       parent_spec = NULL;     /* rbd_dev now owns this */
 
        /*
-        * We always update the parent overlap.  If it's zero we issue
-        * a warning, as we will proceed as if there was no parent.
+        * Record the parent overlap.  If it's zero, issue a warning as
+        * we will proceed as if there is no parent.
         */
-       if (!pii.overlap) {
-               if (parent_spec) {
-                       /* refresh, careful to warn just once */
-                       if (rbd_dev->parent_overlap)
-                               rbd_warn(rbd_dev,
-                                   "clone now standalone (overlap became 0)");
-               } else {
-                       /* initial probe */
-                       rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
-               }
-       }
+       if (!pii.overlap)
+               rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
        rbd_dev->parent_overlap = pii.overlap;
 
 out:
        ret = 0;
 out_err:
-       kfree(pii.pool_ns);
-       kfree(pii.image_id);
+       rbd_parent_info_cleanup(&pii);
        rbd_spec_put(parent_spec);
        return ret;
 }
 
-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
+                                   u64 *stripe_unit, u64 *stripe_count)
 {
        struct {
                __le64 stripe_unit;
                __le64 stripe_count;
        } __attribute__ ((packed)) striping_info_buf = { 0 };
        size_t size = sizeof (striping_info_buf);
-       void *p;
        int ret;
 
        ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -5891,27 +5828,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
        if (ret < size)
                return -ERANGE;
 
-       p = &striping_info_buf;
-       rbd_dev->header.stripe_unit = ceph_decode_64(&p);
-       rbd_dev->header.stripe_count = ceph_decode_64(&p);
+       *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
+       *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
+       dout("  stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
+            *stripe_count);
+
        return 0;
 }
 
-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
 {
-       __le64 data_pool_id;
+       __le64 data_pool_buf;
        int ret;
 
        ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
                                  &rbd_dev->header_oloc, "get_data_pool",
-                                 NULL, 0, &data_pool_id, sizeof(data_pool_id));
+                                 NULL, 0, &data_pool_buf,
+                                 sizeof(data_pool_buf));
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                return ret;
-       if (ret < sizeof(data_pool_id))
+       if (ret < sizeof(data_pool_buf))
                return -EBADMSG;
 
-       rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
-       WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
+       *data_pool_id = le64_to_cpu(data_pool_buf);
+       dout("  data_pool_id = %lld\n", *data_pool_id);
+       WARN_ON(*data_pool_id == CEPH_NOPOOL);
+
        return 0;
 }
 
@@ -6103,7 +6046,8 @@ out_err:
        return ret;
 }
 
-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
+                                  struct ceph_snap_context **psnapc)
 {
        size_t size;
        int ret;
@@ -6164,9 +6108,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
        for (i = 0; i < snap_count; i++)
                snapc->snaps[i] = ceph_decode_64(&p);
 
-       ceph_put_snap_context(rbd_dev->header.snapc);
-       rbd_dev->header.snapc = snapc;
-
+       *psnapc = snapc;
        dout("  snap context seq = %llu, snap_count = %u\n",
                (unsigned long long)seq, (unsigned int)snap_count);
 out:
@@ -6215,38 +6157,42 @@ out:
        return snap_name;
 }
 
-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
+                                 struct rbd_image_header *header,
+                                 bool first_time)
 {
-       bool first_time = rbd_dev->header.object_prefix == NULL;
        int ret;
 
-       ret = rbd_dev_v2_image_size(rbd_dev);
+       ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+                                   first_time ? &header->obj_order : NULL,
+                                   &header->image_size);
        if (ret)
                return ret;
 
        if (first_time) {
-               ret = rbd_dev_v2_header_onetime(rbd_dev);
+               ret = rbd_dev_v2_header_onetime(rbd_dev, header);
                if (ret)
                        return ret;
        }
 
-       ret = rbd_dev_v2_snap_context(rbd_dev);
-       if (ret && first_time) {
-               kfree(rbd_dev->header.object_prefix);
-               rbd_dev->header.object_prefix = NULL;
-       }
+       ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
+       if (ret)
+               return ret;
 
-       return ret;
+       return 0;
 }
 
-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_header_info(struct rbd_device *rbd_dev,
+                              struct rbd_image_header *header,
+                              bool first_time)
 {
        rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+       rbd_assert(!header->object_prefix && !header->snapc);
 
        if (rbd_dev->image_format == 1)
-               return rbd_dev_v1_header_info(rbd_dev);
+               return rbd_dev_v1_header_info(rbd_dev, header, first_time);
 
-       return rbd_dev_v2_header_info(rbd_dev);
+       return rbd_dev_v2_header_info(rbd_dev, header, first_time);
 }
 
 /*
@@ -6734,60 +6680,49 @@ out:
  */
 static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
 {
-       struct rbd_image_header *header;
-
        rbd_dev_parent_put(rbd_dev);
        rbd_object_map_free(rbd_dev);
        rbd_dev_mapping_clear(rbd_dev);
 
        /* Free dynamic fields from the header, then zero it out */
 
-       header = &rbd_dev->header;
-       ceph_put_snap_context(header->snapc);
-       kfree(header->snap_sizes);
-       kfree(header->snap_names);
-       kfree(header->object_prefix);
-       memset(header, 0, sizeof (*header));
+       rbd_image_header_cleanup(&rbd_dev->header);
 }
 
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+                                    struct rbd_image_header *header)
 {
        int ret;
 
-       ret = rbd_dev_v2_object_prefix(rbd_dev);
+       ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
        if (ret)
-               goto out_err;
+               return ret;
 
        /*
         * Get the and check features for the image.  Currently the
         * features are assumed to never change.
         */
-       ret = rbd_dev_v2_features(rbd_dev);
+       ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+                                       rbd_is_ro(rbd_dev), &header->features);
        if (ret)
-               goto out_err;
+               return ret;
 
        /* If the image supports fancy striping, get its parameters */
 
-       if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
-               ret = rbd_dev_v2_striping_info(rbd_dev);
-               if (ret < 0)
-                       goto out_err;
+       if (header->features & RBD_FEATURE_STRIPINGV2) {
+               ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
+                                              &header->stripe_count);
+               if (ret)
+                       return ret;
        }
 
-       if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
-               ret = rbd_dev_v2_data_pool(rbd_dev);
+       if (header->features & RBD_FEATURE_DATA_POOL) {
+               ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
                if (ret)
-                       goto out_err;
+                       return ret;
        }
 
-       rbd_init_layout(rbd_dev);
        return 0;
-
-out_err:
-       rbd_dev->header.features = 0;
-       kfree(rbd_dev->header.object_prefix);
-       rbd_dev->header.object_prefix = NULL;
-       return ret;
 }
 
 /*
@@ -6982,13 +6917,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
        if (!depth)
                down_write(&rbd_dev->header_rwsem);
 
-       ret = rbd_dev_header_info(rbd_dev);
+       ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
        if (ret) {
                if (ret == -ENOENT && !need_watch)
                        rbd_print_dne(rbd_dev, false);
                goto err_out_probe;
        }
 
+       rbd_init_layout(rbd_dev);
+
        /*
         * If this image is the one being mapped, we have pool name and
         * id, image name and id, and snap name - need to fill snap id.
@@ -7017,7 +6954,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
        }
 
        if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
-               ret = rbd_dev_v2_parent_info(rbd_dev);
+               ret = rbd_dev_setup_parent(rbd_dev);
                if (ret)
                        goto err_out_probe;
        }
@@ -7043,6 +6980,107 @@ err_out_format:
        return ret;
 }
 
+static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+                                 struct rbd_image_header *header)
+{
+       rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+       rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+
+       if (rbd_dev->header.image_size != header->image_size) {
+               rbd_dev->header.image_size = header->image_size;
+
+               if (!rbd_is_snap(rbd_dev)) {
+                       rbd_dev->mapping.size = header->image_size;
+                       rbd_dev_update_size(rbd_dev);
+               }
+       }
+
+       ceph_put_snap_context(rbd_dev->header.snapc);
+       rbd_dev->header.snapc = header->snapc;
+       header->snapc = NULL;
+
+       if (rbd_dev->image_format == 1) {
+               kfree(rbd_dev->header.snap_names);
+               rbd_dev->header.snap_names = header->snap_names;
+               header->snap_names = NULL;
+
+               kfree(rbd_dev->header.snap_sizes);
+               rbd_dev->header.snap_sizes = header->snap_sizes;
+               header->snap_sizes = NULL;
+       }
+}
+
+static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
+                                 struct parent_image_info *pii)
+{
+       if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
+               /*
+                * Either the parent never existed, or we have
+                * record of it but the image got flattened so it no
+                * longer has a parent.  When the parent of a
+                * layered image disappears we immediately set the
+                * overlap to 0.  The effect of this is that all new
+                * requests will be treated as if the image had no
+                * parent.
+                *
+                * If !pii.has_overlap, the parent image spec is not
+                * applicable.  It's there to avoid duplication in each
+                * snapshot record.
+                */
+               if (rbd_dev->parent_overlap) {
+                       rbd_dev->parent_overlap = 0;
+                       rbd_dev_parent_put(rbd_dev);
+                       pr_info("%s: clone has been flattened\n",
+                               rbd_dev->disk->disk_name);
+               }
+       } else {
+               rbd_assert(rbd_dev->parent_spec);
+
+               /*
+                * Update the parent overlap.  If it became zero, issue
+                * a warning as we will proceed as if there is no parent.
+                */
+               if (!pii->overlap && rbd_dev->parent_overlap)
+                       rbd_warn(rbd_dev,
+                                "clone has become standalone (overlap 0)");
+               rbd_dev->parent_overlap = pii->overlap;
+       }
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+       struct rbd_image_header header = { 0 };
+       struct parent_image_info pii = { 0 };
+       int ret;
+
+       dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+       ret = rbd_dev_header_info(rbd_dev, &header, false);
+       if (ret)
+               goto out;
+
+       /*
+        * If there is a parent, see if it has disappeared due to the
+        * mapped image getting flattened.
+        */
+       if (rbd_dev->parent) {
+               ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+               if (ret)
+                       goto out;
+       }
+
+       down_write(&rbd_dev->header_rwsem);
+       rbd_dev_update_header(rbd_dev, &header);
+       if (rbd_dev->parent)
+               rbd_dev_update_parent(rbd_dev, &pii);
+       up_write(&rbd_dev->header_rwsem);
+
+out:
+       rbd_parent_info_cleanup(&pii);
+       rbd_image_header_cleanup(&header);
+       return ret;
+}
+
 static ssize_t do_rbd_add(const char *buf, size_t count)
 {
        struct rbd_device *rbd_dev = NULL;
index 4eaf1b5..ef4ba46 100644 (file)
@@ -96,7 +96,7 @@ static int si521xx_regmap_i2c_write(void *context, unsigned int reg,
                                    unsigned int val)
 {
        struct i2c_client *i2c = context;
-       const u8 data[3] = { reg, 1, val };
+       const u8 data[2] = { reg, val };
        const int count = ARRAY_SIZE(data);
        int ret;
 
@@ -146,7 +146,7 @@ static int si521xx_regmap_i2c_read(void *context, unsigned int reg,
 static const struct regmap_config si521xx_regmap_config = {
        .reg_bits = 8,
        .val_bits = 8,
-       .cache_type = REGCACHE_NONE,
+       .cache_type = REGCACHE_FLAT,
        .max_register = SI521XX_REG_DA,
        .rd_table = &si521xx_readable_table,
        .wr_table = &si521xx_writeable_table,
@@ -281,9 +281,10 @@ static int si521xx_probe(struct i2c_client *client)
 {
        const u16 chip_info = (u16)(uintptr_t)device_get_match_data(&client->dev);
        const struct clk_parent_data clk_parent_data = { .index = 0 };
-       struct si521xx *si;
+       const u8 data[3] = { SI521XX_REG_BC, 1, 1 };
        unsigned char name[6] = "DIFF0";
        struct clk_init_data init = {};
+       struct si521xx *si;
        int i, ret;
 
        if (!chip_info)
@@ -308,7 +309,7 @@ static int si521xx_probe(struct i2c_client *client)
                                     "Failed to allocate register map\n");
 
        /* Always read back 1 Byte via I2C */
-       ret = regmap_write(si->regmap, SI521XX_REG_BC, 1);
+       ret = i2c_master_send(client, data, ARRAY_SIZE(data));
        if (ret < 0)
                return ret;
 
index 7ab2447..3d7de35 100644 (file)
@@ -118,21 +118,21 @@ enum vc3_div {
        VC3_DIV5,
 };
 
-enum vc3_clk_mux {
-       VC3_DIFF2_MUX,
-       VC3_DIFF1_MUX,
-       VC3_SE3_MUX,
-       VC3_SE2_MUX,
-       VC3_SE1_MUX,
-};
-
 enum vc3_clk {
-       VC3_DIFF2,
-       VC3_DIFF1,
-       VC3_SE3,
-       VC3_SE2,
-       VC3_SE1,
        VC3_REF,
+       VC3_SE1,
+       VC3_SE2,
+       VC3_SE3,
+       VC3_DIFF1,
+       VC3_DIFF2,
+};
+
+enum vc3_clk_mux {
+       VC3_SE1_MUX = VC3_SE1 - 1,
+       VC3_SE2_MUX = VC3_SE2 - 1,
+       VC3_SE3_MUX = VC3_SE3 - 1,
+       VC3_DIFF1_MUX = VC3_DIFF1 - 1,
+       VC3_DIFF2_MUX = VC3_DIFF2 - 1,
 };
 
 struct vc3_clk_data {
@@ -401,11 +401,10 @@ static long vc3_pll_round_rate(struct clk_hw *hw, unsigned long rate,
                /* Determine best fractional part, which is 16 bit wide */
                div_frc = rate % *parent_rate;
                div_frc *= BIT(16) - 1;
-               do_div(div_frc, *parent_rate);
 
-               vc3->div_frc = (u32)div_frc;
+               vc3->div_frc = min_t(u64, div64_ul(div_frc, *parent_rate), U16_MAX);
                rate = (*parent_rate *
-                       (vc3->div_int * VC3_2_POW_16 + div_frc) / VC3_2_POW_16);
+                       (vc3->div_int * VC3_2_POW_16 + vc3->div_frc) / VC3_2_POW_16);
        } else {
                rate = *parent_rate * vc3->div_int;
        }
@@ -897,33 +896,33 @@ static struct vc3_hw_data clk_div[] = {
 };
 
 static struct vc3_hw_data clk_mux[] = {
-       [VC3_DIFF2_MUX] = {
+       [VC3_SE1_MUX] = {
                .data = &(struct vc3_clk_data) {
-                       .offs = VC3_DIFF2_CTRL_REG,
-                       .bitmsk = VC3_DIFF2_CTRL_REG_DIFF2_CLK_SEL
+                       .offs = VC3_SE1_DIV4_CTRL,
+                       .bitmsk = VC3_SE1_DIV4_CTRL_SE1_CLK_SEL
                },
                .hw.init = &(struct clk_init_data){
-                       .name = "diff2_mux",
+                       .name = "se1_mux",
                        .ops = &vc3_clk_mux_ops,
                        .parent_hws = (const struct clk_hw *[]) {
-                               &clk_div[VC3_DIV1].hw,
-                               &clk_div[VC3_DIV3].hw
+                               &clk_div[VC3_DIV5].hw,
+                               &clk_div[VC3_DIV4].hw
                        },
                        .num_parents = 2,
                        .flags = CLK_SET_RATE_PARENT
                }
        },
-       [VC3_DIFF1_MUX] = {
+       [VC3_SE2_MUX] = {
                .data = &(struct vc3_clk_data) {
-                       .offs = VC3_DIFF1_CTRL_REG,
-                       .bitmsk = VC3_DIFF1_CTRL_REG_DIFF1_CLK_SEL
+                       .offs = VC3_SE2_CTRL_REG0,
+                       .bitmsk = VC3_SE2_CTRL_REG0_SE2_CLK_SEL
                },
                .hw.init = &(struct clk_init_data){
-                       .name = "diff1_mux",
+                       .name = "se2_mux",
                        .ops = &vc3_clk_mux_ops,
                        .parent_hws = (const struct clk_hw *[]) {
-                               &clk_div[VC3_DIV1].hw,
-                               &clk_div[VC3_DIV3].hw
+                               &clk_div[VC3_DIV5].hw,
+                               &clk_div[VC3_DIV4].hw
                        },
                        .num_parents = 2,
                        .flags = CLK_SET_RATE_PARENT
@@ -945,33 +944,33 @@ static struct vc3_hw_data clk_mux[] = {
                        .flags = CLK_SET_RATE_PARENT
                }
        },
-       [VC3_SE2_MUX] = {
+       [VC3_DIFF1_MUX] = {
                .data = &(struct vc3_clk_data) {
-                       .offs = VC3_SE2_CTRL_REG0,
-                       .bitmsk = VC3_SE2_CTRL_REG0_SE2_CLK_SEL
+                       .offs = VC3_DIFF1_CTRL_REG,
+                       .bitmsk = VC3_DIFF1_CTRL_REG_DIFF1_CLK_SEL
                },
                .hw.init = &(struct clk_init_data){
-                       .name = "se2_mux",
+                       .name = "diff1_mux",
                        .ops = &vc3_clk_mux_ops,
                        .parent_hws = (const struct clk_hw *[]) {
-                               &clk_div[VC3_DIV5].hw,
-                               &clk_div[VC3_DIV4].hw
+                               &clk_div[VC3_DIV1].hw,
+                               &clk_div[VC3_DIV3].hw
                        },
                        .num_parents = 2,
                        .flags = CLK_SET_RATE_PARENT
                }
        },
-       [VC3_SE1_MUX] = {
+       [VC3_DIFF2_MUX] = {
                .data = &(struct vc3_clk_data) {
-                       .offs = VC3_SE1_DIV4_CTRL,
-                       .bitmsk = VC3_SE1_DIV4_CTRL_SE1_CLK_SEL
+                       .offs = VC3_DIFF2_CTRL_REG,
+                       .bitmsk = VC3_DIFF2_CTRL_REG_DIFF2_CLK_SEL
                },
                .hw.init = &(struct clk_init_data){
-                       .name = "se1_mux",
+                       .name = "diff2_mux",
                        .ops = &vc3_clk_mux_ops,
                        .parent_hws = (const struct clk_hw *[]) {
-                               &clk_div[VC3_DIV5].hw,
-                               &clk_div[VC3_DIV4].hw
+                               &clk_div[VC3_DIV1].hw,
+                               &clk_div[VC3_DIV3].hw
                        },
                        .num_parents = 2,
                        .flags = CLK_SET_RATE_PARENT
@@ -1110,7 +1109,7 @@ static int vc3_probe(struct i2c_client *client)
                                name, 0, CLK_SET_RATE_PARENT, 1, 1);
                else
                        clk_out[i] = devm_clk_hw_register_fixed_factor_parent_hw(dev,
-                               name, &clk_mux[i].hw, CLK_SET_RATE_PARENT, 1, 1);
+                               name, &clk_mux[i - 1].hw, CLK_SET_RATE_PARENT, 1, 1);
 
                if (IS_ERR(clk_out[i]))
                        return PTR_ERR(clk_out[i]);
index 8f4441d..9384ecc 100644 (file)
@@ -800,7 +800,7 @@ static SPRD_MUX_CLK_DATA(uart1_clk, "uart1-clk", uart_parents,
                         0x250, 0, 3, UMS512_MUX_FLAG);
 
 static const struct clk_parent_data thm_parents[] = {
-       { .fw_name = "ext-32m" },
+       { .fw_name = "ext-32k" },
        { .hw = &clk_250k.hw  },
 };
 static SPRD_MUX_CLK_DATA(thm0_clk, "thm0-clk", thm_parents,
index a9f3fb4..7bfba0a 100644 (file)
@@ -159,7 +159,7 @@ static unsigned long tegra_bpmp_clk_recalc_rate(struct clk_hw *hw,
 
        err = tegra_bpmp_clk_transfer(clk->bpmp, &msg);
        if (err < 0)
-               return err;
+               return 0;
 
        return response.rate;
 }
index 26db5b8..749868b 100644 (file)
@@ -81,7 +81,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
  *
  * - power condition
  *   Set the power condition field in the START STOP UNIT commands sent by
- *   sd_mod on suspend, resume, and shutdown (if manage_start_stop is on).
+ *   sd_mod on suspend, resume, and shutdown (if manage_system_start_stop or
+ *   manage_runtime_start_stop is on).
  *   Some disks need this to spin down or to resume properly.
  *
  * - override internal blacklist
@@ -1517,8 +1518,10 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 
        sdev->use_10_for_rw = 1;
 
-       if (sbp2_param_exclusive_login)
-               sdev->manage_start_stop = 1;
+       if (sbp2_param_exclusive_login) {
+               sdev->manage_system_start_stop = true;
+               sdev->manage_runtime_start_stop = true;
+       }
 
        if (sdev->type == TYPE_ROM)
                sdev->use_10_for_ms = 1;
index 2b9b7be..01c0fd0 100644 (file)
@@ -352,6 +352,7 @@ static int sprd_pmic_eic_probe(struct platform_device *pdev)
        pmic_eic->chip.set_config = sprd_pmic_eic_set_config;
        pmic_eic->chip.set = sprd_pmic_eic_set;
        pmic_eic->chip.get = sprd_pmic_eic_get;
+       pmic_eic->chip.can_sleep = true;
 
        irq = &pmic_eic->chip.irq;
        gpio_irq_chip_set_chip(irq, &pmic_eic_irq_chip);
index bbd9e91..fad9797 100644 (file)
@@ -43,9 +43,10 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index,
        unsigned offset, bool enabled)
 {
        struct timbgpio *tgpio = gpiochip_get_data(gpio);
+       unsigned long flags;
        u32 reg;
 
-       spin_lock(&tgpio->lock);
+       spin_lock_irqsave(&tgpio->lock, flags);
        reg = ioread32(tgpio->membase + offset);
 
        if (enabled)
@@ -54,7 +55,7 @@ static int timbgpio_update_bit(struct gpio_chip *gpio, unsigned index,
                reg &= ~(1 << index);
 
        iowrite32(reg, tgpio->membase + offset);
-       spin_unlock(&tgpio->lock);
+       spin_unlock_irqrestore(&tgpio->lock, flags);
 
        return 0;
 }
index 8f1633c..73a4a4e 100644 (file)
@@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
        st->nents = 0;
        for (i = 0; i < page_count; i++) {
                struct folio *folio;
+               unsigned long nr_pages;
                const unsigned int shrink[] = {
                        I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
                        0,
@@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
                        }
                } while (1);
 
+               nr_pages = min_t(unsigned long,
+                               folio_nr_pages(folio), page_count - i);
                if (!i ||
                    sg->length >= max_segment ||
                    folio_pfn(folio) != next_pfn) {
@@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
                                sg = sg_next(sg);
 
                        st->nents++;
-                       sg_set_folio(sg, folio, folio_size(folio), 0);
+                       sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
                } else {
                        /* XXX: could overflow? */
-                       sg->length += folio_size(folio);
+                       sg->length += nr_pages * PAGE_SIZE;
                }
-               next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
-               i += folio_nr_pages(folio) - 1;
+               next_pfn = folio_pfn(folio) + nr_pages;
+               i += nr_pages - 1;
 
                /* Check that the i965g/gm workaround works. */
                GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
index dd0ed94..da21f27 100644 (file)
@@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
        vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
+/*
+ * Reserve the top of the GuC address space for firmware images. Addresses
+ * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
+ * which makes for a suitable range to hold GuC/HuC firmware images if the
+ * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
+ * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
+ * of the same size anyway, which is far more than needed, to keep the logic
+ * in uc_fw_ggtt_offset() simple.
+ */
+#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
+
 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
 {
-       u64 size;
+       u64 offset;
        int ret;
 
        if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
                return 0;
 
-       GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
-       size = ggtt->vm.total - GUC_GGTT_TOP;
+       GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
+       offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
 
-       ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
-                                  GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
-                                  PIN_NOEVICT);
+       ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
+                                  GUC_TOP_RESERVE_SIZE, offset,
+                                  I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
        if (ret)
                drm_dbg(&ggtt->vm.i915->drm,
                        "Failed to reserve top of GGTT for GuC\n");
index b5b7f2f..dc7b40e 100644 (file)
@@ -1433,6 +1433,36 @@ static void guc_timestamp_ping(struct work_struct *wrk)
        int srcu, ret;
 
        /*
+        * Ideally the busyness worker should take a gt pm wakeref because the
+        * worker only needs to be active while gt is awake. However, the
+        * gt_park path cancels the worker synchronously and this complicates
+        * the flow if the worker is also running at the same time. The cancel
+        * waits for the worker and when the worker releases the wakeref, that
+        * would call gt_park and would lead to a deadlock.
+        *
+        * The resolution is to take the global pm wakeref if runtime pm is
+        * already active. If not, we don't need to update the busyness stats as
+        * the stats would already be updated when the gt was parked.
+        *
+        * Note:
+        * - We do not requeue the worker if we cannot take a reference to runtime
+        *   pm since intel_guc_busyness_unpark would requeue the worker in the
+        *   resume path.
+        *
+        * - If the gt was parked longer than time taken for GT timestamp to roll
+        *   over, we ignore those rollovers since we don't care about tracking
+        *   the exact GT time. We only care about roll overs when the gt is
+        *   active and running workloads.
+        *
+        * - There is a window of time between gt_park and runtime suspend,
+        *   where the worker may run. This is acceptable since the worker will
+        *   not find any new data to update busyness.
+        */
+       wakeref = intel_runtime_pm_get_if_active(&gt->i915->runtime_pm);
+       if (!wakeref)
+               return;
+
+       /*
         * Synchronize with gt reset to make sure the worker does not
         * corrupt the engine/guc stats. NB: can't actually block waiting
         * for a reset to complete as the reset requires flushing out
@@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
         */
        ret = intel_gt_reset_trylock(gt, &srcu);
        if (ret)
-               return;
+               goto err_trylock;
 
-       with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
-               __update_guc_busyness_stats(guc);
+       __update_guc_busyness_stats(guc);
 
        /* adjust context stats for overflow */
        xa_for_each(&guc->context_lookup, index, ce)
@@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
        intel_gt_reset_unlock(gt, srcu);
 
        guc_enable_busyness_worker(guc);
+
+err_trylock:
+       intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 }
 
 static int guc_action_enable_usage_stats(struct intel_guc *guc)
index 8c581c9..7f314e5 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/irqdomain.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/xtensa-mx.h>
 #include <linux/of.h>
 
 #include <asm/mxregs.h>
index 59e1ebb..411e00b 100644 (file)
@@ -300,7 +300,7 @@ config NVMEM_REBOOT_MODE
 
 config POWER_MLXBF
        tristate "Mellanox BlueField power handling driver"
-       depends on (GPIO_MLXBF2 && ACPI)
+       depends on (GPIO_MLXBF2 || GPIO_MLXBF3) && ACPI
        help
          This driver supports reset or low power mode handling for Mellanox BlueField.
 
index 12dedf8..de35d24 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
 
 /*
  *  Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES.
index 447ffda..17064d7 100644 (file)
@@ -121,7 +121,7 @@ static int vexpress_reset_probe(struct platform_device *pdev)
                return PTR_ERR(regmap);
        dev_set_drvdata(&pdev->dev, regmap);
 
-       switch ((enum vexpress_reset_func)match->data) {
+       switch ((uintptr_t)match->data) {
        case FUNC_SHUTDOWN:
                vexpress_power_off_device = &pdev->dev;
                pm_power_off = vexpress_power_off;
index 663a1c4..a61bb12 100644 (file)
@@ -769,6 +769,7 @@ config BATTERY_RT5033
 config CHARGER_RT5033
        tristate "RT5033 battery charger support"
        depends on MFD_RT5033
+       depends on EXTCON || !EXTCON
        help
          This adds support for battery charger in Richtek RT5033 PMIC.
          The device supports pre-charge mode, fast charge mode and
index 6f83e99..ce36d6c 100644 (file)
@@ -115,7 +115,6 @@ struct ab8500_btemp {
 static enum power_supply_property ab8500_btemp_props[] = {
        POWER_SUPPLY_PROP_PRESENT,
        POWER_SUPPLY_PROP_ONLINE,
-       POWER_SUPPLY_PROP_TECHNOLOGY,
        POWER_SUPPLY_PROP_TEMP,
 };
 
@@ -532,12 +531,6 @@ static int ab8500_btemp_get_property(struct power_supply *psy,
                else
                        val->intval = 1;
                break;
-       case POWER_SUPPLY_PROP_TECHNOLOGY:
-               if (di->bm->bi)
-                       val->intval = di->bm->bi->technology;
-               else
-                       val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
-               break;
        case POWER_SUPPLY_PROP_TEMP:
                val->intval = ab8500_btemp_get_temp(di);
                break;
@@ -662,7 +655,7 @@ static char *supply_interface[] = {
 
 static const struct power_supply_desc ab8500_btemp_desc = {
        .name                   = "ab8500_btemp",
-       .type                   = POWER_SUPPLY_TYPE_BATTERY,
+       .type                   = POWER_SUPPLY_TYPE_UNKNOWN,
        .properties             = ab8500_btemp_props,
        .num_properties         = ARRAY_SIZE(ab8500_btemp_props),
        .get_property           = ab8500_btemp_get_property,
index ea4ad61..2205ea0 100644 (file)
@@ -1720,7 +1720,7 @@ static char *supply_interface[] = {
 
 static const struct power_supply_desc ab8500_chargalg_desc = {
        .name                   = "ab8500_chargalg",
-       .type                   = POWER_SUPPLY_TYPE_BATTERY,
+       .type                   = POWER_SUPPLY_TYPE_UNKNOWN,
        .properties             = ab8500_chargalg_props,
        .num_properties         = ARRAY_SIZE(ab8500_chargalg_props),
        .get_property           = ab8500_chargalg_get_property,
index f27dae5..a9641bd 100644 (file)
@@ -324,7 +324,7 @@ static int mt6370_chg_toggle_cfo(struct mt6370_priv *priv)
 
        if (fl_strobe) {
                dev_err(priv->dev, "Flash led is still in strobe mode\n");
-               return ret;
+               return -EINVAL;
        }
 
        /* cfo off */
index 06e5b6b..d483a81 100644 (file)
@@ -482,6 +482,13 @@ int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env)
        if (ret)
                return ret;
 
+       /*
+        * Kernel generates KOBJ_REMOVE uevent in device removal path, after
+        * resources have been freed. Exit early to avoid use-after-free.
+        */
+       if (psy->removing)
+               return 0;
+
        prop_buf = (char *)get_zeroed_page(GFP_KERNEL);
        if (!prop_buf)
                return -ENOMEM;
index 8328bce..f64daf5 100644 (file)
@@ -1045,6 +1045,13 @@ static void rk817_charging_monitor(struct work_struct *work)
        queue_delayed_work(system_wq, &charger->work, msecs_to_jiffies(8000));
 }
 
+static void rk817_cleanup_node(void *data)
+{
+       struct device_node *node = data;
+
+       of_node_put(node);
+}
+
 static int rk817_charger_probe(struct platform_device *pdev)
 {
        struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
@@ -1061,11 +1068,13 @@ static int rk817_charger_probe(struct platform_device *pdev)
        if (!node)
                return -ENODEV;
 
+       ret = devm_add_action_or_reset(&pdev->dev, rk817_cleanup_node, node);
+       if (ret)
+               return ret;
+
        charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL);
-       if (!charger) {
-               of_node_put(node);
+       if (!charger)
                return -ENOMEM;
-       }
 
        charger->rk808 = rk808;
 
@@ -1211,3 +1220,4 @@ MODULE_DESCRIPTION("Battery power supply driver for RK817 PMIC");
 MODULE_AUTHOR("Maya Matuszczyk <maccraft123mc@gmail.com>");
 MODULE_AUTHOR("Chris Morgan <macromorgan@hotmail.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:rk817-charger");
index 683adb1..fdfdc83 100644 (file)
@@ -598,8 +598,8 @@ static int rt9467_run_aicl(struct rt9467_chg_data *data)
 
        reinit_completion(&data->aicl_done);
        ret = wait_for_completion_timeout(&data->aicl_done, msecs_to_jiffies(3500));
-       if (ret)
-               return ret;
+       if (ret == 0)
+               return -ETIMEDOUT;
 
        ret = rt9467_get_value_from_ranges(data, F_IAICR, RT9467_RANGE_IAICR, &aicr_get);
        if (ret) {
index 954feba..7970843 100644 (file)
@@ -384,7 +384,8 @@ static int ucs1002_get_property(struct power_supply *psy,
        case POWER_SUPPLY_PROP_USB_TYPE:
                return ucs1002_get_usb_type(info, val);
        case POWER_SUPPLY_PROP_HEALTH:
-               return val->intval = info->health;
+               val->intval = info->health;
+               return 0;
        case POWER_SUPPLY_PROP_PRESENT:
                val->intval = info->present;
                return 0;
index d0911bc..89367c4 100644 (file)
@@ -613,6 +613,17 @@ void scsi_cdl_check(struct scsi_device *sdev)
        bool cdl_supported;
        unsigned char *buf;
 
+       /*
+        * Support for CDL was defined in SPC-5. Ignore devices reporting an
+        * lower SPC version. This also avoids problems with old drives choking
+        * on MAINTENANCE_IN / MI_REPORT_SUPPORTED_OPERATION_CODES with a
+        * service action specified, as done in scsi_cdl_check_cmd().
+        */
+       if (sdev->scsi_level < SCSI_SPC_5) {
+               sdev->cdl_supported = 0;
+               return;
+       }
+
        buf = kmalloc(SCSI_CDL_CHECK_BUF_LEN, GFP_KERNEL);
        if (!buf) {
                sdev->cdl_supported = 0;
index 52014b2..902655d 100644 (file)
@@ -822,7 +822,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
         * device is attached at LUN 0 (SCSI_SCAN_TARGET_PRESENT) so
         * non-zero LUNs can be scanned.
         */
-       sdev->scsi_level = inq_result[2] & 0x07;
+       sdev->scsi_level = inq_result[2] & 0x0f;
        if (sdev->scsi_level >= 2 ||
            (sdev->scsi_level == 1 && (inq_result[3] & 0x0f) == 1))
                sdev->scsi_level++;
@@ -1619,12 +1619,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
 }
 EXPORT_SYMBOL(scsi_add_device);
 
-void scsi_rescan_device(struct scsi_device *sdev)
+int scsi_rescan_device(struct scsi_device *sdev)
 {
        struct device *dev = &sdev->sdev_gendev;
+       int ret = 0;
 
        device_lock(dev);
 
+       /*
+        * Bail out if the device is not running. Otherwise, the rescan may
+        * block waiting for commands to be executed, with us holding the
+        * device lock. This can result in a potential deadlock in the power
+        * management core code when system resume is on-going.
+        */
+       if (sdev->sdev_state != SDEV_RUNNING) {
+               ret = -EWOULDBLOCK;
+               goto unlock;
+       }
+
        scsi_attach_vpd(sdev);
        scsi_cdl_check(sdev);
 
@@ -1638,7 +1650,11 @@ void scsi_rescan_device(struct scsi_device *sdev)
                        drv->rescan(dev);
                module_put(dev->driver->owner);
        }
+
+unlock:
        device_unlock(dev);
+
+       return ret;
 }
 EXPORT_SYMBOL(scsi_rescan_device);
 
index c92a317..83b6a3f 100644 (file)
@@ -201,18 +201,32 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
 }
 
 static ssize_t
-manage_start_stop_show(struct device *dev, struct device_attribute *attr,
-                      char *buf)
+manage_start_stop_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
 {
        struct scsi_disk *sdkp = to_scsi_disk(dev);
        struct scsi_device *sdp = sdkp->device;
 
-       return sprintf(buf, "%u\n", sdp->manage_start_stop);
+       return sysfs_emit(buf, "%u\n",
+                         sdp->manage_system_start_stop &&
+                         sdp->manage_runtime_start_stop);
 }
+static DEVICE_ATTR_RO(manage_start_stop);
 
 static ssize_t
-manage_start_stop_store(struct device *dev, struct device_attribute *attr,
-                       const char *buf, size_t count)
+manage_system_start_stop_show(struct device *dev,
+                             struct device_attribute *attr, char *buf)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+       struct scsi_device *sdp = sdkp->device;
+
+       return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop);
+}
+
+static ssize_t
+manage_system_start_stop_store(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count)
 {
        struct scsi_disk *sdkp = to_scsi_disk(dev);
        struct scsi_device *sdp = sdkp->device;
@@ -224,11 +238,42 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr,
        if (kstrtobool(buf, &v))
                return -EINVAL;
 
-       sdp->manage_start_stop = v;
+       sdp->manage_system_start_stop = v;
 
        return count;
 }
-static DEVICE_ATTR_RW(manage_start_stop);
+static DEVICE_ATTR_RW(manage_system_start_stop);
+
+static ssize_t
+manage_runtime_start_stop_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+       struct scsi_device *sdp = sdkp->device;
+
+       return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop);
+}
+
+static ssize_t
+manage_runtime_start_stop_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+       struct scsi_device *sdp = sdkp->device;
+       bool v;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (kstrtobool(buf, &v))
+               return -EINVAL;
+
+       sdp->manage_runtime_start_stop = v;
+
+       return count;
+}
+static DEVICE_ATTR_RW(manage_runtime_start_stop);
 
 static ssize_t
 allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -560,6 +605,8 @@ static struct attribute *sd_disk_attrs[] = {
        &dev_attr_FUA.attr,
        &dev_attr_allow_restart.attr,
        &dev_attr_manage_start_stop.attr,
+       &dev_attr_manage_system_start_stop.attr,
+       &dev_attr_manage_runtime_start_stop.attr,
        &dev_attr_protection_type.attr,
        &dev_attr_protection_mode.attr,
        &dev_attr_app_tag_own.attr,
@@ -3694,7 +3741,8 @@ static int sd_remove(struct device *dev)
 
        device_del(&sdkp->disk_dev);
        del_gendisk(sdkp->disk);
-       sd_shutdown(dev);
+       if (!sdkp->suspended)
+               sd_shutdown(dev);
 
        put_disk(sdkp->disk);
        return 0;
@@ -3771,13 +3819,20 @@ static void sd_shutdown(struct device *dev)
                sd_sync_cache(sdkp, NULL);
        }
 
-       if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
+       if (system_state != SYSTEM_RESTART &&
+           sdkp->device->manage_system_start_stop) {
                sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
                sd_start_stop_device(sdkp, 0);
        }
 }
 
-static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
+static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
+{
+       return (sdev->manage_system_start_stop && !runtime) ||
+               (sdev->manage_runtime_start_stop && runtime);
+}
+
+static int sd_suspend_common(struct device *dev, bool runtime)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        struct scsi_sense_hdr sshdr;
@@ -3809,15 +3864,18 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
                }
        }
 
-       if (sdkp->device->manage_start_stop) {
+       if (sd_do_start_stop(sdkp->device, runtime)) {
                if (!sdkp->device->silence_suspend)
                        sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
                /* an error is not worth aborting a system sleep */
                ret = sd_start_stop_device(sdkp, 0);
-               if (ignore_stop_errors)
+               if (!runtime)
                        ret = 0;
        }
 
+       if (!ret)
+               sdkp->suspended = true;
+
        return ret;
 }
 
@@ -3826,15 +3884,15 @@ static int sd_suspend_system(struct device *dev)
        if (pm_runtime_suspended(dev))
                return 0;
 
-       return sd_suspend_common(dev, true);
+       return sd_suspend_common(dev, false);
 }
 
 static int sd_suspend_runtime(struct device *dev)
 {
-       return sd_suspend_common(dev, false);
+       return sd_suspend_common(dev, true);
 }
 
-static int sd_resume(struct device *dev)
+static int sd_resume(struct device *dev, bool runtime)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        int ret = 0;
@@ -3842,16 +3900,21 @@ static int sd_resume(struct device *dev)
        if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
                return 0;
 
-       if (!sdkp->device->manage_start_stop)
+       if (!sd_do_start_stop(sdkp->device, runtime)) {
+               sdkp->suspended = false;
                return 0;
+       }
 
        if (!sdkp->device->no_start_on_resume) {
                sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
                ret = sd_start_stop_device(sdkp, 1);
        }
 
-       if (!ret)
+       if (!ret) {
                opal_unlock_from_suspend(sdkp->opal_dev);
+               sdkp->suspended = false;
+       }
+
        return ret;
 }
 
@@ -3860,7 +3923,7 @@ static int sd_resume_system(struct device *dev)
        if (pm_runtime_suspended(dev))
                return 0;
 
-       return sd_resume(dev);
+       return sd_resume(dev, false);
 }
 
 static int sd_resume_runtime(struct device *dev)
@@ -3887,7 +3950,7 @@ static int sd_resume_runtime(struct device *dev)
                                  "Failed to clear sense data\n");
        }
 
-       return sd_resume(dev);
+       return sd_resume(dev, true);
 }
 
 static const struct dev_pm_ops sd_pm_ops = {
index 5eea762..409dda5 100644 (file)
@@ -131,6 +131,7 @@ struct scsi_disk {
        u8              provisioning_mode;
        u8              zeroing_mode;
        u8              nr_actuators;           /* Number of actuators */
+       bool            suspended;      /* Disk is suspended (stopped) */
        unsigned        ATO : 1;        /* state of disk ATO bit */
        unsigned        cache_override : 1; /* temp override of WCE,RCD */
        unsigned        WCE : 1;        /* state of disk WCE bit */
index 453a9b3..d239fc5 100644 (file)
@@ -256,7 +256,6 @@ static int cs42l43_spi_probe(struct platform_device *pdev)
 
        ret = devm_spi_register_controller(priv->dev, priv->ctlr);
        if (ret) {
-               pm_runtime_disable(priv->dev);
                dev_err(priv->dev, "Failed to register SPI controller: %d\n", ret);
        }
 
index fd2fac2..3aff5a1 100644 (file)
@@ -194,7 +194,7 @@ static ssize_t gxp_spi_write(struct gxp_spi_chip *chip, const struct spi_mem_op
                return ret;
        }
 
-       return write_len;
+       return 0;
 }
 
 static int do_gxp_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
index e449063..9d2738e 100644 (file)
@@ -233,7 +233,8 @@ int parent_create_sysfs_files(struct mdev_parent *parent)
 out_err:
        while (--i >= 0)
                mdev_type_remove(parent->types[i]);
-       return 0;
+       kset_unregister(parent->mdev_types_kset);
+       return ret;
 }
 
 static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
index 407b3fd..6eceef7 100644 (file)
@@ -3,7 +3,7 @@
 
 config PDS_VFIO_PCI
        tristate "VFIO support for PDS PCI devices"
-       depends on PDS_CORE
+       depends on PDS_CORE && PCI_IOV
        select VFIO_PCI_CORE
        help
          This provides generic PCI support for PDS devices using the VFIO
index b46174f..649b18e 100644 (file)
@@ -162,7 +162,7 @@ static int pds_vfio_init_device(struct vfio_device *vdev)
        pci_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
        dev_dbg(&pdev->dev,
                "%s: PF %#04x VF %#04x vf_id %d domain %d pds_vfio %p\n",
-               __func__, pci_dev_id(pdev->physfn), pci_id, vf_id,
+               __func__, pci_dev_id(pci_physfn(pdev)), pci_id, vf_id,
                pci_domain_nr(pdev->bus), pds_vfio);
 
        return 0;
index a4c2a6b..f8589ca 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -80,7 +80,7 @@ struct aio_ring {
 struct kioctx_table {
        struct rcu_head         rcu;
        unsigned                nr;
-       struct kioctx __rcu     *table[];
+       struct kioctx __rcu     *table[] __counted_by(nr);
 };
 
 struct kioctx_cpu {
index 6a13cf0..9fe4ccc 100644 (file)
@@ -103,24 +103,17 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
  * Transfer bytes to our delayed refs rsv.
  *
  * @fs_info:   the filesystem
- * @src:       source block rsv to transfer from
  * @num_bytes: number of bytes to transfer
  *
- * This transfers up to the num_bytes amount from the src rsv to the
+ * This transfers up to the num_bytes amount, previously reserved, to the
  * delayed_refs_rsv.  Any extra bytes are returned to the space info.
  */
 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
-                                      struct btrfs_block_rsv *src,
                                       u64 num_bytes)
 {
        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
        u64 to_free = 0;
 
-       spin_lock(&src->lock);
-       src->reserved -= num_bytes;
-       src->size -= num_bytes;
-       spin_unlock(&src->lock);
-
        spin_lock(&delayed_refs_rsv->lock);
        if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
                u64 delta = delayed_refs_rsv->size -
@@ -163,6 +156,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
        u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1);
        u64 num_bytes = 0;
+       u64 refilled_bytes;
+       u64 to_free;
        int ret = -ENOSPC;
 
        spin_lock(&block_rsv->lock);
@@ -178,9 +173,38 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
        ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
        if (ret)
                return ret;
-       btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
-       trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
-                                     0, num_bytes, 1);
+
+       /*
+        * We may have raced with someone else, so check again if we the block
+        * reserve is still not full and release any excess space.
+        */
+       spin_lock(&block_rsv->lock);
+       if (block_rsv->reserved < block_rsv->size) {
+               u64 needed = block_rsv->size - block_rsv->reserved;
+
+               if (num_bytes >= needed) {
+                       block_rsv->reserved += needed;
+                       block_rsv->full = true;
+                       to_free = num_bytes - needed;
+                       refilled_bytes = needed;
+               } else {
+                       block_rsv->reserved += num_bytes;
+                       to_free = 0;
+                       refilled_bytes = num_bytes;
+               }
+       } else {
+               to_free = num_bytes;
+               refilled_bytes = 0;
+       }
+       spin_unlock(&block_rsv->lock);
+
+       if (to_free > 0)
+               btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info,
+                                                   to_free);
+
+       if (refilled_bytes > 0)
+               trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0,
+                                             refilled_bytes, 1);
        return 0;
 }
 
index b8e14b0..fd9bf2b 100644 (file)
@@ -407,7 +407,6 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
                                  enum btrfs_reserve_flush_enum flush);
 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
-                                      struct btrfs_block_rsv *src,
                                       u64 num_bytes);
 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
 
index f356f08..fc313fc 100644 (file)
@@ -1514,15 +1514,14 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_release_path(path);
 
        /* now insert the actual backref */
-       if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               BUG_ON(refs_to_add != 1);
+       if (owner < BTRFS_FIRST_FREE_OBJECTID)
                ret = insert_tree_block_ref(trans, path, bytenr, parent,
                                            root_objectid);
-       } else {
+       else
                ret = insert_extent_data_ref(trans, path, bytenr, parent,
                                             root_objectid, owner, offset,
                                             refs_to_add);
-       }
+
        if (ret)
                btrfs_abort_transaction(trans, ret);
 out:
@@ -1656,7 +1655,10 @@ again:
                                goto again;
                        }
                } else {
-                       err = -EIO;
+                       err = -EUCLEAN;
+                       btrfs_err(fs_info,
+                 "missing extent item for extent %llu num_bytes %llu level %d",
+                                 head->bytenr, head->num_bytes, extent_op->level);
                        goto out;
                }
        }
@@ -1699,12 +1701,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                parent = ref->parent;
        ref_root = ref->root;
 
-       if (node->ref_mod != 1) {
+       if (unlikely(node->ref_mod != 1)) {
                btrfs_err(trans->fs_info,
-       "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+       "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
                          node->bytenr, node->ref_mod, node->action, ref_root,
                          parent);
-               return -EIO;
+               return -EUCLEAN;
        }
        if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
                BUG_ON(!extent_op || !extent_op->update_flags);
index 6954ae7..caccd03 100644 (file)
@@ -3995,8 +3995,14 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
        char *dst = (char *)dstv;
        unsigned long i = get_eb_page_index(start);
 
-       if (check_eb_range(eb, start, len))
+       if (check_eb_range(eb, start, len)) {
+               /*
+                * Invalid range hit, reset the memory, so callers won't get
+                * some random garbage for their uninitialzed memory.
+                */
+               memset(dstv, 0, len);
                return;
+       }
 
        offset = get_eb_offset_in_page(eb, start);
 
index cffdd6f..1a093ec 100644 (file)
@@ -2117,7 +2117,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
         * calculated f_bavail.
         */
        if (!mixed && block_rsv->space_info->full &&
-           total_free_meta - thresh < block_rsv->size)
+           (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
                buf->f_bavail = 0;
 
        buf->f_type = BTRFS_SUPER_MAGIC;
index 0bf42dc..c780d37 100644 (file)
@@ -631,14 +631,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
                        reloc_reserved = true;
                }
 
-               ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush);
+               ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush);
                if (ret)
                        goto reserve_fail;
                if (delayed_refs_bytes) {
-                       btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv,
-                                                         delayed_refs_bytes);
+                       btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes);
                        num_bytes -= delayed_refs_bytes;
                }
+               btrfs_block_rsv_add_bytes(rsv, num_bytes, true);
 
                if (rsv->space_info->force_alloc)
                        do_chunk_alloc = true;
index d1e46b8..cbb17b5 100644 (file)
@@ -4722,7 +4722,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        int slot;
        int ins_nr = 0;
-       int start_slot;
+       int start_slot = 0;
        int ret;
 
        if (!(inode->flags & BTRFS_INODE_PREALLOC))
index 9621455..5a5a8d4 100644 (file)
@@ -1594,7 +1594,7 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
        u64 search_start;
        u64 hole_size;
        u64 max_hole_start;
-       u64 max_hole_size;
+       u64 max_hole_size = 0;
        u64 extent_end;
        u64 search_end = device->total_bytes;
        int ret;
@@ -1602,17 +1602,16 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
        struct extent_buffer *l;
 
        search_start = dev_extent_search_start(device);
+       max_hole_start = search_start;
 
        WARN_ON(device->zone_info &&
                !IS_ALIGNED(num_bytes, device->zone_info->zone_size));
 
        path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       max_hole_start = search_start;
-       max_hole_size = 0;
-
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
 again:
        if (search_start >= search_end ||
                test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
index e4d5cd5..e1f31b8 100644 (file)
@@ -249,11 +249,9 @@ static struct inode *parse_longname(const struct inode *parent,
        if (!dir) {
                /* This can happen if we're not mounting cephfs on the root */
                dir = ceph_get_inode(parent->i_sb, vino, NULL);
-               if (!dir)
-                       dir = ERR_PTR(-ENOENT);
+               if (IS_ERR(dir))
+                       dout("Can't find inode %s (%s)\n", inode_number, name);
        }
-       if (IS_ERR(dir))
-               dout("Can't find inode %s (%s)\n", inode_number, name);
 
 out:
        kfree(inode_number);
index 969ce99..c1af01b 100644 (file)
@@ -1535,10 +1535,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
 
        if (wbc->pages_skipped) {
                /*
-                * writeback is not making progress due to locked
-                * buffers. Skip this inode for now.
+                * Writeback is not making progress due to locked buffers.
+                * Skip this inode for now. Although having skipped pages
+                * is odd for clean inodes, it can happen for some
+                * filesystems so handle that gracefully.
                 */
-               redirty_tail_locked(inode, wb);
+               if (inode->i_state & I_DIRTY_ALL)
+                       redirty_tail_locked(inode, wb);
+               else
+                       inode_cgwb_move_to_attached(inode, wb);
                return;
        }
 
index a4eb127..37f2d34 100644 (file)
@@ -1903,6 +1903,7 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
                 * We don't know how much we wrote, so just return the number of
                 * bytes which were direct-written
                 */
+               iocb->ki_pos -= buffered_written;
                if (direct_written)
                        return direct_written;
                return err;
index 2e40c74..92c7dde 100644 (file)
@@ -4113,6 +4113,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
                                 struct file *file, unsigned long maxcount)
 {
        struct xdr_stream *xdr = resp->xdr;
+       unsigned int base = xdr->buf->page_len & ~PAGE_MASK;
        unsigned int starting_len = xdr->buf->len;
        __be32 zero = xdr_zero;
        __be32 nfserr;
@@ -4121,8 +4122,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
                return nfserr_resource;
 
        nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
-                               read->rd_offset, &maxcount,
-                               xdr->buf->page_len & ~PAGE_MASK,
+                               read->rd_offset, &maxcount, base,
                                &read->rd_eof);
        read->rd_length = maxcount;
        if (nfserr)
index cfec5e0..5661a36 100644 (file)
@@ -1562,6 +1562,7 @@ load_root:
 put_inode_out:
        iput(inode);
 out:
+       ntfs3_put_sbi(sbi);
        kfree(boot2);
        return err;
 }
index d1761ec..ada3fcc 100644 (file)
@@ -337,7 +337,7 @@ static int ovl_set_timestamps(struct ovl_fs *ofs, struct dentry *upperdentry,
 {
        struct iattr attr = {
                .ia_valid =
-                    ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+                    ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
                .ia_atime = stat->atime,
                .ia_mtime = stat->mtime,
        };
index 4193633..693971d 100644 (file)
@@ -391,6 +391,12 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
        if (!ovl_should_sync(OVL_FS(inode->i_sb)))
                ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
 
+       /*
+        * Overlayfs doesn't support deferred completions, don't copy
+        * this property in case it is set by the issuer.
+        */
+       ifl &= ~IOCB_DIO_CALLER_COMP;
+
        old_cred = ovl_override_creds(file_inode(file)->i_sb);
        if (is_sync_kiocb(iocb)) {
                file_start_write(real.file);
index 6c1a9b1..1391901 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -537,7 +537,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                                break;
                        }
                        ret += copied;
-                       buf->offset = 0;
                        buf->len = copied;
 
                        if (!iov_iter_count(from))
index b817494..7d12b8c 100644 (file)
@@ -2699,7 +2699,7 @@ struct reiserfs_iget_args {
 #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
 
 #define journal_trans_half(blocksize) \
-       ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
+       ((blocksize - sizeof(struct reiserfs_journal_desc) - 12) / sizeof(__u32))
 
 /* journal.c see journal.c for all the comments here */
 
@@ -2711,7 +2711,7 @@ struct reiserfs_journal_desc {
        __le32 j_len;
 
        __le32 j_mount_id;      /* mount id of this trans */
-       __le32 j_realblock[1];  /* real locations for each block */
+       __le32 j_realblock[];   /* real locations for each block */
 };
 
 #define get_desc_trans_id(d)   le32_to_cpu((d)->j_trans_id)
@@ -2726,7 +2726,7 @@ struct reiserfs_journal_desc {
 struct reiserfs_journal_commit {
        __le32 j_trans_id;      /* must match j_trans_id from the desc block */
        __le32 j_len;           /* ditto */
-       __le32 j_realblock[1];  /* real locations for each block */
+       __le32 j_realblock[];   /* real locations for each block */
 };
 
 #define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
index e45ce31..a3493da 100644 (file)
@@ -1541,6 +1541,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 
  cifs_parse_mount_err:
        kfree_sensitive(ctx->password);
+       ctx->password = NULL;
        return -EINVAL;
 }
 
index 0d990c2..db7fa70 100644 (file)
@@ -197,6 +197,9 @@ int ksmbd_conn_write(struct ksmbd_work *work)
        if (work->send_no_response)
                return 0;
 
+       if (!work->iov_idx)
+               return -EINVAL;
+
        ksmbd_conn_lock(conn);
        sent = conn->transport->ops->writev(conn->transport, work->iov,
                        work->iov_cnt,
index 5ab2f52..32347fe 100644 (file)
@@ -115,8 +115,10 @@ static int __process_request(struct ksmbd_work *work, struct ksmbd_conn *conn,
        if (check_conn_state(work))
                return SERVER_HANDLER_CONTINUE;
 
-       if (ksmbd_verify_smb_message(work))
+       if (ksmbd_verify_smb_message(work)) {
+               conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
                return SERVER_HANDLER_ABORT;
+       }
 
        command = conn->ops->get_cmd_val(work);
        *cmd = command;
index e881df1..23bd3d1 100644 (file)
@@ -440,10 +440,8 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
 
 validate_credit:
        if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) &&
-           smb2_validate_credit_charge(work->conn, hdr)) {
-               work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+           smb2_validate_credit_charge(work->conn, hdr))
                return 1;
-       }
 
        return 0;
 }
index f71ea78..7cd09c3 100644 (file)
@@ -146,10 +146,18 @@ xfs_nfs_get_inode(
                return ERR_PTR(error);
        }
 
-       error = xfs_inode_reload_unlinked(ip);
-       if (error) {
-               xfs_irele(ip);
-               return ERR_PTR(error);
+       /*
+        * Reload the incore unlinked list to avoid failure in inodegc.
+        * Use an unlocked check here because unrecovered unlinked inodes
+        * should be somewhat rare.
+        */
+       if (xfs_inode_unlinked_incomplete(ip)) {
+               error = xfs_inode_reload_unlinked(ip);
+               if (error) {
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                       xfs_irele(ip);
+                       return ERR_PTR(error);
+               }
        }
 
        if (VFS_I(ip)->i_generation != generation) {
index f94f7b3..4d55f58 100644 (file)
@@ -1743,6 +1743,14 @@ xfs_inactive(
                truncate = 1;
 
        if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
+               /*
+                * If this inode is being inactivated during a quotacheck and
+                * has not yet been scanned by quotacheck, we /must/ remove
+                * the dquots from the inode before inactivation changes the
+                * block and inode counts.  Most probably this is a result of
+                * reloading the incore iunlinked list to purge unrecovered
+                * unlinked inodes.
+                */
                xfs_qm_dqdetach(ip);
        } else {
                error = xfs_qm_dqattach(ip);
@@ -3641,6 +3649,16 @@ xfs_inode_reload_unlinked_bucket(
        if (error)
                return error;
 
+       /*
+        * We've taken ILOCK_SHARED and the AGI buffer lock to stabilize the
+        * incore unlinked list pointers for this inode.  Check once more to
+        * see if we raced with anyone else to reload the unlinked list.
+        */
+       if (!xfs_inode_unlinked_incomplete(ip)) {
+               foundit = true;
+               goto out_agibp;
+       }
+
        bucket = agino % XFS_AGI_UNLINKED_BUCKETS;
        agi = agibp->b_addr;
 
@@ -3655,25 +3673,27 @@ xfs_inode_reload_unlinked_bucket(
        while (next_agino != NULLAGINO) {
                struct xfs_inode        *next_ip = NULL;
 
+               /* Found this caller's inode, set its backlink. */
                if (next_agino == agino) {
-                       /* Found this inode, set its backlink. */
                        next_ip = ip;
                        next_ip->i_prev_unlinked = prev_agino;
                        foundit = true;
+                       goto next_inode;
                }
-               if (!next_ip) {
-                       /* Inode already in memory. */
-                       next_ip = xfs_iunlink_lookup(pag, next_agino);
-               }
-               if (!next_ip) {
-                       /* Inode not in memory, reload. */
-                       error = xfs_iunlink_reload_next(tp, agibp, prev_agino,
-                                       next_agino);
-                       if (error)
-                               break;
 
-                       next_ip = xfs_iunlink_lookup(pag, next_agino);
-               }
+               /* Try in-memory lookup first. */
+               next_ip = xfs_iunlink_lookup(pag, next_agino);
+               if (next_ip)
+                       goto next_inode;
+
+               /* Inode not in memory, try reloading it. */
+               error = xfs_iunlink_reload_next(tp, agibp, prev_agino,
+                               next_agino);
+               if (error)
+                       break;
+
+               /* Grab the reloaded inode. */
+               next_ip = xfs_iunlink_lookup(pag, next_agino);
                if (!next_ip) {
                        /* No incore inode at all?  We reloaded it... */
                        ASSERT(next_ip != NULL);
@@ -3681,10 +3701,12 @@ xfs_inode_reload_unlinked_bucket(
                        break;
                }
 
+next_inode:
                prev_agino = next_agino;
                next_agino = next_ip->i_next_unlinked;
        }
 
+out_agibp:
        xfs_trans_brelse(tp, agibp);
        /* Should have found this inode somewhere in the iunlinked bucket. */
        if (!error && !foundit)
index ccf0c4f..f5377ba 100644 (file)
@@ -80,10 +80,12 @@ xfs_bulkstat_one_int(
        if (error)
                goto out;
 
+       /* Reload the incore unlinked list to avoid failure in inodegc. */
        if (xfs_inode_unlinked_incomplete(ip)) {
                error = xfs_inode_reload_unlinked_bucket(tp, ip);
                if (error) {
                        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                        xfs_irele(ip);
                        return error;
                }
index 7256090..086e78a 100644 (file)
@@ -1160,9 +1160,18 @@ xfs_qm_dqusage_adjust(
        if (error)
                return error;
 
-       error = xfs_inode_reload_unlinked(ip);
-       if (error)
-               goto error0;
+       /*
+        * Reload the incore unlinked list to avoid failure in inodegc.
+        * Use an unlocked check here because unrecovered unlinked inodes
+        * should be somewhat rare.
+        */
+       if (xfs_inode_unlinked_incomplete(ip)) {
+               error = xfs_inode_reload_unlinked(ip);
+               if (error) {
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                       goto error0;
+               }
+       }
 
        ASSERT(ip->i_delayed_blks == 0);
 
index 5f2301e..f3b3593 100644 (file)
@@ -467,19 +467,17 @@ union ceph_mds_request_args {
 } __attribute__ ((packed));
 
 union ceph_mds_request_args_ext {
-       union {
-               union ceph_mds_request_args old;
-               struct {
-                       __le32 mode;
-                       __le32 uid;
-                       __le32 gid;
-                       struct ceph_timespec mtime;
-                       struct ceph_timespec atime;
-                       __le64 size, old_size;       /* old_size needed by truncate */
-                       __le32 mask;                 /* CEPH_SETATTR_* */
-                       struct ceph_timespec btime;
-               } __attribute__ ((packed)) setattr_ext;
-       };
+       union ceph_mds_request_args old;
+       struct {
+               __le32 mode;
+               __le32 uid;
+               __le32 gid;
+               struct ceph_timespec mtime;
+               struct ceph_timespec atime;
+               __le64 size, old_size;       /* old_size needed by truncate */
+               __le32 mask;                 /* CEPH_SETATTR_* */
+               struct ceph_timespec btime;
+       } __attribute__ ((packed)) setattr_ext;
 };
 
 #define CEPH_MDS_FLAG_REPLAY           1 /* this is a replayed op */
index bf4913f..2a7d2af 100644 (file)
@@ -192,6 +192,7 @@ enum {
        ATA_PFLAG_UNLOADING     = (1 << 9), /* driver is being unloaded */
        ATA_PFLAG_UNLOADED      = (1 << 10), /* driver is unloaded */
 
+       ATA_PFLAG_RESUMING      = (1 << 16),  /* port is being resumed */
        ATA_PFLAG_SUSPENDED     = (1 << 17), /* port is suspended (power) */
        ATA_PFLAG_PM_PENDING    = (1 << 18), /* PM operation pending */
        ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */
@@ -259,7 +260,7 @@ enum {
         * advised to wait only for the following duration before
         * doing SRST.
         */
-       ATA_TMOUT_PMP_SRST_WAIT = 5000,
+       ATA_TMOUT_PMP_SRST_WAIT = 10000,
 
        /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might
         * be a spurious PHY event, so ignore the first PHY event that
@@ -318,9 +319,10 @@ enum {
        ATA_EH_ENABLE_LINK      = (1 << 3),
        ATA_EH_PARK             = (1 << 5), /* unload heads and stop I/O */
        ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */
+       ATA_EH_SET_ACTIVE       = (1 << 7), /* Set a device to active power mode */
 
        ATA_EH_PERDEV_MASK      = ATA_EH_REVALIDATE | ATA_EH_PARK |
-                                 ATA_EH_GET_SUCCESS_SENSE,
+                                 ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE,
        ATA_EH_ALL_ACTIONS      = ATA_EH_REVALIDATE | ATA_EH_RESET |
                                  ATA_EH_ENABLE_LINK,
 
@@ -357,7 +359,7 @@ enum {
        /* This should match the actual table size of
         * ata_eh_cmd_timeout_table in libata-eh.c.
         */
-       ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7,
+       ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8,
 
        /* Horkage types. May be set by libata or controller on drives
           (some horkage may be drive/controller pair dependent */
@@ -1148,6 +1150,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev,
                              struct block_device *bdev,
                              sector_t capacity, int geom[]);
 extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev);
+extern int ata_scsi_slave_alloc(struct scsi_device *sdev);
 extern int ata_scsi_slave_config(struct scsi_device *sdev);
 extern void ata_scsi_slave_destroy(struct scsi_device *sdev);
 extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
@@ -1396,6 +1399,7 @@ extern const struct attribute_group *ata_common_sdev_groups[];
        .this_id                = ATA_SHT_THIS_ID,              \
        .emulated               = ATA_SHT_EMULATED,             \
        .proc_name              = drv_name,                     \
+       .slave_alloc            = ata_scsi_slave_alloc,         \
        .slave_destroy          = ata_scsi_slave_destroy,       \
        .bios_param             = ata_std_bios_param,           \
        .unlock_native_capacity = ata_scsi_unlock_native_capacity,\
index ec09359..4498f84 100644 (file)
@@ -157,6 +157,9 @@ enum scsi_disposition {
 #define SCSI_3          4        /* SPC */
 #define SCSI_SPC_2      5
 #define SCSI_SPC_3      6
+#define SCSI_SPC_4     7
+#define SCSI_SPC_5     8
+#define SCSI_SPC_6     14
 
 /*
  * INQ PERIPHERAL QUALIFIERS
index b9230b6..fd41fda 100644 (file)
@@ -161,6 +161,10 @@ struct scsi_device {
                                 * pass settings from slave_alloc to scsi
                                 * core. */
        unsigned int eh_timeout; /* Error handling timeout */
+
+       bool manage_system_start_stop; /* Let HLD (sd) manage system start/stop */
+       bool manage_runtime_start_stop; /* Let HLD (sd) manage runtime start/stop */
+
        unsigned removable:1;
        unsigned changed:1;     /* Data invalid due to media change */
        unsigned busy:1;        /* Used to prevent races */
@@ -193,7 +197,6 @@ struct scsi_device {
        unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */
        unsigned no_start_on_add:1;     /* do not issue start on add */
        unsigned allow_restart:1; /* issue START_UNIT in error handler */
-       unsigned manage_start_stop:1;   /* Let HLD (sd) manage start/stop */
        unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */
        unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
        unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
index 49f768d..4c2dc81 100644 (file)
@@ -764,7 +764,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
 #define scsi_template_proc_dir(sht) NULL
 #endif
 extern void scsi_scan_host(struct Scsi_Host *);
-extern void scsi_rescan_device(struct scsi_device *);
+extern int scsi_rescan_device(struct scsi_device *sdev);
 extern void scsi_remove_host(struct Scsi_Host *);
 extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
 extern int scsi_host_busy(struct Scsi_Host *shost);
index f6a69a5..08e3b17 100644 (file)
@@ -243,7 +243,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link);
        const char __user *oldf, *newf;
 
-       if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
index c85825e..b9f053a 100644 (file)
@@ -6535,9 +6535,6 @@ void __init workqueue_init_early(void)
 
        BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE));
 
-       wq_update_pod_attrs_buf = alloc_workqueue_attrs();
-       BUG_ON(!wq_update_pod_attrs_buf);
-
        pt->nr_pods = 1;
        cpumask_copy(pt->pod_cpus[0], cpu_possible_mask);
        pt->pod_node[0] = NUMA_NO_NODE;
@@ -6605,13 +6602,13 @@ static void __init wq_cpu_intensive_thresh_init(void)
        unsigned long thresh;
        unsigned long bogo;
 
+       pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
+       BUG_ON(IS_ERR(pwq_release_worker));
+
        /* if the user set it to a specific value, keep it */
        if (wq_cpu_intensive_thresh_us != ULONG_MAX)
                return;
 
-       pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
-       BUG_ON(IS_ERR(pwq_release_worker));
-
        /*
         * The default of 10ms is derived from the fact that most modern (as of
         * 2023) processors can do a lot in 10ms and that it's just below what
index cd71f95..8fda308 100644 (file)
@@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
-       int refcnt;
+       int err = -EBUSY;
        bool rcu_set;
 
        if (unlikely(!s) || !kasan_check_byte(s))
@@ -490,17 +490,17 @@ void kmem_cache_destroy(struct kmem_cache *s)
 
        rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
 
-       refcnt = --s->refcount;
-       if (refcnt)
+       s->refcount--;
+       if (s->refcount)
                goto out_unlock;
 
-       WARN(shutdown_cache(s),
-            "%s %s: Slab cache still has objects when called from %pS",
+       err = shutdown_cache(s);
+       WARN(err, "%s %s: Slab cache still has objects when called from %pS",
             __func__, s->name, (void *)_RET_IP_);
 out_unlock:
        mutex_unlock(&slab_mutex);
        cpus_read_unlock();
-       if (!refcnt && !rcu_set)
+       if (!err && !rcu_set)
                kmem_cache_release(s);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
@@ -745,24 +745,24 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
 
 size_t kmalloc_size_roundup(size_t size)
 {
-       struct kmem_cache *c;
+       if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
+               /*
+                * The flags don't matter since size_index is common to all.
+                * Neither does the caller for just getting ->object_size.
+                */
+               return kmalloc_slab(size, GFP_KERNEL, 0)->object_size;
+       }
 
-       /* Short-circuit the 0 size case. */
-       if (unlikely(size == 0))
-               return 0;
-       /* Short-circuit saturated "too-large" case. */
-       if (unlikely(size == SIZE_MAX))
-               return SIZE_MAX;
        /* Above the smaller buckets, size is a multiple of page size. */
-       if (size > KMALLOC_MAX_CACHE_SIZE)
+       if (size && size <= KMALLOC_MAX_SIZE)
                return PAGE_SIZE << get_order(size);
 
        /*
-        * The flags don't matter since size_index is common to all.
-        * Neither does the caller for just getting ->object_size.
+        * Return 'size' for 0 - kmalloc() returns ZERO_SIZE_PTR
+        * and very large size - kmalloc() may fail.
         */
-       c = kmalloc_slab(size, GFP_KERNEL, 0);
-       return c ? c->object_size : 0;
+       return size;
+
 }
 EXPORT_SYMBOL(kmalloc_size_roundup);
 
index a00a53e..1d11135 100644 (file)
@@ -57,6 +57,7 @@
 
 #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB                  BIT(0)     /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB                  BIT(7)     /* Selective Branch Prediction Barrier */
 
 #define MSR_PPIN_CTL                   0x0000004e
 #define MSR_PPIN                       0x0000004f
                                                 * Not susceptible to Post-Barrier
                                                 * Return Stack Buffer Predictions.
                                                 */
+#define ARCH_CAP_GDS_CTRL              BIT(25) /*
+                                                * CPU is vulnerable to Gather
+                                                * Data Sampling (GDS) and
+                                                * has controls for mitigation.
+                                                */
+#define ARCH_CAP_GDS_NO                        BIT(26) /*
+                                                * CPU is not vulnerable to Gather
+                                                * Data Sampling (GDS).
+                                                */
 
 #define ARCH_CAP_XAPIC_DISABLE         BIT(21) /*
                                                 * IA32_XAPIC_DISABLE_STATUS MSR
 #define RNGDS_MITG_DIS                 BIT(0)  /* SRBDS support */
 #define RTM_ALLOW                      BIT(1)  /* TSX development mode */
 #define FB_CLEAR_DIS                   BIT(3)  /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS                   BIT(4)  /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED                        BIT(5)  /* GDS mitigation locked */
 
 #define MSR_IA32_SYSENTER_CS           0x00000174
 #define MSR_IA32_SYSENTER_ESP          0x00000175
index fd6c1cb..abe087c 100644 (file)
@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
 
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+
 #undef __NR_syscalls
-#define __NR_syscalls 452
+#define __NR_syscalls 453
 
 /*
  * 32 bit systems traditionally used different
index a87bbbb..794c1d8 100644 (file)
@@ -673,8 +673,11 @@ struct drm_gem_open {
  * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
  * and &DRM_PRIME_CAP_EXPORT.
  *
- * PRIME buffers are exposed as dma-buf file descriptors. See
- * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and
+ * &DRM_PRIME_CAP_EXPORT are always advertised.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors.
+ * See :ref:`prime_buffer_sharing`.
  */
 #define DRM_CAP_PRIME                  0x5
 /**
@@ -682,6 +685,8 @@ struct drm_gem_open {
  *
  * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
  * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
  */
 #define  DRM_PRIME_CAP_IMPORT          0x1
 /**
@@ -689,6 +694,8 @@ struct drm_gem_open {
  *
  * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
  * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
  */
 #define  DRM_PRIME_CAP_EXPORT          0x2
 /**
@@ -756,15 +763,14 @@ struct drm_gem_open {
 /**
  * DRM_CAP_SYNCOBJ
  *
- * If set to 1, the driver supports sync objects. See
- * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`.
  */
 #define DRM_CAP_SYNCOBJ                0x13
 /**
  * DRM_CAP_SYNCOBJ_TIMELINE
  *
  * If set to 1, the driver supports timeline operations on sync objects. See
- * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ * :ref:`drm_sync_objects`.
  */
 #define DRM_CAP_SYNCOBJ_TIMELINE       0x14
 
@@ -909,6 +915,27 @@ struct drm_syncobj_timeline_wait {
        __u32 pad;
 };
 
+/**
+ * struct drm_syncobj_eventfd
+ * @handle: syncobj handle.
+ * @flags: Zero to wait for the point to be signalled, or
+ *         &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be
+ *         available for the point.
+ * @point: syncobj timeline point (set to zero for binary syncobjs).
+ * @fd: Existing eventfd to sent events to.
+ * @pad: Must be zero.
+ *
+ * Register an eventfd to be signalled by a syncobj. The eventfd counter will
+ * be incremented by one.
+ */
+struct drm_syncobj_eventfd {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+       __s32 fd;
+       __u32 pad;
+};
+
 
 struct drm_syncobj_array {
        __u64 handles;
@@ -1169,6 +1196,8 @@ extern "C" {
  */
 #define DRM_IOCTL_MODE_GETFB2          DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
 
+#define DRM_IOCTL_SYNCOBJ_EVENTFD      DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
+
 /*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
@@ -1180,25 +1209,50 @@ extern "C" {
 #define DRM_COMMAND_BASE                0x40
 #define DRM_COMMAND_END                        0xA0
 
-/*
- * Header for events written back to userspace on the drm fd.  The
- * type defines the type of event, the length specifies the total
- * length of the event (including the header), and user_data is
- * typically a 64 bit value passed with the ioctl that triggered the
- * event.  A read on the drm fd will always only return complete
- * events, that is, if for example the read buffer is 100 bytes, and
- * there are two 64 byte events pending, only one will be returned.
+/**
+ * struct drm_event - Header for DRM events
+ * @type: event type.
+ * @length: total number of payload bytes (including header).
  *
- * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
- * up are chipset specific.
+ * This struct is a header for events written back to user-space on the DRM FD.
+ * A read on the DRM FD will always only return complete events: e.g. if the
+ * read buffer is 100 bytes large and there are two 64 byte events pending,
+ * only one will be returned.
+ *
+ * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and
+ * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK,
+ * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE.
  */
 struct drm_event {
        __u32 type;
        __u32 length;
 };
 
+/**
+ * DRM_EVENT_VBLANK - vertical blanking event
+ *
+ * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the
+ * &_DRM_VBLANK_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
 #define DRM_EVENT_VBLANK 0x01
+/**
+ * DRM_EVENT_FLIP_COMPLETE - page-flip completion event
+ *
+ * This event is sent in response to an atomic commit or legacy page-flip with
+ * the &DRM_MODE_PAGE_FLIP_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
 #define DRM_EVENT_FLIP_COMPLETE 0x02
+/**
+ * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event
+ *
+ * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE.
+ *
+ * The event payload is a struct drm_event_crtc_sequence.
+ */
 #define DRM_EVENT_CRTC_SEQUENCE        0x03
 
 struct drm_event_vblank {
diff --git a/tools/include/uapi/linux/seccomp.h b/tools/include/uapi/linux/seccomp.h
new file mode 100644 (file)
index 0000000..dbfc9b3
--- /dev/null
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_SECCOMP_H
+#define _UAPI_LINUX_SECCOMP_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+
+/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */
+#define SECCOMP_MODE_DISABLED  0 /* seccomp is not in use. */
+#define SECCOMP_MODE_STRICT    1 /* uses hard-coded filter. */
+#define SECCOMP_MODE_FILTER    2 /* uses user-supplied filter. */
+
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT                0
+#define SECCOMP_SET_MODE_FILTER                1
+#define SECCOMP_GET_ACTION_AVAIL       2
+#define SECCOMP_GET_NOTIF_SIZES                3
+
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC              (1UL << 0)
+#define SECCOMP_FILTER_FLAG_LOG                        (1UL << 1)
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW         (1UL << 2)
+#define SECCOMP_FILTER_FLAG_NEW_LISTENER       (1UL << 3)
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH                (1UL << 4)
+/* Received notifications wait in killable state (only respond to fatal signals) */
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+
+/*
+ * All BPF programs must return a 32-bit value.
+ * The bottom 16-bits are for optional return data.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
+ *
+ * The ordering ensures that a min_t() over composed return values always
+ * selects the least permissive choice.
+ */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD         0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL        SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
+#define SECCOMP_RET_USER_NOTIF  0x7fc00000U /* notifies userspace */
+#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG                 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+
+/* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL        0xffff0000U
+#define SECCOMP_RET_ACTION     0x7fff0000U
+#define SECCOMP_RET_DATA       0x0000ffffU
+
+/**
+ * struct seccomp_data - the format the BPF program executes over.
+ * @nr: the system call number
+ * @arch: indicates system call convention as an AUDIT_ARCH_* value
+ *        as defined in <linux/audit.h>.
+ * @instruction_pointer: at the time of the system call.
+ * @args: up to 6 system call arguments always stored as 64-bit values
+ *        regardless of the architecture.
+ */
+struct seccomp_data {
+       int nr;
+       __u32 arch;
+       __u64 instruction_pointer;
+       __u64 args[6];
+};
+
+struct seccomp_notif_sizes {
+       __u16 seccomp_notif;
+       __u16 seccomp_notif_resp;
+       __u16 seccomp_data;
+};
+
+struct seccomp_notif {
+       __u64 id;
+       __u32 pid;
+       __u32 flags;
+       struct seccomp_data data;
+};
+
+/*
+ * Valid flags for struct seccomp_notif_resp
+ *
+ * Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution!
+ * If set by the process supervising the syscalls of another process the
+ * syscall will continue. This is problematic because of an inherent TOCTOU.
+ * An attacker can exploit the time while the supervised process is waiting on
+ * a response from the supervising process to rewrite syscall arguments which
+ * are passed as pointers of the intercepted syscall.
+ * It should be absolutely clear that this means that the seccomp notifier
+ * _cannot_ be used to implement a security policy! It should only ever be used
+ * in scenarios where a more privileged process supervises the syscalls of a
+ * lesser privileged process to get around kernel-enforced security
+ * restrictions when the privileged process deems this safe. In other words,
+ * in order to continue a syscall the supervising process should be sure that
+ * another security mechanism or the kernel itself will sufficiently block
+ * syscalls if arguments are rewritten to something unsafe.
+ *
+ * Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF
+ * or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the
+ * same syscall, the most recently added filter takes precedence. This means
+ * that the new SECCOMP_RET_USER_NOTIF filter can override any
+ * SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all
+ * such filtered syscalls to be executed by sending the response
+ * SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally
+ * be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE.
+ */
+#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0)
+
+struct seccomp_notif_resp {
+       __u64 id;
+       __s64 val;
+       __s32 error;
+       __u32 flags;
+};
+
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+
+/* valid flags for seccomp_notif_addfd */
+#define SECCOMP_ADDFD_FLAG_SETFD       (1UL << 0) /* Specify remote fd */
+#define SECCOMP_ADDFD_FLAG_SEND                (1UL << 1) /* Addfd and return it, atomically */
+
+/**
+ * struct seccomp_notif_addfd
+ * @id: The ID of the seccomp notification
+ * @flags: SECCOMP_ADDFD_FLAG_*
+ * @srcfd: The local fd number
+ * @newfd: Optional remote FD number if SETFD option is set, otherwise 0.
+ * @newfd_flags: The O_* flags the remote FD should have applied
+ */
+struct seccomp_notif_addfd {
+       __u64 id;
+       __u32 flags;
+       __u32 srcfd;
+       __u32 newfd;
+       __u32 newfd_flags;
+};
+
+#define SECCOMP_IOC_MAGIC              '!'
+#define SECCOMP_IO(nr)                 _IO(SECCOMP_IOC_MAGIC, nr)
+#define SECCOMP_IOR(nr, type)          _IOR(SECCOMP_IOC_MAGIC, nr, type)
+#define SECCOMP_IOW(nr, type)          _IOW(SECCOMP_IOC_MAGIC, nr, type)
+#define SECCOMP_IOWR(nr, type)         _IOWR(SECCOMP_IOC_MAGIC, nr, type)
+
+/* Flags for seccomp notification fd ioctl. */
+#define SECCOMP_IOCTL_NOTIF_RECV       SECCOMP_IOWR(0, struct seccomp_notif)
+#define SECCOMP_IOCTL_NOTIF_SEND       SECCOMP_IOWR(1, \
+                                               struct seccomp_notif_resp)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID   SECCOMP_IOW(2, __u64)
+/* On success, the return value is the remote process's added fd number */
+#define SECCOMP_IOCTL_NOTIF_ADDFD      SECCOMP_IOW(3, \
+                                               struct seccomp_notif_addfd)
+
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS  SECCOMP_IOW(4, __u64)
+
+#endif /* _UAPI_LINUX_SECCOMP_H */
index cfda251..cb5e757 100644 (file)
 449    n64     futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    n64     cachestat                       sys_cachestat
+452    n64     fchmodat2                       sys_fchmodat2
index 8c0b08b..20e5058 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    nospu   set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index a6935af..0122cc1 100644 (file)
 449  common    futex_waitv             sys_futex_waitv                 sys_futex_waitv
 450  common    set_mempolicy_home_node sys_set_mempolicy_home_node     sys_set_mempolicy_home_node
 451  common    cachestat               sys_cachestat                   sys_cachestat
+452  common    fchmodat2               sys_fchmodat2                   sys_fchmodat2
index 227538b..1d6eee3 100644 (file)
 449    common  futex_waitv             sys_futex_waitv
 450    common  set_mempolicy_home_node sys_set_mempolicy_home_node
 451    common  cachestat               sys_cachestat
+452    common  fchmodat2               sys_fchmodat2
+453    64      map_shadow_stack        sys_map_shadow_stack
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index b04ebcd..a01c401 100644 (file)
@@ -9,7 +9,7 @@
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <linux/time64.h>
-#include <linux/seccomp.h>
+#include <uapi/linux/seccomp.h>
 #include <sys/prctl.h>
 
 #include <unistd.h>
index 4314c91..e21caad 100755 (executable)
@@ -21,6 +21,7 @@ FILES=(
   "include/uapi/linux/perf_event.h"
   "include/uapi/linux/prctl.h"
   "include/uapi/linux/sched.h"
+  "include/uapi/linux/seccomp.h"
   "include/uapi/linux/stat.h"
   "include/uapi/linux/usbdevice_fs.h"
   "include/uapi/linux/vhost.h"
index a7e8833..72ba4a9 100755 (executable)
@@ -991,7 +991,7 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
                 }
         }
         free(cpuid);
-        if (!pmu)
+        if (!pmu || !table)
                 return table;
 
         for (i = 0; i < table->num_pmus; i++) {
index 0e9ec65..3e673f2 100644 (file)
@@ -413,10 +413,10 @@ def has_event(event: Event) -> Function:
   # pylint: disable=invalid-name
   return Function('has_event', event)
 
-def strcmp_cpuid_str(event: str) -> Function:
+def strcmp_cpuid_str(cpuid: Event) -> Function:
   # pylint: disable=redefined-builtin
   # pylint: disable=invalid-name
-  return Function('strcmp_cpuid_str', event)
+  return Function('strcmp_cpuid_str', cpuid)
 
 class Metric:
   """An individual metric that will specifiable on the perf command line."""
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
deleted file mode 100644 (file)
index 9887ae0..0000000
+++ /dev/null
@@ -1,508 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * bpf-prologue.c
- *
- * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
- * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015 Huawei Inc.
- */
-
-#include <bpf/libbpf.h>
-#include "debug.h"
-#include "bpf-loader.h"
-#include "bpf-prologue.h"
-#include "probe-finder.h"
-#include <errno.h>
-#include <stdlib.h>
-#include <dwarf-regs.h>
-#include <linux/filter.h>
-
-#define BPF_REG_SIZE           8
-
-#define JMP_TO_ERROR_CODE      -1
-#define JMP_TO_SUCCESS_CODE    -2
-#define JMP_TO_USER_CODE       -3
-
-struct bpf_insn_pos {
-       struct bpf_insn *begin;
-       struct bpf_insn *end;
-       struct bpf_insn *pos;
-};
-
-static inline int
-pos_get_cnt(struct bpf_insn_pos *pos)
-{
-       return pos->pos - pos->begin;
-}
-
-static int
-append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
-{
-       if (!pos->pos)
-               return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
-
-       if (pos->pos + 1 >= pos->end) {
-               pr_err("bpf prologue: prologue too long\n");
-               pos->pos = NULL;
-               return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
-       }
-
-       *(pos->pos)++ = new_insn;
-       return 0;
-}
-
-static int
-check_pos(struct bpf_insn_pos *pos)
-{
-       if (!pos->pos || pos->pos >= pos->end)
-               return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
-       return 0;
-}
-
-/*
- * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
- * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM
- * instruction (BPF_{B,H,W,DW}).
- */
-static int
-argtype_to_ldx_size(const char *type)
-{
-       int arg_size = type ? atoi(&type[1]) : 64;
-
-       switch (arg_size) {
-       case 8:
-               return BPF_B;
-       case 16:
-               return BPF_H;
-       case 32:
-               return BPF_W;
-       case 64:
-       default:
-               return BPF_DW;
-       }
-}
-
-static const char *
-insn_sz_to_str(int insn_sz)
-{
-       switch (insn_sz) {
-       case BPF_B:
-               return "BPF_B";
-       case BPF_H:
-               return "BPF_H";
-       case BPF_W:
-               return "BPF_W";
-       case BPF_DW:
-               return "BPF_DW";
-       default:
-               return "UNKNOWN";
-       }
-}
-
-/* Give it a shorter name */
-#define ins(i, p) append_insn((i), (p))
-
-/*
- * Give a register name (in 'reg'), generate instruction to
- * load register into an eBPF register rd:
- *   'ldd target_reg, offset(ctx_reg)', where:
- * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
- */
-static int
-gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
-                    const char *reg, int target_reg)
-{
-       int offset = regs_query_register_offset(reg);
-
-       if (offset < 0) {
-               pr_err("bpf: prologue: failed to get register %s\n",
-                      reg);
-               return offset;
-       }
-       ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
-
-       return check_pos(pos);
-}
-
-/*
- * Generate a BPF_FUNC_probe_read function call.
- *
- * src_base_addr_reg is a register holding base address,
- * dst_addr_reg is a register holding dest address (on stack),
- * result is:
- *
- *  *[dst_addr_reg] = *([src_base_addr_reg] + offset)
- *
- * Arguments of BPF_FUNC_probe_read:
- *     ARG1: ptr to stack (dest)
- *     ARG2: size (8)
- *     ARG3: unsafe ptr (src)
- */
-static int
-gen_read_mem(struct bpf_insn_pos *pos,
-            int src_base_addr_reg,
-            int dst_addr_reg,
-            long offset,
-            int probeid)
-{
-       /* mov arg3, src_base_addr_reg */
-       if (src_base_addr_reg != BPF_REG_ARG3)
-               ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
-       /* add arg3, #offset */
-       if (offset)
-               ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
-
-       /* mov arg2, #reg_size */
-       ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
-
-       /* mov arg1, dst_addr_reg */
-       if (dst_addr_reg != BPF_REG_ARG1)
-               ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
-
-       /* Call probe_read  */
-       ins(BPF_EMIT_CALL(probeid), pos);
-       /*
-        * Error processing: if read fail, goto error code,
-        * will be relocated. Target should be the start of
-        * error processing code.
-        */
-       ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
-           pos);
-
-       return check_pos(pos);
-}
-
-/*
- * Each arg should be bare register. Fetch and save them into argument
- * registers (r3 - r5).
- *
- * BPF_REG_1 should have been initialized with pointer to
- * 'struct pt_regs'.
- */
-static int
-gen_prologue_fastpath(struct bpf_insn_pos *pos,
-                     struct probe_trace_arg *args, int nargs)
-{
-       int i, err = 0;
-
-       for (i = 0; i < nargs; i++) {
-               err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
-                                          BPF_PROLOGUE_START_ARG_REG + i);
-               if (err)
-                       goto errout;
-       }
-
-       return check_pos(pos);
-errout:
-       return err;
-}
-
-/*
- * Slow path:
- *   At least one argument has the form of 'offset($rx)'.
- *
- * Following code first stores them into stack, then loads all of then
- * to r2 - r5.
- * Before final loading, the final result should be:
- *
- * low address
- * BPF_REG_FP - 24  ARG3
- * BPF_REG_FP - 16  ARG2
- * BPF_REG_FP - 8   ARG1
- * BPF_REG_FP
- * high address
- *
- * For each argument (described as: offn(...off2(off1(reg)))),
- * generates following code:
- *
- *  r7 <- fp
- *  r7 <- r7 - stack_offset  // Ideal code should initialize r7 using
- *                           // fp before generating args. However,
- *                           // eBPF won't regard r7 as stack pointer
- *                           // if it is generated by minus 8 from
- *                           // another stack pointer except fp.
- *                           // This is why we have to set r7
- *                           // to fp for each variable.
- *  r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
- *  (r7) <- r3       // skip following instructions for bare reg
- *  r3 <- r3 + off1  . // skip if off1 == 0
- *  r2 <- 8           \
- *  r1 <- r7           |-> generated by gen_read_mem()
- *  call probe_read    /
- *  jnei r0, 0, err  ./
- *  r3 <- (r7)
- *  r3 <- r3 + off2  . // skip if off2 == 0
- *  r2 <- 8           \  // r2 may be broken by probe_read, so set again
- *  r1 <- r7           |-> generated by gen_read_mem()
- *  call probe_read    /
- *  jnei r0, 0, err  ./
- *  ...
- */
-static int
-gen_prologue_slowpath(struct bpf_insn_pos *pos,
-                     struct probe_trace_arg *args, int nargs)
-{
-       int err, i, probeid;
-
-       for (i = 0; i < nargs; i++) {
-               struct probe_trace_arg *arg = &args[i];
-               const char *reg = arg->value;
-               struct probe_trace_arg_ref *ref = NULL;
-               int stack_offset = (i + 1) * -8;
-
-               pr_debug("prologue: fetch arg %d, base reg is %s\n",
-                        i, reg);
-
-               /* value of base register is stored into ARG3 */
-               err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
-                                          BPF_REG_ARG3);
-               if (err) {
-                       pr_err("prologue: failed to get offset of register %s\n",
-                              reg);
-                       goto errout;
-               }
-
-               /* Make r7 the stack pointer. */
-               ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
-               /* r7 += -8 */
-               ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
-               /*
-                * Store r3 (base register) onto stack
-                * Ensure fp[offset] is set.
-                * fp is the only valid base register when storing
-                * into stack. We are not allowed to use r7 as base
-                * register here.
-                */
-               ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
-                               stack_offset), pos);
-
-               ref = arg->ref;
-               probeid = BPF_FUNC_probe_read_kernel;
-               while (ref) {
-                       pr_debug("prologue: arg %d: offset %ld\n",
-                                i, ref->offset);
-
-                       if (ref->user_access)
-                               probeid = BPF_FUNC_probe_read_user;
-
-                       err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
-                                          ref->offset, probeid);
-                       if (err) {
-                               pr_err("prologue: failed to generate probe_read function call\n");
-                               goto errout;
-                       }
-
-                       ref = ref->next;
-                       /*
-                        * Load previous result into ARG3. Use
-                        * BPF_REG_FP instead of r7 because verifier
-                        * allows FP based addressing only.
-                        */
-                       if (ref)
-                               ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
-                                               BPF_REG_FP, stack_offset), pos);
-               }
-       }
-
-       /* Final pass: read to registers */
-       for (i = 0; i < nargs; i++) {
-               int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW;
-
-               pr_debug("prologue: load arg %d, insn_sz is %s\n",
-                        i, insn_sz_to_str(insn_sz));
-               ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i,
-                               BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
-       }
-
-       ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
-
-       return check_pos(pos);
-errout:
-       return err;
-}
-
-static int
-prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
-                 struct bpf_insn *success_code, struct bpf_insn *user_code)
-{
-       struct bpf_insn *insn;
-
-       if (check_pos(pos))
-               return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
-
-       for (insn = pos->begin; insn < pos->pos; insn++) {
-               struct bpf_insn *target;
-               u8 class = BPF_CLASS(insn->code);
-               u8 opcode;
-
-               if (class != BPF_JMP)
-                       continue;
-               opcode = BPF_OP(insn->code);
-               if (opcode == BPF_CALL)
-                       continue;
-
-               switch (insn->off) {
-               case JMP_TO_ERROR_CODE:
-                       target = error_code;
-                       break;
-               case JMP_TO_SUCCESS_CODE:
-                       target = success_code;
-                       break;
-               case JMP_TO_USER_CODE:
-                       target = user_code;
-                       break;
-               default:
-                       pr_err("bpf prologue: internal error: relocation failed\n");
-                       return -BPF_LOADER_ERRNO__PROLOGUE;
-               }
-
-               insn->off = target - (insn + 1);
-       }
-       return 0;
-}
-
-int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
-                     struct bpf_insn *new_prog, size_t *new_cnt,
-                     size_t cnt_space)
-{
-       struct bpf_insn *success_code = NULL;
-       struct bpf_insn *error_code = NULL;
-       struct bpf_insn *user_code = NULL;
-       struct bpf_insn_pos pos;
-       bool fastpath = true;
-       int err = 0, i;
-
-       if (!new_prog || !new_cnt)
-               return -EINVAL;
-
-       if (cnt_space > BPF_MAXINSNS)
-               cnt_space = BPF_MAXINSNS;
-
-       pos.begin = new_prog;
-       pos.end = new_prog + cnt_space;
-       pos.pos = new_prog;
-
-       if (!nargs) {
-               ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
-                   &pos);
-
-               if (check_pos(&pos))
-                       goto errout;
-
-               *new_cnt = pos_get_cnt(&pos);
-               return 0;
-       }
-
-       if (nargs > BPF_PROLOGUE_MAX_ARGS) {
-               pr_warning("bpf: prologue: %d arguments are dropped\n",
-                          nargs - BPF_PROLOGUE_MAX_ARGS);
-               nargs = BPF_PROLOGUE_MAX_ARGS;
-       }
-
-       /* First pass: validation */
-       for (i = 0; i < nargs; i++) {
-               struct probe_trace_arg_ref *ref = args[i].ref;
-
-               if (args[i].value[0] == '@') {
-                       /* TODO: fetch global variable */
-                       pr_err("bpf: prologue: global %s%+ld not support\n",
-                               args[i].value, ref ? ref->offset : 0);
-                       return -ENOTSUP;
-               }
-
-               while (ref) {
-                       /* fastpath is true if all args has ref == NULL */
-                       fastpath = false;
-
-                       /*
-                        * Instruction encodes immediate value using
-                        * s32, ref->offset is long. On systems which
-                        * can't fill long in s32, refuse to process if
-                        * ref->offset too large (or small).
-                        */
-#ifdef __LP64__
-#define OFFSET_MAX     ((1LL << 31) - 1)
-#define OFFSET_MIN     ((1LL << 31) * -1)
-                       if (ref->offset > OFFSET_MAX ||
-                                       ref->offset < OFFSET_MIN) {
-                               pr_err("bpf: prologue: offset out of bound: %ld\n",
-                                      ref->offset);
-                               return -BPF_LOADER_ERRNO__PROLOGUEOOB;
-                       }
-#endif
-                       ref = ref->next;
-               }
-       }
-       pr_debug("prologue: pass validation\n");
-
-       if (fastpath) {
-               /* If all variables are registers... */
-               pr_debug("prologue: fast path\n");
-               err = gen_prologue_fastpath(&pos, args, nargs);
-               if (err)
-                       goto errout;
-       } else {
-               pr_debug("prologue: slow path\n");
-
-               /* Initialization: move ctx to a callee saved register. */
-               ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
-
-               err = gen_prologue_slowpath(&pos, args, nargs);
-               if (err)
-                       goto errout;
-               /*
-                * start of ERROR_CODE (only slow pass needs error code)
-                *   mov r2 <- 1  // r2 is error number
-                *   mov r3 <- 0  // r3, r4... should be touched or
-                *                // verifier would complain
-                *   mov r4 <- 0
-                *   ...
-                *   goto usercode
-                */
-               error_code = pos.pos;
-               ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
-                   &pos);
-
-               for (i = 0; i < nargs; i++)
-                       ins(BPF_ALU64_IMM(BPF_MOV,
-                                         BPF_PROLOGUE_START_ARG_REG + i,
-                                         0),
-                           &pos);
-               ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
-                               &pos);
-       }
-
-       /*
-        * start of SUCCESS_CODE:
-        *   mov r2 <- 0
-        *   goto usercode  // skip
-        */
-       success_code = pos.pos;
-       ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
-
-       /*
-        * start of USER_CODE:
-        *   Restore ctx to r1
-        */
-       user_code = pos.pos;
-       if (!fastpath) {
-               /*
-                * Only slow path needs restoring of ctx. In fast path,
-                * register are loaded directly from r1.
-                */
-               ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
-               err = prologue_relocate(&pos, error_code, success_code,
-                                       user_code);
-               if (err)
-                       goto errout;
-       }
-
-       err = check_pos(&pos);
-       if (err)
-               goto errout;
-
-       *new_cnt = pos_get_cnt(&pos);
-       return 0;
-errout:
-       return err;
-}
index 90ce22f..939ec76 100644 (file)
@@ -23,7 +23,9 @@
 #define MAX_CPUS  4096
 
 // FIXME: These should come from system headers
+#ifndef bool
 typedef char bool;
+#endif
 typedef int pid_t;
 typedef long long int __s64;
 typedef __s64 time64_t;
index 0a5bf19..c12f832 100644 (file)
@@ -80,16 +80,6 @@ struct hashmap {
        size_t sz;
 };
 
-#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
-       .hash_fn = (hash_fn),                   \
-       .equal_fn = (equal_fn),                 \
-       .ctx = (ctx),                           \
-       .buckets = NULL,                        \
-       .cap = 0,                               \
-       .cap_bits = 0,                          \
-       .sz = 0,                                \
-}
-
 void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
                   hashmap_equal_fn equal_fn, void *ctx);
 struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
index d85602a..8de6f39 100644 (file)
@@ -520,7 +520,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
                pmu_name = pe->pmu;
        }
 
-       alias = malloc(sizeof(*alias));
+       alias = zalloc(sizeof(*alias));
        if (!alias)
                return -ENOMEM;
 
index 49f2ad1..7ea42fa 100644 (file)
@@ -59,12 +59,11 @@ override define INSTALL_RULE
        done;
 endef
 
-override define EMIT_TESTS
+emit_tests:
        +@for TARGET in $(SUB_DIRS); do \
                BUILD_TARGET=$(OUTPUT)/$$TARGET;        \
-               $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
+               $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\
        done;
-endef
 
 override define CLEAN
        +@for TARGET in $(SUB_DIRS); do \
@@ -77,4 +76,4 @@ endef
 tags:
        find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: tags $(SUB_DIRS)
+.PHONY: tags $(SUB_DIRS) emit_tests
index 2b95e44..a284fa8 100644 (file)
@@ -30,13 +30,14 @@ override define RUN_TESTS
        +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
 endef
 
-DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
-override define EMIT_TESTS
-       $(DEFAULT_EMIT_TESTS)
+emit_tests:
+       for TEST in $(TEST_GEN_PROGS); do \
+               BASENAME_TEST=`basename $$TEST`;        \
+               echo "$(COLLECTION):$$BASENAME_TEST";   \
+       done
        +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
        +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
        +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
-endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
@@ -64,4 +65,4 @@ sampling_tests:
 event_code_tests:
        TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
 
-.PHONY: all run_tests ebb sampling_tests event_code_tests
+.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests
index 22374d2..8202f13 100644 (file)
@@ -91,16 +91,18 @@ static int reg_disable(long *enable, int bit)
 
 FIXTURE(user) {
        long check;
+       bool umount;
 };
 
 FIXTURE_SETUP(user) {
-       USER_EVENT_FIXTURE_SETUP(return);
+       USER_EVENT_FIXTURE_SETUP(return, self->umount);
 
        change_event(false);
        self->check = 0;
 }
 
 FIXTURE_TEARDOWN(user) {
+       USER_EVENT_FIXTURE_TEARDOWN(self->umount);
 }
 
 TEST_F(user, enablement) {
index 32c827a..a859801 100644 (file)
@@ -144,13 +144,16 @@ do { \
 
 FIXTURE(user) {
        int check;
+       bool umount;
 };
 
 FIXTURE_SETUP(user) {
-       USER_EVENT_FIXTURE_SETUP(return);
+       USER_EVENT_FIXTURE_SETUP(return, self->umount);
 }
 
 FIXTURE_TEARDOWN(user) {
+       USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+
        wait_for_delete();
 }
 
index 6a260ca..dcd7509 100644 (file)
@@ -204,10 +204,11 @@ FIXTURE(user) {
        int data_fd;
        int enable_fd;
        int check;
+       bool umount;
 };
 
 FIXTURE_SETUP(user) {
-       USER_EVENT_FIXTURE_SETUP(return);
+       USER_EVENT_FIXTURE_SETUP(return, self->umount);
 
        self->status_fd = open(status_file, O_RDONLY);
        ASSERT_NE(-1, self->status_fd);
@@ -219,6 +220,8 @@ FIXTURE_SETUP(user) {
 }
 
 FIXTURE_TEARDOWN(user) {
+       USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+
        close(self->status_fd);
        close(self->data_fd);
 
index f893398..5288e76 100644 (file)
@@ -111,16 +111,19 @@ static int clear(int *check)
 FIXTURE(user) {
        int data_fd;
        int check;
+       bool umount;
 };
 
 FIXTURE_SETUP(user) {
-       USER_EVENT_FIXTURE_SETUP(return);
+       USER_EVENT_FIXTURE_SETUP(return, self->umount);
 
        self->data_fd = open(data_file, O_RDWR);
        ASSERT_NE(-1, self->data_fd);
 }
 
 FIXTURE_TEARDOWN(user) {
+       USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+
        close(self->data_fd);
 
        if (clear(&self->check) != 0)
index 6903789..e1c3c06 100644 (file)
 
 #include "../kselftest.h"
 
-static inline bool tracefs_enabled(char **message, bool *fail)
+static inline void tracefs_unmount(void)
+{
+       umount("/sys/kernel/tracing");
+}
+
+static inline bool tracefs_enabled(char **message, bool *fail, bool *umount)
 {
        struct stat buf;
        int ret;
 
        *message = "";
        *fail = false;
+       *umount = false;
 
        /* Ensure tracefs is installed */
        ret = stat("/sys/kernel/tracing", &buf);
@@ -37,6 +43,8 @@ static inline bool tracefs_enabled(char **message, bool *fail)
                        return false;
                }
 
+               *umount = true;
+
                ret = stat("/sys/kernel/tracing/README", &buf);
        }
 
@@ -49,13 +57,14 @@ static inline bool tracefs_enabled(char **message, bool *fail)
        return true;
 }
 
-static inline bool user_events_enabled(char **message, bool *fail)
+static inline bool user_events_enabled(char **message, bool *fail, bool *umount)
 {
        struct stat buf;
        int ret;
 
        *message = "";
        *fail = false;
+       *umount = false;
 
        if (getuid() != 0) {
                *message = "Must be run as root";
@@ -63,7 +72,7 @@ static inline bool user_events_enabled(char **message, bool *fail)
                return false;
        }
 
-       if (!tracefs_enabled(message, fail))
+       if (!tracefs_enabled(message, fail, umount))
                return false;
 
        /* Ensure user_events is installed */
@@ -85,10 +94,10 @@ static inline bool user_events_enabled(char **message, bool *fail)
        return true;
 }
 
-#define USER_EVENT_FIXTURE_SETUP(statement) do { \
+#define USER_EVENT_FIXTURE_SETUP(statement, umount) do { \
        char *message; \
        bool fail; \
-       if (!user_events_enabled(&message, &fail)) { \
+       if (!user_events_enabled(&message, &fail, &(umount))) { \
                if (fail) { \
                        TH_LOG("Setup failed due to: %s", message); \
                        ASSERT_FALSE(fail); \
@@ -97,4 +106,9 @@ static inline bool user_events_enabled(char **message, bool *fail)
        } \
 } while (0)
 
+#define USER_EVENT_FIXTURE_TEARDOWN(umount) do { \
+       if ((umount))  \
+               tracefs_unmount(); \
+} while (0)
+
 #endif /* _USER_EVENTS_SELFTESTS_H */