Merge tag 'efi-urgent-2020-04-15' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Apr 2020 00:37:48 +0000 (17:37 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Apr 2020 00:37:48 +0000 (17:37 -0700)
Pull EFI fixes from Ingo Molnar:
 "Misc EFI fixes, including the boot failure regression caused by the
  BSS section not being cleared by the loaders"

* tag 'efi-urgent-2020-04-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  efi/x86: Revert struct layout change to fix kexec boot regression
  efi/x86: Don't remap text<->rodata gap read-only for mixed mode
  efi/x86: Fix the deletion of variables in mixed mode
  efi/libstub/file: Merge file name buffers to reduce stack usage
  Documentation/x86, efi/x86: Clarify EFI handover protocol and its requirements
  efi/arm: Deal with ADR going out of range in efi_enter_kernel()
  efi/x86: Always relocate the kernel for EFI handover entry
  efi/x86: Move efi stub globals from .bss to .data
  efi/libstub/x86: Remove redundant assignment to pointer hdr
  efi/cper: Use scnprintf() for avoiding potential buffer overflow

19 files changed:
arch/m68k/include/asm/Kbuild
arch/x86/hyperv/hv_init.c
arch/x86/kernel/cpu/mshyperv.c
drivers/hv/channel_mgmt.c
drivers/hv/hv_debugfs.c
drivers/hv/hyperv_vmbus.h
drivers/hv/vmbus_drv.c
fs/afs/dir.c
fs/afs/dir_silly.c
fs/afs/fsclient.c
fs/afs/yfsclient.c
fs/btrfs/block-group.c
fs/btrfs/file.c
fs/btrfs/reflink.c
fs/btrfs/relocation.c
fs/btrfs/space-info.c
fs/btrfs/tree-log.c
include/asm-generic/mshyperv.h
include/uapi/linux/btrfs.h

index a0765aa..1bff55a 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += extable.h
-generic-y += hardirq.h
 generic-y += kvm_para.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
index b0da532..624f5d9 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/hyperv.h>
 #include <linux/slab.h>
+#include <linux/kernel.h>
 #include <linux/cpuhotplug.h>
 #include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
@@ -419,11 +420,14 @@ void hyperv_cleanup(void)
 }
 EXPORT_SYMBOL_GPL(hyperv_cleanup);
 
-void hyperv_report_panic(struct pt_regs *regs, long err)
+void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
 {
        static bool panic_reported;
        u64 guest_id;
 
+       if (in_die && !panic_on_oops)
+               return;
+
        /*
         * We prefer to report panic on 'die' chain as we have proper
         * registers to report, but if we miss it (e.g. on BUG()) we need
index caa032c..ebf34c7 100644 (file)
@@ -227,8 +227,8 @@ static void __init ms_hyperv_init_platform(void)
        ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
        ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-       pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
-               ms_hyperv.features, ms_hyperv.hints);
+       pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n",
+               ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features);
 
        ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
        ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
@@ -263,6 +263,16 @@ static void __init ms_hyperv_init_platform(void)
                        cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
        }
 
+       /*
+        * Hyper-V expects to get crash register data or kmsg when
+        * crash enlightment is available and system crashes. Set
+        * crash_kexec_post_notifiers to be true to make sure that
+        * calling crash enlightment interface before running kdump
+        * kernel.
+        */
+       if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
+               crash_kexec_post_notifiers = true;
+
 #ifdef CONFIG_X86_LOCAL_APIC
        if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
            ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
index 0370364..501c43c 100644 (file)
@@ -839,6 +839,9 @@ void vmbus_initiate_unload(bool crash)
 {
        struct vmbus_channel_message_header hdr;
 
+       if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
+               return;
+
        /* Pre-Win2012R2 hosts don't support reconnect */
        if (vmbus_proto_version < VERSION_WIN8_1)
                return;
index 8a28785..ccf752b 100644 (file)
@@ -11,7 +11,7 @@
 
 #include "hyperv_vmbus.h"
 
-struct dentry *hv_debug_root;
+static struct dentry *hv_debug_root;
 
 static int hv_debugfs_delay_get(void *data, u64 *val)
 {
index f5fa3b3..70b30e2 100644 (file)
@@ -292,7 +292,7 @@ struct vmbus_msginfo {
        struct list_head msglist_entry;
 
        /* The message itself */
-       unsigned char msg[0];
+       unsigned char msg[];
 };
 
 
index 029378c..a68bce4 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/kdebug.h>
 #include <linux/efi.h>
 #include <linux/random.h>
+#include <linux/kernel.h>
 #include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 #include "hyperv_vmbus.h"
@@ -48,14 +49,35 @@ static int hyperv_cpuhp_online;
 
 static void *hv_panic_page;
 
+/*
+ * Boolean to control whether to report panic messages over Hyper-V.
+ *
+ * It can be set via /proc/sys/kernel/hyperv/record_panic_msg
+ */
+static int sysctl_record_panic_msg = 1;
+
+static int hyperv_report_reg(void)
+{
+       return !sysctl_record_panic_msg || !hv_panic_page;
+}
+
 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
                              void *args)
 {
        struct pt_regs *regs;
 
-       regs = current_pt_regs();
+       vmbus_initiate_unload(true);
 
-       hyperv_report_panic(regs, val);
+       /*
+        * Hyper-V should be notified only once about a panic.  If we will be
+        * doing hyperv_report_panic_msg() later with kmsg data, don't do
+        * the notification here.
+        */
+       if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE
+           && hyperv_report_reg()) {
+               regs = current_pt_regs();
+               hyperv_report_panic(regs, val, false);
+       }
        return NOTIFY_DONE;
 }
 
@@ -65,7 +87,13 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
        struct die_args *die = (struct die_args *)args;
        struct pt_regs *regs = die->regs;
 
-       hyperv_report_panic(regs, val);
+       /*
+        * Hyper-V should be notified only once about a panic.  If we will be
+        * doing hyperv_report_panic_msg() later with kmsg data, don't do
+        * the notification here.
+        */
+       if (hyperv_report_reg())
+               hyperv_report_panic(regs, val, true);
        return NOTIFY_DONE;
 }
 
@@ -1253,13 +1281,6 @@ static void vmbus_isr(void)
 }
 
 /*
- * Boolean to control whether to report panic messages over Hyper-V.
- *
- * It can be set via /proc/sys/kernel/hyperv/record_panic_msg
- */
-static int sysctl_record_panic_msg = 1;
-
-/*
  * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
  * buffer and call into Hyper-V to transfer the data.
  */
@@ -1382,19 +1403,29 @@ static int vmbus_bus_init(void)
                        hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
                        if (hv_panic_page) {
                                ret = kmsg_dump_register(&hv_kmsg_dumper);
-                               if (ret)
+                               if (ret) {
                                        pr_err("Hyper-V: kmsg dump register "
                                                "error 0x%x\n", ret);
+                                       hv_free_hyperv_page(
+                                           (unsigned long)hv_panic_page);
+                                       hv_panic_page = NULL;
+                               }
                        } else
                                pr_err("Hyper-V: panic message page memory "
                                        "allocation failed");
                }
 
                register_die_notifier(&hyperv_die_block);
-               atomic_notifier_chain_register(&panic_notifier_list,
-                                              &hyperv_panic_block);
        }
 
+       /*
+        * Always register the panic notifier because we need to unload
+        * the VMbus channel connection to prevent any VMbus
+        * activity after the VM panics.
+        */
+       atomic_notifier_chain_register(&panic_notifier_list,
+                              &hyperv_panic_block);
+
        vmbus_request_offers();
 
        return 0;
@@ -1407,7 +1438,6 @@ err_alloc:
        hv_remove_vmbus_irq();
 
        bus_unregister(&hv_bus);
-       hv_free_hyperv_page((unsigned long)hv_panic_page);
        unregister_sysctl_table(hv_ctl_table_hdr);
        hv_ctl_table_hdr = NULL;
        return ret;
@@ -2204,8 +2234,6 @@ static int vmbus_bus_suspend(struct device *dev)
 
        vmbus_initiate_unload(false);
 
-       vmbus_connection.conn_state = DISCONNECTED;
-
        /* Reset the event for the next resume. */
        reinit_completion(&vmbus_connection.ready_for_resume_event);
 
@@ -2289,7 +2317,6 @@ static void hv_kexec_handler(void)
 {
        hv_stimer_global_cleanup();
        vmbus_initiate_unload(false);
-       vmbus_connection.conn_state = DISCONNECTED;
        /* Make sure conn_state is set as hv_synic_cleanup checks for it */
        mb();
        cpuhp_remove_state(hyperv_cpuhp_online);
@@ -2306,7 +2333,6 @@ static void hv_crash_handler(struct pt_regs *regs)
         * doing the cleanup for current CPU only. This should be sufficient
         * for kdump.
         */
-       vmbus_connection.conn_state = DISCONNECTED;
        cpu = smp_processor_id();
        hv_stimer_cleanup(cpu);
        hv_synic_disable_regs(cpu);
index 5c794f4..d1e1caa 100644 (file)
@@ -1032,7 +1032,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        struct dentry *parent;
        struct inode *inode;
        struct key *key;
-       afs_dataversion_t dir_version;
+       afs_dataversion_t dir_version, invalid_before;
        long de_version;
        int ret;
 
@@ -1084,8 +1084,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        if (de_version == (long)dir_version)
                goto out_valid_noupdate;
 
-       dir_version = dir->invalid_before;
-       if (de_version - (long)dir_version >= 0)
+       invalid_before = dir->invalid_before;
+       if (de_version - (long)invalid_before >= 0)
                goto out_valid;
 
        _debug("dir modified");
@@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        struct afs_fs_cursor fc;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
        struct key *key;
+       afs_dataversion_t data_version;
        int ret;
 
        mode |= S_IFDIR;
@@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
-               afs_dataversion_t data_version = dvnode->status.data_version + 1;
+               data_version = dvnode->status.data_version + 1;
 
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto error_key;
        }
 
-       if (ret == 0 &&
-           test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
-               afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
-                                afs_edit_dir_for_create);
+       if (ret == 0) {
+               down_write(&dvnode->validate_lock);
+               if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+                   dvnode->status.data_version == data_version)
+                       afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
+                                        afs_edit_dir_for_create);
+               up_write(&dvnode->validate_lock);
+       }
 
        key_put(key);
        kfree(scb);
@@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
        struct afs_fs_cursor fc;
        struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
        struct key *key;
+       afs_dataversion_t data_version;
        int ret;
 
        _enter("{%llx:%llu},{%pd}",
@@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
-               afs_dataversion_t data_version = dvnode->status.data_version + 1;
+               data_version = dvnode->status.data_version + 1;
 
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
                ret = afs_end_vnode_operation(&fc);
                if (ret == 0) {
                        afs_dir_remove_subdir(dentry);
-                       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+                       down_write(&dvnode->validate_lock);
+                       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+                           dvnode->status.data_version == data_version)
                                afs_edit_dir_remove(dvnode, &dentry->d_name,
                                                    afs_edit_dir_for_rmdir);
+                       up_write(&dvnode->validate_lock);
                }
        }
 
@@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                ret = afs_end_vnode_operation(&fc);
                if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
                        ret = afs_dir_remove_link(dvnode, dentry, key);
-               if (ret == 0 &&
-                   test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
-                       afs_edit_dir_remove(dvnode, &dentry->d_name,
-                                           afs_edit_dir_for_unlink);
+
+               if (ret == 0) {
+                       down_write(&dvnode->validate_lock);
+                       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+                           dvnode->status.data_version == data_version)
+                               afs_edit_dir_remove(dvnode, &dentry->d_name,
+                                                   afs_edit_dir_for_unlink);
+                       up_write(&dvnode->validate_lock);
+               }
        }
 
        if (need_rehash && ret < 0 && ret != -ENOENT)
@@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        struct afs_status_cb *scb;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
        struct key *key;
+       afs_dataversion_t data_version;
        int ret;
 
        mode |= S_IFREG;
@@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
-               afs_dataversion_t data_version = dvnode->status.data_version + 1;
+               data_version = dvnode->status.data_version + 1;
 
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                goto error_key;
        }
 
-       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+       down_write(&dvnode->validate_lock);
+       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+           dvnode->status.data_version == data_version)
                afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
                                 afs_edit_dir_for_create);
+       up_write(&dvnode->validate_lock);
 
        kfree(scb);
        key_put(key);
@@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
        struct afs_vnode *dvnode = AFS_FS_I(dir);
        struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
        struct key *key;
+       afs_dataversion_t data_version;
        int ret;
 
        _enter("{%llx:%llu},{%llx:%llu},{%pd}",
@@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
 
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
-               afs_dataversion_t data_version = dvnode->status.data_version + 1;
+               data_version = dvnode->status.data_version + 1;
 
                if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
                        afs_end_vnode_operation(&fc);
@@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir,
                goto error_key;
        }
 
-       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+       down_write(&dvnode->validate_lock);
+       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+           dvnode->status.data_version == data_version)
                afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid,
                                 afs_edit_dir_for_link);
+       up_write(&dvnode->validate_lock);
 
        key_put(key);
        kfree(scb);
@@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
        struct afs_status_cb *scb;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
        struct key *key;
+       afs_dataversion_t data_version;
        int ret;
 
        _enter("{%llx:%llu},{%pd},%s",
@@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
-               afs_dataversion_t data_version = dvnode->status.data_version + 1;
+               data_version = dvnode->status.data_version + 1;
 
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
                goto error_key;
        }
 
-       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+       down_write(&dvnode->validate_lock);
+       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+           dvnode->status.data_version == data_version)
                afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
                                 afs_edit_dir_for_symlink);
+       up_write(&dvnode->validate_lock);
 
        key_put(key);
        kfree(scb);
@@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct dentry *tmp = NULL, *rehash = NULL;
        struct inode *new_inode;
        struct key *key;
+       afs_dataversion_t orig_data_version;
+       afs_dataversion_t new_data_version;
        bool new_negative = d_is_negative(new_dentry);
        int ret;
 
@@ -1890,10 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
-               afs_dataversion_t orig_data_version;
-               afs_dataversion_t new_data_version;
-               struct afs_status_cb *new_scb = &scb[1];
-
                orig_data_version = orig_dvnode->status.data_version + 1;
 
                if (orig_dvnode != new_dvnode) {
@@ -1904,7 +1928,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        new_data_version = new_dvnode->status.data_version + 1;
                } else {
                        new_data_version = orig_data_version;
-                       new_scb = &scb[0];
                }
 
                while (afs_select_fileserver(&fc)) {
@@ -1912,7 +1935,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
                        afs_fs_rename(&fc, old_dentry->d_name.name,
                                      new_dvnode, new_dentry->d_name.name,
-                                     &scb[0], new_scb);
+                                     &scb[0], &scb[1]);
                }
 
                afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break,
@@ -1930,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (ret == 0) {
                if (rehash)
                        d_rehash(rehash);
-               if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
-                   afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
-                                       afs_edit_dir_for_rename_0);
+               down_write(&orig_dvnode->validate_lock);
+               if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+                   orig_dvnode->status.data_version == orig_data_version)
+                       afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
+                                           afs_edit_dir_for_rename_0);
+               if (orig_dvnode != new_dvnode) {
+                       up_write(&orig_dvnode->validate_lock);
 
-               if (!new_negative &&
-                   test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
-                       afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
-                                           afs_edit_dir_for_rename_1);
+                       down_write(&new_dvnode->validate_lock);
+               }
+               if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
+                   orig_dvnode->status.data_version == new_data_version) {
+                       if (!new_negative)
+                               afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
+                                                   afs_edit_dir_for_rename_1);
 
-               if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
                        afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
                                         &vnode->fid, afs_edit_dir_for_rename_2);
+               }
 
                new_inode = d_inode(new_dentry);
                if (new_inode) {
@@ -1957,14 +1987,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * Note that if we ever implement RENAME_EXCHANGE, we'll have
                 * to update both dentries with opposing dir versions.
                 */
-               if (new_dvnode != orig_dvnode) {
-                       afs_update_dentry_version(&fc, old_dentry, &scb[1]);
-                       afs_update_dentry_version(&fc, new_dentry, &scb[1]);
-               } else {
-                       afs_update_dentry_version(&fc, old_dentry, &scb[0]);
-                       afs_update_dentry_version(&fc, new_dentry, &scb[0]);
-               }
+               afs_update_dentry_version(&fc, old_dentry, &scb[1]);
+               afs_update_dentry_version(&fc, new_dentry, &scb[1]);
                d_move(old_dentry, new_dentry);
+               up_write(&new_dvnode->validate_lock);
                goto error_tmp;
        }
 
index 361088a..d94e2b7 100644 (file)
@@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
 {
        struct afs_fs_cursor fc;
        struct afs_status_cb *scb;
+       afs_dataversion_t dir_data_version;
        int ret = -ERESTARTSYS;
 
        _enter("%pd,%pd", old, new);
@@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
 
        trace_afs_silly_rename(vnode, false);
        if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
-               afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
+               dir_data_version = dvnode->status.data_version + 1;
 
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
                        dvnode->silly_key = key_get(key);
                }
 
-               if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+               down_write(&dvnode->validate_lock);
+               if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+                   dvnode->status.data_version == dir_data_version) {
                        afs_edit_dir_remove(dvnode, &old->d_name,
                                            afs_edit_dir_for_silly_0);
-               if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
                        afs_edit_dir_add(dvnode, &new->d_name,
                                         &vnode->fid, afs_edit_dir_for_silly_1);
+               }
+               up_write(&dvnode->validate_lock);
        }
 
        kfree(scb);
@@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
                                clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
                        }
                }
-               if (ret == 0 &&
-                   test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
-                       afs_edit_dir_remove(dvnode, &dentry->d_name,
-                                           afs_edit_dir_for_unlink);
+               if (ret == 0) {
+                       down_write(&dvnode->validate_lock);
+                       if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+                           dvnode->status.data_version == dir_data_version)
+                               afs_edit_dir_remove(dvnode, &dentry->d_name,
+                                                   afs_edit_dir_for_unlink);
+                       up_write(&dvnode->validate_lock);
+               }
        }
 
        kfree(scb);
index 1f9c5d8..68fc466 100644 (file)
@@ -65,6 +65,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
        bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
        u64 data_version, size;
        u32 type, abort_code;
+       int ret;
 
        abort_code = ntohl(xdr->abort_code);
 
@@ -78,7 +79,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
                         */
                        status->abort_code = abort_code;
                        scb->have_error = true;
-                       return 0;
+                       goto good;
                }
 
                pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
@@ -87,7 +88,8 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
 
        if (abort_code != 0 && inline_error) {
                status->abort_code = abort_code;
-               return 0;
+               scb->have_error = true;
+               goto good;
        }
 
        type = ntohl(xdr->type);
@@ -123,13 +125,16 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
        data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
        status->data_version = data_version;
        scb->have_status = true;
-
+good:
+       ret = 0;
+advance:
        *_bp = (const void *)*_bp + sizeof(*xdr);
-       return 0;
+       return ret;
 
 bad:
        xdr_dump_bad(*_bp);
-       return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+       ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+       goto advance;
 }
 
 static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
@@ -981,16 +986,16 @@ static int afs_deliver_fs_rename(struct afs_call *call)
        if (ret < 0)
                return ret;
 
-       /* unmarshall the reply once we've received all of it */
+       /* If the two dirs are the same, we have two copies of the same status
+        * report, so we just decode it twice.
+        */
        bp = call->buffer;
        ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
        if (ret < 0)
                return ret;
-       if (call->out_dir_scb != call->out_scb) {
-               ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
-               if (ret < 0)
-                       return ret;
-       }
+       ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+       if (ret < 0)
+               return ret;
        xdr_decode_AFSVolSync(&bp, call->out_volsync);
 
        _leave(" = 0 [done]");
index a26126a..b5b45c5 100644 (file)
@@ -165,15 +165,15 @@ static void xdr_dump_bad(const __be32 *bp)
        int i;
 
        pr_notice("YFS XDR: Bad status record\n");
-       for (i = 0; i < 5 * 4 * 4; i += 16) {
+       for (i = 0; i < 6 * 4 * 4; i += 16) {
                memcpy(x, bp, 16);
                bp += 4;
                pr_notice("%03x: %08x %08x %08x %08x\n",
                          i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
        }
 
-       memcpy(x, bp, 4);
-       pr_notice("0x50: %08x\n", ntohl(x[0]));
+       memcpy(x, bp, 8);
+       pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1]));
 }
 
 /*
@@ -186,13 +186,14 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
        const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
        struct afs_file_status *status = &scb->status;
        u32 type;
+       int ret;
 
        status->abort_code = ntohl(xdr->abort_code);
        if (status->abort_code != 0) {
                if (status->abort_code == VNOVNODE)
                        status->nlink = 0;
                scb->have_error = true;
-               return 0;
+               goto good;
        }
 
        type = ntohl(xdr->type);
@@ -220,13 +221,16 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
        status->size            = xdr_to_u64(xdr->size);
        status->data_version    = xdr_to_u64(xdr->data_version);
        scb->have_status        = true;
-
+good:
+       ret = 0;
+advance:
        *_bp += xdr_size(xdr);
-       return 0;
+       return ret;
 
 bad:
        xdr_dump_bad(*_bp);
-       return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+       ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+       goto advance;
 }
 
 /*
@@ -1153,11 +1157,9 @@ static int yfs_deliver_fs_rename(struct afs_call *call)
        ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
        if (ret < 0)
                return ret;
-       if (call->out_dir_scb != call->out_scb) {
-               ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
-               if (ret < 0)
-                       return ret;
-       }
+       ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+       if (ret < 0)
+               return ret;
 
        xdr_decode_YFSVolSync(&bp, call->out_volsync);
        _leave(" = 0 [done]");
index 786849f..47f66c6 100644 (file)
@@ -3370,6 +3370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                            space_info->bytes_reserved > 0 ||
                            space_info->bytes_may_use > 0))
                        btrfs_dump_space_info(info, space_info, 0, 0);
+               WARN_ON(space_info->reclaim_size > 0);
                list_del(&space_info->list);
                btrfs_sysfs_remove_space_info(space_info);
        }
index 8a144f9..719e68a 100644 (file)
@@ -2098,6 +2098,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        atomic_inc(&root->log_batch);
 
        /*
+        * If the inode needs a full sync, make sure we use a full range to
+        * avoid log tree corruption, due to hole detection racing with ordered
+        * extent completion for adjacent ranges and races between logging and
+        * completion of ordered extents for adjancent ranges - both races
+        * could lead to file extent items in the log with overlapping ranges.
+        * Do this while holding the inode lock, to avoid races with other
+        * tasks.
+        */
+       if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                    &BTRFS_I(inode)->runtime_flags)) {
+               start = 0;
+               end = LLONG_MAX;
+       }
+
+       /*
         * Before we acquired the inode's lock, someone may have dirtied more
         * pages in the target range. We need to make sure that writeback for
         * any such pages does not start while we are logging the inode, because
index d197314..040009d 100644 (file)
@@ -264,6 +264,7 @@ copy_inline_extent:
                            size);
        inode_add_bytes(dst, datal);
        set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
+       ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
        if (!ret && !trans) {
                /*
index f655956..7e362a6 100644 (file)
@@ -611,8 +611,8 @@ static int should_ignore_root(struct btrfs_root *root)
        if (!reloc_root)
                return 0;
 
-       if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
-           root->fs_info->running_transaction->transid - 1)
+       if (btrfs_header_generation(reloc_root->commit_root) ==
+           root->fs_info->running_transaction->transid)
                return 0;
        /*
         * if there is reloc tree and it was created in previous
index 8b0fe05..ff17a44 100644 (file)
@@ -361,6 +361,16 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+static void remove_ticket(struct btrfs_space_info *space_info,
+                         struct reserve_ticket *ticket)
+{
+       if (!list_empty(&ticket->list)) {
+               list_del_init(&ticket->list);
+               ASSERT(space_info->reclaim_size >= ticket->bytes);
+               space_info->reclaim_size -= ticket->bytes;
+       }
+}
+
 /*
  * This is for space we already have accounted in space_info->bytes_may_use, so
  * basically when we're returning space from block_rsv's.
@@ -388,9 +398,7 @@ again:
                        btrfs_space_info_update_bytes_may_use(fs_info,
                                                              space_info,
                                                              ticket->bytes);
-                       list_del_init(&ticket->list);
-                       ASSERT(space_info->reclaim_size >= ticket->bytes);
-                       space_info->reclaim_size -= ticket->bytes;
+                       remove_ticket(space_info, ticket);
                        ticket->bytes = 0;
                        space_info->tickets_id++;
                        wake_up(&ticket->wait);
@@ -899,7 +907,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
                        btrfs_info(fs_info, "failing ticket with %llu bytes",
                                   ticket->bytes);
 
-               list_del_init(&ticket->list);
+               remove_ticket(space_info, ticket);
                ticket->error = -ENOSPC;
                wake_up(&ticket->wait);
 
@@ -1063,7 +1071,7 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
                         * despite getting an error, resulting in a space leak
                         * (bytes_may_use counter of our space_info).
                         */
-                       list_del_init(&ticket->list);
+                       remove_ticket(space_info, ticket);
                        ticket->error = -EINTR;
                        break;
                }
@@ -1121,7 +1129,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
                 * either the async reclaim job deletes the ticket from the list
                 * or we delete it ourselves at wait_reserve_ticket().
                 */
-               list_del_init(&ticket->list);
+               remove_ticket(space_info, ticket);
                if (!ret)
                        ret = -ENOSPC;
        }
index 58c1114..ec36a7c 100644 (file)
@@ -96,8 +96,8 @@ enum {
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, struct btrfs_inode *inode,
                           int inode_only,
-                          u64 start,
-                          u64 end,
+                          const loff_t start,
+                          const loff_t end,
                           struct btrfs_log_ctx *ctx);
 static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
@@ -4533,15 +4533,13 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
 static int btrfs_log_holes(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_inode *inode,
-                          struct btrfs_path *path,
-                          const u64 start,
-                          const u64 end)
+                          struct btrfs_path *path)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_key key;
        const u64 ino = btrfs_ino(inode);
        const u64 i_size = i_size_read(&inode->vfs_inode);
-       u64 prev_extent_end = start;
+       u64 prev_extent_end = 0;
        int ret;
 
        if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
@@ -4549,21 +4547,14 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
 
        key.objectid = ino;
        key.type = BTRFS_EXTENT_DATA_KEY;
-       key.offset = start;
+       key.offset = 0;
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                return ret;
 
-       if (ret > 0 && path->slots[0] > 0) {
-               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
-               if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
-                       path->slots[0]--;
-       }
-
        while (true) {
                struct extent_buffer *leaf = path->nodes[0];
-               u64 extent_end;
 
                if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
                        ret = btrfs_next_leaf(root, path);
@@ -4580,18 +4571,9 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
                if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
                        break;
 
-               extent_end = btrfs_file_extent_end(path);
-               if (extent_end <= start)
-                       goto next_slot;
-
                /* We have a hole, log it. */
                if (prev_extent_end < key.offset) {
-                       u64 hole_len;
-
-                       if (key.offset >= end)
-                               hole_len = end - prev_extent_end;
-                       else
-                               hole_len = key.offset - prev_extent_end;
+                       const u64 hole_len = key.offset - prev_extent_end;
 
                        /*
                         * Release the path to avoid deadlocks with other code
@@ -4621,20 +4603,16 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
                        leaf = path->nodes[0];
                }
 
-               prev_extent_end = min(extent_end, end);
-               if (extent_end >= end)
-                       break;
-next_slot:
+               prev_extent_end = btrfs_file_extent_end(path);
                path->slots[0]++;
                cond_resched();
        }
 
-       if (prev_extent_end < end && prev_extent_end < i_size) {
+       if (prev_extent_end < i_size) {
                u64 hole_len;
 
                btrfs_release_path(path);
-               hole_len = min(ALIGN(i_size, fs_info->sectorsize), end);
-               hole_len -= prev_extent_end;
+               hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
                ret = btrfs_insert_file_extent(trans, root->log_root,
                                               ino, prev_extent_end, 0, 0,
                                               hole_len, 0, hole_len,
@@ -4971,8 +4949,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
                                   const u64 logged_isize,
                                   const bool recursive_logging,
                                   const int inode_only,
-                                  const u64 start,
-                                  const u64 end,
                                   struct btrfs_log_ctx *ctx,
                                   bool *need_log_inode_item)
 {
@@ -4981,21 +4957,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
        int ins_nr = 0;
        int ret;
 
-       /*
-        * We must make sure we don't copy extent items that are entirely out of
-        * the range [start, end - 1]. This is not just an optimization to avoid
-        * copying but also needed to avoid a corruption where we end up with
-        * file extent items in the log tree that have overlapping ranges - this
-        * can happen if we race with ordered extent completion for ranges that
-        * are outside our target range. For example we copy an extent item and
-        * when we move to the next leaf, that extent was trimmed and a new one
-        * covering a subrange of it, but with a higher key, was inserted - we
-        * would then copy this other extent too, resulting in a log tree with
-        * 2 extent items that represent overlapping ranges.
-        *
-        * We can copy the entire extents at the range bondaries however, even
-        * if they cover an area outside the target range. That's ok.
-        */
        while (1) {
                ret = btrfs_search_forward(root, min_key, path, trans->transid);
                if (ret < 0)
@@ -5063,29 +5024,6 @@ again:
                        goto next_slot;
                }
 
-               if (min_key->type == BTRFS_EXTENT_DATA_KEY) {
-                       const u64 extent_end = btrfs_file_extent_end(path);
-
-                       if (extent_end <= start) {
-                               if (ins_nr > 0) {
-                                       ret = copy_items(trans, inode, dst_path,
-                                                        path, ins_start_slot,
-                                                        ins_nr, inode_only,
-                                                        logged_isize);
-                                       if (ret < 0)
-                                               return ret;
-                                       ins_nr = 0;
-                               }
-                               goto next_slot;
-                       }
-                       if (extent_end >= end) {
-                               ins_nr++;
-                               if (ins_nr == 1)
-                                       ins_start_slot = path->slots[0];
-                               break;
-                       }
-               }
-
                if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
                        ins_nr++;
                        goto next_slot;
@@ -5151,8 +5089,8 @@ next_key:
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, struct btrfs_inode *inode,
                           int inode_only,
-                          u64 start,
-                          u64 end,
+                          const loff_t start,
+                          const loff_t end,
                           struct btrfs_log_ctx *ctx)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5180,9 +5118,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       start = ALIGN_DOWN(start, fs_info->sectorsize);
-       end = ALIGN(end, fs_info->sectorsize);
-
        min_key.objectid = ino;
        min_key.type = BTRFS_INODE_ITEM_KEY;
        min_key.offset = 0;
@@ -5298,8 +5233,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
        err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
                                      path, dst_path, logged_isize,
-                                     recursive_logging, inode_only,
-                                     start, end, ctx, &need_log_inode_item);
+                                     recursive_logging, inode_only, ctx,
+                                     &need_log_inode_item);
        if (err)
                goto out_unlock;
 
@@ -5312,7 +5247,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
                btrfs_release_path(path);
                btrfs_release_path(dst_path);
-               err = btrfs_log_holes(trans, root, inode, path, start, end);
+               err = btrfs_log_holes(trans, root, inode, path);
                if (err)
                        goto out_unlock;
        }
index b3f1082..1c4fd95 100644 (file)
@@ -163,7 +163,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
        return nr_bank;
 }
 
-void hyperv_report_panic(struct pt_regs *regs, long err);
+void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
 void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
 bool hv_is_hyperv_initialized(void);
 bool hv_is_hibernation_supported(void);
index 8134924..e6b6cb0 100644 (file)
@@ -36,12 +36,10 @@ struct btrfs_ioctl_vol_args {
 #define BTRFS_DEVICE_PATH_NAME_MAX     1024
 #define BTRFS_SUBVOL_NAME_MAX          4039
 
-/*
- * Deprecated since 5.7:
- *
- * BTRFS_SUBVOL_CREATE_ASYNC   (1ULL << 0)
- */
-
+#ifndef __KERNEL__
+/* Deprecated since 5.7 */
+# define BTRFS_SUBVOL_CREATE_ASYNC     (1ULL << 0)
+#endif
 #define BTRFS_SUBVOL_RDONLY            (1ULL << 1)
 #define BTRFS_SUBVOL_QGROUP_INHERIT    (1ULL << 2)