KVM: x86: Fix potential race in KVM_GET_CLOCK
authorOliver Upton <oupton@google.com>
Thu, 16 Sep 2021 18:15:34 +0000 (18:15 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 1 Oct 2021 07:44:47 +0000 (03:44 -0400)
Sean noticed that KVM_GET_CLOCK was checking kvm_arch.use_master_clock
outside of the pvclock sync lock. This is problematic, as the clock
value written to the user may or may not actually correspond to a stable
TSC.

Fix the race by populating the entire kvm_clock_data structure behind
the pvclock_gtod_sync_lock.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Message-Id: <20210916181538.968978-4-oupton@google.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/x86.c

index ed86e437d70793c608f0a742effed2725904db8b..79535fe83a04743eaa5361acefe1f02419a73059 100644 (file)
@@ -2781,19 +2781,20 @@ static void kvm_update_masterclock(struct kvm *kvm)
        kvm_end_pvclock_update(kvm);
 }
 
-u64 get_kvmclock_ns(struct kvm *kvm)
+static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct pvclock_vcpu_time_info hv_clock;
        unsigned long flags;
-       u64 ret;
 
        spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        if (!ka->use_master_clock) {
                spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
-               return get_kvmclock_base_ns() + ka->kvmclock_offset;
+               data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
+               return;
        }
 
+       data->flags |= KVM_CLOCK_TSC_STABLE;
        hv_clock.tsc_timestamp = ka->master_cycle_now;
        hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
        spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
@@ -2805,13 +2806,26 @@ u64 get_kvmclock_ns(struct kvm *kvm)
                kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
                                   &hv_clock.tsc_shift,
                                   &hv_clock.tsc_to_system_mul);
-               ret = __pvclock_read_cycles(&hv_clock, rdtsc());
-       } else
-               ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
+               data->clock = __pvclock_read_cycles(&hv_clock, rdtsc());
+       } else {
+               data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
+       }
 
        put_cpu();
+}
 
-       return ret;
+u64 get_kvmclock_ns(struct kvm *kvm)
+{
+       struct kvm_clock_data data;
+
+       /*
+        * Zero flags as it's accessed RMW, leave everything else uninitialized
+        * as clock is always written and no other fields are consumed.
+        */
+       data.flags = 0;
+
+       get_kvmclock(kvm, &data);
+       return data.clock;
 }
 
 static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
@@ -5820,13 +5834,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
 static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp)
 {
        struct kvm_clock_data data;
-       u64 now_ns;
-
-       now_ns = get_kvmclock_ns(kvm);
-       user_ns.clock = now_ns;
-       user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
-       memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
+       memset(&data, 0, sizeof(data));
+       get_kvmclock(kvm, &data);
        if (copy_to_user(argp, &data, sizeof(data)))
                return -EFAULT;