KVM: VMX: Fix ds/es corruption on i386 with preemption
authorAvi Kivity <avi@redhat.com>
Wed, 1 Aug 2012 13:48:03 +0000 (16:48 +0300)
committerMarcelo Tosatti <mtosatti@redhat.com>
Wed, 1 Aug 2012 23:23:57 +0000 (20:23 -0300)
Commit b2da15ac26a0c ("KVM: VMX: Optimize %ds, %es reload") broke i386
in the following scenario:

  vcpu_load
  ...
  vmx_save_host_state
  vmx_vcpu_run
  (ds.rpl, es.rpl cleared by hardware)

  interrupt
    push ds, es  # pushes bad ds, es
    schedule
      vmx_vcpu_put
        vmx_load_host_state
          reload ds, es (with __USER_DS)
    pop ds, es  # of other thread's stack
    iret
  # other thread runs
  interrupt
    push ds, es
    schedule  # back in vcpu thread
    pop ds, es  # now with rpl=0
    iret
  ...
  vcpu_put
  resume_userspace
  iret  # clears ds, es due to mismatched rpl

(instead of resume_userspace, we might return with SYSEXIT and then
take an exception; when the exception IRETs we end up with cleared
ds, es)

Fix by avoiding the optimization on i386 and reloading ds, es on the
lightweight exit path.

Reported-by: Chris Clayron <chris2553@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
arch/x86/kvm/vmx.c

index c39b607..c00f03d 100644 (file)
@@ -1488,13 +1488,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
                loadsegment(ds, vmx->host_state.ds_sel);
                loadsegment(es, vmx->host_state.es_sel);
        }
-#else
-       /*
-        * The sysexit path does not restore ds/es, so we must set them to
-        * a reasonable value ourselves.
-        */
-       loadsegment(ds, __USER_DS);
-       loadsegment(es, __USER_DS);
 #endif
        reload_tss();
 #ifdef CONFIG_X86_64
@@ -6370,6 +6363,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
              );
 
+#ifndef CONFIG_X86_64
+       /*
+        * The sysexit path does not restore ds/es, so we must set them to
+        * a reasonable value ourselves.
+        *
+        * We can't defer this to vmx_load_host_state() since that function
+        * may be executed in interrupt context, which saves and restore segments
+        * around it, nullifying its effect.
+        */
+       loadsegment(ds, __USER_DS);
+       loadsegment(es, __USER_DS);
+#endif
+
        vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
                                  | (1 << VCPU_EXREG_RFLAGS)
                                  | (1 << VCPU_EXREG_CPL)