Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/dvrabel/uwb
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Feb 2009 22:05:59 +0000 (14:05 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Feb 2009 22:05:59 +0000 (14:05 -0800)
* 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/dvrabel/uwb:
  wusb: whci-hcd: always lock whc->lock with interrupts disabled

34 files changed:
arch/alpha/kernel/process.c
arch/alpha/kernel/smp.c
arch/ia64/include/asm/kvm.h
arch/ia64/kvm/kvm-ia64.c
arch/ia64/kvm/process.c
arch/powerpc/kvm/powerpc.c
arch/s390/kvm/kvm-s390.c
arch/x86/include/asm/kvm.h
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
arch/x86/kvm/i8254.c
arch/x86/kvm/irq.c
arch/x86/kvm/irq.h
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
drivers/pci/intel-iommu.c
fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/migrate.c
fs/ext4/super.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/namespace.c
fs/ocfs2/journal.h
include/asm-frv/pgtable.h
include/linux/jbd2.h
include/linux/kvm.h
include/linux/kvm_host.h
virt/kvm/iommu.c
virt/kvm/kvm_main.c

index f238370..8d0097f 100644 (file)
@@ -93,8 +93,8 @@ common_shutdown_1(void *generic_ptr)
        if (cpuid != boot_cpuid) {
                flags |= 0x00040000UL; /* "remain halted" */
                *pflags = flags;
-               cpu_clear(cpuid, cpu_present_map);
-               cpu_clear(cpuid, cpu_possible_map);
+               set_cpu_present(cpuid, false);
+               set_cpu_possible(cpuid, false);
                halt();
        }
 #endif
@@ -120,8 +120,8 @@ common_shutdown_1(void *generic_ptr)
 
 #ifdef CONFIG_SMP
        /* Wait for the secondaries to halt. */
-       cpu_clear(boot_cpuid, cpu_present_map);
-       cpu_clear(boot_cpuid, cpu_possible_map);
+       set_cpu_present(boot_cpuid, false);
+       set_cpu_possible(boot_cpuid, false);
        while (cpus_weight(cpu_present_map))
                barrier();
 #endif
index 00f1dc3..b1fe567 100644 (file)
@@ -120,12 +120,12 @@ void __cpuinit
 smp_callin(void)
 {
        int cpuid = hard_smp_processor_id();
-       cpumask_t mask = cpu_online_map;
 
-       if (cpu_test_and_set(cpuid, mask)) {
+       if (cpu_online(cpuid)) {
                printk("??, cpu 0x%x already present??\n", cpuid);
                BUG();
        }
+       set_cpu_online(cpuid, true);
 
        /* Turn on machine checks.  */
        wrmces(7);
@@ -436,8 +436,8 @@ setup_smp(void)
                                ((char *)cpubase + i*hwrpb->processor_size);
                        if ((cpu->flags & 0x1cc) == 0x1cc) {
                                smp_num_probed++;
-                               cpu_set(i, cpu_possible_map);
-                               cpu_set(i, cpu_present_map);
+                               set_cpu_possible(i, true);
+                               set_cpu_present(i, true);
                                cpu->pal_revision = boot_cpu_palrev;
                        }
 
@@ -470,8 +470,8 @@ smp_prepare_cpus(unsigned int max_cpus)
 
        /* Nothing to do on a UP box, or when told not to.  */
        if (smp_num_probed == 1 || max_cpus == 0) {
-               cpu_possible_map = cpumask_of_cpu(boot_cpuid);
-               cpu_present_map = cpumask_of_cpu(boot_cpuid);
+               init_cpu_possible(cpumask_of(boot_cpuid));
+               init_cpu_present(cpumask_of(boot_cpuid));
                printk(KERN_INFO "SMP mode deactivated.\n");
                return;
        }
index 68aa6da..bfa86b6 100644 (file)
 
 #include <linux/ioctl.h>
 
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
index 4e586f6..28f9820 100644 (file)
@@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
        }
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        kvm_iommu_unmap_guest(kvm);
index 552d077..230eae4 100644 (file)
@@ -455,13 +455,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
        if (!vmm_fpswa_interface)
                return (fpswa_ret_t) {-1, 0, 0, 0};
 
-       /*
-        * Just let fpswa driver to use hardware fp registers.
-        * No fp register is valid in memory.
-        */
        memset(&fp_state, 0, sizeof(fp_state_t));
 
        /*
+        * compute fp_state.  only FP registers f6 - f11 are used by the
+        * vmm, so set those bits in the mask and set the low volatile
+        * pointer to point to these registers.
+        */
+       fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+       fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+
+   /*
         * unsigned long (*EFI_FPSWA) (
         *      unsigned long    trap_type,
         *      void             *Bundle,
@@ -545,10 +550,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
                status = vmm_handle_fpu_swa(0, regs, isr);
                if (!status)
                        return ;
-               else if (-EAGAIN == status) {
-                       vcpu_decrement_iip(vcpu);
-                       return ;
-               }
                break;
        }
 
index 2822c8c..5f81256 100644 (file)
@@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm)
        }
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        kvmppc_free_vcpus(kvm);
index be84971..0d33893 100644 (file)
@@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
        }
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        kvm_free_vcpus(kvm);
index d2e3bf3..886c940 100644 (file)
@@ -9,6 +9,13 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_PIT
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+#define __KVM_HAVE_MSI
+#define __KVM_HAVE_USER_NMI
+
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
index fb039cd..6428aa1 100644 (file)
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
        data->cpu = pol->cpu;
        data->currpstate = HW_PSTATE_INVALID;
 
-       rc = powernow_k8_cpu_init_acpi(data);
-       if (rc) {
+       if (powernow_k8_cpu_init_acpi(data)) {
                /*
                 * Use the PSB BIOS structure. This is only availabe on
                 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
                               "ACPI maintainers and complain to your BIOS "
                               "vendor.\n");
 #endif
-                       goto err_out;
+                       kfree(data);
+                       return -ENODEV;
                }
                if (pol->cpu != 0) {
                        printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
                               "CPU other than CPU0. Complain to your BIOS "
                               "vendor.\n");
-                       goto err_out;
+                       kfree(data);
+                       return -ENODEV;
                }
                rc = find_psb_table(data);
                if (rc) {
-                       goto err_out;
+                       kfree(data);
+                       return -ENODEV;
                }
                /* Take a crude guess here.
                 * That guess was in microseconds, so multiply with 1000 */
index e665d1c..72bd275 100644 (file)
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
        hrtimer_add_expires_ns(&pt->timer, pt->period);
        pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
        if (pt->period)
-               ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
+               ps->channels[0].count_load_time = ktime_get();
 
        return (pt->period == 0 ? 0 : 1);
 }
index c019b8e..cf17ed5 100644 (file)
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-       kvm_apic_timer_intr_post(vcpu, vec);
-       /* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
-
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
        __kvm_migrate_apic_timer(vcpu);
index 2bf32a0..82579ee 100644 (file)
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
index afac68c..f0b67f2 100644 (file)
 #include "kvm_cache_regs.h"
 #include "irq.h"
 
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
 #define PRId64 "d"
 #define PRIx64 "llx"
 #define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
-       u64 counter_passed;
-       ktime_t passed, now;
+       ktime_t remaining;
+       s64 ns;
        u32 tmcct;
 
        ASSERT(apic != NULL);
 
-       now = apic->timer.dev.base->get_time();
-       tmcct = apic_get_reg(apic, APIC_TMICT);
-
        /* if initial count is 0, current count should also be 0 */
-       if (tmcct == 0)
+       if (apic_get_reg(apic, APIC_TMICT) == 0)
                return 0;
 
-       if (unlikely(ktime_to_ns(now) <=
-               ktime_to_ns(apic->timer.last_update))) {
-               /* Wrap around */
-               passed = ktime_add(( {
-                                   (ktime_t) {
-                                   .tv64 = KTIME_MAX -
-                                   (apic->timer.last_update).tv64}; }
-                                  ), now);
-               apic_debug("time elapsed\n");
-       } else
-               passed = ktime_sub(now, apic->timer.last_update);
-
-       counter_passed = div64_u64(ktime_to_ns(passed),
-                                  (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
-
-       if (counter_passed > tmcct) {
-               if (unlikely(!apic_lvtt_period(apic))) {
-                       /* one-shot timers stick at 0 until reset */
-                       tmcct = 0;
-               } else {
-                       /*
-                        * periodic timers reset to APIC_TMICT when they
-                        * hit 0. The while loop simulates this happening N
-                        * times. (counter_passed %= tmcct) would also work,
-                        * but might be slower or not work on 32-bit??
-                        */
-                       while (counter_passed > tmcct)
-                               counter_passed -= tmcct;
-                       tmcct -= counter_passed;
-               }
-       } else {
-               tmcct -= counter_passed;
-       }
+       remaining = hrtimer_expires_remaining(&apic->timer.dev);
+       if (ktime_to_ns(remaining) < 0)
+               remaining = ktime_set(0, 0);
+
+       ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
+       tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
 
        return tmcct;
 }
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
 {
        ktime_t now = apic->timer.dev.base->get_time();
 
-       apic->timer.last_update = now;
-
        apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
                    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
        atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
        }
 }
 
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-       struct kvm_lapic *apic = vcpu->arch.apic;
-
-       if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
-               apic->timer.last_update = ktime_add_ns(
-                               apic->timer.last_update,
-                               apic->timer.period);
-}
-
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
        int vector = kvm_apic_has_interrupt(vcpu);
index 8185888..45ab6ee 100644 (file)
@@ -12,7 +12,6 @@ struct kvm_lapic {
                atomic_t pending;
                s64 period;     /* unit: ns */
                u32 divide_count;
-               ktime_t last_update;
                struct hrtimer dev;
        } timer;
        struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
index 83f11c7..2d4477c 100644 (file)
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
        if (largepage)
                spte |= PT_PAGE_SIZE_MASK;
        if (mt_mask) {
-               mt_mask = get_memory_type(vcpu, gfn) <<
-                         kvm_x86_ops->get_mt_mask_shift();
+               if (!kvm_is_mmio_pfn(pfn)) {
+                       mt_mask = get_memory_type(vcpu, gfn) <<
+                               kvm_x86_ops->get_mt_mask_shift();
+                       mt_mask |= VMX_EPT_IGMT_BIT;
+               } else
+                       mt_mask = MTRR_TYPE_UNCACHABLE <<
+                               kvm_x86_ops->get_mt_mask_shift();
                spte |= mt_mask;
        }
 
index 1452851..a9e769e 100644 (file)
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
        /* Okay, we can deliver the interrupt: grab it and update PIC state. */
        intr_vector = kvm_cpu_get_interrupt(vcpu);
        svm_inject_irq(svm, intr_vector);
-       kvm_timer_intr_post(vcpu, intr_vector);
 out:
        update_cr8_intercept(vcpu);
 }
index 6259d74..7611af5 100644 (file)
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                data = vmcs_readl(GUEST_SYSENTER_ESP);
                break;
        default:
+               vmx_load_host_state(to_vmx(vcpu));
                msr = find_msr_entry(to_vmx(vcpu), msr_index);
                if (msr) {
                        data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
        }
        if (vcpu->arch.interrupt.pending) {
                vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-               kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
                if (kvm_cpu_has_interrupt(vcpu))
                        enable_irq_window(vcpu);
        }
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
        if (vm_need_ept()) {
                bypass_guest_pf = 0;
                kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
-                       VMX_EPT_WRITABLE_MASK |
-                       VMX_EPT_IGMT_BIT);
+                       VMX_EPT_WRITABLE_MASK);
                kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
                                VMX_EPT_EXECUTABLE_MASK,
                                VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
index cc17546..758b7a1 100644 (file)
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
        case KVM_CAP_SET_TSS_ADDR:
        case KVM_CAP_EXT_CPUID:
-       case KVM_CAP_CLOCKSOURCE:
        case KVM_CAP_PIT:
        case KVM_CAP_NOP_IO_DELAY:
        case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IOMMU:
                r = iommu_found();
                break;
+       case KVM_CAP_CLOCKSOURCE:
+               r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
+               break;
        default:
                r = 0;
                break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 }
 
-void kvm_arch_destroy_vm(struct kvm *kvm)
+void kvm_arch_sync_events(struct kvm *kvm)
 {
        kvm_free_all_assigned_devices(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
        kvm_iommu_unmap_guest(kvm);
        kvm_free_pit(kvm);
        kfree(kvm->arch.vpic);
index f4b7c79..fa9e416 100644 (file)
@@ -61,6 +61,8 @@
 /* global iommu list, set NULL for ignored DMAR units */
 static struct intel_iommu **g_iommus;
 
+static int rwbf_quirk = 0;
+
 /*
  * 0: Present
  * 1-11: Reserved
@@ -785,7 +787,7 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
        u32 val;
        unsigned long flag;
 
-       if (!cap_rwbf(iommu->cap))
+       if (!rwbf_quirk && !cap_rwbf(iommu->cap))
                return;
        val = iommu->gcmd | DMA_GCMD_WBF;
 
@@ -3137,3 +3139,13 @@ static struct iommu_ops intel_iommu_ops = {
        .unmap          = intel_iommu_unmap_range,
        .iova_to_phys   = intel_iommu_iova_to_phys,
 };
+
+static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
+{
+       /* Mobile 4 Series Chipset neglects to set RWBF capability,
+          but needs it */
+       printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
+       rwbf_quirk = 1;
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
index aafc9eb..b0c87dc 100644 (file)
@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
 {
        unsigned len = le16_to_cpu(dlen);
 
-       if (len == EXT4_MAX_REC_LEN)
+       if (len == EXT4_MAX_REC_LEN || len == 0)
                return 1 << 16;
        return len;
 }
index 03ba20b..cbd2ca9 100644 (file)
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
-       return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
-                                                  new_size);
+       return jbd2_journal_begin_ordered_truncate(
+                                       EXT4_SB(inode->i_sb)->s_journal,
+                                       &EXT4_I(inode)->jinode,
+                                       new_size);
 }
 
 static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping,
        int no_nrwrite_index_update;
        int pages_written = 0;
        long pages_skipped;
+       int range_cyclic, cycled = 1, io_done = 0;
        int needed_blocks, ret = 0, nr_to_writebump = 0;
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 
@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping,
        if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                range_whole = 1;
 
-       if (wbc->range_cyclic)
+       range_cyclic = wbc->range_cyclic;
+       if (wbc->range_cyclic) {
                index = mapping->writeback_index;
-       else
+               if (index)
+                       cycled = 0;
+               wbc->range_start = index << PAGE_CACHE_SHIFT;
+               wbc->range_end  = LLONG_MAX;
+               wbc->range_cyclic = 0;
+       } else
                index = wbc->range_start >> PAGE_CACHE_SHIFT;
 
        mpd.wbc = wbc;
@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping,
        wbc->no_nrwrite_index_update = 1;
        pages_skipped = wbc->pages_skipped;
 
+retry:
        while (!ret && wbc->nr_to_write > 0) {
 
                /*
@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping,
                        pages_written += mpd.pages_written;
                        wbc->pages_skipped = pages_skipped;
                        ret = 0;
+                       io_done = 1;
                } else if (wbc->nr_to_write)
                        /*
                         * There is no more writeout needed
@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping,
                         */
                        break;
        }
+       if (!io_done && !cycled) {
+               cycled = 1;
+               index = 0;
+               wbc->range_start = index << PAGE_CACHE_SHIFT;
+               wbc->range_end  = mapping->writeback_index - 1;
+               goto retry;
+       }
        if (pages_skipped != wbc->pages_skipped)
                printk(KERN_EMERG "This should not happen leaving %s "
                                "with nr_to_write = %ld ret = %d\n",
@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping,
 
        /* Update index */
        index += pages_written;
+       wbc->range_cyclic = range_cyclic;
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                /*
                 * set the writeback_index so that range_cyclic
index deba54f..4415bee 100644 (file)
@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        pa->pa_free = pa->pa_len;
        atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
+       INIT_LIST_HEAD(&pa->pa_inode_list);
+       INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
        pa->pa_linear = 0;
 
@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
        INIT_LIST_HEAD(&pa->pa_inode_list);
+       INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
        pa->pa_linear = 1;
 
@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
                        pa->pa_free -= ac->ac_b_ex.fe_len;
                        pa->pa_len -= ac->ac_b_ex.fe_len;
                        spin_unlock(&pa->pa_lock);
-                       /*
-                        * We want to add the pa to the right bucket.
-                        * Remove it from the list and while adding
-                        * make sure the list to which we are adding
-                        * doesn't grow big.
-                        */
-                       if (likely(pa->pa_free)) {
-                               spin_lock(pa->pa_obj_lock);
-                               list_del_rcu(&pa->pa_inode_list);
-                               spin_unlock(pa->pa_obj_lock);
-                               ext4_mb_add_n_trim(ac);
-                       }
                }
-               ext4_mb_put_pa(ac, ac->ac_sb, pa);
        }
        if (ac->alloc_semp)
                up_read(ac->alloc_semp);
+       if (pa) {
+               /*
+                * We want to add the pa to the right bucket.
+                * Remove it from the list and while adding
+                * make sure the list to which we are adding
+                * doesn't grow big.  We need to release
+                * alloc_semp before calling ext4_mb_add_n_trim()
+                */
+               if (pa->pa_linear && likely(pa->pa_free)) {
+                       spin_lock(pa->pa_obj_lock);
+                       list_del_rcu(&pa->pa_inode_list);
+                       spin_unlock(pa->pa_obj_lock);
+                       ext4_mb_add_n_trim(ac);
+               }
+               ext4_mb_put_pa(ac, ac->ac_sb, pa);
+       }
        if (ac->ac_bitmap_page)
                page_cache_release(ac->ac_bitmap_page);
        if (ac->ac_buddy_page)
index 734abca..fe64d9f 100644 (file)
@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
                                        + 1);
        if (IS_ERR(handle)) {
                retval = PTR_ERR(handle);
-               goto err_out;
+               return retval;
        }
        tmp_inode = ext4_new_inode(handle,
                                inode->i_sb->s_root->d_inode,
@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
        if (IS_ERR(tmp_inode)) {
                retval = -ENOMEM;
                ext4_journal_stop(handle);
-               tmp_inode = NULL;
-               goto err_out;
+               return retval;
        }
        i_size_write(tmp_inode, i_size_read(inode));
        /*
@@ -618,8 +617,7 @@ err_out:
 
        ext4_journal_stop(handle);
 
-       if (tmp_inode)
-               iput(tmp_inode);
+       iput(tmp_inode);
 
        return retval;
 }
index e5f06a5..a5732c5 100644 (file)
@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
 static int ext4_sync_fs(struct super_block *sb, int wait)
 {
        int ret = 0;
+       tid_t target;
 
        trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
        sb->s_dirt = 0;
        if (EXT4_SB(sb)->s_journal) {
-               if (wait)
-                       ret = ext4_force_commit(sb);
-               else
-                       jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+               if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
+                                             &target)) {
+                       if (wait)
+                               jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
+                                                    target);
+               }
        } else {
                ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
        }
index eb34300..5814410 100644 (file)
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
 }
 
 /*
- * Called under j_state_lock.  Returns true if a transaction was started.
+ * Called under j_state_lock.  Returns true if a transaction commit was started.
  */
 int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 {
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
 
 /*
  * Start a commit of the current running transaction (if any).  Returns true
- * if a transaction was started, and fills its tid in at *ptid
+ * if a transaction is going to be committed (or is currently already
+ * committing), and fills its tid in at *ptid
  */
 int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 {
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
        if (journal->j_running_transaction) {
                tid_t tid = journal->j_running_transaction->t_tid;
 
-               ret = __jbd2_log_start_commit(journal, tid);
-               if (ret && ptid)
+               __jbd2_log_start_commit(journal, tid);
+               /* There's a running transaction and we've just made sure
+                * it's commit has been scheduled. */
+               if (ptid)
                        *ptid = tid;
-       } else if (journal->j_committing_transaction && ptid) {
+               ret = 1;
+       } else if (journal->j_committing_transaction) {
                /*
                 * If ext3_write_super() recently started a commit, then we
                 * have to wait for completion of that transaction
                 */
-               *ptid = journal->j_committing_transaction->t_tid;
+               if (ptid)
+                       *ptid = journal->j_committing_transaction->t_tid;
                ret = 1;
        }
        spin_unlock(&journal->j_state_lock);
index 46b4e34..28ce21d 100644 (file)
@@ -2129,26 +2129,46 @@ done:
 }
 
 /*
- * This function must be called when inode is journaled in ordered mode
- * before truncation happens. It starts writeout of truncated part in
- * case it is in the committing transaction so that we stand to ordered
- * mode consistency guarantees.
+ * File truncate and transaction commit interact with each other in a
+ * non-trivial way.  If a transaction writing data block A is
+ * committing, we cannot discard the data by truncate until we have
+ * written them.  Otherwise if we crashed after the transaction with
+ * write has committed but before the transaction with truncate has
+ * committed, we could see stale data in block A.  This function is a
+ * helper to solve this problem.  It starts writeout of the truncated
+ * part in case it is in the committing transaction.
+ *
+ * Filesystem code must call this function when inode is journaled in
+ * ordered mode before truncation happens and after the inode has been
+ * placed on orphan list with the new inode size. The second condition
+ * avoids the race that someone writes new data and we start
+ * committing the transaction after this function has been called but
+ * before a transaction for truncate is started (and furthermore it
+ * allows us to optimize the case where the addition to orphan list
+ * happens in the same transaction as write --- we don't have to write
+ * any data in such case).
  */
-int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
+int jbd2_journal_begin_ordered_truncate(journal_t *journal,
+                                       struct jbd2_inode *jinode,
                                        loff_t new_size)
 {
-       journal_t *journal;
-       transaction_t *commit_trans;
+       transaction_t *inode_trans, *commit_trans;
        int ret = 0;
 
-       if (!inode->i_transaction && !inode->i_next_transaction)
+       /* This is a quick check to avoid locking if not necessary */
+       if (!jinode->i_transaction)
                goto out;
-       journal = inode->i_transaction->t_journal;
+       /* Locks are here just to force reading of recent values, it is
+        * enough that the transaction was not committing before we started
+        * a transaction adding the inode to orphan list */
        spin_lock(&journal->j_state_lock);
        commit_trans = journal->j_committing_transaction;
        spin_unlock(&journal->j_state_lock);
-       if (inode->i_transaction == commit_trans) {
-               ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
+       spin_lock(&journal->j_list_lock);
+       inode_trans = jinode->i_transaction;
+       spin_unlock(&journal->j_list_lock);
+       if (inode_trans == commit_trans) {
+               ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
                        new_size, LLONG_MAX);
                if (ret)
                        jbd2_journal_abort(journal, ret);
index 228d8c4..06f8e63 100644 (file)
@@ -614,9 +614,11 @@ static inline void __mntput(struct vfsmount *mnt)
         */
        for_each_possible_cpu(cpu) {
                struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
-               if (cpu_writer->mnt != mnt)
-                       continue;
                spin_lock(&cpu_writer->lock);
+               if (cpu_writer->mnt != mnt) {
+                       spin_unlock(&cpu_writer->lock);
+                       continue;
+               }
                atomic_add(cpu_writer->count, &mnt->__mnt_writers);
                cpu_writer->count = 0;
                /*
index 3c3532e..172850a 100644 (file)
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
                                               loff_t new_size)
 {
-       return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
-                                                  new_size);
+       return jbd2_journal_begin_ordered_truncate(
+                               OCFS2_SB(inode->i_sb)->journal->j_journal,
+                               &OCFS2_I(inode)->ip_jinode,
+                               new_size);
 }
 
 #endif /* OCFS2_JOURNAL_H */
index 83c51ab..e16fdb1 100644 (file)
@@ -478,7 +478,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define __swp_type(x)                  (((x).val >> 2) & 0x1f)
 #define __swp_offset(x)                        ((x).val >> 8)
 #define __swp_entry(type, offset)      ((swp_entry_t) { ((type) << 2) | ((offset) << 8) })
-#define __pte_to_swp_entry(pte)                ((swp_entry_t) { (pte).pte })
+#define __pte_to_swp_entry(_pte)       ((swp_entry_t) { (_pte).pte })
 #define __swp_entry_to_pte(x)          ((pte_t) { (x).val })
 
 static inline int pte_file(pte_t pte)
index b28b37e..4d248b3 100644 (file)
@@ -1150,7 +1150,8 @@ extern int           jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
 extern int        jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
-extern int        jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
+extern int        jbd2_journal_begin_ordered_truncate(journal_t *journal,
+                               struct jbd2_inode *inode, loff_t new_size);
 extern void       jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
 extern void       jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
 
index 5715f19..0424326 100644 (file)
@@ -58,10 +58,10 @@ struct kvm_irqchip {
        __u32 pad;
         union {
                char dummy[512];  /* reserving space */
-#ifdef CONFIG_X86
+#ifdef __KVM_HAVE_PIT
                struct kvm_pic_state pic;
 #endif
-#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_IOAPIC
                struct kvm_ioapic_state ioapic;
 #endif
        } chip;
@@ -384,16 +384,16 @@ struct kvm_trace_rec {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_MSI
 #define KVM_CAP_DEVICE_MSI 20
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
 #endif
 
index ec49d0b..bf6f703 100644 (file)
@@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm);
 struct  kvm *kvm_arch_create_vm(void);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_free_all_assigned_devices(struct kvm *kvm);
+void kvm_arch_sync_events(struct kvm *kvm);
 
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
index e9693a2..4c40375 100644 (file)
@@ -73,14 +73,13 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
        int i, r = 0;
 
-       down_read(&kvm->slots_lock);
        for (i = 0; i < kvm->nmemslots; i++) {
                r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
                                        kvm->memslots[i].npages);
                if (r)
                        break;
        }
-       up_read(&kvm->slots_lock);
+
        return r;
 }
 
@@ -190,12 +189,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 {
        int i;
-       down_read(&kvm->slots_lock);
+
        for (i = 0; i < kvm->nmemslots; i++) {
                kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
                                    kvm->memslots[i].npages);
        }
-       up_read(&kvm->slots_lock);
 
        return 0;
 }
index 3a5a082..29a667c 100644 (file)
@@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
                assigned_dev->host_irq_disabled = false;
        }
        mutex_unlock(&assigned_dev->kvm->lock);
-       kvm_put_kvm(assigned_dev->kvm);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
        struct kvm_assigned_dev_kernel *assigned_dev =
                (struct kvm_assigned_dev_kernel *) dev_id;
 
-       kvm_get_kvm(assigned_dev->kvm);
-
        schedule_work(&assigned_dev->interrupt_work);
 
        disable_irq_nosync(irq);
@@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
        }
 }
 
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
 static void kvm_free_assigned_irq(struct kvm *kvm,
                                  struct kvm_assigned_dev_kernel *assigned_dev)
 {
@@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
        if (!assigned_dev->irq_requested_type)
                return;
 
-       if (cancel_work_sync(&assigned_dev->interrupt_work))
-               /* We had pending work. That means we will have to take
-                * care of kvm_put_kvm.
-                */
-               kvm_put_kvm(kvm);
+       /*
+        * In kvm_free_device_irq, cancel_work_sync return true if:
+        * 1. work is scheduled, and then cancelled.
+        * 2. work callback is executed.
+        *
+        * The first one ensured that the irq is disabled and no more events
+        * would happen. But for the second one, the irq may be enabled (e.g.
+        * for MSI). So we disable irq here to prevent further events.
+        *
+        * Notice this maybe result in nested disable if the interrupt type is
+        * INTx, but it's OK for we are going to free it.
+        *
+        * If this function is a part of VM destroy, please ensure that till
+        * now, the kvm state is still legal for probably we also have to wait
+        * interrupt_work done.
+        */
+       disable_irq_nosync(assigned_dev->host_irq);
+       cancel_work_sync(&assigned_dev->interrupt_work);
 
        free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
@@ -285,8 +296,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
 
        if (irqchip_in_kernel(kvm)) {
                if (!msi2intx &&
-                   adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
-                       free_irq(adev->host_irq, (void *)kvm);
+                   (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
+                       free_irq(adev->host_irq, (void *)adev);
                        pci_disable_msi(adev->dev);
                }
 
@@ -455,6 +466,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
        struct kvm_assigned_dev_kernel *match;
        struct pci_dev *dev;
 
+       down_read(&kvm->slots_lock);
        mutex_lock(&kvm->lock);
 
        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
@@ -516,6 +528,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 out:
        mutex_unlock(&kvm->lock);
+       up_read(&kvm->slots_lock);
        return r;
 out_list_del:
        list_del(&match->list);
@@ -527,6 +540,7 @@ out_put:
 out_free:
        kfree(match);
        mutex_unlock(&kvm->lock);
+       up_read(&kvm->slots_lock);
        return r;
 }
 #endif
@@ -789,11 +803,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
        return young;
 }
 
+static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
+                                    struct mm_struct *mm)
+{
+       struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       kvm_arch_flush_shadow(kvm);
+}
+
 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
        .invalidate_page        = kvm_mmu_notifier_invalidate_page,
        .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
        .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
        .clear_flush_young      = kvm_mmu_notifier_clear_flush_young,
+       .release                = kvm_mmu_notifier_release,
 };
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
@@ -883,6 +905,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 {
        struct mm_struct *mm = kvm->mm;
 
+       kvm_arch_sync_events(kvm);
        spin_lock(&kvm_lock);
        list_del(&kvm->vm_list);
        spin_unlock(&kvm_lock);