Subject: kvm mmu transparent hugepage support From: Marcelo Tosatti This should work for both hugetlbfs and transparent hugepages. Signed-off-by: Andrea Arcangeli Signed-off-by: Marcelo Tosatti Acked-by: Rik van Riel --- diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -459,10 +459,20 @@ static int has_wrprotected_page(struct k static int host_mapping_level(struct kvm *kvm, gfn_t gfn) { - unsigned long page_size; + unsigned long page_size, addr; int i, ret = 0; - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(kvm, gfn, &addr); + + /* check for transparent hugepages */ + if (page_size == PAGE_SIZE && !kvm_is_error_hva(addr)) { + pfn_t pfn = hva_to_pfn(kvm, addr); + + if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && + PageTransCompound(pfn_to_page(pfn))) + page_size = KVM_HPAGE_SIZE(2); + kvm_release_pfn_clean(pfn); + } for (i = PT_PAGE_TABLE_LEVEL; i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { @@ -1965,6 +1975,8 @@ static int nonpaging_map(struct kvm_vcpu pfn_t pfn; unsigned long mmu_seq; + mmu_seq = vcpu->kvm->mmu_notifier_seq; + smp_rmb(); level = mapping_level(vcpu, gfn); /* @@ -1976,8 +1988,6 @@ static int nonpaging_map(struct kvm_vcpu gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, gfn); /* mmio */ @@ -2189,12 +2199,12 @@ static int tdp_page_fault(struct kvm_vcp if (r) return r; - level = mapping_level(vcpu, gfn); - - gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); + level = mapping_level(vcpu, gfn); + + gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); + pfn = gfn_to_pfn(vcpu->kvm, gfn); if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -420,13 +420,13 @@ static int FNAME(page_fault)(struct kvm_ return 0; } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + smp_rmb(); if (walker.level >= PT_DIRECTORY_LEVEL) { level = min(walker.level, mapping_level(vcpu, walker.gfn)); walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); } - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); /* mmio */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -289,6 +289,7 @@ gfn_t unalias_gfn_instantiation(struct k struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); @@ -317,7 +318,8 @@ int kvm_clear_guest_page(struct kvm *kvm int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); +unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn, + unsigned long *addr); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -81,7 +81,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, } /* Get the page size we could use to map */ - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(kvm, gfn, NULL); /* Make sure the page_size does not exceed the memslot */ while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -95,8 +95,36 @@ static bool largepages_enabled = true; inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) { - struct page *page = compound_head(pfn_to_page(pfn)); - return PageReserved(page); + struct page *head; + struct page *tail = pfn_to_page(pfn); + head = compound_head(tail); + if (head != tail) { + smp_rmb(); + /* + * head may be a dangling pointer. + * __split_huge_page_refcount clears PageTail + * before overwriting first_page, so if + * PageTail is still there it means the head + * pointer isn't dangling. + */ + if (PageTail(tail)) { + /* + * the "head" is not a dangling + * pointer but the hugepage may have + * been splitted from under us (and we + * may not hold a reference count on + * the head page so it can be reused + * before we run PageReferenced), so + * we've to recheck PageTail before + * returning what we just read. + */ + int reserved = PageReserved(head); + smp_rmb(); + if (PageTail(tail)) + return reserved; + } + } + return PageReserved(tail); } return true; @@ -873,7 +901,8 @@ int kvm_is_visible_gfn(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn, + unsigned long *addrp) { struct vm_area_struct *vma; unsigned long addr, size; @@ -881,6 +910,8 @@ unsigned long kvm_host_page_size(struct size = PAGE_SIZE; addr = gfn_to_hva(kvm, gfn); + if (addrp) + *addrp = addr; if (kvm_is_error_hva(addr)) return PAGE_SIZE; @@ -932,7 +963,7 @@ unsigned long gfn_to_hva(struct kvm *kvm } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) +pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) { struct page *page[1]; int npages;