Subject: split_huge_page anon_vma ordering dependency From: Andrea Arcangeli This documents how split_huge_page is safe vs new vma inserctions into the anon_vma that may have already released the anon_vma->lock but not established pmds yet when split_huge_page starts. Signed-off-by: Andrea Arcangeli --- diff --git a/mm/huge_memory.c b/mm/huge_memory.c --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -841,6 +841,19 @@ static void __split_huge_page(struct pag continue; mapcount += __split_huge_page_splitting(page, vma, addr); } + /* + * It is critical that new vmas are added to the tail of the + * anon_vma list. This guarantes that if copy_huge_pmd() runs + * and establishes a child pmd before + * __split_huge_page_splitting() freezes the parent pmd (so if + * we fail to prevent copy_huge_pmd() from running until the + * whole __split_huge_page() is complete), we will still see + * the newly established pmd of the child later during the + * walk, to be able to set it as pmd_trans_splitting too. + */ + if (mapcount != page_mapcount(page)) + printk(KERN_ERR "mapcount %d page_mapcount %d\n", + mapcount, page_mapcount(page)); BUG_ON(mapcount != page_mapcount(page)); __split_huge_page_refcount(page); @@ -853,6 +866,9 @@ static void __split_huge_page(struct pag continue; mapcount2 += __split_huge_page_map(page, vma, addr); } + if (mapcount != mapcount2) + printk(KERN_ERR "mapcount %d mapcount2 %d page_mapcount %d\n", + mapcount, mapcount2, page_mapcount(page)); BUG_ON(mapcount != mapcount2); } diff --git a/mm/rmap.c b/mm/rmap.c --- a/mm/rmap.c +++ b/mm/rmap.c @@ -177,6 +177,10 @@ static void anon_vma_chain_link(struct v list_add(&avc->same_vma, &vma->anon_vma_chain); anon_vma_lock(anon_vma); + /* + * It's critical to add new vmas to the tail of the anon_vma, + * see comment in huge_memory.c:__split_huge_page(). + */ list_add_tail(&avc->same_anon_vma, &anon_vma->head); anon_vma_unlock(anon_vma); }