Index: linux-2.6.19-rc6-mm2/mm/hugetlb.c =================================================================== --- linux-2.6.19-rc6-mm2.orig/mm/hugetlb.c 2006-12-04 11:22:15.000000000 -0800 +++ linux-2.6.19-rc6-mm2/mm/hugetlb.c 2006-12-04 11:47:57.000000000 -0800 @@ -1,6 +1,9 @@ /* * Generic hugetlb support. * (C) William Irwin, April 2004 + * + * (C) 2005-2006 SGI, NUMA allocation logic, + * Christoph Lameter */ #include #include @@ -22,15 +25,37 @@ #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; -static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; unsigned long max_huge_pages; -static struct list_head hugepage_freelists[MAX_NUMNODES]; -static unsigned int nr_huge_pages_node[MAX_NUMNODES]; -static unsigned int free_huge_pages_node[MAX_NUMNODES]; + /* - * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages + * Hugetlb locking is accomplished through a variety of locks: + * + * 1. The hugetlb_lock protects global variables for huge pages and the + * lists of available huge pages per node. + * + * 2. The page_table_lock is used to synchronize page table updates + * + * 3. There is another global mutex hugetlb_instantiation_mutex + * serializing all huge page faults which currently limits the + * scalability of huge pages. This lock insures that no new + * allocations are performed between the allocation of a huge + * page and the installation of the page table entry. + * + * Lock order: + * mmap_sem + * hugetlb_instantiation_mutex + * page_table_lock + * hugetlb_lock */ + static DEFINE_SPINLOCK(hugetlb_lock); +/* + * Variables protected by the hugetlb_lock + */ +static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; +static struct list_head hugepage_freelists[MAX_NUMNODES]; +static unsigned int nr_huge_pages_node[MAX_NUMNODES]; +static unsigned int free_huge_pages_node[MAX_NUMNODES]; static void clear_huge_page(struct page *page, unsigned long addr) { @@ -55,6 +80,9 @@ } } +/* + * Must hold hugetlb_lock + */ static void enqueue_huge_page(struct page *page) { int nid = page_to_nid(page); @@ -63,6 +91,9 @@ free_huge_pages_node[nid]++; } +/* + * Must hold hugetlb_lock + */ static struct page *dequeue_huge_page(struct vm_area_struct *vma, unsigned long address) { @@ -188,6 +219,9 @@ __free_pages(page, HUGETLB_PAGE_ORDER); } +/* + * Must hold hugetlb_lock + */ #ifdef CONFIG_HIGHMEM static void try_to_free_low(unsigned long count) { @@ -274,10 +308,11 @@ } /* - * We cannot handle pagefaults against hugetlb pages at all. They cause - * handle_mm_fault() to try to instantiate regular-sized pages in the - * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get - * this far. + * We do not need a hugetlb_nopage since there is an explicit + * check in __handle_mm_fault() calling to hugetlb_fault() if a fault + * occurs on a huge page vma. + * + * This exists only to trap other uses of nopage() (Are there any??) */ static struct page *hugetlb_nopage(struct vm_area_struct *vma, unsigned long address, int *unused) @@ -307,6 +342,9 @@ return entry; } +/* + * Must hold page_table_lock + */ static void set_huge_ptep_writable(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {