Subject: memcg compound From: Andrea Arcangeli Teach memcg to charge/uncharge compound pages. Signed-off-by: Andrea Arcangeli Acked-by: Rik van Riel --- Documentation/cgroups/memory.txt | 4 + mm/memcontrol.c | 80 +++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 32 deletions(-) --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -4,6 +4,10 @@ NOTE: The Memory Resource Controller has to as the memory controller in this document. Do not confuse memory controller used here with the memory controller that is used in hardware. +NOTE: When in this documentation we refer to PAGE_SIZE, we actually +mean the real page size of the page being accounted which is bigger than +PAGE_SIZE for compound pages. + Salient features a. Enable control of Anonymous, Page Cache (mapped and unmapped) and --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1424,8 +1424,8 @@ static int __cpuinit memcg_stock_cpu_cal * oom-killer can be invoked. */ static int __mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **memcg, - bool oom, struct page *page) + gfp_t gfp_mask, struct mem_cgroup **memcg, + bool oom, struct page *page, int page_size) { struct mem_cgroup *mem, *mem_over_limit; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -1438,6 +1438,9 @@ static int __mem_cgroup_try_charge(struc return 0; } + if (PageTransHuge(page)) + csize = page_size; + /* * We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the @@ -1462,8 +1465,9 @@ static int __mem_cgroup_try_charge(struc int ret = 0; unsigned long flags = 0; - if (consume_stock(mem)) - goto charged; + if (!PageTransHuge(page)) + if (consume_stock(mem)) + goto charged; ret = res_counter_charge(&mem->res, csize, &fail_res); if (likely(!ret)) { @@ -1483,7 +1487,7 @@ static int __mem_cgroup_try_charge(struc res); /* reduce request size and retry */ - if (csize > PAGE_SIZE) { + if (csize > page_size) { csize = PAGE_SIZE; continue; } @@ -1556,7 +1560,7 @@ static int __mem_cgroup_try_charge(struc goto nomem; } } - if (csize > PAGE_SIZE) + if (csize > page_size) refill_stock(mem, csize - PAGE_SIZE); charged: /* @@ -1593,9 +1597,10 @@ static void __mem_cgroup_cancel_charge(s /* we don't need css_put for root */ } -static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem, + int page_size) { - __mem_cgroup_cancel_charge(mem, 1); + __mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT); } /* @@ -1651,8 +1656,9 @@ struct mem_cgroup *try_get_mem_cgroup_fr */ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, - struct page_cgroup *pc, - enum charge_type ctype) + struct page_cgroup *pc, + enum charge_type ctype, + int page_size) { /* try_charge() can return NULL to *memcg, taking care of it. */ if (!mem) @@ -1661,7 +1667,7 @@ static void __mem_cgroup_commit_charge(s lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); - mem_cgroup_cancel_charge(mem); + mem_cgroup_cancel_charge(mem, page_size); return; } @@ -1732,7 +1738,7 @@ static void __mem_cgroup_move_account(st mem_cgroup_charge_statistics(from, pc, false); if (uncharge) /* This is not "cancel", but cancel_charge does all we need. */ - mem_cgroup_cancel_charge(from); + mem_cgroup_cancel_charge(from, PAGE_SIZE); /* caller should have done css_get */ pc->mem_cgroup = to; @@ -1788,13 +1794,14 @@ static int mem_cgroup_move_parent(struct goto put; parent = mem_cgroup_from_cont(pcg); - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page, + PAGE_SIZE); if (ret || !parent) goto put_back; ret = mem_cgroup_move_account(pc, child, parent, true); if (ret) - mem_cgroup_cancel_charge(parent); + mem_cgroup_cancel_charge(parent, PAGE_SIZE); put_back: putback_lru_page(page); put: @@ -1816,6 +1823,10 @@ static int mem_cgroup_charge_common(stru struct mem_cgroup *mem; struct page_cgroup *pc; int ret; + int page_size = PAGE_SIZE; + + if (PageTransHuge(page)) + page_size <<= compound_order(page); pc = lookup_page_cgroup(page); /* can happen at boot */ @@ -1824,11 +1835,12 @@ static int mem_cgroup_charge_common(stru prefetchw(pc); mem = memcg; - ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page); + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page, + page_size); if (ret || !mem) return ret; - __mem_cgroup_commit_charge(mem, pc, ctype); + __mem_cgroup_commit_charge(mem, pc, ctype, page_size); return 0; } @@ -1837,8 +1849,6 @@ int mem_cgroup_newpage_charge(struct pag { if (mem_cgroup_disabled()) return 0; - if (PageCompound(page)) - return 0; /* * If already mapped, we don't have to account. * If page cache, page->mapping has address_space. @@ -1851,7 +1861,7 @@ int mem_cgroup_newpage_charge(struct pag if (unlikely(!mm)) mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); + MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); } static void @@ -1944,14 +1954,14 @@ int mem_cgroup_try_charge_swapin(struct if (!mem) goto charge_cur_mm; *ptr = mem; - ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page); + ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page, PAGE_SIZE); /* drop extra refcnt from tryget */ css_put(&mem->css); return ret; charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; - return __mem_cgroup_try_charge(mm, mask, ptr, true, page); + return __mem_cgroup_try_charge(mm, mask, ptr, true, page, PAGE_SIZE); } static void @@ -1967,7 +1977,7 @@ __mem_cgroup_commit_charge_swapin(struct cgroup_exclude_rmdir(&ptr->css); pc = lookup_page_cgroup(page); mem_cgroup_lru_del_before_commit_swapcache(page); - __mem_cgroup_commit_charge(ptr, pc, ctype); + __mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE); mem_cgroup_lru_add_after_commit_swapcache(page); /* * Now swap is on-memory. This means this page may be @@ -2016,11 +2026,12 @@ void mem_cgroup_cancel_charge_swapin(str return; if (!mem) return; - mem_cgroup_cancel_charge(mem); + mem_cgroup_cancel_charge(mem, PAGE_SIZE); } static void -__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) +__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, + int page_size) { struct memcg_batch_info *batch = NULL; bool uncharge_memsw = true; @@ -2053,14 +2064,14 @@ __do_uncharge(struct mem_cgroup *mem, co if (batch->memcg != mem) goto direct_uncharge; /* remember freed charge and uncharge it later */ - batch->bytes += PAGE_SIZE; + batch->bytes += page_size; if (uncharge_memsw) - batch->memsw_bytes += PAGE_SIZE; + batch->memsw_bytes += page_size; return; direct_uncharge: - res_counter_uncharge(&mem->res, PAGE_SIZE); + res_counter_uncharge(&mem->res, page_size); if (uncharge_memsw) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + res_counter_uncharge(&mem->memsw, page_size); return; } @@ -2073,6 +2084,10 @@ __mem_cgroup_uncharge_common(struct page struct page_cgroup *pc; struct mem_cgroup *mem = NULL; struct mem_cgroup_per_zone *mz; + int page_size = PAGE_SIZE; + + if (PageTransHuge(page)) + page_size <<= compound_order(page); if (mem_cgroup_disabled()) return NULL; @@ -2112,7 +2127,7 @@ __mem_cgroup_uncharge_common(struct page } if (!mem_cgroup_is_root(mem)) - __do_uncharge(mem, ctype); + __do_uncharge(mem, ctype, page_size); if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) mem_cgroup_swap_statistics(mem, true); mem_cgroup_charge_statistics(mem, pc, false); @@ -2341,7 +2356,7 @@ int mem_cgroup_prepare_migration(struct if (mem) { ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, - page); + page, PAGE_SIZE); css_put(&mem->css); } *ptr = mem; @@ -2384,7 +2399,7 @@ void mem_cgroup_end_migration(struct mem * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. * So, double-counting is effectively avoided. */ - __mem_cgroup_commit_charge(mem, pc, ctype); + __mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE); /* * Both of oldpage and newpage are still under lock_page(). @@ -3864,7 +3879,7 @@ one_by_one: cond_resched(); } ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, - false, NULL); + false, NULL, PAGE_SIZE); if (ret || !mem) /* mem_cgroup_clear_mc() will do uncharge later */ return -ENOMEM; @@ -4124,6 +4139,7 @@ static int mem_cgroup_move_charge_pte_ra pte_t *pte; spinlock_t *ptl; + VM_BUG_ON(pmd_trans_huge(*pmd)); retry: pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; addr += PAGE_SIZE) {