Subject: memcg compound From: Andrea Arcangeli Teach memcg to charge/uncharge compound pages. Signed-off-by: Andrea Arcangeli Acked-by: Rik van Riel --- diff --git a/mm/memcontrol.c b/mm/memcontrol.c --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1587,12 +1587,14 @@ static int __cpuinit memcg_stock_cpu_cal * oom-killer can be invoked. */ static int __mem_cgroup_try_charge(struct mm_struct *mm, - gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) + gfp_t gfp_mask, + struct mem_cgroup **memcg, bool oom, + int page_size) { struct mem_cgroup *mem, *mem_over_limit; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct res_counter *fail_res; - int csize = CHARGE_SIZE; + int csize = max(CHARGE_SIZE, (unsigned long) page_size); /* * Unlike gloval-vm's OOM-kill, we're not in memory shortage @@ -1627,8 +1629,9 @@ static int __mem_cgroup_try_charge(struc int ret = 0; unsigned long flags = 0; - if (consume_stock(mem)) - goto done; + if (page_size == PAGE_SIZE) + if (consume_stock(mem)) + goto done; ret = res_counter_charge(&mem->res, csize, &fail_res); if (likely(!ret)) { @@ -1648,8 +1651,8 @@ static int __mem_cgroup_try_charge(struc res); /* reduce request size and retry */ - if (csize > PAGE_SIZE) { - csize = PAGE_SIZE; + if (csize > page_size) { + csize = page_size; continue; } if (!(gfp_mask & __GFP_WAIT)) @@ -1723,8 +1726,11 @@ static int __mem_cgroup_try_charge(struc goto bypass; } } - if (csize > PAGE_SIZE) - refill_stock(mem, csize - PAGE_SIZE); + if (csize > page_size) + refill_stock(mem, csize - page_size); + /* increase css->refcnt by the number of tail pages */ + if (page_size != PAGE_SIZE) + __css_get(&mem->css, (page_size >> PAGE_SHIFT) - 1); done: return 0; nomem: @@ -1754,9 +1760,10 @@ static void __mem_cgroup_cancel_charge(s /* we don't need css_put for root */ } -static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem, + int page_size) { - __mem_cgroup_cancel_charge(mem, 1); + __mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT); } /* @@ -1812,8 +1819,9 @@ struct mem_cgroup *try_get_mem_cgroup_fr */ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, - struct page_cgroup *pc, - enum charge_type ctype) + struct page_cgroup *pc, + enum charge_type ctype, + int page_size) { /* try_charge() can return NULL to *memcg, taking care of it. */ if (!mem) @@ -1822,7 +1830,7 @@ static void __mem_cgroup_commit_charge(s lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); - mem_cgroup_cancel_charge(mem); + mem_cgroup_cancel_charge(mem, page_size); return; } @@ -1896,7 +1904,7 @@ static void __mem_cgroup_move_account(st mem_cgroup_charge_statistics(from, pc, false); if (uncharge) /* This is not "cancel", but cancel_charge does all we need. */ - mem_cgroup_cancel_charge(from); + mem_cgroup_cancel_charge(from, PAGE_SIZE); /* caller should have done css_get */ pc->mem_cgroup = to; @@ -1957,13 +1965,14 @@ static int mem_cgroup_move_parent(struct goto put; parent = mem_cgroup_from_cont(pcg); - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, + PAGE_SIZE); if (ret || !parent) goto put_back; ret = mem_cgroup_move_account(pc, child, parent, true); if (ret) - mem_cgroup_cancel_charge(parent); + mem_cgroup_cancel_charge(parent, PAGE_SIZE); put_back: putback_lru_page(page); put: @@ -1985,6 +1994,10 @@ static int mem_cgroup_charge_common(stru struct mem_cgroup *mem; struct page_cgroup *pc; int ret; + int page_size = PAGE_SIZE; + + if (PageTransHuge(page)) + page_size <<= compound_order(page); pc = lookup_page_cgroup(page); /* can happen at boot */ @@ -1993,11 +2006,11 @@ static int mem_cgroup_charge_common(stru prefetchw(pc); mem = memcg; - ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); if (ret || !mem) return ret; - __mem_cgroup_commit_charge(mem, pc, ctype); + __mem_cgroup_commit_charge(mem, pc, ctype, page_size); return 0; } @@ -2006,8 +2019,6 @@ int mem_cgroup_newpage_charge(struct pag { if (mem_cgroup_disabled()) return 0; - if (PageCompound(page)) - return 0; /* * If already mapped, we don't have to account. * If page cache, page->mapping has address_space. @@ -2020,7 +2031,7 @@ int mem_cgroup_newpage_charge(struct pag if (unlikely(!mm)) mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); + MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); } static void @@ -2113,14 +2124,14 @@ int mem_cgroup_try_charge_swapin(struct if (!mem) goto charge_cur_mm; *ptr = mem; - ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); + ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, PAGE_SIZE); /* drop extra refcnt from tryget */ css_put(&mem->css); return ret; charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; - return __mem_cgroup_try_charge(mm, mask, ptr, true); + return __mem_cgroup_try_charge(mm, mask, ptr, true, PAGE_SIZE); } static void @@ -2136,7 +2147,7 @@ __mem_cgroup_commit_charge_swapin(struct cgroup_exclude_rmdir(&ptr->css); pc = lookup_page_cgroup(page); mem_cgroup_lru_del_before_commit_swapcache(page); - __mem_cgroup_commit_charge(ptr, pc, ctype); + __mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE); mem_cgroup_lru_add_after_commit_swapcache(page); /* * Now swap is on-memory. This means this page may be @@ -2185,11 +2196,12 @@ void mem_cgroup_cancel_charge_swapin(str return; if (!mem) return; - mem_cgroup_cancel_charge(mem); + mem_cgroup_cancel_charge(mem, PAGE_SIZE); } static void -__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) +__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, + int page_size) { struct memcg_batch_info *batch = NULL; bool uncharge_memsw = true; @@ -2224,14 +2236,14 @@ __do_uncharge(struct mem_cgroup *mem, co if (batch->memcg != mem) goto direct_uncharge; /* remember freed charge and uncharge it later */ - batch->bytes += PAGE_SIZE; + batch->bytes += page_size; if (uncharge_memsw) - batch->memsw_bytes += PAGE_SIZE; + batch->memsw_bytes += page_size; return; direct_uncharge: - res_counter_uncharge(&mem->res, PAGE_SIZE); + res_counter_uncharge(&mem->res, page_size); if (uncharge_memsw) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + res_counter_uncharge(&mem->memsw, page_size); if (unlikely(batch->memcg != mem)) memcg_oom_recover(mem); return; @@ -2246,6 +2258,10 @@ __mem_cgroup_uncharge_common(struct page struct page_cgroup *pc; struct mem_cgroup *mem = NULL; struct mem_cgroup_per_zone *mz; + int page_size = PAGE_SIZE; + + if (PageTransHuge(page)) + page_size <<= compound_order(page); if (mem_cgroup_disabled()) return NULL; @@ -2286,7 +2302,7 @@ __mem_cgroup_uncharge_common(struct page } if (!mem_cgroup_is_root(mem)) - __do_uncharge(mem, ctype); + __do_uncharge(mem, ctype, page_size); if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) mem_cgroup_swap_statistics(mem, true); mem_cgroup_charge_statistics(mem, pc, false); @@ -2305,7 +2321,7 @@ __mem_cgroup_uncharge_common(struct page memcg_check_events(mem, page); /* at swapout, this memcg will be accessed to record to swap */ if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) - css_put(&mem->css); + __css_put(&mem->css, page_size >> PAGE_SHIFT); return mem; @@ -2502,6 +2518,7 @@ int mem_cgroup_prepare_migration(struct enum charge_type ctype; int ret = 0; + VM_BUG_ON(PageTransHuge(page)); if (mem_cgroup_disabled()) return 0; @@ -2551,7 +2568,7 @@ int mem_cgroup_prepare_migration(struct return 0; *ptr = mem; - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE); css_put(&mem->css);/* drop extra refcnt */ if (ret || *ptr == NULL) { if (PageAnon(page)) { @@ -2578,7 +2595,7 @@ int mem_cgroup_prepare_migration(struct ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; else ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; - __mem_cgroup_commit_charge(mem, pc, ctype); + __mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE); return ret; } @@ -4194,7 +4211,8 @@ one_by_one: batch_count = PRECHARGE_COUNT_AT_ONCE; cond_resched(); } - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, + PAGE_SIZE); if (ret || !mem) /* mem_cgroup_clear_mc() will do uncharge later */ return -ENOMEM; @@ -4356,6 +4374,7 @@ static int mem_cgroup_count_precharge_pt pte_t *pte; spinlock_t *ptl; + VM_BUG_ON(pmd_trans_huge(*pmd)); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) if (is_target_pte_for_mc(vma, addr, *pte, NULL)) @@ -4501,6 +4520,7 @@ static int mem_cgroup_move_charge_pte_ra spinlock_t *ptl; retry: + VM_BUG_ON(pmd_trans_huge(*pmd)); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; addr += PAGE_SIZE) { pte_t ptent = *(pte++);