Subject: memcontrol: prevent endless loop with huge pages and near-limit group From: Johannes Weiner If reclaim after a failed charging was unsuccessful, the limits are checked again, just in case they settled by means of other tasks. This is all fine as long as every charge is of size PAGE_SIZE, because in that case, being below the limit means having at least PAGE_SIZE bytes available. But with transparent huge pages, we may end up in an endless loop where charging and reclaim fail, but we keep going because the limits are not yet exceeded, although not allowing for a huge page. Fix this up by explicitely checking for enough room, not just whether we are within limits. Signed-off-by: Johannes Weiner Signed-off-by: Andrea Arcangeli --- diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index fcb9884..03212e4 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -182,6 +182,18 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline bool res_counter_check_room(struct res_counter *cnt, + unsigned long room) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + ret = cnt->limit - cnt->usage >= room; + spin_unlock_irqrestore(&cnt->lock, flags); + return ret; +} + static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt) { bool ret; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 41a8cbd..c395dcc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1056,6 +1056,15 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) return false; } +static bool mem_cgroup_check_room(struct mem_cgroup *mem, unsigned long room) +{ + if (!res_counter_check_room(&mem->res, room)) + return false; + if (!do_swap_account) + return true; + return res_counter_check_room(&mem->memsw, room); +} + static unsigned int get_swappiness(struct mem_cgroup *memcg) { struct cgroup *cgrp = memcg->css.cgroup; @@ -1657,20 +1666,10 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (!(gfp_mask & __GFP_WAIT)) goto nomem; - ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, + mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, gfp_mask, flags); - if (ret) - continue; - /* - * try_to_free_mem_cgroup_pages() might not give us a full - * picture of reclaim. Some pages are reclaimed and might be - * moved to swap cache or just unmapped from the cgroup. - * Check the limit again to see if the reclaim reduced the - * current usage of the cgroup before giving up - * - */ - if (mem_cgroup_check_under_limit(mem_over_limit)) + if (mem_cgroup_check_room(mem_over_limit, csize)) continue; /* try to avoid oom while someone is moving charge */