From: Balbir Singh memrlimit cgroup does not handle error cases after may_expand_vm(). This BUG was reported by Kamezawa, with the test case below to reproduce it [root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes 71921664 [root@iridium kamezawa]# ulimit -s 3 [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ls Killed [root@iridium kamezawa]# ulimit -s unlimited [root@iridium kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes 72368128 [root@iridium kamezawa]# This patch adds better handling support to fix the reported problem. Reported-by: KAMEZAWA Hiroyuki Signed-off-by: Balbir Singh Cc: Pavel Emelyanov Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: Balbir Singh Cc: David Rientjes Cc: Vivek Goyal Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/mmap.c | 36 +++++++++++++++++++++++++----------- mm/mremap.c | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff -puN mm/mmap.c~memrlimit-improve-error-handling mm/mmap.c --- a/mm/mmap.c~memrlimit-improve-error-handling +++ a/mm/mmap.c @@ -1125,7 +1125,7 @@ munmap_back: */ charged = len >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) - return -ENOMEM; + goto undo_charge; vm_flags |= VM_ACCOUNT; } } @@ -1241,6 +1241,8 @@ free_vma: unacct_error: if (charged) vm_unacct_memory(charged); +undo_charge: + memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT); return error; } @@ -1536,14 +1538,15 @@ static int acct_stack_growth(struct vm_a struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; unsigned long new_start; + int ret = -ENOMEM; /* address space limit tests */ if (!may_expand_vm(mm, grow)) - return -ENOMEM; + goto out; /* Stack limit test */ if (size > rlim[RLIMIT_STACK].rlim_cur) - return -ENOMEM; + goto undo_charge; /* mlock limit tests */ if (vma->vm_flags & VM_LOCKED) { @@ -1552,21 +1555,23 @@ static int acct_stack_growth(struct vm_a locked = mm->locked_vm + grow; limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) - return -ENOMEM; + goto undo_charge; } /* Check to ensure the stack will not grow into a hugetlb-only region */ new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start : vma->vm_end - size; - if (is_hugepage_only_range(vma->vm_mm, new_start, size)) - return -EFAULT; + if (is_hugepage_only_range(vma->vm_mm, new_start, size)) { + ret = -EFAULT; + goto undo_charge; + } /* * Overcommit.. This must be the final test, as it will * update security statistics. */ if (security_vm_enough_memory(grow)) - return -ENOMEM; + goto undo_charge; /* Ok, everything looks good - let it rip */ mm->total_vm += grow; @@ -1574,6 +1579,11 @@ static int acct_stack_growth(struct vm_a mm->locked_vm += grow; vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow); return 0; +undo_charge: + /* Undo memrlimit charge */ + memrlimit_cgroup_uncharge_as(mm, grow); +out: + return ret; } #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) @@ -1960,6 +1970,7 @@ unsigned long do_brk(unsigned long addr, struct rb_node ** rb_link, * rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; + int ret = -ENOMEM; len = PAGE_ALIGN(len); if (!len) @@ -2013,13 +2024,13 @@ unsigned long do_brk(unsigned long addr, /* Check against address space limits *after* clearing old maps... */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) - return -ENOMEM; + return ret; if (mm->map_count > sysctl_max_map_count) - return -ENOMEM; + goto undo_charge; if (security_vm_enough_memory(len >> PAGE_SHIFT)) - return -ENOMEM; + goto undo_charge; /* Can we just expand an old private anonymous mapping? */ if (vma_merge(mm, prev, addr, addr + len, flags, @@ -2032,7 +2043,7 @@ unsigned long do_brk(unsigned long addr, vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); - return -ENOMEM; + goto undo_charge; } vma->vm_mm = mm; @@ -2049,6 +2060,9 @@ out: make_pages_present(addr, addr + len); } return addr; +undo_charge: + memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT); + return ret; } EXPORT_SYMBOL(do_brk); diff -puN mm/mremap.c~memrlimit-improve-error-handling mm/mremap.c --- a/mm/mremap.c~memrlimit-improve-error-handling +++ a/mm/mremap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,7 @@ unsigned long do_mremap(unsigned long ad struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; + int vm_expanded = 0; if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -358,6 +360,7 @@ unsigned long do_mremap(unsigned long ad goto out; } + vm_expanded = 1; if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; if (security_vm_enough_memory(charged)) @@ -422,6 +425,9 @@ out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); out_nc: + if (vm_expanded) + memrlimit_cgroup_uncharge_as(mm, + (new_len - old_len) >> PAGE_SHIFT); return ret; } _