From: Hugh Dickins Date: Fri, 11 Feb 2005 04:48:00 +0000 (-0800) Subject: [PATCH] general split_vma hugetlb fix X-Git-Tag: v2.6.11-rc4~13^2^2~2 X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=b717da93b010bec7cc52d11a3637ac0d2ecaeea1;p=history.git [PATCH] general split_vma hugetlb fix My recent do_munmap hugetlb fix has proved inadequate. There are other places (madvise, mbind, mlock, mprotect) where split_vma is called. Only mprotect excludes a hugetlb vma: the others are in danger of splitting at a misaligned address, causing later BUGs. So move the ~HPAGE_MASK check from do_munmap to split_vma itself; and fix up those places (madvise and mlock) which expect split_vma can fail only with -ENOMEM, and wish to convert that to -EAGAIN. (It appears genuine that some of these syscalls should be failing with -ENOMEM and some with -EAGAIN, so respect those behaviours.) madvise_dontneed doesn't use split_vma, but is equally in danger of causing a hugetlb BUG via zap_page_range. Whereas elsewhere the patch is permissive (allowing the operation on a hugetlb vma even when pointless, so long as it doesn't missplit it), here we must use -EINVAL on any hugetlb vma, since a page fault would hit the BUG in its nopage. Signed-off-by: Hugh Dickins Acked-by: William Irwin Signed-off-by: Linus Torvalds --- diff --git a/mm/madvise.c b/mm/madvise.c index 875a871b73ba..944b5e52d812 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -8,7 +8,7 @@ #include #include #include - +#include /* * We can potentially split a vm area into separate @@ -18,18 +18,18 @@ static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, unsigned long end, int behavior) { struct mm_struct * mm = vma->vm_mm; - int error; + int error = 0; if (start != vma->vm_start) { error = split_vma(mm, vma, start, 1); if (error) - return -EAGAIN; + goto out; } if (end != vma->vm_end) { error = split_vma(mm, vma, end, 0); if (error) - return -EAGAIN; + goto out; } /* @@ -48,7 +48,10 @@ static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, break; } - return 0; +out: + if (error == -ENOMEM) + error = -EAGAIN; + return error; } /* @@ -94,7 +97,7 @@ static long madvise_willneed(struct vm_area_struct * vma, static long madvise_dontneed(struct vm_area_struct * vma, unsigned long start, unsigned long end) { - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma)) return -EINVAL; if (unlikely(vma->vm_flags & VM_NONLINEAR)) { diff --git a/mm/mlock.c b/mm/mlock.c index 0c41018fb37b..5c722ce45cd8 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -21,17 +21,15 @@ static int mlock_fixup(struct vm_area_struct * vma, goto out; if (start != vma->vm_start) { - if (split_vma(mm, vma, start, 1)) { - ret = -EAGAIN; + ret = split_vma(mm, vma, start, 1); + if (ret) goto out; - } } if (end != vma->vm_end) { - if (split_vma(mm, vma, end, 0)) { - ret = -EAGAIN; + ret = split_vma(mm, vma, end, 0); + if (ret) goto out; - } } /* @@ -53,6 +51,8 @@ static int mlock_fixup(struct vm_area_struct * vma, vma->vm_mm->locked_vm -= pages; out: + if (ret == -ENOMEM) + ret = -EAGAIN; return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index 6a5453d31ca2..b8af0c8db48d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1747,6 +1747,9 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, struct mempolicy *pol; struct vm_area_struct *new; + if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) + return -EINVAL; + if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; @@ -1821,20 +1824,18 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) * places tmp vma above, and higher split_vma places tmp vma below. */ if (start > mpnt->vm_start) { - if (is_vm_hugetlb_page(mpnt) && (start & ~HPAGE_MASK)) - return -EINVAL; - if (split_vma(mm, mpnt, start, 0)) - return -ENOMEM; + int error = split_vma(mm, mpnt, start, 0); + if (error) + return error; prev = mpnt; } /* Does it split the last one? */ last = find_vma(mm, end); if (last && end > last->vm_start) { - if (is_vm_hugetlb_page(last) && (end & ~HPAGE_MASK)) - return -EINVAL; - if (split_vma(mm, last, end, 1)) - return -ENOMEM; + int error = split_vma(mm, last, end, 1); + if (error) + return error; } mpnt = prev? prev->vm_next: mm->mmap;