Kenneth, does the patch below look reasonable? It's close to trivial on everything except IA64, which I can't easily test on. Actually, it occurs to me that this may even fix a bug on IA64 - if follow_page() was called on an address in region 1 that wasn't in a VMA, then I think it would attempt to follow it as a normal page, which sounds bad. If you think it is sane, I intend to push to akpm. hugepage_vma() is both misleadingly named and unnecessary. On most archs it always returns NULL, and on IA64 the vma it returns is never used. The function's real purpose is to determine whether the address it is passed is a special hugepage address which must be looked up in hugepage pagetables, rather than being looked up in the normal pagetables (which might have specially marked hugepage PMDs or PTEs). This patch kills off hugepage_vma() and folds the logic it really needs into follow_huge_addr(). That now returns a (page *) if called on a special hugepage address, and an error encoded with ERR_PTR otherwise. This also requires tweaking the IA64 code to check that the hugepage PTE is present in follow_huge_addr() - previously this was guaranteed, since it was only called if the address was in an existing hugepage VMA, and hugepages are always prefaulted. Index: working-2.6/include/linux/hugetlb.h =================================================================== --- 25-akpm/arch/i386/mm/hugetlbpage.c | 47 ++++++++++++++-------------------- 25-akpm/arch/ia64/mm/hugetlbpage.c | 29 ++++++++++---------- 25-akpm/arch/ppc64/mm/hugetlbpage.c | 10 +------ 25-akpm/arch/sh/mm/hugetlbpage.c | 8 ----- 25-akpm/arch/sparc64/mm/hugetlbpage.c | 8 ----- 25-akpm/include/linux/hugetlb.h | 9 ++---- 25-akpm/mm/memory.c | 8 ++--- 7 files changed, 45 insertions(+), 74 deletions(-) diff -puN arch/i386/mm/hugetlbpage.c~kill-off-hugepage_vma arch/i386/mm/hugetlbpage.c --- 25/arch/i386/mm/hugetlbpage.c~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/arch/i386/mm/hugetlbpage.c Fri Apr 16 13:37:34 2004 @@ -140,32 +140,31 @@ follow_hugetlb_page(struct mm_struct *mm #if 0 /* This is just for testing */ struct page * -follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { unsigned long start = address; int length = 1; int nr; struct page *page; + struct vm_area_struct *vma; - nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0); - if (nr == 1) - return page; - return NULL; -} + if (! mm->used_hugetlb) + return ERR_PTR(-EINVAL); -/* - * If virtual address `addr' lies within a huge page, return its controlling - * VMA, else NULL. - */ -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - if (mm->used_hugetlb) { - struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && is_vm_hugetlb_page(vma)) - return vma; - } - return NULL; + vma = find_vma(mm, addr); + if (!vma || !is_vm_hugetlb_page(vma)) + return ERR_PTR(-EINVAL); + + pte = huge_pte_offset(mm, address); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + + return page; } int pmd_huge(pmd_t pmd) @@ -183,15 +182,9 @@ follow_huge_pmd(struct mm_struct *mm, un #else struct page * -follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) diff -puN arch/ia64/mm/hugetlbpage.c~kill-off-hugepage_vma arch/ia64/mm/hugetlbpage.c --- 25/arch/ia64/mm/hugetlbpage.c~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/arch/ia64/mm/hugetlbpage.c Fri Apr 16 13:37:34 2004 @@ -49,8 +49,12 @@ huge_pte_offset (struct mm_struct *mm, u pte_t *pte = NULL; pgd = pgd_offset(mm, taddr); - pmd = pmd_offset(pgd, taddr); - pte = pte_offset_map(pmd, taddr); + if (pgd_present(*pgd)) { + pmd = pmd_offset(pgd, taddr); + if (pmd_present(*pmd)) + pte = pte_offset_map(pmd, taddr); + } + return pte; } @@ -150,24 +154,19 @@ back1: return i; } -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - if (mm->used_hugetlb) { - if (REGION_NUMBER(addr) == REGION_HPAGE) { - struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && is_vm_hugetlb_page(vma)) - return vma; - } - } - return NULL; -} - -struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, int write) +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) { struct page *page; pte_t *ptep; + if (! mm->used_hugetlb) + return ERR_PTR(-EINVAL); + if (REGION_NUMBER(addr) != REGION_HPAGE) + return ERR_PTR(-EINVAL); + ptep = huge_pte_offset(mm, addr); + if (!ptep || pte_none(*ptep)) + return NULL; page = pte_page(*ptep); page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); return page; diff -puN arch/ppc64/mm/hugetlbpage.c~kill-off-hugepage_vma arch/ppc64/mm/hugetlbpage.c --- 25/arch/ppc64/mm/hugetlbpage.c~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/arch/ppc64/mm/hugetlbpage.c Fri Apr 16 13:37:34 2004 @@ -334,15 +334,9 @@ follow_hugetlb_page(struct mm_struct *mm } struct page * -follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) diff -puN arch/sh/mm/hugetlbpage.c~kill-off-hugepage_vma arch/sh/mm/hugetlbpage.c --- 25/arch/sh/mm/hugetlbpage.c~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/arch/sh/mm/hugetlbpage.c Fri Apr 16 13:37:34 2004 @@ -166,15 +166,9 @@ int follow_hugetlb_page(struct mm_struct } struct page *follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) diff -puN arch/sparc64/mm/hugetlbpage.c~kill-off-hugepage_vma arch/sparc64/mm/hugetlbpage.c --- 25/arch/sparc64/mm/hugetlbpage.c~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/arch/sparc64/mm/hugetlbpage.c Fri Apr 16 13:37:34 2004 @@ -164,15 +164,9 @@ int follow_hugetlb_page(struct mm_struct } struct page *follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) diff -puN include/linux/hugetlb.h~kill-off-hugepage_vma include/linux/hugetlb.h --- 25/include/linux/hugetlb.h~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/include/linux/hugetlb.h Fri Apr 16 13:37:34 2004 @@ -22,10 +22,8 @@ void huge_page_release(struct page *); int hugetlb_report_meminfo(char *); int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); -struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, int write); -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, - unsigned long address); +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write); int is_aligned_hugepage_range(unsigned long addr, unsigned long len); @@ -67,7 +65,7 @@ static inline unsigned long hugetlb_tota } #define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; }) -#define follow_huge_addr(mm, vma, addr, write) 0 +#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) #define zap_hugepage_range(vma, start, len) BUG() @@ -75,7 +73,6 @@ static inline unsigned long hugetlb_tota #define huge_page_release(page) BUG() #define is_hugepage_mem_enough(size) 0 #define hugetlb_report_meminfo(buf) 0 -#define hugepage_vma(mm, addr) 0 #define mark_mm_hugetlb(mm, vma) do { } while (0) #define follow_huge_pmd(mm, addr, pmd, write) 0 #define is_aligned_hugepage_range(addr, len) 0 diff -puN mm/memory.c~kill-off-hugepage_vma mm/memory.c --- 25/mm/memory.c~kill-off-hugepage_vma Fri Apr 16 13:37:34 2004 +++ 25-akpm/mm/memory.c Fri Apr 16 13:37:34 2004 @@ -619,11 +619,11 @@ follow_page(struct mm_struct *mm, unsign pmd_t *pmd; pte_t *ptep, pte; unsigned long pfn; - struct vm_area_struct *vma; + struct page *page; - vma = hugepage_vma(mm, address); - if (vma) - return follow_huge_addr(mm, vma, address, write); + page = follow_huge_addr(mm, address, write); + if (! IS_ERR(page)) + return page; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || pgd_bad(*pgd)) _