From: David Gibson This patch started as simply removing a few never-used macros from asm-ppc64/pgtable.h, then kind of grew. It now makes a bunch of cleanups to the ppc64 low-level header files (with corresponding changes to .c files where necessary) such as: - Abolishing never-used macros - Eliminating multiple #defines with the same purpose - Removing pointless macros (cases where just expanding the macro everywhere turns out clearer and more sensible) - Removing some cases where macros which could be defined in terms of each other weren't - Moving imalloc() related definitions from pgtable.h to their own header file (imalloc.h) - Re-arranging headers to group things more logically - Moving all VSID allocation related things to mmu.h, instead of being split between mmu.h and mmu_context.h - Removing some reserved space for flags from the PMD - we're not using it. - Fix some bugs which broke compile with STRICT_MM_TYPECHECKS. Signed-off-by: David Gibson Acked-by: Paul Mackerras Signed-off-by: Andrew Morton --- arch/ppc64/kernel/pci.c | 2 arch/ppc64/mm/hash_native.c | 3 arch/ppc64/mm/hash_utils.c | 11 +- arch/ppc64/mm/imalloc.c | 5 - arch/ppc64/mm/init.c | 1 arch/ppc64/mm/stab.c | 5 + include/asm-ppc64/imalloc.h | 24 ++++ include/asm-ppc64/mmu.h | 193 ++++++++++++++++++++++++++++++---------- include/asm-ppc64/mmu_context.h | 82 ---------------- include/asm-ppc64/page.h | 15 +-- include/asm-ppc64/pgtable.h | 117 +++++------------------- 11 files changed, 219 insertions(+), 239 deletions(-) diff -puN arch/ppc64/kernel/pci.c~ppc64-pgtableh-and-other-header-cleanups arch/ppc64/kernel/pci.c --- 25/arch/ppc64/kernel/pci.c~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/arch/ppc64/kernel/pci.c 2005-05-02 21:20:30.000000000 -0700 @@ -438,7 +438,7 @@ pgprot_t pci_phys_mem_access_prot(struct int i; if (page_is_ram(offset >> PAGE_SHIFT)) - return prot; + return __pgprot(prot); prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; diff -puN arch/ppc64/mm/hash_native.c~ppc64-pgtableh-and-other-header-cleanups arch/ppc64/mm/hash_native.c --- 25/arch/ppc64/mm/hash_native.c~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/hash_native.c 2005-05-02 21:20:30.000000000 -0700 @@ -320,8 +320,7 @@ static void native_flush_hash_range(unsi j = 0; for (i = 0; i < number; i++) { - if ((batch->addr[i] >= USER_START) && - (batch->addr[i] <= USER_END)) + if (batch->addr[i] < KERNELBASE) vsid = get_vsid(context, batch->addr[i]); else vsid = get_kernel_vsid(batch->addr[i]); diff -puN arch/ppc64/mm/hash_utils.c~ppc64-pgtableh-and-other-header-cleanups arch/ppc64/mm/hash_utils.c --- 25/arch/ppc64/mm/hash_utils.c~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/hash_utils.c 2005-05-02 21:20:30.000000000 -0700 @@ -298,24 +298,23 @@ int hash_page(unsigned long ea, unsigned int local = 0; cpumask_t tmp; + if ((ea & ~REGION_MASK) > EADDR_MASK) + return 1; + switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; mm = current->mm; - if ((ea > USER_END) || (! mm)) + if (! mm) return 1; vsid = get_vsid(mm->context.id, ea); break; case IO_REGION_ID: - if (ea > IMALLOC_END) - return 1; mm = &ioremap_mm; vsid = get_kernel_vsid(ea); break; case VMALLOC_REGION_ID: - if (ea > VMALLOC_END) - return 1; mm = &init_mm; vsid = get_kernel_vsid(ea); break; @@ -362,7 +361,7 @@ void flush_hash_page(unsigned long conte unsigned long vsid, vpn, va, hash, secondary, slot; unsigned long huge = pte_huge(pte); - if ((ea >= USER_START) && (ea <= USER_END)) + if (ea < KERNELBASE) vsid = get_vsid(context, ea); else vsid = get_kernel_vsid(ea); diff -puN arch/ppc64/mm/imalloc.c~ppc64-pgtableh-and-other-header-cleanups arch/ppc64/mm/imalloc.c --- 25/arch/ppc64/mm/imalloc.c~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/imalloc.c 2005-05-02 21:20:30.000000000 -0700 @@ -14,6 +14,7 @@ #include #include #include +#include static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; @@ -23,11 +24,11 @@ static int get_free_im_addr(unsigned lon unsigned long addr; struct vm_struct **p, *tmp; - addr = IMALLOC_START; + addr = ioremap_bot; for (p = &imlist; (tmp = *p) ; p = &tmp->next) { if (size + addr < (unsigned long) tmp->addr) break; - if ((unsigned long)tmp->addr >= IMALLOC_START) + if ((unsigned long)tmp->addr >= ioremap_bot) addr = tmp->size + (unsigned long) tmp->addr; if (addr > IMALLOC_END-size) return 1; diff -puN arch/ppc64/mm/init.c~ppc64-pgtableh-and-other-header-cleanups arch/ppc64/mm/init.c --- 25/arch/ppc64/mm/init.c~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/init.c 2005-05-02 21:20:30.000000000 -0700 @@ -64,6 +64,7 @@ #include #include #include +#include int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; diff -puN arch/ppc64/mm/stab.c~ppc64-pgtableh-and-other-header-cleanups arch/ppc64/mm/stab.c --- 25/arch/ppc64/mm/stab.c~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/stab.c 2005-05-02 21:20:30.000000000 -0700 @@ -19,6 +19,11 @@ #include #include +struct stab_entry { + unsigned long esid_data; + unsigned long vsid_data; +}; + /* Both the segment table and SLB code uses the following cache */ #define NR_STAB_CACHE_ENTRIES 8 DEFINE_PER_CPU(long, stab_cache_ptr); diff -puN /dev/null include/asm-ppc64/imalloc.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/include/asm-ppc64/imalloc.h 2005-05-02 21:20:30.000000000 -0700 @@ -0,0 +1,24 @@ +#ifndef _PPC64_IMALLOC_H +#define _PPC64_IMALLOC_H + +/* + * Define the address range of the imalloc VM area. + */ +#define PHBS_IO_BASE IOREGIONBASE +#define IMALLOC_BASE (IOREGIONBASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ +#define IMALLOC_END (IOREGIONBASE + EADDR_MASK) + + +/* imalloc region types */ +#define IM_REGION_UNUSED 0x1 +#define IM_REGION_SUBSET 0x2 +#define IM_REGION_EXISTS 0x4 +#define IM_REGION_OVERLAP 0x8 +#define IM_REGION_SUPERSET 0x10 + +extern struct vm_struct * im_get_free_area(unsigned long size); +extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, + int region_type); +unsigned long im_free(void *addr); + +#endif /* _PPC64_IMALLOC_H */ diff -puN include/asm-ppc64/mmu_context.h~ppc64-pgtableh-and-other-header-cleanups include/asm-ppc64/mmu_context.h --- 25/include/asm-ppc64/mmu_context.h~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/include/asm-ppc64/mmu_context.h 2005-05-02 21:20:30.000000000 -0700 @@ -84,86 +84,4 @@ static inline void activate_mm(struct mm local_irq_restore(flags); } -/* VSID allocation - * =============== - * - * We first generate a 36-bit "proto-VSID". For kernel addresses this - * is equal to the ESID, for user addresses it is: - * (context << 15) | (esid & 0x7fff) - * - * The two forms are distinguishable because the top bit is 0 for user - * addresses, whereas the top two bits are 1 for kernel addresses. - * Proto-VSIDs with the top two bits equal to 0b10 are reserved for - * now. - * - * The proto-VSIDs are then scrambled into real VSIDs with the - * multiplicative hash: - * - * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS - * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7 - * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF - * - * This scramble is only well defined for proto-VSIDs below - * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are - * reserved. VSID_MULTIPLIER is prime, so in particular it is - * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. - * Because the modulus is 2^n-1 we can compute it efficiently without - * a divide or extra multiply (see below). - * - * This scheme has several advantages over older methods: - * - * - We have VSIDs allocated for every kernel address - * (i.e. everything above 0xC000000000000000), except the very top - * segment, which simplifies several things. - * - * - We allow for 15 significant bits of ESID and 20 bits of - * context for user addresses. i.e. 8T (43 bits) of address space for - * up to 1M contexts (although the page table structure and context - * allocation will need changes to take advantage of this). - * - * - The scramble function gives robust scattering in the hash - * table (at least based on some initial results). The previous - * method was more susceptible to pathological cases giving excessive - * hash collisions. - */ - -/* - * WARNING - If you change these you must make sure the asm - * implementations in slb_allocate(), do_stab_bolted and mmu.h - * (ASM_VSID_SCRAMBLE macro) are changed accordingly. - * - * You'll also need to change the precomputed VSID values in head.S - * which are used by the iSeries firmware. - */ - -static inline unsigned long vsid_scramble(unsigned long protovsid) -{ -#if 0 - /* The code below is equivalent to this function for arguments - * < 2^VSID_BITS, which is all this should ever be called - * with. However gcc is not clever enough to compute the - * modulus (2^n-1) without a second multiply. */ - return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS); -#else /* 1 */ - unsigned long x; - - x = protovsid * VSID_MULTIPLIER; - x = (x >> VSID_BITS) + (x & VSID_MODULUS); - return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; -#endif /* 1 */ -} - -/* This is only valid for addresses >= KERNELBASE */ -static inline unsigned long get_kernel_vsid(unsigned long ea) -{ - return vsid_scramble(ea >> SID_SHIFT); -} - -/* This is only valid for user addresses (which are below 2^41) */ -static inline unsigned long get_vsid(unsigned long context, unsigned long ea) -{ - return vsid_scramble((context << USER_ESID_BITS) - | (ea >> SID_SHIFT)); -} - #endif /* __PPC64_MMU_CONTEXT_H */ diff -puN include/asm-ppc64/mmu.h~ppc64-pgtableh-and-other-header-cleanups include/asm-ppc64/mmu.h --- 25/include/asm-ppc64/mmu.h~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/include/asm-ppc64/mmu.h 2005-05-02 21:20:30.000000000 -0700 @@ -15,19 +15,10 @@ #include #include -#include -#ifndef __ASSEMBLY__ - -/* Time to allow for more things here */ -typedef unsigned long mm_context_id_t; -typedef struct { - mm_context_id_t id; -#ifdef CONFIG_HUGETLB_PAGE - pgd_t *huge_pgdir; - u16 htlb_segs; /* bitmask */ -#endif -} mm_context_t; +/* + * Segment table + */ #define STE_ESID_V 0x80 #define STE_ESID_KS 0x20 @@ -36,15 +27,48 @@ typedef struct { #define STE_VSID_SHIFT 12 -struct stab_entry { - unsigned long esid_data; - unsigned long vsid_data; -}; +/* Location of cpu0's segment table */ +#define STAB0_PAGE 0x9 +#define STAB0_PHYS_ADDR (STAB0_PAGE<> VSID_BITS) + (x & VSID_MODULUS); + return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; +#endif /* 1 */ +} + +/* This is only valid for addresses >= KERNELBASE */ +static inline unsigned long get_kernel_vsid(unsigned long ea) +{ + return vsid_scramble(ea >> SID_SHIFT); +} + +/* This is only valid for user addresses (which are below 2^41) */ +static inline unsigned long get_vsid(unsigned long context, unsigned long ea) +{ + return vsid_scramble((context << USER_ESID_BITS) + | (ea >> SID_SHIFT)); +} + +#endif /* __ASSEMBLY */ + #endif /* _PPC64_MMU_H_ */ diff -puN include/asm-ppc64/page.h~ppc64-pgtableh-and-other-header-cleanups include/asm-ppc64/page.h --- 25/include/asm-ppc64/page.h~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/include/asm-ppc64/page.h 2005-05-02 21:20:30.000000000 -0700 @@ -23,7 +23,6 @@ #define PAGE_SHIFT 12 #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PAGE_OFFSET_MASK (PAGE_SIZE-1) #define SID_SHIFT 28 #define SID_MASK 0xfffffffffUL @@ -85,9 +84,6 @@ /* align addr on a size boundary - adjust address up if needed */ #define _ALIGN(addr,size) _ALIGN_UP(addr,size) -/* to align the pointer to the (next) double word boundary */ -#define DOUBLEWORD_ALIGN(addr) _ALIGN(addr,sizeof(unsigned long)) - /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) @@ -100,7 +96,6 @@ #define REGION_SIZE 4UL #define REGION_SHIFT 60UL #define REGION_MASK (((1UL<>REGION_SHIFT) -#define VMALLOC_REGION_ID (VMALLOCBASE>>REGION_SHIFT) -#define KERNEL_REGION_ID (KERNELBASE>>REGION_SHIFT) +#define IO_REGION_ID (IOREGIONBASE >> REGION_SHIFT) +#define VMALLOC_REGION_ID (VMALLOCBASE >> REGION_SHIFT) +#define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) #define USER_REGION_ID (0UL) -#define REGION_ID(X) (((unsigned long)(X))>>REGION_SHIFT) +#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define __bpn_to_ba(x) ((((unsigned long)(x))<> PAGE_SHIFT) #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) diff -puN include/asm-ppc64/pgtable.h~ppc64-pgtableh-and-other-header-cleanups include/asm-ppc64/pgtable.h --- 25/include/asm-ppc64/pgtable.h~ppc64-pgtableh-and-other-header-cleanups 2005-05-02 21:20:30.000000000 -0700 +++ 25-akpm/include/asm-ppc64/pgtable.h 2005-05-02 21:20:30.000000000 -0700 @@ -17,16 +17,6 @@ #include -/* PMD_SHIFT determines what a second-level page table entry can map */ -#define PMD_SHIFT (PAGE_SHIFT + PAGE_SHIFT - 3) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT - 3) + (PAGE_SHIFT - 2)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for @@ -40,40 +30,30 @@ #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) -#define USER_PTRS_PER_PGD (1024) -#define FIRST_USER_ADDRESS 0 +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) -#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PGD_INDEX_SIZE + PAGE_SHIFT) +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define FIRST_USER_ADDRESS 0 /* * Size of EA range mapped by our pagetables. */ -#define PGTABLE_EA_BITS 41 -#define PGTABLE_EA_MASK ((1UL<> PMD_TO_PTEPAGE_SHIFT)) +#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) #define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) @@ -266,8 +242,6 @@ void hugetlb_mm_free_pgd(struct mm_struc /* to find an entry in the ioremap page-table-directory */ #define pgd_offset_i(address) (ioremap_pgd + pgd_index(address)) -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -442,7 +416,7 @@ static inline void set_pte_at(struct mm_ pte_clear(mm, addr, ptep); flush_tlb_pending(); } - *ptep = __pte(pte_val(pte)) & ~_PAGE_HPTEFLAGS; + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } /* Set the dirty and/or accessed bits atomically in a linux PTE, this @@ -487,18 +461,13 @@ extern pgprot_t phys_mem_access_prot(str extern unsigned long ioremap_bot, ioremap_base; -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - -#define pte_ERROR(e) \ - printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) -extern pgd_t swapper_pg_dir[1024]; -extern pgd_t ioremap_dir[1024]; +extern pgd_t swapper_pg_dir[]; +extern pgd_t ioremap_dir[]; extern void paging_init(void); @@ -540,43 +509,11 @@ extern void update_mmu_cache(struct vm_a */ #define kern_addr_valid(addr) (1) -#define io_remap_page_range(vma, vaddr, paddr, size, prot) \ - remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot) - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) -#define MK_IOSPACE_PFN(space, pfn) (pfn) -#define GET_IOSPACE(pfn) 0 -#define GET_PFN(pfn) (pfn) - void pgtable_cache_init(void); -extern void hpte_init_native(void); -extern void hpte_init_lpar(void); -extern void hpte_init_iSeries(void); - -/* imalloc region types */ -#define IM_REGION_UNUSED 0x1 -#define IM_REGION_SUBSET 0x2 -#define IM_REGION_EXISTS 0x4 -#define IM_REGION_OVERLAP 0x8 -#define IM_REGION_SUPERSET 0x10 - -extern struct vm_struct * im_get_free_area(unsigned long size); -extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, - int region_type); -unsigned long im_free(void *addr); - -extern long pSeries_lpar_hpte_insert(unsigned long hpte_group, - unsigned long va, unsigned long prpn, - int secondary, unsigned long hpteflags, - int bolted, int large); - -extern long native_hpte_insert(unsigned long hpte_group, unsigned long va, - unsigned long prpn, int secondary, - unsigned long hpteflags, int bolted, int large); - /* * find_linux_pte returns the address of a linux pte for a given * effective address and directory. If not found, it returns zero. _