This tunable refers to the amount of free memory which the VM will attempt to sustain. It is mainly needed for atomic allocations (eg, networking receive). It is currently hardwired to 1024k, which is far too large for small machines and too small for large machines. Rework it to be 128k on tiny machines and 16M on huge machines. include/linux/mm.h | 2 ++ include/linux/mmzone.h | 2 +- init/main.c | 2 +- mm/oom_kill.c | 14 -------------- mm/page_alloc.c | 40 +++++++++++++++++++++++++++++++++++++++- mm/swap.c | 20 ++++++++++++++++++++ 6 files changed, 63 insertions(+), 17 deletions(-) diff -puN mm/page_alloc.c~scale-min_free_kbytes mm/page_alloc.c --- 25/mm/page_alloc.c~scale-min_free_kbytes 2003-10-14 17:53:47.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2003-10-14 17:54:07.000000000 -0700 @@ -1589,7 +1589,7 @@ void __init page_alloc_init(void) * that the pages_{min,low,high} values for each zone are set correctly * with respect to min_free_kbytes. */ -void setup_per_zone_pages_min(void) +static void setup_per_zone_pages_min(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; @@ -1633,6 +1633,44 @@ void setup_per_zone_pages_min(void) } /* + * Initialise min_free_kbytes. + * + * For small machines we want it small (128k min). For large machines + * we want it large (16MB max). But it is not linear, because network + * bandwidth does not increase linearly with machine size. We use + * + * min_free_kbytes = sqrt(lowmem_kbytes) + * + * which yields + * + * 8MB: 128k + * 16MB: 170k + * 32MB: 256k + * 64MB: 341k + * 128MB: 512k + * 256MB: 682k + * 512MB: 1024k + * 1024MB: 1365k + * 2048MB: 2048k + * 4096MB: 2739k + * 8192MB: 4096k + * 16348MB: 5461k + */ +void __init init_per_zone_pages_min(void) +{ + unsigned long lowmem_kbytes; + + lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10); + + min_free_kbytes = int_sqrt(lowmem_kbytes); + if (min_free_kbytes < 128) + min_free_kbytes = 128; + if (min_free_kbytes > 16384) + min_free_kbytes = 16384; + setup_per_zone_pages_min(); +} + +/* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call setup_per_zone_pages_min() whenever min_free_kbytes * changes. diff -puN init/main.c~scale-min_free_kbytes init/main.c --- 25/init/main.c~scale-min_free_kbytes 2003-10-14 17:53:47.000000000 -0700 +++ 25-akpm/init/main.c 2003-10-14 17:53:47.000000000 -0700 @@ -396,7 +396,7 @@ asmlinkage void __init start_kernel(void lock_kernel(); printk(linux_banner); setup_arch(&command_line); - setup_per_zone_pages_min(); + init_per_zone_pages_min(); setup_per_cpu_areas(); /* diff -puN include/linux/mmzone.h~scale-min_free_kbytes include/linux/mmzone.h --- 25/include/linux/mmzone.h~scale-min_free_kbytes 2003-10-14 17:53:47.000000000 -0700 +++ 25-akpm/include/linux/mmzone.h 2003-10-14 17:53:47.000000000 -0700 @@ -284,7 +284,7 @@ struct ctl_table; struct file; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, void *, size_t *); -extern void setup_per_zone_pages_min(void); +extern void init_per_zone_pages_min(void); #ifdef CONFIG_NUMA diff -puN mm/oom_kill.c~scale-min_free_kbytes mm/oom_kill.c --- 25/mm/oom_kill.c~scale-min_free_kbytes 2003-10-14 17:53:47.000000000 -0700 +++ 25-akpm/mm/oom_kill.c 2003-10-14 17:53:47.000000000 -0700 @@ -24,20 +24,6 @@ /* #define DEBUG */ /** - * int_sqrt - oom_kill.c internal function, rough approximation to sqrt - * @x: integer of which to calculate the sqrt - * - * A very rough approximation to the sqrt() function. - */ -static unsigned int int_sqrt(unsigned int x) -{ - unsigned int out = x; - while (x & ~(unsigned int)1) x >>=2, out >>=1; - if (x) out -= out >> 2; - return (out ? out : 1); -} - -/** * oom_badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate * diff -puN mm/swap.c~scale-min_free_kbytes mm/swap.c --- 25/mm/swap.c~scale-min_free_kbytes 2003-10-14 17:53:47.000000000 -0700 +++ 25-akpm/mm/swap.c 2003-10-14 17:53:47.000000000 -0700 @@ -380,6 +380,26 @@ void vm_acct_memory(long pages) EXPORT_SYMBOL(vm_acct_memory); #endif +/** + * int_sqrt - rough approximation to sqrt + * @x: integer of which to calculate the sqrt + * + * A very rough approximation to the sqrt() function. + */ +unsigned long int_sqrt(unsigned long x) +{ + unsigned long out = x; + + while (x & ~(unsigned long)1) { + x >>= 2; + out >>= 1; + } + + if (x) + out -= out >> 2; + + return (out ? out : 1); +} /* * Perform any setup for the swap system diff -puN include/linux/mm.h~scale-min_free_kbytes include/linux/mm.h --- 25/include/linux/mm.h~scale-min_free_kbytes 2003-10-14 17:53:47.000000000 -0700 +++ 25-akpm/include/linux/mm.h 2003-10-14 17:53:47.000000000 -0700 @@ -615,6 +615,8 @@ extern struct page * follow_page(struct extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); +unsigned long int_sqrt(unsigned long x); + #ifndef CONFIG_DEBUG_PAGEALLOC static inline void kernel_map_pages(struct page *page, int numpages, int enable) _