diff -urN 2.2.20aa1/drivers/char/Config.in page-coloring/drivers/char/Config.in --- 2.2.20aa1/drivers/char/Config.in Sun Nov 4 02:15:34 2001 +++ page-coloring/drivers/char/Config.in Fri Feb 8 08:22:08 2002 @@ -118,6 +118,8 @@ endmenu fi +# FIXME: this is definitely the wrong place and it shouldn't be a module but a sysctl +tristate 'Page Coloring' CONFIG_PAGE_COLORING tristate '/dev/nvram support' CONFIG_NVRAM bool 'Enhanced Real Time Clock Support' CONFIG_RTC diff -urN 2.2.20aa1/drivers/char/Makefile page-coloring/drivers/char/Makefile --- 2.2.20aa1/drivers/char/Makefile Sun Nov 4 02:15:34 2001 +++ page-coloring/drivers/char/Makefile Thu Feb 7 16:30:17 2002 @@ -729,6 +729,11 @@ M_OBJS += $(sort $(filter $(module-list), $(obj-m))) +ifeq ($(CONFIG_PAGE_COLORING),m) + CONFIG_PAGE_COLORING_MODULE=y + M_OBJS += page_color.o +endif + include $(TOPDIR)/Rules.make fastdep: diff -urN 2.2.20aa1/drivers/char/page_color.c page-coloring/drivers/char/page_color.c --- 2.2.20aa1/drivers/char/page_color.c Thu Jan 1 01:00:00 1970 +++ page-coloring/drivers/char/page_color.c Fri Feb 8 05:18:22 2002 @@ -0,0 +1,167 @@ +/* + * This module implements page coloring, a systematic way + * to get the most performance out of the expensive cache + * memory your computer has. At present the code is *only* + * to be built as a loadable kernel module. + * + * After building the kernel and rebooting, load the module + * and specify the cache size to use, like so: + * + * insmod cache_size=X + * + * where X is the size of the largest cache your system has. + * For machines with three cache levels (Alpha 21164, AMD K6-III) + * this will be the size in bytes of the L3 cache, and for all + * others it will be the size of the L2 cache. If your system + * doesn't have at least L2 cache, fer cryin' out loud GET SOME! + * When specifying the cache size you can use 'K' or 'M' to signify + * kilobytes or megabytes, respectively. In any case, the cache + * size *must* be a power of two. + * + * insmod will create a module called 'page_color' which changes + * the way Linux allocates pages from the free list. Once a page + * is given to another process the page coloring code will forget + * about it; thus it's always safe to start and stop the module + * while other processes are running. + * + * If linux is configured for a /proc filesystem, the module will + * also create /proc/page_color as a means of reporting statistics. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned int page_coloring; +extern unsigned int page_miss_count; +extern unsigned int page_hit_count; +extern unsigned int page_colors; +extern unsigned long *page_color_table; +extern spinlock_t page_alloc_lock; + +void fill_color_pool(void); +void empty_color_pool(void); +unsigned int page_color_alloc(void); + +#if defined(__alpha__) +#define CACHE_SIZE_GUESS (4*1024*1024) +#elif defined(__i386__) +#define CACHE_SIZE_GUESS (256*1024) +#else +#define CACHE_SIZE_GUESS (1*1024*1024) +#endif + +#ifdef CONFIG_PROC_FS + +int page_color_getinfo(char *buf, char **start, off_t fpos, + int length, int dummy) +{ + char *p = buf; + + p += sprintf(p, "colors: %d\n", page_colors); + p += sprintf(p, "hits: %d\n", page_hit_count); + p += sprintf(p, "misses: %d\n", page_miss_count); + + return p - buf; +} + +static struct proc_dir_entry page_color_proc_entry = { + 0, + 10, + "page_color", + S_IFREG | S_IRUGO, + 1, 0, 0, + 0, 0, + page_color_getinfo +}; + +#endif + + +#define page_color_init init_module + +void cleanup_module(void) +{ + unsigned long flags; + + printk("page_color: terminating page coloring\n"); + +#ifdef CONFIG_PROC_FS + proc_unregister( &proc_root, page_color_proc_entry.low_ino ); +#endif + + spin_lock_irqsave(&page_alloc_lock, flags); + empty_color_pool(); + page_coloring = 0; + spin_unlock_irqrestore(&page_alloc_lock, flags); + + vfree(page_color_table); +} + +static char *cache_size; +MODULE_PARM(cache_size, "s"); + +__initfunc(int page_color_init(void)) +{ + unsigned int cache_size_int; + unsigned int alloc_size; + unsigned long flags; + + if (cache_size) { + cache_size_int = simple_strtoul(cache_size, + (char **)NULL, 10); + if ( strchr(cache_size, 'M') || + strchr(cache_size, 'm') ) + cache_size_int *= 1024*1024; + + if ( strchr(cache_size, 'K') || + strchr(cache_size, 'k') ) + cache_size_int *= 1024; + } + else { + cache_size_int = CACHE_SIZE_GUESS; + } + + if( (-cache_size_int & cache_size_int) != cache_size_int ) { + printk ("page_color: cache size is not a power of two\n"); + return 1; + } + + page_colors = cache_size_int / PAGE_SIZE; + page_hit_count = 0; + page_miss_count = 0; + alloc_size = page_color_alloc(); + page_color_table = (unsigned long *)vmalloc(alloc_size); + if (!page_color_table) { + printk("page_color: memory allocation failed\n"); + return 1; + } + memset(page_color_table, 0, alloc_size); + + spin_lock_irqsave(&page_alloc_lock, flags); + fill_color_pool(); + page_coloring = 1; + spin_unlock_irqrestore(&page_alloc_lock, flags); + +#ifdef CONFIG_PROC_FS + proc_register( &proc_root, &page_color_proc_entry ); +#endif + + printk("page_color: starting with %d colors\n", page_colors ); + return 0; +} diff -urN 2.2.20aa1/fs/nfs/dir.c page-coloring/fs/nfs/dir.c --- 2.2.20aa1/fs/nfs/dir.c Sun Nov 4 02:15:35 2001 +++ page-coloring/fs/nfs/dir.c Fri Feb 8 06:24:14 2002 @@ -350,7 +350,7 @@ desc->page = NULL; } - cache_page = page_cache_alloc(); + cache_page = page_cache_alloc(-1L); if (!cache_page) { status = -ENOMEM; goto out; diff -urN 2.2.20aa1/include/linux/mm.h page-coloring/include/linux/mm.h --- 2.2.20aa1/include/linux/mm.h Fri Feb 8 07:27:02 2002 +++ page-coloring/include/linux/mm.h Fri Feb 8 08:22:23 2002 @@ -302,8 +302,15 @@ * overhead, just use __get_free_page() directly.. */ #define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0) +#define __get_free_page_pfn(gfp_mask, pfn) __get_free_pages_pfn((gfp_mask),0, pfn) #define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order)) +#ifdef CONFIG_PAGE_COLORING_MODULE +#define __get_free_pages(gfp_mask, order) __get_free_pages_pfn((gfp_mask),(order), -1L) +extern unsigned long FASTCALL(__get_free_pages_pfn(int gfp_mask, unsigned long gfp_order, long pfn)); +#else +#define __get_free_pages_pfn(gfp_mask, order, pfn) __get_free_pages((gfp_mask),(order)) extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order)); +#endif extern inline unsigned long get_free_page(int gfp_mask) { diff -urN 2.2.20aa1/include/linux/pagemap.h page-coloring/include/linux/pagemap.h --- 2.2.20aa1/include/linux/pagemap.h Fri Feb 8 07:27:03 2002 +++ page-coloring/include/linux/pagemap.h Fri Feb 8 08:22:25 2002 @@ -30,7 +30,7 @@ #define PAGE_CACHE_MASK PAGE_MASK #define PAGE_CACHE_MASK_loff PAGE_MASK_loff -#define page_cache_alloc() __get_free_page(GFP_USER) +#define page_cache_alloc(idx) __get_free_page_pfn(GFP_USER, idx) #define page_cache_free(x) free_page(x) #define page_cache_release(x) __free_page(x) diff -urN 2.2.20aa1/ipc/shm.c page-coloring/ipc/shm.c --- 2.2.20aa1/ipc/shm.c Sun Nov 4 02:15:33 2001 +++ page-coloring/ipc/shm.c Fri Feb 8 04:47:16 2002 @@ -653,7 +653,7 @@ pte = __pte(shp->shm_pages[idx]); if (!pte_present(pte)) { - unsigned long page = __get_free_page(GFP_BIGUSER); + unsigned long page = __get_free_page_pfn(GFP_BIGUSER, address >> PAGE_SHIFT); if (!page) return -1; clear_bigpage(page); diff -urN 2.2.20aa1/kernel/ksyms.c page-coloring/kernel/ksyms.c --- 2.2.20aa1/kernel/ksyms.c Sun Nov 4 02:15:38 2001 +++ page-coloring/kernel/ksyms.c Fri Feb 8 04:56:13 2002 @@ -102,7 +102,11 @@ EXPORT_SYMBOL(daemonize); /* internal kernel memory management */ +#ifdef CONFIG_PAGE_COLORING_MODULE +EXPORT_SYMBOL(__get_free_pages_pfn); +#else EXPORT_SYMBOL(__get_free_pages); +#endif EXPORT_SYMBOL(free_pages); EXPORT_SYMBOL(__free_pages); EXPORT_SYMBOL(kmem_find_general_cachep); @@ -471,4 +475,24 @@ EXPORT_SYMBOL(_etext); EXPORT_SYMBOL(module_list); - +#ifdef CONFIG_PAGE_COLORING_MODULE +extern unsigned int page_coloring; +extern unsigned int page_miss_count; +extern unsigned int page_hit_count; +extern unsigned int page_colors; +extern unsigned long *page_color_table; +extern spinlock_t page_alloc_lock; +void fill_color_pool(void); +void empty_color_pool(void); +unsigned int page_color_alloc(void); + +EXPORT_SYMBOL_NOVERS(page_coloring); +EXPORT_SYMBOL_NOVERS(page_miss_count); +EXPORT_SYMBOL_NOVERS(page_hit_count); +EXPORT_SYMBOL_NOVERS(page_colors); +EXPORT_SYMBOL_NOVERS(page_color_table); +EXPORT_SYMBOL_NOVERS(page_alloc_lock); +EXPORT_SYMBOL_NOVERS(fill_color_pool); +EXPORT_SYMBOL_NOVERS(empty_color_pool); +EXPORT_SYMBOL_NOVERS(page_color_alloc); +#endif diff -urN 2.2.20aa1/mm/filemap.c page-coloring/mm/filemap.c --- 2.2.20aa1/mm/filemap.c Sun Nov 4 02:15:36 2001 +++ page-coloring/mm/filemap.c Fri Feb 8 04:43:17 2002 @@ -309,7 +309,7 @@ * this is all overlapped with the IO on the previous page finishing anyway) */ static unsigned long try_to_read_ahead(struct file * file, - pgoff_t pgoff, unsigned long page_cache) + pgoff_t pgoff, unsigned long page_cache, long color_pfn) { struct inode *inode = file->f_dentry->d_inode; pgoff_t pg_size; @@ -319,7 +319,7 @@ pg_size = loff2pgoff(inode->i_size+(PAGE_SIZE-1)); if (!page_cache) { - page_cache = page_cache_alloc(); + page_cache = page_cache_alloc(color_pfn); if (!page_cache) return 0; /* Can't allocate! */ } @@ -563,9 +563,10 @@ */ ahead = 0; while (ahead < max_ahead) { + unsigned long pgoff; ahead += PAGE_CACHE_SIZE; - page_cache = try_to_read_ahead(filp, loff2pgoff(raend + ahead), - page_cache); + pgoff = loff2pgoff(raend + ahead); + page_cache = try_to_read_ahead(filp, pgoff, page_cache, pgoff); } /* * If we tried to read ahead some pages, @@ -755,7 +756,7 @@ * page.. */ if (!page_cache) { - page_cache = page_cache_alloc(); + page_cache = page_cache_alloc(pgoff); /* * That could have slept, so go around to the * very beginning.. @@ -1021,6 +1022,7 @@ int i; struct page * page, **hash; unsigned long old_page, new_page; + long color_pfn = address >> PAGE_SHIFT; new_page = 0; offset = ((loff_t)((address & PAGE_MASK) - area->vm_start) + @@ -1045,7 +1047,7 @@ * extra page -- better to overlap the allocation with the I/O. */ if (no_share && !new_page) { - new_page = page_cache_alloc(); + new_page = page_cache_alloc(color_pfn); if (!new_page) goto release_and_oom; } @@ -1091,10 +1093,10 @@ for (i = 1 << page_cluster; i > 0; --i, reada = ulong2pgoff(pgoff2ulong(reada)+1)) - new_page = try_to_read_ahead(file, reada, new_page); + new_page = try_to_read_ahead(file, reada, new_page, color_pfn + reada - pgoff); if (!new_page) - new_page = page_cache_alloc(); + new_page = page_cache_alloc(color_pfn); if (!new_page) goto oom; @@ -1516,7 +1518,7 @@ page = __find_page(inode, offset, *hash); if (!page) { if (!cached_page) { - cached_page = page_cache_alloc(); + cached_page = page_cache_alloc(offset); if (!cached_page) return ERR_PTR(-ENOMEM); goto repeat; @@ -1657,7 +1659,7 @@ page = __find_page(inode, pgpos, *hash); if (!page) { if (!page_cache) { - page_cache = page_cache_alloc(); + page_cache = page_cache_alloc(pgpos); if (page_cache) continue; status = -ENOMEM; @@ -1737,7 +1739,7 @@ if (!page) { if (!new) goto out; - page_cache = page_cache_alloc(); + page_cache = page_cache_alloc(pgoff); if (!page_cache) goto out; clear_page(page_cache); diff -urN 2.2.20aa1/mm/memory.c page-coloring/mm/memory.c --- 2.2.20aa1/mm/memory.c Sun Nov 4 02:15:36 2001 +++ page-coloring/mm/memory.c Fri Feb 8 04:46:44 2002 @@ -836,7 +836,7 @@ struct page * page_map; pte = *page_table; - new_page = __get_free_page(GFP_BIGUSER); + new_page = __get_free_page_pfn(GFP_BIGUSER, address >> PAGE_SHIFT); /* Did swap_out() unmapped the protected page while we slept? */ if (pte_val(*page_table) != pte_val(pte)) goto end_wp_page; @@ -1033,7 +1033,7 @@ { pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); if (write_access) { - unsigned long page = __get_free_page(GFP_BIGUSER); + unsigned long page = __get_free_page_pfn(GFP_BIGUSER, addr >> PAGE_SHIFT); if (!page) return -1; clear_bigpage(page); diff -urN 2.2.20aa1/mm/page_alloc.c page-coloring/mm/page_alloc.c --- 2.2.20aa1/mm/page_alloc.c Sun Nov 4 02:15:36 2001 +++ page-coloring/mm/page_alloc.c Fri Feb 8 08:11:14 2002 @@ -80,6 +80,251 @@ prev->next = next; } +#ifdef CONFIG_BIGMEM +#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) \ + do \ + { \ + if ((map_nr) >= bigmem_mapnr) \ + nr_free_bigpages -= 1 << (order); \ + } \ + while (0) +#else +#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) do { } while (0) +#endif + +#ifdef CONFIG_PAGE_COLORING_MODULE + +unsigned int page_coloring; +unsigned int page_miss_count; +unsigned int page_hit_count; +unsigned int page_colors; +struct free_area_struct *page_color_table; +struct free_area_struct *queues[NR_MEM_TYPES][NR_MEM_LISTS]; + +#define COLOR(x) ((x) & cache_mask) + +unsigned int page_color_alloc(void) +{ + /* FIXME, overkill */ + return NR_MEM_TYPES * sizeof(struct free_area_struct) * + ( 2 * page_colors + NR_MEM_LISTS ); +} + +void fill_color_pool(void) +{ + /* For each of the NR_MEM_LISTS queues in + free_area[], move the queued pages into + a separate array of queues, one queue per + distinct page color. empty_color_pool() + reverses the process. + + This code and empty_color_pool() must be + called atomically. */ + + int i, j, k; + unsigned int num_colors, cache_mask; + unsigned long map_nr; + struct free_area_struct *area, *old_area, **qptr; + struct page *page; + + cache_mask = page_colors - 1; + area = page_color_table; + + for(k = 0; k < NR_MEM_TYPES; k++) { + num_colors = page_colors; + qptr = queues[k]; + old_area = free_area[k]; + + for(i = 0; imap; + area[j].count = 0; + } + + for(j = 0; j < old_area->count; j++) { + page = memory_head(old_area); + page = page->next; + remove_mem_queue(page); + map_nr = page - mem_map; + add_mem_queue(area + + (COLOR(map_nr) >> i), page); + } + + old_area++; + area += num_colors; + if (num_colors > 1) + num_colors >>= 1; + } + } +} + +void empty_color_pool(void) +{ + int i, j, k, m; + unsigned int num_colors; + struct free_area_struct *area, *old_area, **qptr; + struct page *page; + + for(m = 0; m < NR_MEM_TYPES; m++) { + old_area = free_area[m]; + qptr = queues[m]; + num_colors = page_colors; + + for(i = 0; i < NR_MEM_LISTS; i++) { + area = qptr[i]; + old_area->count = 0; + + for(j = 0; j < num_colors; j++) { + for(k = 0; k < area[j].count; k++) { + page = memory_head(area + j); + page = page->next; + remove_mem_queue(page); + add_mem_queue(old_area, page); + } + } + old_area++; + if (num_colors > 1) + num_colors >>= 1; + } + } +} + +unsigned long alloc_page_by_color(unsigned long order, unsigned long type, long pfn) +{ + unsigned int i; + unsigned int mask, color; + struct free_area_struct *area, *old_area, **qptr; + struct page *prev, *ret; + unsigned long map_nr; + unsigned int cache_mask = page_colors - 1; + static unsigned long global_color; /* not SMP safe but a race doesn't hurt too much */ + int color_hit; + + /* + * If pfn is negative just try to distribute the page colors globally + * with a dynamic page coloring. + */ + color = pfn; + if (pfn < 0) + color = global_color; + + /* Round the target color to look for up to the + next 1<> i); + if (area->count) + goto alloc_page_done; + } + + page_miss_count++; + color = COLOR(color + (1<next; + (prev->next = ret->next)->prev = prev; + map_nr = ret - mem_map; + change_bit(map_nr >> (1+i), area->map); + nr_free_pages -= 1 << order; + UPDATE_NR_FREE_BIGPAGES(map_nr, order); + area->count--; + old_area[i].count--; + + while (i > order) { + + /* Return 1<> (1+i), area->map); + if (color & mask) { + add_mem_queue(area + (COLOR(map_nr) >> i), ret); + map_nr += mask; + ret += mask; + } else { + add_mem_queue(area + (COLOR(map_nr + mask) >> i), + ret + mask); + } + } + atomic_set(&ret->count, 1); + page_hit_count += color_hit; + return PAGE_OFFSET + (map_nr << PAGE_SHIFT); +} + +void free_pages_by_color(unsigned long map_nr, unsigned long mask, + unsigned long order, unsigned long index, + unsigned long type) +{ + /* Works in the same way as __free_pages_ok, + except that the mem_queue operations are + color-dependent. */ + + int i; + struct free_area_struct *area, *old_area, **qptr; + unsigned int cache_mask = page_colors - 1; + + i = order; + old_area = free_area[type]; + qptr = queues[type]; + area = qptr[i]; + nr_free_pages -= mask; + + while (mask + (1 << (NR_MEM_LISTS-1))) { + if (!test_and_change_bit(index, area->map)) + break; + remove_mem_queue(mem_map + (map_nr ^ -mask)); + area[COLOR(map_nr ^ -mask) >> i].count--; + old_area[i].count--; + mask <<= 1; + i++; + area = qptr[i]; + index >>= 1; + map_nr &= mask; + } + + add_mem_queue(area + (COLOR(map_nr) >> i), mem_map + map_nr); + old_area[i].count++; +} + +#endif /* CONFIG_PAGE_COLORING_MODULE */ + /* * Free_page() adds the page to the free lists. This is optimized for * fast normal cases (no error jumps taken normally). @@ -128,6 +373,12 @@ type = 2; } #endif +#ifdef CONFIG_PAGE_COLORING_MODULE + if (page_coloring == 1) { + free_pages_by_color(map_nr, mask, order, index, type); + return; + } +#endif area = free_area[type] + order; __free_pages_ok(map_nr, mask, area, index); } @@ -155,6 +406,13 @@ type = 2; } #endif +#ifdef CONFIG_PAGE_COLORING_MODULE + if (page_coloring == 1) { + free_pages_by_color(map_nr, mask, order, index, type); + spin_unlock_irqrestore(&page_alloc_lock, flags); + return; + } +#endif area = free_area[type] + order; __free_pages_ok(map_nr, mask, area, index); spin_unlock_irqrestore(&page_alloc_lock, flags); @@ -202,17 +460,6 @@ #define MARK_USED(index, order, area) \ change_bit((index) >> (1+(order)), (area)->map) #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT)) -#ifdef CONFIG_BIGMEM -#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) \ - do \ - { \ - if ((map_nr) >= bigmem_mapnr) \ - nr_free_bigpages -= 1 << (order); \ - } \ - while (0) -#else -#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) do { } while (0) -#endif #define RMQUEUE_TYPE(order, type) \ do { struct free_area_struct * area = free_area[type]+order; \ unsigned long new_order = order; \ @@ -265,7 +512,11 @@ } } +#ifdef CONFIG_PAGE_COLORING_MODULE +unsigned long __get_free_pages_pfn(int gfp_mask, unsigned long order, long pfn) +#else unsigned long __get_free_pages(int gfp_mask, unsigned long order) +#endif { unsigned long flags; @@ -377,6 +628,26 @@ } } ok_to_allocate: + +#ifdef CONFIG_PAGE_COLORING_MODULE + if (page_coloring == 1) { + unsigned long page = 0; + if (!(gfp_mask & __GFP_DMA)) { +#ifdef CONFIG_BIGMEM + if (gfp_mask & __GFP_BIGMEM) + page = alloc_page_by_color(order, 2, pfn); + if (!page) +#endif + page = alloc_page_by_color(order, 0, pfn); + + } + if (!page) + page = alloc_page_by_color(order, 1, pfn); + spin_unlock_irqrestore(&page_alloc_lock, flags); + return page; + } +#endif + /* if it's not a dma request, try non-dma first */ if (!(gfp_mask & __GFP_DMA)) { #ifdef CONFIG_BIGMEM