Index: linux-2.6.10/include/linux/sched.h =================================================================== --- linux-2.6.10.orig/include/linux/sched.h 2005-02-14 17:48:07.000000000 -0800 +++ linux-2.6.10/include/linux/sched.h 2005-02-15 13:48:14.000000000 -0800 @@ -551,6 +551,8 @@ struct task_struct { #endif struct list_head tasks; + unsigned long anon_fault_next_addr; /* Predicted sequential fault address */ + int anon_fault_order; /* Last order of allocation on fault */ /* * ptrace_list/ptrace_children forms the list of my children * that were stolen by a ptracer. Index: linux-2.6.10/mm/memory.c =================================================================== --- linux-2.6.10.orig/mm/memory.c 2005-02-14 17:48:07.000000000 -0800 +++ linux-2.6.10/mm/memory.c 2005-02-15 13:48:31.000000000 -0800 @@ -58,6 +58,7 @@ #include #include +#include #ifndef CONFIG_DISCONTIGMEM /* use the per-pgdat data instead for discontigmem - mbligh */ @@ -1772,6 +1773,8 @@ out: return ret; } +int sysctl_max_prealloc_order = 1; + /* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in @@ -1783,53 +1786,106 @@ do_anonymous_page(struct mm_struct *mm, unsigned long addr) { pte_t entry; - struct page * page = ZERO_PAGE(addr); + unsigned long end_addr; + + addr &= PAGE_MASK; - /* Read-only mapping of ZERO_PAGE. */ - entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); + if (likely((vma->vm_flags & VM_RAND_READ) + || current->anon_fault_next_addr != addr) + || current->anon_fault_order >= sysctl_max_prealloc_order) { + /* Single page */ + current->anon_fault_order = 0; + end_addr = addr + PAGE_SIZE; + } else { + /* Sequence of faults detect. Perform preallocation */ + int order = ++current->anon_fault_order; + + if ((1 << order) < PAGEVEC_SIZE) + end_addr = addr + (PAGE_SIZE << order); + else { + end_addr = addr + PAGEVEC_SIZE * PAGE_SIZE; + current->anon_fault_order = 3; + } - /* ..except if it's a write access */ + if (end_addr > vma->vm_end) + end_addr = vma->vm_end; + if ((addr & PMD_MASK) != (end_addr & PMD_MASK)) + end_addr &= PMD_MASK; + } if (write_access) { - /* Allocate our own private page. */ + + unsigned long a; + int i; + struct pagevec pv; + pte_unmap(page_table); spin_unlock(&mm->page_table_lock); + pagevec_init(&pv, 0); + if (unlikely(anon_vma_prepare(vma))) - goto no_mem; - page = alloc_zeroed_user_highpage(vma, addr); - if (!page) - goto no_mem; + return VM_FAULT_OOM; + + /* Allocate the necessary pages */ + for(a = addr; a < end_addr ; a += PAGE_SIZE) { + struct page *p = alloc_zeroed_user_highpage(vma, a); + + if (likely(p)) { + pagevec_add(&pv, p); + } else { + if (a == addr) + return VM_FAULT_OOM; + break; + } + } spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, addr); + for(i = 0; addr < a; addr += PAGE_SIZE, i++) { + struct page *p = pv.pages[i]; - if (!pte_none(*page_table)) { + page_table = pte_offset_map(pmd, addr); + if (unlikely(!pte_none(*page_table))) { + /* Someone else got there first */ + pte_unmap(page_table); + page_cache_release(p); + mm->rss--; + continue; + } + + entry = maybe_mkwrite(pte_mkdirty(mk_pte(p, + vma->vm_page_prot)), + vma); + + lru_cache_add_active(p); + SetPageReferenced(p); + page_add_anon_rmap(p, vma, addr); + + set_pte(page_table, entry); pte_unmap(page_table); - page_cache_release(page); - spin_unlock(&mm->page_table_lock); - goto out; + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); } - mm->rss++; + mm->rss += pagevec_count(&pv); acct_update_integrals(); - update_mem_hiwater(); - entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, - vma->vm_page_prot)), - vma); - lru_cache_add_active(page); - SetPageReferenced(page); - page_add_anon_rmap(page, vma, addr); + update_mem_hiwater(); + } else { + /* Read */ + entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); +nextread: + set_pte(page_table, entry); + pte_unmap(page_table); + update_mmu_cache(vma, addr, entry); + addr += PAGE_SIZE; + if (unlikely(addr < end_addr)) { + page_table = pte_offset_map(pmd, addr); + if (likely(pte_none(*page_table))) + goto nextread; + } } - - set_pte(page_table, entry); - pte_unmap(page_table); - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); + current->anon_fault_next_addr = addr; spin_unlock(&mm->page_table_lock); -out: return VM_FAULT_MINOR; -no_mem: - return VM_FAULT_OOM; } /* Index: linux-2.6.10/kernel/sysctl.c =================================================================== --- linux-2.6.10.orig/kernel/sysctl.c 2005-02-14 17:48:07.000000000 -0800 +++ linux-2.6.10/kernel/sysctl.c 2005-02-15 13:48:14.000000000 -0800 @@ -55,6 +55,7 @@ extern int C_A_D; extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; +extern int sysctl_max_prealloc_order; extern int max_threads; extern int sysrq_enabled; extern int core_uses_pid; @@ -825,6 +826,16 @@ static ctl_table vm_table[] = { .strategy = &sysctl_jiffies, }, #endif + { + .ctl_name = VM_MAX_PREFAULT_ORDER, + .procname = "max_prealloc_order", + .data = &sysctl_max_prealloc_order, + .maxlen = sizeof(sysctl_max_prealloc_order), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, { .ctl_name = 0 } }; Index: linux-2.6.10/include/linux/sysctl.h =================================================================== --- linux-2.6.10.orig/include/linux/sysctl.h 2005-02-14 17:48:07.000000000 -0800 +++ linux-2.6.10/include/linux/sysctl.h 2005-02-15 13:48:14.000000000 -0800 @@ -169,6 +169,7 @@ enum VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ + VM_MAX_PREFAULT_ORDER=29, /* max prefault order during anonymous page faults */ };