diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/arch/i386/Kconfig 950-shpte/arch/i386/Kconfig
--- 900-mjb1/arch/i386/Kconfig	Thu Mar 27 22:15:28 2003
+++ 950-shpte/arch/i386/Kconfig	Sat Mar 29 07:53:14 2003
@@ -751,6 +751,15 @@ config 4K_STACK
 	  detection.  It is much more reliable than the currently in-kernel
 	  version.
 
+config SHAREPTE
+	bool "Share 3rd-level pagetables between processes"
+	help
+	  Normally each address space has its own complete page table for all
+	  its mappings.  This can mean many mappings of a set of shared data
+	  pages.  With this option, the VM will attempt to share the bottom
+	  level of the page table between address spaces that are sharing data
+	  pages.
+
 config MATH_EMULATION
 	bool "Math emulation"
 	---help---
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/arch/i386/kernel/vm86.c 950-shpte/arch/i386/kernel/vm86.c
--- 900-mjb1/arch/i386/kernel/vm86.c	Mon Mar 17 21:43:39 2003
+++ 950-shpte/arch/i386/kernel/vm86.c	Sat Mar 29 07:53:14 2003
@@ -41,6 +41,8 @@
 #include <linux/smp_lock.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
+#include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -125,6 +127,7 @@ struct pt_regs * save_v86_state(struct k
 
 static void mark_screen_rdonly(struct task_struct * tsk)
 {
+	struct ptpage *ptepage;
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *pte, *mapped;
@@ -148,6 +151,8 @@ static void mark_screen_rdonly(struct ta
 		pmd_clear(pmd);
 		goto out;
 	}
+	ptepage = pmd_ptpage(*pmd);
+	pte_page_lock(ptepage);
 	pte = mapped = pte_offset_map(pmd, 0xA0000);
 	for (i = 0; i < 32; i++) {
 		if (pte_present(*pte))
@@ -155,6 +160,7 @@ static void mark_screen_rdonly(struct ta
 		pte++;
 	}
 	pte_unmap(mapped);
+	pte_page_unlock(ptepage);
 out:
 	spin_unlock(&tsk->mm->page_table_lock);
 	preempt_enable();
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/arch/i386/mm/pgtable.c 950-shpte/arch/i386/mm/pgtable.c
--- 900-mjb1/arch/i386/mm/pgtable.c	Mon Mar 17 21:43:39 2003
+++ 950-shpte/arch/i386/mm/pgtable.c	Sat Mar 29 07:53:14 2003
@@ -146,24 +146,27 @@ pte_t *pte_alloc_one_kernel(struct mm_st
 	return pte;
 }
 
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+struct ptpage *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	int count = 0;
-	struct page *pte;
+	struct ptpage *pte;
    
    	do {
 #if CONFIG_HIGHPTE
-		pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
+		pte = (struct ptpage *)alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
 #else
-		pte = alloc_pages(GFP_KERNEL, 0);
+		pte = (struct ptpage *)alloc_pages(GFP_KERNEL, 0);
 #endif
-		if (pte)
-			clear_highpage(pte);
-		else {
+		if (pte) {
+			clear_highpage((struct page *)pte);
+			pte->mapcount = pte->swapcount= 0;
+			pte->pte.mmdirect = 0;
+			break;
+		} else {
 			current->state = TASK_UNINTERRUPTIBLE;
 			schedule_timeout(HZ);
 		}
-	} while (!pte && (count++ < 10));
+	} while (count++ < 10);
 	return pte;
 }
 
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/fs/exec.c 950-shpte/fs/exec.c
--- 900-mjb1/fs/exec.c	Thu Mar 27 21:57:38 2003
+++ 950-shpte/fs/exec.c	Sat Mar 29 07:53:14 2003
@@ -50,6 +50,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
+#include <asm/rmap.h>
 
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
@@ -320,7 +321,7 @@ void put_dirty_page(struct task_struct *
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
-	tsk->mm->rss++;
+	increment_rss(pmd_ptpage(*pmd));
 	spin_unlock(&tsk->mm->page_table_lock);
 
 	/* no need for flush_tlb */
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/asm-generic/rmap.h 950-shpte/include/asm-generic/rmap.h
--- 900-mjb1/include/asm-generic/rmap.h	Thu Feb 13 11:08:13 2003
+++ 950-shpte/include/asm-generic/rmap.h	Sat Mar 29 07:53:14 2003
@@ -26,39 +26,12 @@
  */
 #include <linux/mm.h>
 
-static inline void pgtable_add_rmap(struct page * page, struct mm_struct * mm, unsigned long address)
-{
-#ifdef BROKEN_PPC_PTE_ALLOC_ONE
-	/* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */
-	extern int mem_init_done;
-
-	if (!mem_init_done)
-		return;
-#endif
-	page->mapping = (void *)mm;
-	page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
-	inc_page_state(nr_page_table_pages);
-}
-
-static inline void pgtable_remove_rmap(struct page * page)
-{
-	page->mapping = NULL;
-	page->index = 0;
-	dec_page_state(nr_page_table_pages);
-}
-
-static inline struct mm_struct * ptep_to_mm(pte_t * ptep)
-{
-	struct page * page = kmap_atomic_to_page(ptep);
-	return (struct mm_struct *) page->mapping;
-}
-
 static inline unsigned long ptep_to_address(pte_t * ptep)
 {
-	struct page * page = kmap_atomic_to_page(ptep);
+	struct ptpage * page = (struct ptpage *)kmap_atomic_to_page(ptep);
 	unsigned long low_bits;
 	low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE;
-	return page->index + low_bits;
+	return page->virtual + low_bits;
 }
 
 #if CONFIG_HIGHPTE
@@ -86,5 +59,10 @@ static inline void rmap_ptep_unmap(pte_t
 	return;
 }
 #endif
+
+extern void pgtable_add_rmap(struct ptpage * ptepage, struct mm_struct * mm, unsigned long address);
+extern void pgtable_add_rmap_locked(struct ptpage * ptepage, struct mm_struct * mm, unsigned long address);
+extern void pgtable_remove_rmap(struct ptpage * ptepage, struct mm_struct *mm);
+extern void pgtable_remove_rmap_locked(struct ptpage * ptepage, struct mm_struct *mm);
 
 #endif /* _GENERIC_RMAP_H */
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/asm-generic/tlb.h 950-shpte/include/asm-generic/tlb.h
--- 900-mjb1/include/asm-generic/tlb.h	Wed Mar 26 22:54:36 2003
+++ 950-shpte/include/asm-generic/tlb.h	Sat Mar 29 07:53:14 2003
@@ -85,13 +85,6 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	int freed = tlb->freed;
-	struct mm_struct *mm = tlb->mm;
-	int rss = mm->rss;
-
-	if (rss < freed)
-		freed = rss;
-	mm->rss = rss - freed;
 	tlb_flush_mmu(tlb, start, end);
 
 	/* keep the page table cache within bounds */
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/asm-i386/pgalloc.h 950-shpte/include/asm-i386/pgalloc.h
--- 900-mjb1/include/asm-i386/pgalloc.h	Thu Feb 13 11:08:13 2003
+++ 950-shpte/include/asm-i386/pgalloc.h	Sat Mar 29 07:53:14 2003
@@ -10,10 +10,10 @@
 #define pmd_populate_kernel(mm, pmd, pte) \
 		set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
 
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct ptpage *pte)
 {
 	set_pmd(pmd, __pmd(_PAGE_TABLE +
-		((unsigned long long)page_to_pfn(pte) <<
+		((unsigned long long)page_to_pfn((struct page *)pte) <<
 			(unsigned long long) PAGE_SHIFT)));
 }
 /*
@@ -24,20 +24,20 @@ pgd_t *pgd_alloc(struct mm_struct *);
 void pgd_free(pgd_t *pgd);
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-struct page *pte_alloc_one(struct mm_struct *, unsigned long);
+struct ptpage *pte_alloc_one(struct mm_struct *, unsigned long);
 
 static inline void pte_free_kernel(pte_t *pte)
 {
 	free_page((unsigned long)pte);
 }
 
-static inline void pte_free(struct page *pte)
+static inline void pte_free(struct ptpage *pte)
 {
-	__free_page(pte);
+	__free_page((struct page *)pte);
 }
 
 
-#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),((struct page *)pte))
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/asm-i386/pgtable.h 950-shpte/include/asm-i386/pgtable.h
--- 900-mjb1/include/asm-i386/pgtable.h	Thu Mar 27 22:12:25 2003
+++ 950-shpte/include/asm-i386/pgtable.h	Sat Mar 29 07:53:14 2003
@@ -115,6 +115,7 @@ void pgtable_cache_init(void);
 #define _PAGE_PROTNONE	0x080	/* If not present */
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_TABLE_RDONLY	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
@@ -123,6 +124,10 @@ void pgtable_cache_init(void);
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 
+#define	PMD_NONE	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define PMD_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PMD_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
 #define _PAGE_KERNEL \
 	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 
@@ -159,6 +164,15 @@ extern unsigned long __PAGE_KERNEL;
 #define __S110	PAGE_SHARED
 #define __S111	PAGE_SHARED
 
+#define __PMD000	PMD_NONE
+#define __PMD001	PMD_READONLY
+#define __PMD010	PMD_SHARED
+#define __PMD011	PMD_SHARED
+#define __PMD100	PMD_READONLY
+#define __PMD101	PMD_READONLY
+#define __PMD110	PMD_SHARED
+#define __PMD111	PMD_SHARED
+
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
@@ -175,8 +189,8 @@ extern unsigned long pg0[1024];
 #define pmd_none(x)	(!pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
-#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
-
+#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_RW)) != \
+			(_KERNPG_TABLE & ~_PAGE_RW))
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
 
@@ -201,6 +215,9 @@ static inline pte_t pte_mkexec(pte_t pte
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
+static inline int pmd_write(pmd_t pmd)		{ return (pmd).pmd & _PAGE_RW; }
+static inline pmd_t pmd_wrprotect(pmd_t pmd)	{ (pmd).pmd &= ~_PAGE_RW; return pmd; }
+static inline pmd_t pmd_mkwrite(pmd_t pmd)	{ (pmd).pmd |= _PAGE_RW; return pmd; }
 
 static inline  int ptep_test_and_clear_dirty(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
 static inline  int ptep_test_and_clear_young(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); }
@@ -222,6 +239,13 @@ static inline pte_t pte_modify(pte_t pte
 	return pte;
 }
 
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	pmd.pmd &= ~(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER);
+	pmd.pmd |= pgprot_val(newprot);
+	return pmd;
+}
+
 #define page_pte(page) page_pte_prot(page, __pgprot(0))
 
 #define pmd_page_kernel(pmd) \
@@ -231,6 +255,8 @@ static inline pte_t pte_modify(pte_t pte
 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
 #endif /* !CONFIG_DISCONTIGMEM */
 
+#define	pmd_ptpage(pmd) ((struct ptpage *)pmd_page(pmd))
+
 #define pmd_large(pmd) \
 	((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
 
@@ -279,12 +305,20 @@ static inline pte_t pte_modify(pte_t pte
 	((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
 #define pte_offset_map_nested(dir, address) \
 	((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
+#define pte_page_map(__page, address) \
+	((pte_t *)kmap_atomic(__page,KM_PTE0) + pte_index(address))
+#define pte_page_map_nested(__page, address) \
+	((pte_t *)kmap_atomic(__page,KM_PTE1) + pte_index(address))
 #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
 #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
 #else
 #define pte_offset_map(dir, address) \
 	((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
 #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
+#define pte_page_map(__page, address) \
+	((pte_t *)page_address(__page) + pte_index(address))
+#define pte_page_map_nested(__page, address) \
+	((pte_t *)page_address(__page) + pte_index(address))
 #define pte_unmap(pte) do { } while (0)
 #define pte_unmap_nested(pte) do { } while (0)
 #endif
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/linux/mm.h 950-shpte/include/linux/mm.h
--- 900-mjb1/include/linux/mm.h	Thu Mar 27 21:57:40 2003
+++ 950-shpte/include/linux/mm.h	Sat Mar 29 07:53:14 2003
@@ -105,6 +105,7 @@ struct vm_area_struct {
 #define VM_RESERVED	0x00080000	/* Don't unmap it from swap_out */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
+#define VM_NONLINEAR	0x00800000	/* VM contains nonlinear mappings */
 
 #ifdef CONFIG_STACK_GROWSUP
 #define VM_STACK_FLAGS	(VM_GROWSUP | VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT)
@@ -124,7 +125,6 @@ struct vm_area_struct {
  */
 extern pgprot_t protection_map[16];
 
-
 /*
  * These are the virtual MM functions - opening of an area, closing and
  * unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -137,8 +137,9 @@ struct vm_operations_struct {
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
 };
 
-/* forward declaration; pte_chain is meant to be internal to rmap.c */
+/* forward declaration; pte_chain and mm_chain are meant to be internal to rmap.c */
 struct pte_chain;
+struct mm_chain;
 struct mmu_gather;
 struct inode;
 
@@ -196,6 +197,26 @@ struct page {
  */
 #include <linux/page-flags.h>
 
+struct ptpage {
+	unsigned long flags;		/* atomic flags, some possibly
+					   updated asynchronously */
+	atomic_t count;			/* Usage count, see below. */
+	unsigned long virtual;		/* virtual address this page maps */
+	unsigned short mapcount;	/* Number of pages mapped to this page */
+	unsigned short swapcount; 	/* Number of swap pages in this page */
+	union {
+		struct mm_chain *mmchain;/* Reverse mm_struct mapping pointer */
+		struct mm_struct *mmdirect;
+	} pte;
+	struct semaphore sem;
+};
+
+static inline void clear_pte_page(struct ptpage *ptepage)
+{
+	ClearPagePtepage(ptepage);
+	memset(&ptepage->sem, 0, sizeof(struct semaphore));
+}
+
 /*
  * Methods to modify the page usage count.
  *
@@ -400,14 +421,19 @@ struct file *shmem_file_setup(char * nam
 void shmem_lock(struct file * file, int lock);
 int shmem_zero_setup(struct vm_area_struct *);
 
+void increment_rss(struct ptpage *ptpage);
+void decrement_rss(struct ptpage *ptpage);
+void increment_swapcount(struct ptpage *ptpage);
+void decrement_swapcount(struct ptpage *ptpage);
+
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
 			unsigned long size);
 int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted);
-void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+void unmap_page_range(struct mmu_gather **tlb, struct vm_area_struct *vma,
 			unsigned long address, unsigned long size);
-void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr);
+void unmap_all_pages(struct mm_struct *mm);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/linux/page-flags.h 950-shpte/include/linux/page-flags.h
--- 900-mjb1/include/linux/page-flags.h	Thu Mar 27 21:57:38 2003
+++ 950-shpte/include/linux/page-flags.h	Sat Mar 29 07:53:14 2003
@@ -74,7 +74,8 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_compound		19	/* Part of a compound page */
-#define PG_anon			20	/* Anonymous page */
+#define	PG_ptepage		20	/* This page is a pte page */
+#define PG_anon			22	/* Anonymous page */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -246,6 +247,12 @@ extern void get_full_page_state(struct p
 #define PageMappedToDisk(page)	test_bit(PG_mappedtodisk, &(page)->flags)
 #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
 #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
+
+#define PagePtepage(page)	test_bit(PG_ptepage, &(page)->flags)
+#define SetPagePtepage(page)	set_bit(PG_ptepage, &(page)->flags)
+#define TestSetPagePtepage(page)	test_and_set_bit(PG_ptepage, &(page)->flags)
+#define ClearPagePtepage(page)		clear_bit(PG_ptepage, &(page)->flags)
+#define TestClearPagePtepage(page)	test_and_clear_bit(PG_ptepage, &(page)->flags)
 
 #define PageReclaim(page)	test_bit(PG_reclaim, &(page)->flags)
 #define SetPageReclaim(page)	set_bit(PG_reclaim, &(page)->flags)
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/linux/ptshare.h 950-shpte/include/linux/ptshare.h
--- 900-mjb1/include/linux/ptshare.h	Wed Dec 31 16:00:00 1969
+++ 950-shpte/include/linux/ptshare.h	Sat Mar 29 07:53:14 2003
@@ -0,0 +1,159 @@
+#ifndef _LINUX_PTSHARE_H
+#define	_LINUX_PTSHARE_H
+
+#include <linux/mm.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+/*
+ * Lock primitives for the pte page.  They're aliased to the
+ * pte chain lock in struct page, since pte pages can't have
+ * pte chains.
+ */
+
+
+static inline void pte_page_lock(struct ptpage *ptepage)
+{
+	pte_chain_lock((struct page *)ptepage);
+}
+
+static inline int pte_page_trylock(struct ptpage *ptepage)
+{
+	return pte_chain_trylock((struct page *)ptepage);
+}
+
+static inline void pte_page_unlock(struct ptpage *ptepage)
+{
+	pte_chain_unlock((struct page *)ptepage);
+}
+
+/*
+ * Provide a primitive for taking a pmd entry and using it to
+ * get the corresponding pte_page_lock.  This function takes
+ * the page_table_lock briefly to freeze the pmd entry, so it can
+ * only be used in places where the page_table_lock is not held.
+ * The pte page pointer is returned, since most callers will want it
+ * and it's handy.
+ */
+
+static inline struct ptpage *pte_page_lock_pmd(struct mm_struct *mm, pmd_t *pmd)
+{
+	struct ptpage *ptepage;
+
+	spin_lock(&mm->page_table_lock);
+	ptepage = pmd_ptpage(*pmd);
+	pte_page_lock(ptepage);
+	spin_unlock(&mm->page_table_lock);
+	return ptepage;
+}
+
+/*
+ * Functions to handle shared page tables
+ */
+
+#ifdef CONFIG_SHAREPTE
+
+int zap_shared_range(struct mmu_gather **tlb, pmd_t *pmd, unsigned long address,
+			unsigned long end);
+int zap_shared_pmd(struct mm_struct *mm, pmd_t *pmd);
+pte_t *pte_alloc_unshare(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long address);
+pte_t *pte_map_unshare(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long address);
+int share_page_range(struct mm_struct *dst, struct mm_struct *src,
+			struct vm_area_struct *vma, pmd_t **prev_pmd);
+void unshare_page_range(struct mm_struct *mm, unsigned long address,
+			unsigned long len);
+pte_t *mprotect_shared_range(struct vm_area_struct *vma, pmd_t *pmd,
+			unsigned long address, unsigned long end);
+void mremap_unshare(struct mm_struct *mm, pmd_t *src_pmd, pmd_t *dst_pmd,
+			unsigned long src_addr, unsigned long dst_addr);
+pte_t *pte_fault_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+			pmd_t *pmd, unsigned long address, int write_access);
+int fork_page_range(struct mm_struct *dst, struct mm_struct *src,
+		    struct vm_area_struct *vma, pmd_t **prev_pmd);
+
+#else
+
+static inline void unshare_page_range(struct mm_struct *mm,
+				      unsigned long address, unsigned long len)
+{
+	return;
+}
+
+static inline int fork_page_range(struct mm_struct *dst, struct mm_struct *src,
+				  struct vm_area_struct *vma, pmd_t **prev_pmd)
+{
+	return copy_page_range(dst, src, vma);
+}
+
+
+static inline int zap_shared_range(struct mmu_gather **tlb, pmd_t *pmd,
+				   unsigned long address, unsigned long end)
+{
+	pte_page_lock(pmd_ptpage(*pmd));
+	return 1;
+}
+
+static inline int zap_shared_pmd(struct mm_struct *mm, pmd_t *pmd)
+{
+	return 1;
+}
+
+static inline pte_t *pte_alloc_unshare(struct mm_struct *mm, pmd_t *pmd,
+				       unsigned long address)
+{
+	pte_t *pte;
+
+	pte = pte_alloc_map(mm, pmd, address);
+	if (pte)
+		pte_page_lock(pmd_ptpage(*pmd));
+
+	return pte;
+}
+
+static inline pte_t *pte_map_unshare(struct mm_struct *mm, pmd_t *pmd,
+				     unsigned long address)
+{
+	pte_t *pte;
+
+	if (pmd_present(*pmd)) {
+		pte_page_lock(pmd_ptpage(*pmd));
+		pte = pte_offset_map(pmd, address);
+	} else
+		pte = NULL;
+
+	return pte;
+}
+
+static inline pte_t *
+mprotect_shared_range(struct vm_area_struct *vma, pmd_t *pmd,
+			unsigned long address, unsigned long end)
+{
+	pte_page_lock(pmd_ptpage(*pmd));
+	return pte_offset_map(pmd, address);
+}
+
+static inline void
+mremap_unshare(struct mm_struct *mm, pmd_t *src_pmd, pmd_t *dst_pmd,
+		unsigned long src_addr, unsigned long dst_addr)
+{
+	return;
+}
+
+static inline pte_t *
+pte_fault_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+		pmd_t *pmd, unsigned long address, int write_access)
+{
+	pte_t *pte;
+
+	pte = pte_alloc_map(mm, pmd, address);
+	if (pte)
+		pte_page_lock(pmd_ptpage(*pmd));
+
+	return pte;
+}
+#endif	/* CONFIG_SHARE_PTE */
+
+#endif
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/linux/rmap-locking.h 950-shpte/include/linux/rmap-locking.h
--- 900-mjb1/include/linux/rmap-locking.h	Thu Jan  9 19:16:14 2003
+++ 950-shpte/include/linux/rmap-locking.h	Sat Mar 29 07:53:14 2003
@@ -28,6 +28,18 @@ static inline void pte_chain_lock(struct
 #endif
 }
 
+static inline int pte_chain_trylock(struct page *page)
+{
+	preempt_disable();
+#ifdef CONFIG_SMP
+	if (test_and_set_bit(PG_chainlock, &page->flags)) {
+		preempt_enable();
+		return 0;
+	}
+#endif
+	return 1;
+}
+
 static inline void pte_chain_unlock(struct page *page)
 {
 #ifdef CONFIG_SMP
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/linux/sched.h 950-shpte/include/linux/sched.h
--- 900-mjb1/include/linux/sched.h	Thu Mar 27 22:18:13 2003
+++ 950-shpte/include/linux/sched.h	Sat Mar 29 07:53:14 2003
@@ -192,6 +192,7 @@ struct mm_struct {
 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
 	unsigned long free_area_cache;		/* first hole */
 	pgd_t * pgd;
+	atomic_t ptepages;			/* Number of pte pages allocated */
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/include/linux/swapops.h 950-shpte/include/linux/swapops.h
--- 900-mjb1/include/linux/swapops.h	Wed Mar 26 22:54:38 2003
+++ 950-shpte/include/linux/swapops.h	Sat Mar 29 10:20:23 2003
@@ -1,3 +1,5 @@
+#include <linux/swap.h>
+
 /*
  * swapcache pages are stored in the swapper_space radix tree.  We want to
  * get good packing density in that tree, so the index should be dense in
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/kernel/fork.c 950-shpte/kernel/fork.c
--- 900-mjb1/kernel/fork.c	Thu Mar 27 22:09:08 2003
+++ 950-shpte/kernel/fork.c	Sat Mar 29 07:53:14 2003
@@ -30,6 +30,10 @@
 #include <linux/futex.h>
 #include <linux/ptrace.h>
 #include <linux/mount.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -251,6 +255,7 @@ static inline int dup_mmap(struct mm_str
 	struct vm_area_struct * mpnt, *tmp, **pprev;
 	int retval;
 	unsigned long charge = 0;
+	pmd_t *prev_pmd = 0;
 
 	down_write(&oldmm->mmap_sem);
 	flush_cache_mm(current->mm);
@@ -260,6 +265,7 @@ static inline int dup_mmap(struct mm_str
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->map_count = 0;
 	mm->rss = 0;
+	atomic_set(&mm->ptepages, 0);
 	mm->cpu_vm_mask = 0;
 	pprev = &mm->mmap;
 
@@ -314,7 +320,7 @@ static inline int dup_mmap(struct mm_str
 		*pprev = tmp;
 		pprev = &tmp->vm_next;
 		mm->map_count++;
-		retval = copy_page_range(mm, current->mm, tmp);
+		retval = fork_page_range(mm, current->mm, tmp, &prev_pmd);
 		spin_unlock(&mm->page_table_lock);
 
 		if (tmp->vm_ops && tmp->vm_ops->open)
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/Makefile 950-shpte/mm/Makefile
--- 900-mjb1/mm/Makefile	Thu Mar 27 23:23:43 2003
+++ 950-shpte/mm/Makefile	Sat Mar 29 07:53:14 2003
@@ -12,3 +12,5 @@ obj-y			:= bootmem.o fadvise.o filemap.o
 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_SHAREPTE)	+= ptshare.o
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/fremap.c 950-shpte/mm/fremap.c
--- 900-mjb1/mm/fremap.c	Wed Mar 26 22:54:38 2003
+++ 950-shpte/mm/fremap.c	Sat Mar 29 07:53:14 2003
@@ -13,11 +13,13 @@
 #include <linux/pagemap.h>
 #include <linux/swapops.h>
 #include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-static inline int zap_pte(struct mm_struct *mm, pte_t *ptep)
+static inline int
+zap_pte(struct mm_struct *mm, struct ptpage *ptepage, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 
@@ -34,7 +36,7 @@ static inline int zap_pte(struct mm_stru
 					set_page_dirty(page);
 				page_remove_rmap(page, ptep);
 				page_cache_release(page);
-				mm->rss--;
+				decrement_rss(ptepage);
 			}
 		}
 		return 1;
@@ -42,6 +44,7 @@ static inline int zap_pte(struct mm_stru
 		if (!pte_file(pte))
 			free_swap_and_cache(pte_to_swp_entry(pte));
 		pte_clear(ptep);
+		decrement_swapcount(ptepage);
 		return 0;
 	}
 }
@@ -54,6 +57,7 @@ int install_page(struct mm_struct *mm, s
 		unsigned long addr, struct page *page, pgprot_t prot)
 {
 	int err = -ENOMEM, flush;
+	struct ptpage *ptepage;
 	pte_t *pte, entry;
 	pgd_t *pgd;
 	pmd_t *pmd;
@@ -62,29 +66,31 @@ int install_page(struct mm_struct *mm, s
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	if (!pte_chain)
 		goto err;
-	pgd = pgd_offset(mm, addr);
 	spin_lock(&mm->page_table_lock);
+	pgd = pgd_offset(mm, addr);
 
 	pmd = pmd_alloc(mm, pgd, addr);
 	if (!pmd)
 		goto err_unlock;
 
-	pte = pte_alloc_map(mm, pmd, addr);
+	pte = pte_alloc_unshare(mm, pmd, addr);
 	if (!pte)
 		goto err_unlock;
 
-	flush = zap_pte(mm, pte);
+	ptepage = pmd_ptpage(*pmd);
+	flush = zap_pte(mm, ptepage, pte);
 
-	mm->rss++;
 	flush_page_to_ram(page);
 	flush_icache_page(vma, page);
 	entry = mk_pte(page, prot);
 	set_pte(pte, entry);
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
+	increment_rss(ptepage);
 	if (flush)
 		flush_tlb_page(vma, addr);
 
+	pte_page_unlock(ptepage);
 	spin_unlock(&mm->page_table_lock);
 	pte_chain_free(pte_chain);
 	return 0;
@@ -151,9 +157,15 @@ long sys_remap_file_pages(unsigned long 
 	if (vma && (vma->vm_flags & VM_SHARED) &&
 		vma->vm_ops && vma->vm_ops->populate &&
 			end > start && start >= vma->vm_start &&
-				end <= vma->vm_end)
+				end <= vma->vm_end) {
+		vma->vm_flags |= VM_NONLINEAR;
+
+		/* Unshare all the pte pages in the entire vma range */
+		unshare_page_range(mm, vma->vm_start, vma->vm_end);
+
 		err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot,
 				pgoff, flags & MAP_NONBLOCK);
+	}
 
 	up_read(&mm->mmap_sem);
 
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/memory.c 950-shpte/mm/memory.c
--- 900-mjb1/mm/memory.c	Thu Mar 27 21:57:38 2003
+++ 950-shpte/mm/memory.c	Sat Mar 29 07:53:14 2003
@@ -36,6 +36,20 @@
  *		(Gerhard.Wichert@pdb.siemens.de)
  */
 
+/*
+ * A note on locking of the page table structure:
+ *
+ *  The top level lock that protects the page table is the
+ *  mm->page_table_lock.  This lock protects the pgd and pmd layer.
+ *  However, with the advent of shared pte pages, this lock is not
+ *  sufficient.  The pte layer is now protected by the pte_page_lock,
+ *  set in the struct page of the pte page.  Note that with this
+ *  locking scheme, once the pgd and pmd layers have been set in the
+ *  page fault path and the pte_page_lock has been taken, the
+ *  page_table_lock can be released.
+ * 
+ */
+
 #include <linux/kernel_stat.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
@@ -45,6 +59,7 @@
 #include <linux/pagemap.h>
 #include <linux/vcache.h>
 #include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/pgalloc.h>
 #include <asm/rmap.h>
@@ -78,79 +93,10 @@ static inline void copy_cow_page(struct 
 	copy_user_highpage(to, from, address);
 }
 
-/*
- * Note: this doesn't free the actual pages themselves. That
- * has been handled earlier when unmapping all the memory regions.
- */
-static inline void free_one_pmd(struct mmu_gather *tlb, pmd_t * dir)
-{
-	struct page *page;
-
-	if (pmd_none(*dir))
-		return;
-	if (pmd_bad(*dir)) {
-		pmd_ERROR(*dir);
-		pmd_clear(dir);
-		return;
-	}
-	page = pmd_page(*dir);
-	pmd_clear(dir);
-	pgtable_remove_rmap(page);
-	pte_free_tlb(tlb, page);
-}
-
-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
-{
-	pmd_t * pmd, * md, * emd;
-
-	if (pgd_none(*dir))
-		return;
-	if (pgd_bad(*dir)) {
-		pgd_ERROR(*dir);
-		pgd_clear(dir);
-		return;
-	}
-	pmd = pmd_offset(dir, 0);
-	pgd_clear(dir);
-	/*
-	 * Beware if changing the loop below.  It once used int j,
-	 * 	for (j = 0; j < PTRS_PER_PMD; j++)
-	 * 		free_one_pmd(pmd+j);
-	 * but some older i386 compilers (e.g. egcs-2.91.66, gcc-2.95.3)
-	 * terminated the loop with a _signed_ address comparison
-	 * using "jle", when configured for HIGHMEM64GB (X86_PAE).
-	 * If also configured for 3GB of kernel virtual address space,
-	 * if page at physical 0x3ffff000 virtual 0x7ffff000 is used as
-	 * a pmd, when that mm exits the loop goes on to free "entries"
-	 * found at 0x80000000 onwards.  The loop below compiles instead
-	 * to be terminated by unsigned address comparison using "jb".
-	 */
-	for (md = pmd, emd = pmd + PTRS_PER_PMD; md < emd; md++)
-		free_one_pmd(tlb,md);
-	pmd_free_tlb(tlb, pmd);
-}
-
-/*
- * This function clears all user-level page tables of a process - this
- * is needed by execve(), so that old pages aren't in the way.
- *
- * Must be called with pagetable lock held.
- */
-void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
-{
-	pgd_t * page_dir = tlb->mm->pgd;
-
-	page_dir += first;
-	do {
-		free_one_pgd(tlb, page_dir);
-		page_dir++;
-	} while (--nr);
-}
-
 pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 {
 	if (!pmd_present(*pmd)) {
-		struct page *new;
+		struct ptpage *new;
 
 		spin_unlock(&mm->page_table_lock);
 		new = pte_alloc_one(mm, address);
@@ -166,8 +112,12 @@ pte_t * pte_alloc_map(struct mm_struct *
 			pte_free(new);
 			goto out;
 		}
+		SetPagePtepage(new);
 		pgtable_add_rmap(new, mm, address);
 		pmd_populate(mm, pmd, new);
+		atomic_inc(&mm->ptepages);
+		inc_page_state(nr_page_table_pages);
+		init_MUTEX(&new->sem);
 	}
 out:
 	return pte_offset_map(pmd, address);
@@ -192,7 +142,6 @@ pte_t * pte_alloc_kernel(struct mm_struc
 			pte_free_kernel(new);
 			goto out;
 		}
-		pgtable_add_rmap(virt_to_page(new), mm, address);
 		pmd_populate_kernel(mm, pmd, new);
 	}
 out:
@@ -261,6 +210,7 @@ skip_copy_pmd_range:	address = (address 
 			goto nomem;
 
 		do {
+			struct ptpage *src_page, *dst_page;
 			pte_t * src_pte, * dst_pte;
 		
 			/* copy_pte_range */
@@ -280,7 +230,10 @@ skip_copy_pte_range:
 			dst_pte = pte_alloc_map(dst, dst_pmd, address);
 			if (!dst_pte)
 				goto nomem;
-			spin_lock(&src->page_table_lock);	
+			spin_lock(&src->page_table_lock);
+			src_page = pmd_ptpage(*src_pmd);
+			dst_page = pmd_ptpage(*dst_pmd);
+			pte_page_lock(src_page);
 			src_pte = pte_offset_map_nested(src_pmd, address);
 			do {
 				pte_t pte = *src_pte;
@@ -296,6 +249,7 @@ skip_copy_pte_range:
 					if (!pte_file(pte))
 						swap_duplicate(pte_to_swp_entry(pte));
 					set_pte(dst_pte, pte);
+					increment_swapcount(dst_page);
 					goto cont_copy_pte_range_noset;
 				}
 				pfn = pte_pfn(pte);
@@ -329,7 +283,7 @@ skip_copy_pte_range:
 					pte = pte_mkclean(pte);
 				pte = pte_mkold(pte);
 				get_page(page);
-				dst->rss++;
+				increment_rss(dst_page);
 
 cont_copy_pte_range:
 				set_pte(dst_pte, pte);
@@ -345,6 +299,7 @@ cont_copy_pte_range:
 				 * pte_chain allocation failed, and we need to
 				 * run page reclaim.
 				 */
+				pte_page_unlock(src_page);
 				pte_unmap_nested(src_pte);
 				pte_unmap(dst_pte);
 				spin_unlock(&src->page_table_lock);	
@@ -354,12 +309,15 @@ cont_copy_pte_range:
 				if (!pte_chain)
 					goto nomem;
 				spin_lock(&src->page_table_lock);
+				src_page = pmd_ptpage(*src_pmd);
+				pte_page_lock(src_page);
 				dst_pte = pte_offset_map(dst_pmd, address);
 				src_pte = pte_offset_map_nested(src_pmd,
 								address);
 cont_copy_pte_range_noset:
 				address += PAGE_SIZE;
 				if (address >= end) {
+					pte_page_unlock(src_page);
 					pte_unmap_nested(src_pte);
 					pte_unmap(dst_pte);
 					goto out_unlock;
@@ -367,6 +325,7 @@ cont_copy_pte_range_noset:
 				src_pte++;
 				dst_pte++;
 			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
+			pte_page_unlock(src_page);
 			pte_unmap_nested(src_pte-1);
 			pte_unmap(dst_pte-1);
 			spin_unlock(&src->page_table_lock);
@@ -392,23 +351,17 @@ zap_pte_range(struct mmu_gather *tlb, pm
 {
 	unsigned long offset;
 	pte_t *ptep;
+	struct ptpage *ptepage = pmd_ptpage(*pmd);
 
-	if (pmd_none(*pmd))
-		return;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
-		return;
-	}
-	ptep = pte_offset_map(pmd, address);
 	offset = address & ~PMD_MASK;
 	if (offset + size > PMD_SIZE)
 		size = PMD_SIZE - offset;
 	size &= PAGE_MASK;
+
+	ptep = pte_offset_map(pmd, address);
+
 	for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
 		pte_t pte = *ptep;
-		if (pte_none(pte))
-			continue;
 		if (pte_present(pte)) {
 			unsigned long pfn = pte_pfn(pte);
 
@@ -424,20 +377,32 @@ zap_pte_range(struct mmu_gather *tlb, pm
 						mark_page_accessed(page);
 					tlb->freed++;
 					page_remove_rmap(page, ptep);
+					decrement_rss(ptepage);
 					tlb_remove_page(tlb, page);
 				}
 			}
-		} else {
-			if (!pte_file(pte))
+		} else if (!pte_none(pte)) {
+			if (!pte_file(pte)) {
 				free_swap_and_cache(pte_to_swp_entry(pte));
+				decrement_swapcount(ptepage);
+			}
 			pte_clear(ptep);
 		}
+		if (!ptepage->mapcount && !ptepage->swapcount) {
+			pmd_clear(pmd);
+			pgtable_remove_rmap_locked(ptepage, tlb->mm);
+			atomic_dec(&tlb->mm->ptepages);
+			dec_page_state(nr_page_table_pages);
+			clear_pte_page(ptepage);
+			pte_free_tlb(tlb, ptepage);
+			break;
+		}
 	}
 	pte_unmap(ptep-1);
 }
 
 static void
-zap_pmd_range(struct mmu_gather *tlb, pgd_t * dir,
+zap_pmd_range(struct mmu_gather **tlb, pgd_t * dir,
 		unsigned long address, unsigned long size)
 {
 	pmd_t * pmd;
@@ -455,13 +420,27 @@ zap_pmd_range(struct mmu_gather *tlb, pg
 	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
 		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
 	do {
-		zap_pte_range(tlb, pmd, address, end - address);
+		if (pmd_none(*pmd))
+			goto skip_pmd;
+		if (pmd_bad(*pmd)) {
+			pmd_ERROR(*pmd);
+			pmd_clear(pmd);
+			goto skip_pmd;
+		}
+
+		if (zap_shared_range(tlb, pmd, address, end)) {
+			struct ptpage *ptepage = pmd_ptpage(*pmd);
+			zap_pte_range(*tlb, pmd, address, end - address);
+			pte_page_unlock(ptepage);
+		}
+skip_pmd:
 		address = (address + PMD_SIZE) & PMD_MASK; 
 		pmd++;
 	} while (address < end);
 }
 
-void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+void
+unmap_page_range(struct mmu_gather **tlb, struct vm_area_struct *vma,
 			unsigned long address, unsigned long end)
 {
 	pgd_t * dir;
@@ -474,13 +453,13 @@ void unmap_page_range(struct mmu_gather 
 	BUG_ON(address >= end);
 
 	dir = pgd_offset(vma->vm_mm, address);
-	tlb_start_vma(tlb, vma);
+	tlb_start_vma(*tlb, vma);
 	do {
 		zap_pmd_range(tlb, dir, address, end - address);
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	tlb_end_vma(tlb, vma);
+	tlb_end_vma(*tlb, vma);
 }
 
 /* Dispose of an entire struct mmu_gather per rescheduling point */
@@ -570,7 +549,7 @@ int unmap_vmas(struct mmu_gather **tlbp,
 				tlb_start_valid = 1;
 			}
 
-			unmap_page_range(*tlbp, vma, start, start + block);
+			unmap_page_range(tlbp, vma, start, start + block);
 			start += block;
 			zap_bytes -= block;
 			if ((long)zap_bytes > 0)
@@ -620,6 +599,179 @@ void zap_page_range(struct vm_area_struc
 	spin_unlock(&mm->page_table_lock);
 }
 
+/**
+ * unmap_all_pages - unmap all the pages for an mm_struct
+ * @mm: the mm_struct to unmap
+ *
+ * This function is only called when an mm_struct is about to be
+ * released.  It walks through all vmas and removes their pages
+ * from the page table.  It understands shared pte pages and will
+ * decrement the count appropriately.
+ */
+void unmap_all_pages(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	struct ptpage *ptepage;
+	struct page *pagevec[16];
+	int npages = 0;
+	unsigned long address;
+	unsigned long vm_end, pmd_end, pte_end;
+
+	lru_add_drain();
+
+	vma = mm->mmap;
+
+	/* On the off chance that the first vma is hugetlb... */
+	if (is_vm_hugetlb_page(vma)) {
+		unmap_hugepage_range(vma, vma->vm_start, vma->vm_end);
+		vma = vma->vm_next;
+		mm->map_count--;
+	}
+
+	for (;;) {
+		if (!vma)
+			goto out;
+
+		address = vma->vm_start;
+next_vma:
+		vm_end = vma->vm_end;
+		mm->map_count--;
+		/*
+		 * Advance the vma pointer to the next vma.
+		 * To facilitate coalescing adjacent vmas, the
+		 * pointer always points to the next one
+		 * beyond the range we're currently working
+		 * on, which means vma will be null on the
+		 * last iteration.
+		 */
+		vma = vma->vm_next;
+		if (vma) {
+			/*
+			 * Go ahead and include hugetlb vmas
+			 * in the range we process.  The pmd
+			 * entry will be cleared by close, so
+			 * we'll just skip over them.  This is
+			 * easier than trying to avoid them.
+			 */
+			if (is_vm_hugetlb_page(vma))
+				unmap_hugepage_range(vma, vma->vm_start, vma->vm_end);
+
+			/*
+			 * Coalesce adjacent vmas and process
+			 * them all in one iteration.
+			 */
+			if (vma->vm_start == vm_end) {
+				goto next_vma;
+			}
+		}
+		pgd = pgd_offset(mm, address);
+		do {
+			if (pgd_none(*pgd))
+				goto skip_pgd;
+
+			if (pgd_bad(*pgd)) {
+				pgd_ERROR(*pgd);
+				pgd_clear(pgd);
+skip_pgd:
+				address = (address + PGDIR_SIZE) & PGDIR_MASK;
+				if (address > vm_end)
+					address = vm_end;
+				goto next_pgd;
+			}
+			pmd = pmd_offset(pgd, address);
+			if (vm_end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+				pmd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
+			else
+				pmd_end = vm_end;
+
+			do {
+				if (pmd_none(*pmd))
+					goto skip_pmd;
+				if (pmd_bad(*pmd)) {
+					pmd_ERROR(*pmd);
+					pmd_clear(pmd);
+skip_pmd:
+					address =  (address + PMD_SIZE) & PMD_MASK;
+					if (address > pmd_end)
+						address = pmd_end;
+					goto next_pmd;
+				}
+				if (!zap_shared_pmd(mm, pmd))
+					goto skip_pmd;
+
+				ptepage = pmd_ptpage(*pmd);
+				pte = pte_offset_map(pmd, address);
+				if (pmd_end > ((address + PMD_SIZE) & PMD_MASK))
+					pte_end = (address + PMD_SIZE) & PMD_MASK;
+				else
+					pte_end = pmd_end;
+				do {
+					pte_t pteval = *pte;
+
+					if (pte_none(pteval))
+						goto next_pte;
+					if (pte_present(pteval)) {
+						unsigned long pfn = pte_pfn(pteval);
+						if (pfn_valid(pfn)) {
+							struct page *page = pfn_to_page(pfn);
+							if (!PageReserved(page)) {
+								if (pte_dirty(pteval))
+									set_page_dirty(page);
+								if (page->mapping &&
+								    pte_young(pteval) &&
+								    !PageSwapCache(page))
+									mark_page_accessed(page);
+								page_remove_rmap(page, pte);
+								decrement_rss(ptepage);
+								pagevec[npages++] = page;
+								if (npages == 16) {
+									free_pages_and_swap_cache(pagevec, npages);
+									npages = 0;
+								}
+								
+							}
+						}
+					} else {
+						free_swap_and_cache(pte_to_swp_entry(pteval));
+						decrement_swapcount(ptepage);
+					}
+					pte_clear(pte);
+					if (!ptepage->mapcount && !ptepage->swapcount) {
+						pmd_clear(pmd);
+						pgtable_remove_rmap(ptepage, mm);
+						atomic_dec(&mm->ptepages);
+						dec_page_state(nr_page_table_pages);
+						clear_pte_page(ptepage);
+						pte_free(ptepage);
+						address = pte_end;
+						break;
+					}
+next_pte:
+					address += PAGE_SIZE;
+					pte++;
+				} while (address < pte_end);
+				pte_unmap(pte-1);
+next_pmd:
+				pmd++;
+			} while (address < pmd_end);
+next_pgd:
+			pgd++;
+		} while (address < vm_end);
+	}
+
+out:
+	if (npages)
+		free_pages_and_swap_cache(pagevec, npages);
+
+	if (atomic_read(&mm->ptepages) != 0)
+		BUG();
+
+	flush_tlb_mm(mm);
+}
+
 /*
  * Do a quick page-table lookup for a single page.
  * mm->page_table_lock must be held.
@@ -863,11 +1015,14 @@ static inline int remap_pmd_range(struct
 		end = PGDIR_SIZE;
 	phys_addr -= address;
 	do {
-		pte_t * pte = pte_alloc_map(mm, pmd, base + address);
+		pte_t *pte;
+		pte = pte_alloc_unshare(mm, pmd, base + address);
 		if (!pte)
 			return -ENOMEM;
+
 		remap_pte_range(pte, base + address, end - address, address + phys_addr, prot);
 		pte_unmap(pte);
+		pte_page_unlock(pmd_ptpage(*pmd));
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -957,6 +1112,7 @@ static int do_wp_page(struct mm_struct *
 	unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
 {
 	struct page *old_page, *new_page;
+	struct ptpage *ptepage = pmd_ptpage(*pmd);
 	unsigned long pfn = pte_pfn(pte);
 	struct pte_chain *pte_chain = NULL;
 	int ret;
@@ -992,7 +1148,7 @@ static int do_wp_page(struct mm_struct *
 	 * Ok, we need to copy. Oh, well..
 	 */
 	page_cache_get(old_page);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	if (!pte_chain)
@@ -1005,11 +1161,11 @@ static int do_wp_page(struct mm_struct *
 	/*
 	 * Re-check the pte - we dropped the lock
 	 */
-	spin_lock(&mm->page_table_lock);
+	ptepage = pte_page_lock_pmd(mm, pmd);
 	page_table = pte_offset_map(pmd, address);
 	if (pte_same(*page_table, pte)) {
-		if (PageReserved(old_page))
-			++mm->rss;
+  		if (PageReserved(old_page))
+			increment_rss(ptepage);
 		page_remove_rmap(old_page, page_table);
 		break_cow(vma, new_page, address, page_table);
 		SetPageAnon(new_page);
@@ -1030,7 +1186,7 @@ no_mem:
 oom:
 	ret = VM_FAULT_OOM;
 out:
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 	pte_chain_free(pte_chain);
 	return ret;
 }
@@ -1152,13 +1308,14 @@ static int do_swap_page(struct mm_struct
 	pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
 {
 	struct page *page;
+	struct ptpage *ptepage = pmd_ptpage(*pmd);
 	swp_entry_t entry = pte_to_swp_entry(orig_pte);
 	pte_t pte;
 	int ret = VM_FAULT_MINOR;
 	struct pte_chain *pte_chain = NULL;
 
 	pte_unmap(page_table);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 	page = lookup_swap_cache(entry);
 	if (!page) {
 		swapin_readahead(entry);
@@ -1168,14 +1325,14 @@ static int do_swap_page(struct mm_struct
 			 * Back out if somebody else faulted in this pte while
 			 * we released the page table lock.
 			 */
-			spin_lock(&mm->page_table_lock);
+			ptepage = pte_page_lock_pmd(mm, pmd);
 			page_table = pte_offset_map(pmd, address);
 			if (pte_same(*page_table, orig_pte))
 				ret = VM_FAULT_OOM;
 			else
 				ret = VM_FAULT_MINOR;
 			pte_unmap(page_table);
-			spin_unlock(&mm->page_table_lock);
+			pte_page_unlock(ptepage);
 			goto out;
 		}
 
@@ -1196,11 +1353,11 @@ static int do_swap_page(struct mm_struct
 	 * Back out if somebody else faulted in this pte while we
 	 * released the page table lock.
 	 */
-	spin_lock(&mm->page_table_lock);
+	ptepage = pte_page_lock_pmd(mm, pmd);
 	page_table = pte_offset_map(pmd, address);
 	if (!pte_same(*page_table, orig_pte)) {
 		pte_unmap(page_table);
-		spin_unlock(&mm->page_table_lock);
+		pte_page_unlock(ptepage);
 		unlock_page(page);
 		page_cache_release(page);
 		ret = VM_FAULT_MINOR;
@@ -1213,7 +1370,6 @@ static int do_swap_page(struct mm_struct
 	if (vm_swap_full())
 		remove_exclusive_swap_page(page);
 
-	mm->rss++;
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page))
 		pte = pte_mkdirty(pte_mkwrite(pte));
@@ -1224,11 +1380,13 @@ static int do_swap_page(struct mm_struct
 	set_pte(page_table, pte);
 	SetPageAnon(page);
 	pte_chain = page_add_rmap(page, page_table, pte_chain);
+	increment_rss(ptepage);
+	decrement_swapcount(ptepage);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
 	pte_unmap(page_table);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 out:
 	pte_chain_free(pte_chain);
 	return ret;
@@ -1246,20 +1404,10 @@ do_anonymous_page(struct mm_struct *mm, 
 {
 	pte_t entry;
 	struct page * page = ZERO_PAGE(addr);
-	struct pte_chain *pte_chain;
+	struct ptpage *ptepage = pmd_ptpage(*pmd);
+	struct pte_chain *pte_chain = NULL;
 	int ret;
 
-	pte_chain = pte_chain_alloc(GFP_ATOMIC);
-	if (!pte_chain) {
-		pte_unmap(page_table);
-		spin_unlock(&mm->page_table_lock);
-		pte_chain = pte_chain_alloc(GFP_KERNEL);
-		if (!pte_chain)
-			goto no_mem;
-		spin_lock(&mm->page_table_lock);
-		page_table = pte_offset_map(pmd, addr);
-	}
-		
 	/* Read-only mapping of ZERO_PAGE. */
 	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
 
@@ -1267,44 +1415,48 @@ do_anonymous_page(struct mm_struct *mm, 
 	if (write_access) {
 		/* Allocate our own private page. */
 		pte_unmap(page_table);
-		spin_unlock(&mm->page_table_lock);
+		pte_page_unlock(ptepage);
+
+		pte_chain = pte_chain_alloc(GFP_KERNEL);
+		if (!pte_chain) {
+			ret = VM_FAULT_OOM;
+			goto out;
+		}
 
 		page = alloc_page(GFP_HIGHUSER);
-		if (!page)
-			goto no_mem;
+		if (!page) {
+			ret = VM_FAULT_OOM;
+			goto out;
+		}
 		clear_user_highpage(page, addr);
 
-		spin_lock(&mm->page_table_lock);
+		ptepage = pte_page_lock_pmd(mm, pmd);
 		page_table = pte_offset_map(pmd, addr);
 
 		if (!pte_none(*page_table)) {
 			pte_unmap(page_table);
 			page_cache_release(page);
-			spin_unlock(&mm->page_table_lock);
+			pte_page_unlock(ptepage);
 			ret = VM_FAULT_MINOR;
 			goto out;
 		}
-		mm->rss++;
 		flush_page_to_ram(page);
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		lru_cache_add_active(page);
 		mark_page_accessed(page);
 		SetPageAnon(page);
+		pte_chain = page_add_rmap(page, page_table, pte_chain);
+		increment_rss(ptepage);
 	}
 
 	set_pte(page_table, entry);
-	/* ignores ZERO_PAGE */
-	pte_chain = page_add_rmap(page, page_table, pte_chain);
 	pte_unmap(page_table);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, entry);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 	ret = VM_FAULT_MINOR;
-	goto out;
 
-no_mem:
-	ret = VM_FAULT_OOM;
 out:
 	pte_chain_free(pte_chain);
 	return ret;
@@ -1327,6 +1479,7 @@ do_no_page(struct mm_struct *mm, struct 
 	unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
 {
 	struct page * new_page;
+	struct ptpage *ptepage = pmd_ptpage(*pmd);
 	pte_t entry;
 	struct pte_chain *pte_chain;
 	int ret;
@@ -1335,7 +1488,7 @@ do_no_page(struct mm_struct *mm, struct 
 		return do_anonymous_page(mm, vma, page_table,
 					pmd, write_access, address);
 	pte_unmap(page_table);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
 
@@ -1365,7 +1518,7 @@ do_no_page(struct mm_struct *mm, struct 
 		new_page = page;
 	}
 
-	spin_lock(&mm->page_table_lock);
+	pte_page_lock_pmd(mm, pmd);
 	page_table = pte_offset_map(pmd, address);
 
 	/*
@@ -1380,7 +1533,6 @@ do_no_page(struct mm_struct *mm, struct 
 	 */
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
-		++mm->rss;
 		flush_page_to_ram(new_page);
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
@@ -1389,18 +1541,19 @@ do_no_page(struct mm_struct *mm, struct 
 		set_pte(page_table, entry);
 		pte_chain = page_add_rmap(new_page, page_table, pte_chain);
 		pte_unmap(page_table);
+		increment_rss(ptepage);
 	} else {
 		/* One of our sibling threads was faster, back out. */
 		pte_unmap(page_table);
 		page_cache_release(new_page);
-		spin_unlock(&mm->page_table_lock);
+		pte_page_unlock(ptepage);
 		ret = VM_FAULT_MINOR;
 		goto out;
 	}
 
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(ptepage);
 	ret = VM_FAULT_MAJOR;
 	goto out;
 oom:
@@ -1495,7 +1648,7 @@ static inline int handle_pte_fault(struc
 	entry = pte_mkyoung(entry);
 	establish_pte(vma, address, pte, entry);
 	pte_unmap(pte);
-	spin_unlock(&mm->page_table_lock);
+	pte_page_unlock(pmd_ptpage(*pmd));
 	return VM_FAULT_MINOR;
 }
 
@@ -1524,9 +1677,13 @@ int handle_mm_fault(struct mm_struct *mm
 	pmd = pmd_alloc(mm, pgd, address);
 
 	if (pmd) {
-		pte_t * pte = pte_alloc_map(mm, pmd, address);
-		if (pte)
+		pte_t * pte;
+
+		pte = pte_fault_alloc(mm, vma, pmd, address, write_access);
+		if (pte) {
+			spin_unlock(&mm->page_table_lock);
 			return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+		}
 	}
 	spin_unlock(&mm->page_table_lock);
 	return VM_FAULT_OOM;
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/mmap.c 950-shpte/mm/mmap.c
--- 900-mjb1/mm/mmap.c	Wed Mar 26 22:54:38 2003
+++ 950-shpte/mm/mmap.c	Sat Mar 29 07:53:14 2003
@@ -18,6 +18,8 @@
 #include <linux/security.h>
 #include <linux/hugetlb.h>
 #include <linux/profile.h>
+#include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -606,6 +608,7 @@ munmap_back:
 			return -ENOMEM;
 		goto munmap_back;
 	}
+	unshare_page_range(mm, addr, len);
 
 	/* Check against address space limit. */
 	if ((mm->total_vm << PAGE_SHIFT) + len
@@ -1015,69 +1018,6 @@ find_extend_vma(struct mm_struct * mm, u
 }
 #endif
 
-/*
- * Try to free as many page directory entries as we can,
- * without having to work very hard at actually scanning
- * the page tables themselves.
- *
- * Right now we try to free page tables if we have a nice
- * PGDIR-aligned area that got free'd up. We could be more
- * granular if we want to, but this is fast and simple,
- * and covers the bad cases.
- *
- * "prev", if it exists, points to a vma before the one
- * we just free'd - but there's no telling how much before.
- */
-static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
-	unsigned long start, unsigned long end)
-{
-	unsigned long first = start & PGDIR_MASK;
-	unsigned long last = end + PGDIR_SIZE - 1;
-	unsigned long start_index, end_index;
-	struct mm_struct *mm = tlb->mm;
-
-	if (!prev) {
-		prev = mm->mmap;
-		if (!prev)
-			goto no_mmaps;
-		if (prev->vm_end > start) {
-			if (last > prev->vm_start)
-				last = prev->vm_start;
-			goto no_mmaps;
-		}
-	}
-	for (;;) {
-		struct vm_area_struct *next = prev->vm_next;
-
-		if (next) {
-			if (next->vm_start < start) {
-				prev = next;
-				continue;
-			}
-			if (last > next->vm_start)
-				last = next->vm_start;
-		}
-		if (prev->vm_end > first)
-			first = prev->vm_end + PGDIR_SIZE - 1;
-		break;
-	}
-no_mmaps:
-	if (last < first)	/* for arches with discontiguous pgd indices */
-		return;
-	/*
-	 * If the PGD bits are not consecutive in the virtual address, the
-	 * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
-	 */
-	start_index = pgd_index(first);
-	if (start_index < FIRST_USER_PGD_NR)
-		start_index = FIRST_USER_PGD_NR;
-	end_index = pgd_index(last);
-	if (end_index > start_index) {
-		clear_page_tables(tlb, start_index, end_index - start_index);
-		flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
-	}
-}
-
 /* Normal function to fix up a mapping
  * This function is the default for when an area has no specific
  * function.  This may be used as part of a more specific routine.
@@ -1143,7 +1083,6 @@ static void unmap_region(struct mm_struc
 	tlb = tlb_gather_mmu(mm, 0);
 	unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(tlb, prev, start, end);
 	tlb_finish_mmu(tlb, start, end);
 }
 
@@ -1402,25 +1341,16 @@ void build_mmap_rb(struct mm_struct * mm
 /* Release all mmaps. */
 void exit_mmap(struct mm_struct *mm)
 {
-	struct mmu_gather *tlb;
 	struct vm_area_struct *vma;
-	unsigned long nr_accounted = 0;
 
 	profile_exit_mmap(mm);
  
 	lru_add_drain();
 
-	spin_lock(&mm->page_table_lock);
-
-	tlb = tlb_gather_mmu(mm, 1);
 	flush_cache_mm(mm);
-	/* Use ~0UL here to ensure all VMAs in the mm are unmapped */
-	mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
-					~0UL, &nr_accounted);
-	vm_unacct_memory(nr_accounted);
-	BUG_ON(mm->map_count);	/* This is just debugging */
-	clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
-	tlb_finish_mmu(tlb, 0, TASK_SIZE);
+	unmap_all_pages(mm);
+
+  	BUG_ON(mm->map_count);	/* This is just debugging */
 
 	vma = mm->mmap;
 	mm->mmap = mm->mmap_cache = NULL;
@@ -1429,14 +1359,20 @@ void exit_mmap(struct mm_struct *mm)
 	mm->total_vm = 0;
 	mm->locked_vm = 0;
 
-	spin_unlock(&mm->page_table_lock);
-
 	/*
 	 * Walk the list again, actually closing and freeing it
 	 * without holding any MM locks.
 	 */
 	while (vma) {
 		struct vm_area_struct *next = vma->vm_next;
+
+		/*
+		 * If the VMA has been charged for, account for its
+		 * removal
+		 */
+		if (vma->vm_flags & VM_ACCOUNT)
+			vm_unacct_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
+
 		remove_shared_vm_struct(vma);
 		if (vma->vm_ops) {
 			if (vma->vm_ops->close)
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/mprotect.c 950-shpte/mm/mprotect.c
--- 900-mjb1/mm/mprotect.c	Fri Dec 13 23:18:15 2002
+++ 950-shpte/mm/mprotect.c	Sat Mar 29 07:53:14 2003
@@ -16,6 +16,8 @@
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/security.h>
+#include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -24,7 +26,7 @@
 #include <asm/tlbflush.h>
 
 static inline void
-change_pte_range(pmd_t *pmd, unsigned long address,
+change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address,
 		unsigned long size, pgprot_t newprot)
 {
 	pte_t * pte;
@@ -37,11 +39,14 @@ change_pte_range(pmd_t *pmd, unsigned lo
 		pmd_clear(pmd);
 		return;
 	}
-	pte = pte_offset_map(pmd, address);
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
+	end = (address + PMD_SIZE) & PMD_MASK;
+	if (end > (address + size))
+		end = address + size;
+
+	pte = mprotect_shared_range(vma, pmd, address, end);
+	if (pte == NULL)
+		return;
+
 	do {
 		if (pte_present(*pte)) {
 			pte_t entry;
@@ -56,11 +61,12 @@ change_pte_range(pmd_t *pmd, unsigned lo
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+	pte_page_unlock(pmd_ptpage(*pmd));
 	pte_unmap(pte - 1);
 }
 
 static inline void
-change_pmd_range(pgd_t *pgd, unsigned long address,
+change_pmd_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long address,
 		unsigned long size, pgprot_t newprot)
 {
 	pmd_t * pmd;
@@ -74,12 +80,12 @@ change_pmd_range(pgd_t *pgd, unsigned lo
 		return;
 	}
 	pmd = pmd_offset(pgd, address);
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
+	end = (address + PGDIR_SIZE) & PGDIR_MASK;
+	if (end > (address + size))
+		end = address + size;
+
 	do {
-		change_pte_range(pmd, address, end - address, newprot);
+		change_pte_range(vma, pmd, address, end - address, newprot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -98,7 +104,7 @@ change_protection(struct vm_area_struct 
 		BUG();
 	spin_lock(&current->mm->page_table_lock);
 	do {
-		change_pmd_range(dir, start, end - start, newprot);
+		change_pmd_range(vma, dir, start, end - start, newprot);
 		start = (start + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (start && (start < end));
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/mremap.c 950-shpte/mm/mremap.c
--- 900-mjb1/mm/mremap.c	Mon Mar 17 21:43:50 2003
+++ 950-shpte/mm/mremap.c	Sat Mar 29 07:53:14 2003
@@ -16,106 +16,23 @@
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *pte = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_none(*pgd))
-		goto end;
-	if (pgd_bad(*pgd)) {
-		pgd_ERROR(*pgd);
-		pgd_clear(pgd);
-		goto end;
-	}
-
-	pmd = pmd_offset(pgd, addr);
-	if (pmd_none(*pmd))
-		goto end;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
-		goto end;
-	}
-
-	pte = pte_offset_map_nested(pmd, addr);
-	if (pte_none(*pte)) {
-		pte_unmap_nested(pte);
-		pte = NULL;
-	}
-end:
-	return pte;
-}
-
-#ifdef CONFIG_HIGHPTE	/* Save a few cycles on the sane machines */
-static inline int page_table_present(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_none(*pgd))
-		return 0;
-	pmd = pmd_offset(pgd, addr);
-	return pmd_present(*pmd);
-}
-#else
-#define page_table_present(mm, addr)	(1)
-#endif
-
-static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
-{
-	pmd_t *pmd;
-	pte_t *pte = NULL;
-
-	pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
-	if (pmd)
-		pte = pte_alloc_map(mm, pmd, addr);
-	return pte;
-}
-
-static int
-copy_one_pte(struct mm_struct *mm, pte_t *src, pte_t *dst,
-		struct pte_chain **pte_chainp)
-{
-	int error = 0;
-	pte_t pte;
-	struct page *page = NULL;
-
-	if (pte_present(*src))
-		page = pte_page(*src);
-
-	if (!pte_none(*src)) {
-		if (page)
-			page_remove_rmap(page, src);
-		pte = ptep_get_and_clear(src);
-		if (!dst) {
-			/* No dest?  We must put it back. */
-			dst = src;
-			error++;
-		}
-		set_pte(dst, pte);
-		if (page)
-			*pte_chainp = page_add_rmap(page, dst, *pte_chainp);
-	}
-	return error;
-}
-
 static int
 move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
 		unsigned long new_addr)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	int error = 0;
-	pte_t *src, *dst;
+	struct ptpage *src_page, *dst_page;
+	pgd_t *src_pgd, *dst_pgd;
+	pmd_t *src_pmd, *dst_pmd;
+	pte_t *src_pte, *dst_pte;
 	struct pte_chain *pte_chain;
 
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
@@ -124,28 +41,61 @@ move_one_page(struct vm_area_struct *vma
 		goto out;
 	}
 	spin_lock(&mm->page_table_lock);
-	src = get_one_pte_map_nested(mm, old_addr);
-	if (src) {
-		/*
-		 * Look to see whether alloc_one_pte_map needs to perform a
-		 * memory allocation.  If it does then we need to drop the
-		 * atomic kmap
-		 */
-		if (!page_table_present(mm, new_addr)) {
-			pte_unmap_nested(src);
-			src = NULL;
+	src_pgd = pgd_offset(mm, old_addr);
+	dst_pgd = pgd_offset(mm, new_addr);
+	src_pmd = pmd_offset(src_pgd, old_addr);
+
+	/* If there isn't a pmd to copy from, we're done */
+	if (!src_pmd)
+		goto out_unlock;
+	if (!pmd_present(*src_pmd))
+		goto out_unlock;
+
+	dst_pmd = pmd_alloc(mm, dst_pgd, new_addr);
+	if (!dst_pmd) {
+		error++;
+		goto out_unlock;
+	}
+
+	mremap_unshare(vma->vm_mm, src_pmd, dst_pmd, old_addr, new_addr);
+
+	dst_pte = pte_alloc_map(mm, dst_pmd, new_addr);
+	if (!dst_pte) {
+		error++;
+		goto out_unlock;
+	}
+	dst_page = pmd_ptpage(*dst_pmd);
+	pte_page_lock(dst_page);
+
+	src_page = pmd_ptpage(*src_pmd);
+	if (src_page != dst_page)
+		pte_page_lock(src_page);
+	src_pte = pte_offset_map_nested(src_pmd, old_addr);
+
+	if (!pte_none(*src_pte)) {
+		pte_t pte = ptep_get_and_clear(src_pte);
+		set_pte(dst_pte, pte);
+		if (pte_present(pte)) {
+			struct page *page = pte_page(pte);
+			page_remove_rmap(page, src_pte);
+			if (src_page != dst_page) {
+				decrement_rss(src_page);
+				increment_rss(dst_page);
+			}
+			pte_chain = page_add_rmap(page, dst_pte, pte_chain);
 		}
-		dst = alloc_one_pte_map(mm, new_addr);
-		if (src == NULL)
-			src = get_one_pte_map_nested(mm, old_addr);
-		error = copy_one_pte(mm, src, dst, &pte_chain);
-		pte_unmap_nested(src);
-		pte_unmap(dst);
 	}
+	pte_unmap_nested(src_pte);
+	pte_unmap(dst_pte);
+	pte_page_unlock(dst_page);
+	if (src_page != dst_page)
+		pte_page_unlock(src_page);
 	flush_tlb_page(vma, old_addr);
+
+out_unlock:
 	spin_unlock(&mm->page_table_lock);
-	pte_chain_free(pte_chain);
 out:
+	pte_chain_free(pte_chain);
 	return error;
 }
 
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/msync.c 950-shpte/mm/msync.c
--- 900-mjb1/mm/msync.c	Sun Nov 17 20:29:31 2002
+++ 950-shpte/mm/msync.c	Sat Mar 29 07:53:14 2003
@@ -11,6 +11,8 @@
 #include <linux/pagemap.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -43,6 +45,7 @@ static int filemap_sync_pte_range(pmd_t 
 	unsigned long address, unsigned long end, 
 	struct vm_area_struct *vma, unsigned int flags)
 {
+	struct ptpage *ptepage;
 	pte_t *pte;
 	int error;
 
@@ -53,6 +56,8 @@ static int filemap_sync_pte_range(pmd_t 
 		pmd_clear(pmd);
 		return 0;
 	}
+	ptepage = pmd_ptpage(*pmd);
+	pte_page_lock(ptepage);
 	pte = pte_offset_map(pmd, address);
 	if ((address & PMD_MASK) != (end & PMD_MASK))
 		end = (address & PMD_MASK) + PMD_SIZE;
@@ -64,6 +69,7 @@ static int filemap_sync_pte_range(pmd_t 
 	} while (address && (address < end));
 
 	pte_unmap(pte - 1);
+	pte_page_unlock(ptepage);
 
 	return error;
 }
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/ptshare.c 950-shpte/mm/ptshare.c
--- 900-mjb1/mm/ptshare.c	Wed Dec 31 16:00:00 1969
+++ 950-shpte/mm/ptshare.c	Sat Mar 29 07:53:14 2003
@@ -0,0 +1,841 @@
+/*
+ * mm/ptshare.c
+ *
+ * Shared page table support.
+ *
+ * Created 2002 by Dave McCracken (dmccr@us.ibm.com)
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swapops.h>
+#include <linux/mman.h>
+#include <linux/highmem.h>
+#include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
+
+#include <asm/pgalloc.h>
+#include <asm/rmap.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/semaphore.h>
+
+/*
+ * Protections that can be set on the pmd entry (see discussion in mmap.c).
+ */
+static pgprot_t protection_pmd[8] = {
+	__PMD000, __PMD001, __PMD010, __PMD011, __PMD100, __PMD101, __PMD110, __PMD111
+};
+
+/**
+ * is_pte_shared - Basic test for whether a pte page is shared
+ * @ptepage - the struct page of the pte page to test
+ *
+ * The count field in the page struct counts how many page tables are using this pte
+ * page.  The share test simply tests for more then one reference.
+ */
+static inline int is_pte_shared(struct ptpage *ptepage)
+{
+	return page_count(ptepage) > 1;
+}
+
+/**
+ * pte_needs_unshare - Test whether a pte page needs to be unshared at fault time
+ * @mm - The mm_struct being faulted
+ * @vma - The vma describing the range the faulting address is in
+ * @pmd - The pmd entry of the faulting address
+ * @address - The faulting address itself
+ * @write_access - True if it was a write fault
+ *
+ * This function makes the decision whether a pte page needs to be
+ * unshared or not.  Note that page_count() == 1 isn't even tested
+ * here.  The assumption is that if the pmd entry is marked writeable,
+ * then the page is either already unshared or doesn't need to be
+ * unshared.  This catches the situation where task B unshares the pte
+ * page, then task A faults and needs to unprotect the pmd entry.
+ * This is actually done in pte_unshare.
+ *
+ * This function should be called with the page_table_lock held.
+ */
+static int pte_needs_unshare(struct mm_struct *mm,
+			     struct vm_area_struct *vma,
+			     pmd_t *pmd, unsigned long address,
+			     int write_access)
+{
+	struct ptpage *ptepage;
+
+	/* It's not even there, nothing to unshare. */
+	if (!pmd_present(*pmd))
+		return 0;
+
+	/*
+	 * If it's already writable, then it doesn't need to be unshared.
+	 * It's either already not shared or it's part of a large shared
+	 * region that will never need to be unshared.
+	 */
+	if (pmd_write(*pmd))
+		return 0;
+
+	/* If this isn't a write fault we don't need to unshare. */
+	if (!write_access)
+		return 0;
+
+	/*
+	 * If this page fits entirely inside a shared region, don't unshare it.
+	 */
+	ptepage = pmd_ptpage(*pmd);
+	if ((vma->vm_flags & VM_SHARED) &&
+	    (vma->vm_start <= ptepage->virtual) &&
+	    (vma->vm_end >= (ptepage->virtual + PMD_SIZE))) {
+		return 0;
+	}
+	/*
+	 * Ok, we have to unshare.
+	 */
+	return 1;
+}
+
+/**
+ * pte_unshare - Unshare a pte page
+ * @mm: the mm_struct that gets an unshared pte page
+ * @pmd: a pointer to the pmd entry that needs unsharing
+ * @address: the virtual address that triggered the unshare
+ *
+ * Here is where a pte page is actually unshared.  It actually covers
+ * a couple of possible conditions.  If the page_count() is already 1,
+ * then that means it just needs to be set writeable.  Otherwise, a
+ * new page needs to be allocated.
+ *
+ * When each pte entry is copied, it is evaluated for COW protection,
+ * as well as checking whether the swap count needs to be incremented.
+ *
+ * This function must be called with the page_table_lock held.  It
+ * will release and reacquire the lock when it allocates a new page.
+ *
+ * The function must also be called with the pte_page_lock held on the
+ * old page.  This lock will also be dropped, then reacquired when we
+ * allocate a new page.  The pte_page_lock will be taken on the new
+ * page.  Whichever pte page is returned will have its pte_page_lock
+ * held.
+ */
+
+static pte_t *pte_unshare(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+	pte_t	*src_ptb, *dst_ptb;
+	struct ptpage *oldpage, *newpage, *tmppage;
+	struct vm_area_struct *vma;
+	struct pte_chain *pte_chain = NULL;
+	int	base, addr;
+	int	end, page_end;
+	int	src_unshare;
+
+retry:
+	tmppage = oldpage = pmd_ptpage(*pmd);
+
+	/* If it's already unshared, we just need to set it writeable */
+	if (!is_pte_shared(oldpage))
+		goto is_unshared;
+
+	pte_page_unlock(oldpage);
+	spin_unlock(&mm->page_table_lock);
+	newpage = pte_alloc_one(mm, address);
+	if (newpage) {
+		pte_chain = pte_chain_alloc(GFP_KERNEL);
+		if (pte_chain) {
+			down(&oldpage->sem);
+		}
+	}
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!newpage))
+		return NULL;
+	if (!pte_chain) {
+		put_page((struct page *)newpage);
+		return NULL;
+	}
+
+	/*
+	 * Fetch the ptepage pointer again in case it changed while
+	 * the lock was dropped.
+	 */
+	oldpage = pmd_ptpage(*pmd);
+	pte_page_lock(oldpage);
+	if (tmppage != oldpage) {
+		up(&tmppage->sem);
+		pte_free(newpage);
+		pte_chain_free(pte_chain);
+		goto retry;
+	}
+
+	/* See if it got unshared while we dropped the lock */
+	if (!is_pte_shared(oldpage)) {
+		pte_free(newpage);
+		up(&oldpage->sem);
+		goto is_unshared;
+	}
+
+	pte_page_lock(newpage);
+
+	init_MUTEX(&newpage->sem);
+	newpage->mapcount = newpage->swapcount = 0;
+
+	base = addr = oldpage->virtual;
+	page_end = base + PMD_SIZE;
+	vma = find_vma(mm, base);
+	src_unshare = page_count(oldpage) == 2;
+	dst_ptb = pte_page_map((struct page *)newpage, base);
+
+	if (!vma || (page_end <= vma->vm_start)) {
+		goto no_vma;
+	}
+
+	if (vma->vm_start > addr)
+		addr = vma->vm_start;
+
+	if (vma->vm_end < page_end)
+		end = vma->vm_end;
+	else
+		end = page_end;
+
+	src_ptb = pte_page_map_nested((struct page *)oldpage, base);
+
+	do {
+		unsigned int cow = 0;
+		pte_t *src_pte = src_ptb + pte_index(addr);
+		pte_t *dst_pte = dst_ptb + pte_index(addr);
+
+		cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+
+		do {
+			pte_t pte = *src_pte;
+			struct page *page;
+
+			if (pte_none(pte))
+				goto unshare_skip_set;
+
+			if (!pte_present(pte)) {
+				swap_duplicate(pte_to_swp_entry(pte));
+				set_pte(dst_pte, pte);
+				newpage->swapcount++;
+				goto unshare_skip_set;
+			}
+			page = pte_page(pte);
+			if (!PageReserved(page)) {
+				/* COW mappings require write protecting both sides */
+				if (cow) {
+					pte = pte_wrprotect(pte);
+					if (src_unshare)
+						set_pte(src_pte, pte);
+				}
+				/* If it's a shared mapping,
+				 *  mark it clean in the new mapping
+				 */
+				if (vma->vm_flags & VM_SHARED)
+					pte = pte_mkclean(pte);
+				pte = pte_mkold(pte);
+				get_page(page);
+				newpage->mapcount++;
+			}
+			set_pte(dst_pte, pte);
+			pte_chain = page_add_rmap(page, dst_pte, pte_chain);
+			if (!pte_chain)
+				pte_chain = pte_chain_alloc(GFP_ATOMIC);
+			if (!pte_chain) {
+				pte_unmap_nested(src_ptb);
+				pte_unmap(dst_ptb);
+				pte_page_unlock(newpage);
+				pte_page_unlock(oldpage);
+				spin_unlock(&mm->page_table_lock);
+				pte_chain = pte_chain_alloc(GFP_KERNEL);
+				if (!pte_chain) {
+					spin_lock(&mm->page_table_lock);
+					return NULL;
+				}
+				spin_lock(&mm->page_table_lock);
+				oldpage = pmd_ptpage(*pmd);
+				pte_page_lock(oldpage);
+				pte_page_lock(newpage);
+				dst_ptb = pte_page_map((struct page *)newpage, addr);
+				src_ptb = pte_page_map_nested((struct page *)oldpage, addr);
+			}
+unshare_skip_set:
+			src_pte++;
+			dst_pte++;
+			addr += PAGE_SIZE;
+		} while (addr < end);
+
+		if (addr >= page_end)
+			break;
+
+		vma = vma->vm_next;
+		if (!vma)
+			break;
+
+		if (page_end <= vma->vm_start)
+			break;
+
+		addr = vma->vm_start;
+		if (vma->vm_end < page_end)
+			end = vma->vm_end;
+		else
+			end = page_end;
+	} while (1);
+
+	pte_unmap_nested(src_ptb);
+
+no_vma:
+	up(&oldpage->sem);
+	SetPagePtepage(newpage);
+	pgtable_remove_rmap_locked(oldpage, mm);
+	pgtable_add_rmap_locked(newpage, mm, base);
+	pmd_populate(mm, pmd, newpage);
+	inc_page_state(nr_page_table_pages);
+
+	flush_tlb_mm(mm);
+
+	put_page((struct page *)oldpage);
+
+	pte_page_unlock(oldpage);
+	pte_chain_free(pte_chain);
+	return dst_ptb + pte_index(address);
+
+is_unshared:
+	pmd_populate(mm, pmd, oldpage);
+	flush_tlb_mm(mm);
+	pte_chain_free(pte_chain);
+	return pte_offset_map(pmd, address);
+}
+
+/**
+ * pte_try_to_share - Attempt to find a pte page that can be shared
+ * @mm: the mm_struct that needs a pte page
+ * @vma: the vm_area the address is in
+ * @pmd: a pointer to the pmd entry that needs filling
+ * @address: the address that caused the fault
+ *
+ * This function is called during a page fault.  If there is no pte
+ * page for this address, it checks the vma to see if it is shared,
+ * and if it spans the pte page.  If so, it goes to the address_space
+ * structure and looks through for matching vmas from other tasks that
+ * already have a pte page that can be shared.  If it finds one, it
+ * attaches it and makes it a shared page.
+ */
+
+static pte_t *pte_try_to_share(struct mm_struct *mm, struct vm_area_struct *vma,
+			       pmd_t *pmd, unsigned long address)
+{
+	struct address_space *as;
+	struct vm_area_struct *lvma;
+	struct ptpage *ptepage;
+	unsigned long base;
+	pte_t *pte = NULL;
+
+	/* It's not even shared memory. We definitely can't share the page. */
+	if (!(vma->vm_flags & VM_SHARED))
+		return NULL;
+
+	/* Areas with nonlinear mappings can't be shared */
+	if (vma->vm_flags & VM_NONLINEAR)
+		return NULL;
+
+	/* We can only share if the entire pte page fits inside the vma */
+	base = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
+	if ((base < vma->vm_start) || (vma->vm_end < (base + PMD_SIZE)))
+		return NULL;
+
+	as = vma->vm_file->f_dentry->d_inode->i_mapping;
+
+	down(&as->i_shared_sem);
+
+	list_for_each_entry(lvma, &as->i_mmap_shared, shared) {
+		pgd_t *lpgd;
+		pmd_t *lpmd;
+		pmd_t pmdval;
+
+		/* Skip the one we're working on */
+		if (lvma == vma)
+			continue;
+
+		/* We can't share with a nonlinear vma */
+		if (lvma->vm_flags & VM_NONLINEAR)
+			return NULL;
+
+		/* It has to be mapping to the same address */
+		if ((lvma->vm_start != vma->vm_start) ||
+		    (lvma->vm_end != vma->vm_end) ||
+		    (lvma->vm_pgoff != vma->vm_pgoff))
+			continue;
+
+		lpgd = pgd_offset(lvma->vm_mm, address);
+		lpmd = pmd_offset(lpgd, address);
+
+		/* This page table doesn't have a pte page either, so skip it. */
+		if (!pmd_present(*lpmd))
+			continue;
+
+		/* Ok, we can share it. */
+
+		ptepage = pmd_ptpage(*lpmd);
+		pte_page_lock(ptepage);
+		get_page(ptepage);
+		pgtable_add_rmap_locked(ptepage, mm, address);
+		/*
+		 * If this vma is only mapping it read-only, set the
+		 * pmd entry read-only to protect it from writes.
+		 * Otherwise set it writeable.
+		 */
+		pmdval = *lpmd;
+		pmdval = pmd_modify(pmdval, protection_pmd[vma->vm_flags & 0x7]);
+		set_pmd(pmd, pmdval);
+		pte = pte_page_map((struct page *)ptepage, address);
+		break;
+	}
+	up(&as->i_shared_sem);
+	return pte;
+}
+
+#define PMD_TABLE_MASK	((PTRS_PER_PMD-1) * sizeof(pmd_t))
+
+/**
+ * share_page_range - share a range of pages at the pte page level at fork time
+ * @dst: the mm_struct of the forked child
+ * @src: the mm_struct of the forked parent
+ * @vma: the vm_area to be shared
+ * @prev_pmd: A pointer to the pmd entry we did at last invocation
+ *
+ * This function shares pte pages between parent and child at fork.
+ * If the vm_area is shared and spans the page, it sets it
+ * writeable. Otherwise it sets it read-only.  The prev_pmd parameter
+ * is used to keep track of pte pages we've already shared, since this
+ * function can be called with multiple vmas that point to the same
+ * pte page.
+ */
+int share_page_range(struct mm_struct *dst, struct mm_struct *src,
+	struct vm_area_struct *vma, pmd_t **prev_pmd)
+{
+	pgd_t *src_pgd, *dst_pgd;
+	unsigned long address = vma->vm_start;
+	unsigned long end = vma->vm_end;
+	
+	if (is_vm_hugetlb_page(vma))
+		return copy_hugetlb_page_range(dst, src, vma);
+
+	src_pgd = pgd_offset(src, address)-1;
+	dst_pgd = pgd_offset(dst, address)-1;
+
+	for (;;) {
+		pmd_t * src_pmd, * dst_pmd;
+
+		src_pgd++; dst_pgd++;
+		
+		if (pgd_none(*src_pgd))
+			goto skip_share_pmd_range;
+		if (pgd_bad(*src_pgd)) {
+			pgd_ERROR(*src_pgd);
+			pgd_clear(src_pgd);
+skip_share_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
+			if (!address || (address >= end))
+				goto out;
+			continue;
+		}
+
+		src_pmd = pmd_offset(src_pgd, address);
+		dst_pmd = pmd_alloc(dst, dst_pgd, address);
+		if (!dst_pmd)
+			goto nomem;
+
+		spin_lock(&src->page_table_lock);
+
+		do {
+			pmd_t	pmdval = *src_pmd;
+			struct ptpage *ptepage = pmd_ptpage(pmdval);
+
+			if (pmd_none(pmdval))
+				goto skip_share_pte_range;
+			if (pmd_bad(pmdval)) {
+				pmd_ERROR(*src_pmd);
+				pmd_clear(src_pmd);
+				goto skip_share_pte_range;
+			}
+
+			/*
+			 * We set the pmd read-only in both the parent and the
+			 * child unless it's a writeable shared region that
+			 * spans the entire pte page.
+			 */
+			if ((((vma->vm_flags & (VM_SHARED|VM_WRITE)) !=
+			    (VM_SHARED|VM_WRITE)) ||
+			    (ptepage->virtual < vma->vm_start) ||
+			    ((ptepage->virtual + PMD_SIZE) > vma->vm_end)) &&
+			    pmd_write(pmdval)) {
+				pmdval = pmd_wrprotect(pmdval);
+				set_pmd(src_pmd, pmdval);
+			}
+			set_pmd(dst_pmd, pmdval);
+
+			/* Only do this if we haven't seen this pte page before */
+			if (src_pmd != *prev_pmd) {
+				get_page(ptepage);
+				pgtable_add_rmap(ptepage, dst, address);
+				atomic_inc(&dst->ptepages);
+				*prev_pmd = src_pmd;
+				dst->rss += ptepage->mapcount;
+			}
+
+skip_share_pte_range:	address = (address + PMD_SIZE) & PMD_MASK;
+			if (address >= end)
+				goto out_unlock;
+
+			src_pmd++;
+			dst_pmd++;
+		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
+		spin_unlock(&src->page_table_lock);
+	}
+
+out_unlock:
+	spin_unlock(&src->page_table_lock);
+
+out:
+	return 0;
+nomem:
+	return -ENOMEM;
+}
+
+/**
+ * fork_page_range - Either copy or share a page range at fork time
+ * @dst: the mm_struct of the forked child
+ * @src: the mm_struct of the forked parent
+ * @vma: the vm_area to be shared
+ * @prev_pmd: A pointer to the pmd entry we did at last invocation
+ *
+ * This wrapper decides whether to share page tables on fork or just make
+ * a copy.  The current criterion is whether a page table has more than 3
+ * pte pages, since all forked processes will unshare 3 pte pages after fork,
+ * even the ones doing an immediate exec.  Tests indicate that if a page
+ * table has more than 3 pte pages, it's a performance win to share.
+ */
+int fork_page_range(struct mm_struct *dst, struct mm_struct *src,
+		    struct vm_area_struct *vma, pmd_t **prev_pmd)
+{
+	if (atomic_read(&src->ptepages) > 3)
+		return share_page_range(dst, src, vma, prev_pmd);
+
+	return copy_page_range(dst, src, vma);
+}
+
+/**
+ * unshare_page_range - Make sure no pte pages are shared in a given range
+ * @mm: the mm_struct whose page table we unshare from
+ * @address: the base address of the range
+ * @len: the size of the range
+ *
+ * This function is called when a memory region is mapped.  It makes sure there
+ * are no shared pte pages in the region.  This is necessary to make sure the
+ * parent and child don't try to map competing regions into the same shared
+ * pte page.
+ */
+void unshare_page_range(struct mm_struct *mm, unsigned long address, unsigned long len)
+{
+	pgd_t		*pgd;
+	pmd_t		*pmd;
+	pte_t		*pte;
+	struct ptpage	*ptepage;
+	unsigned long	end = address + len;
+	unsigned long	pmd_end;
+
+	spin_lock(&mm->page_table_lock);
+
+	do {
+		pmd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
+		if (pmd_end > end)
+			pmd_end = end;
+
+		pgd = pgd_offset(mm, address);
+		if (pgd_present(*pgd)) do {
+			pmd = pmd_offset(pgd, address);
+			if (pmd_present(*pmd)) {
+				ptepage = pmd_ptpage(*pmd);
+				pte_page_lock(ptepage);
+				if (is_pte_shared(ptepage)) {
+					pte = pte_unshare(mm, pmd, address);
+					pte_unmap(pte);
+					ptepage = pmd_ptpage(*pmd);
+				}
+				pte_page_unlock(ptepage);
+			}
+			address = (address + PMD_SIZE) & PMD_MASK;
+		} while (address < pmd_end);
+		/* The end of the last time around is the start of the next one */
+		address = pmd_end;
+	} while (address < end);
+	spin_unlock(&mm->page_table_lock);
+}
+
+/**
+ * pte_alloc_unshare - Map and return an unshared pte page, allocating one if necessary
+ * @mm - The current mm_struct
+ * @pmd - The pmd entry that needs to be mapped and/or allocated
+ * @address - The current address, needed if a new pte page is allocated
+ *
+ * For a given pmd entry, make sure a pte page exists and is not shared, then map
+ * it and return it locked.
+ *
+ * This function must be called with the page_table_lock held.  It takes the
+ * pte_page_lock for the pte page being returned and returns with it locked.
+ * It is up to the caller to unlock it.  If the pte_alloc_map fails, NULL is
+ * returned and no lock is taken.
+ */
+pte_t *pte_alloc_unshare(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+	pte_t *pte;
+
+	if (pmd_present(*pmd)) {
+		struct ptpage *ptepage;
+
+		ptepage = pmd_ptpage(*pmd);
+		pte_page_lock(ptepage);
+		if (is_pte_shared(ptepage)) {
+			pte = pte_unshare(mm, pmd, address);
+		} else
+			pte = pte_offset_map(pmd, address);
+	} else {
+		pte = pte_alloc_map(mm, pmd, address);
+		if (pte)
+			pte_page_lock(pmd_ptpage(*pmd));
+	}
+	return pte;
+}
+
+/**
+ * pte_map_unshare - if a pmd_entry exists, make sure it is unshared and map it
+ * @mm - The current mm_struct
+ * @pmd - The pmd entry that needs to be mapped
+ * @address - The current address, needed if it's unshared.
+ *
+ * If a pmd entry is valid, make sure the pte page is unshared, then map it
+ * and return it locked.  If none exists, return NULL.
+ *
+ * This function must be called with the page_table_lock held.  It takes the
+ * pte_page_lock for the pte page being returned and returns with it locked
+ * if one exists.  It is up to the caller to unlock it.  if no pte page exists
+ * no lock is taken.
+ */
+pte_t *pte_map_unshare(struct mm_struct *mm, pmd_t *pmd,  unsigned long address)
+{
+	pte_t *pte;
+
+	if (pmd_present(*pmd)) {
+		struct ptpage *ptepage;
+
+		ptepage = pmd_ptpage(*pmd);
+		pte_page_lock(ptepage);
+		if (is_pte_shared(ptepage)) {
+			pte = pte_unshare(mm, pmd, address);
+		} else
+			pte = pte_offset_map(pmd, address);
+	} else
+		pte = NULL;
+
+	return pte;
+}
+
+/**
+ * zap_shared_range - helper function for zap_pmd_range in mm/memory.c
+ * @tlb - The mmu_gather_t being used to coalesce deleted pages
+ * @pmd - The pmd entry currently being worked on
+ * @address - The start of the current range
+ * @end - The end of the current range
+ *
+ * Returns false if the pte page was shared and the count decremented,
+ * true if the page wasn't shared or was unshared.
+ *
+ * This function is called as part of deleting a range of pages from a page
+ * table.  It takes care of detecting when a pmd entry points to a shared pte
+ * page.
+ *
+ * If the pte page is shared and the range covers the entire pte page,
+ * the share count is decremented and the function returns false.  If
+ * the range does not cover the entire range, the pte page is unshared.
+ * If the pte page is not shared or was unshared, the pte_page_lock is taken
+ * and the function returns true.  It is the responsibility of the caller
+ * to unlock it.
+ */
+int zap_shared_range(struct mmu_gather **tlb, pmd_t *pmd,
+		 unsigned long address, unsigned long end)
+{
+	struct mm_struct *mm = (*tlb)->mm;
+	struct ptpage *ptepage;
+	int ret = 1;
+
+	ptepage = pmd_ptpage(*pmd);
+	pte_page_lock(ptepage);
+	if (is_pte_shared(ptepage)) {
+		if ((address <= ptepage->virtual) &&
+		    (end >= (ptepage->virtual + PMD_SIZE))) {
+			pmd_clear(pmd);
+			pgtable_remove_rmap_locked(ptepage, mm);
+			mm->rss -= ptepage->mapcount;
+			atomic_dec(&mm->ptepages);
+			put_page((struct page *)ptepage);
+			pte_page_unlock(ptepage);
+			ret = 0;
+		} else {
+			pte_t *pte;
+
+			tlb_finish_mmu(*tlb, address, end);
+			pte = pte_unshare(mm, pmd, address);
+			pte_unmap(pte);
+			*tlb = tlb_gather_mmu(mm, 0);
+		}
+
+	}
+	return ret;
+}
+
+/**
+ * zap_shared_pmd - helper function for unmap_all_pages in mm/memory.c
+ * @mm - The mm_struct this page table is associated with
+ * @pmd - The pmd entry currently being worked on
+ *
+ * Returns false if the pte page was shared and the count decremented,
+ * true if the page wasn't shared.
+ *
+ * This function is called when an entire page table is being removed.  It
+ * detects when a pte page is shared and takes care of decrementing the count.
+ */
+int zap_shared_pmd(struct mm_struct *mm, pmd_t *pmd)
+{
+	struct ptpage *ptepage;
+	int ret = 1;
+
+	ptepage = pmd_ptpage(*pmd);
+	pte_page_lock(ptepage);
+	if (is_pte_shared(ptepage)) {
+		pmd_clear(pmd);
+		pgtable_remove_rmap_locked(ptepage, mm);
+		mm->rss -= ptepage->mapcount;
+		atomic_dec(&mm->ptepages);
+		put_page((struct page *)ptepage);
+		ret = 0;
+	}
+	pte_page_unlock(ptepage);
+	return ret;
+}
+
+/**
+ * mprotect_shared_range - Helper function for change_pte_range in mm/mprotect.c
+ * @vma - The memory area being changed
+ * @pmd - The current pmd entry
+ * @address - The base of the current range
+ * @end - The end of the current range
+ *
+ * If the current range spans the entire pte page, set protections at the pmd entry
+ * level and return NULL to show nothing else needs to be done.  Otherwise lock and
+ * map the pte page to be worked on.  It is up to the caller to unmap the pte pointer
+ * and unlock the pte_page_lock if the pte page is returned.
+ */
+pte_t *mprotect_shared_range(struct vm_area_struct *vma, pmd_t *pmd,
+			     unsigned long address, unsigned long end)
+{
+	struct ptpage *ptepage;
+	pte_t *pte;
+
+	ptepage = pmd_ptpage(*pmd);
+	pte_page_lock(ptepage);
+	
+	if (is_pte_shared(ptepage)) {
+		if (((address & ~PMD_MASK) == 0) && ((end & ~PMD_MASK) == 0)) {
+			pmd_t	pmdval = *pmd;
+
+			pmdval = pmd_modify(pmdval, protection_pmd[vma->vm_flags & 0x7]);
+			set_pmd(pmd, pmdval);
+			pte_page_unlock(ptepage);
+			pte = NULL;
+		} else
+			pte = pte_unshare(vma->vm_mm, pmd, address);
+	} else
+		pte = pte_offset_map(pmd, address);
+
+	return pte;
+}
+
+/**
+ * mremap_unshare - Helper function for move_one_page in mm/mremap.c
+ * @mm - The current mm_struct
+ * @src_pmd - The originating pmd entry
+ * @dst_pmd - The target pmd entry
+ * @src_addr - The source address
+ * @dst_addr - The destination address
+ *
+ * Make sure both source and destination are unshared for mremap.  Note that
+ * the existence of src_pmd is guaranteed by the caller, but dst_pmd may
+ * not exist.  The mapping is discarded here since mremap needs them mapped
+ * differently.
+ *
+ * Both the page_table_lock and the mmap_sem are held when this function is called,
+ * so it is safe to not keep the pte_page_locks for these pages when it's finished.
+ */
+
+void mremap_unshare(struct mm_struct *mm, pmd_t *src_pmd, pmd_t *dst_pmd,
+		    unsigned long src_addr, unsigned long dst_addr)
+{
+	struct ptpage *ptepage;
+	pte_t *pte;
+
+	ptepage = pmd_ptpage(*src_pmd);
+	pte_page_lock(ptepage);
+	if (is_pte_shared(ptepage)) {
+		pte = pte_unshare(mm, src_pmd, src_addr);
+		pte_unmap(pte);
+		ptepage = pmd_ptpage(*src_pmd);
+	}
+	pte_page_unlock(ptepage);
+
+	if ((src_pmd != dst_pmd) &&
+	    (pmd_present(*dst_pmd))) {
+		ptepage = pmd_ptpage(*dst_pmd);
+		pte_page_lock(ptepage);
+		if (is_pte_shared(ptepage)) {
+			pte = pte_unshare(mm, dst_pmd, dst_addr);
+			pte_unmap(pte);
+			ptepage = pmd_ptpage(*dst_pmd);
+		}
+		pte_page_unlock(ptepage);
+	}
+}
+
+/**
+ * pte_fault_alloc - Helper function for handle_mm_fault in mm/memory.c
+ * @mm - The faulting mm_struct
+ * @vma The area the fault is in
+ * @pmd - The pmd entry that needs handling
+ * @address - The faulting addresss
+ * @write_access - True if it's a write fault
+ *
+ * This function takes care of allocating and/or sharing/unsharing the pte
+ * page on a page fault.  It determines the shareability of the pte page based
+ * on the type of fault and the flags in the vma.  It then locks and maps
+ * the pte page before returning a pointer to the pte entry that needs to
+ * be filled in by the fault.
+ */
+pte_t *pte_fault_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+		       pmd_t *pmd, unsigned long address, int write_access)
+{
+	pte_t *pte;
+
+	if (pmd_present(*pmd)) {
+		pte_page_lock(pmd_ptpage(*pmd));
+		if (pte_needs_unshare(mm, vma, pmd, address, write_access))
+			pte = pte_unshare(mm, pmd, address);
+		else
+			pte = pte_offset_map(pmd, address);
+	} else {
+		pte = pte_try_to_share(mm, vma, pmd, address);
+		if (!pte) {
+			pte = pte_alloc_map(mm, pmd, address);
+			if (pte)
+				pte_page_lock(pmd_ptpage(*pmd));
+		}
+	}
+	return pte;
+}
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/rmap.c 950-shpte/mm/rmap.c
--- 900-mjb1/mm/rmap.c	Thu Mar 27 21:57:40 2003
+++ 950-shpte/mm/rmap.c	Sat Mar 29 09:24:22 2003
@@ -14,11 +14,11 @@
 /*
  * Locking:
  * - the page->pte.chain is protected by the PG_chainlock bit,
- *   which nests within the zone->lru_lock, then the
- *   mm->page_table_lock, and then the page lock.
+ *   which nests within the zone->lru_lock, then the pte_page_lock,
+ *   and then the page lock.
  * - because swapout locking is opposite to the locking order
  *   in the page fault path, the swapout path uses trylocks
- *   on the mm->page_table_lock
+ *   on the pte_page_lock.
  */
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 #include <linux/cache.h>
 #include <linux/percpu.h>
 
@@ -48,11 +49,17 @@
  */
 #define NRPTE ((L1_CACHE_BYTES - sizeof(void *))/sizeof(pte_addr_t))
 
+struct mm_chain {
+	struct mm_chain *next;
+	struct mm_struct *mm;
+};
+
 struct pte_chain {
 	struct pte_chain *next;
 	pte_addr_t ptes[NRPTE];
 } ____cacheline_aligned;
 
+kmem_cache_t	*mm_chain_cache;
 kmem_cache_t	*pte_chain_cache;
 
 /*
@@ -76,6 +83,25 @@ kmem_cache_t	*pte_chain_cache;
  ** VM stuff below this comment
  **/
 
+static inline struct mm_chain *mm_chain_alloc(void)
+{
+	struct mm_chain *ret;
+
+	ret = kmem_cache_alloc(mm_chain_cache, GFP_ATOMIC);
+	return ret;
+}
+
+static void mm_chain_free(struct mm_chain *mc,
+		struct mm_chain *prev_mc, struct ptpage *ptepage)
+{
+	if (prev_mc)
+		prev_mc->next = mc->next;
+	else if (ptepage)
+		ptepage->pte.mmchain = mc->next;
+
+	kmem_cache_free(mm_chain_cache, mc);
+}
+
 /**
  * page_referenced - test if the page was referenced
  * @page: the page to test
@@ -219,13 +245,140 @@ out:
 	return referenced;
 }
 
+/*
+ * pgtable_add_rmap_locked - Add an mm_struct to the chain for a pte page.
+ * @ptepage: The pte page to add the mm_struct to
+ * @mm: The mm_struct to add
+ * @address: The address of the page we're mapping
+ *
+ * Pte pages maintain a chain of mm_structs that use it.  This adds a new
+ * mm_struct to the chain.
+ *
+ * This function must be called with the pte_page_lock held for the page
+ */
+void pgtable_add_rmap_locked(struct ptpage * ptepage, struct mm_struct * mm,
+			     unsigned long address)
+{
+	struct mm_chain *mc;
+
+#ifdef BROKEN_PPC_PTE_ALLOC_ONE
+	/* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */
+	extern int mem_init_done;
+
+	if (!mem_init_done)
+		return;
+#endif
+#ifdef RMAP_DEBUG
+	BUG_ON(mm == NULL);
+	BUG_ON(!PagePtepage(ptepage));
+#endif
+	
+	if (PageDirect(ptepage)) {
+		mc = mm_chain_alloc();
+		mc->mm = ptepage->pte.mmdirect;
+		mc->next = NULL;
+		ptepage->pte.mmchain = mc;
+		ClearPageDirect(ptepage);
+	}
+	if (ptepage->pte.mmchain) {
+		/* Hook up the mm_chain to the page. */
+		mc = mm_chain_alloc();
+		mc->mm = mm;
+		mc->next = ptepage->pte.mmchain;
+		ptepage->pte.mmchain = mc;
+	} else {
+		ptepage->pte.mmdirect = mm;
+		SetPageDirect(ptepage);
+		ptepage->virtual = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
+	}
+}
+
+/*
+ * pgtable_remove_rmap_locked - Remove an mm_struct from the chain for a pte page.
+ * @ptepage: The pte page to remove the mm_struct from
+ * @mm: The mm_struct to remove
+ *
+ * Pte pages maintain a chain of mm_structs that use it.  This removes an 
+ * mm_struct from the chain.
+ *
+ * This function must be called with the pte_page_lock held for the page
+ */
+void pgtable_remove_rmap_locked(struct ptpage *ptepage, struct mm_struct *mm)
+{
+	struct mm_chain * mc, * prev_mc = NULL;
+
+#ifdef DEBUG_RMAP
+	BUG_ON(mm == NULL);
+	BUG_ON(!PagePtepage(ptepage));
+#endif
+
+	if (PageDirect(ptepage)) {
+		if (ptepage->pte.mmdirect == mm) {
+			ptepage->pte.mmdirect = NULL;
+			ClearPageDirect(ptepage);
+			ptepage->virtual = 0;
+			goto out;
+		}
+	} else {
+#ifdef DEBUG_RMAP
+		BUG_ON(ptepage->pte.mmchain->next == NULL);
+#endif
+		for (mc = ptepage->pte.mmchain; mc; prev_mc = mc, mc = mc->next) {
+			if (mc->mm == mm) {
+				mm_chain_free(mc, prev_mc, ptepage);
+				/* Check whether we can convert to direct */
+				mc = ptepage->pte.mmchain;
+				if (!mc->next) {
+					ptepage->pte.mmdirect = mc->mm;
+					SetPageDirect(ptepage);
+					mm_chain_free(mc, NULL, NULL);
+				}
+				goto out;
+			}
+		}
+	}
+	BUG();
+out:
+	return;
+}
+
+/*
+ * pgtable_add_rmap - Add an mm_struct to the chain for a pte page.
+ * @ptepage: The pte page to add the mm_struct to
+ * @mm: The mm_struct to add
+ * @address: The address of the page we're mapping
+ *
+ * This is a wrapper for pgtable_add_rmap_locked that takes the lock
+ */
+void pgtable_add_rmap(struct ptpage *ptepage, struct mm_struct *mm,
+			     unsigned long address)
+{
+	pte_page_lock(ptepage);
+	pgtable_add_rmap_locked(ptepage, mm, address);
+	pte_page_unlock(ptepage);
+}
+
+/*
+ * pgtable_remove_rmap_locked - Remove an mm_struct from the chain for a pte page.
+ * @ptepage: The pte page to remove the mm_struct from
+ * @mm: The mm_struct to remove
+ *
+ * This is a wrapper for pgtable_remove_rmap_locked that takes the lock
+ */
+void pgtable_remove_rmap(struct ptpage *ptepage, struct mm_struct *mm)
+{
+	pte_page_lock(ptepage);
+	pgtable_remove_rmap_locked(ptepage, mm);
+	pte_page_unlock(ptepage);
+}
+
 /**
  * page_add_rmap - add reverse mapping entry to a page
  * @page: the page to add the mapping to
  * @ptep: the page table entry mapping this page
  *
  * Add a new pte reverse mapping to a page.
- * The caller needs to hold the mm->page_table_lock.
+ * The caller needs to hold the pte_page_lock.
  */
 struct pte_chain *
 page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain)
@@ -239,8 +392,7 @@ page_add_rmap(struct page *page, pte_t *
 		BUG();
 	if (!pte_present(*ptep))
 		BUG();
-	if (!ptep_to_mm(ptep))
-		BUG();
+	BUG_ON(PagePtepage(page));
 #endif
 
 	if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
@@ -270,12 +422,15 @@ page_add_rmap(struct page *page, pte_t *
 			if (page->pte.direct == pte_paddr)
 				BUG();
 		} else {
+			int count = 0;
 			for (pc = page->pte.chain; pc; pc = pc->next) {
-				for (i = 0; i < NRPTE; i++) {
+				for (i = 0; i < NRPTE; i++, count++) {
 					pte_addr_t p = pc->ptes[i];
 
-					if (p && p == pte_paddr)
+					if (p && p == pte_paddr) {
+						printk(KERN_ERR "page_add_rmap: page %08lx (count %d), ptep %08lx, rmap count %d\n", page, page_count(page), ptep, count);
 						BUG();
+					}
 				}
 			}
 		}
@@ -332,7 +487,7 @@ out:
  * Removes the reverse mapping from the pte_chain of the page,
  * after that the caller can clear the page table entry and free
  * the page.
- * Caller needs to hold the mm->page_table_lock.
+ * Caller needs to hold the pte_page_lock.
  */
 void page_remove_rmap(struct page * page, pte_t * ptep)
 {
@@ -346,6 +501,10 @@ void page_remove_rmap(struct page * page
 	if (!page_mapped(page))
 		return;		/* remap_page_range() from a driver? */
 
+#ifdef DEBUG_RMAP
+	BUG_ON(PagePtepage(page));
+#endif
+
 	pte_chain_lock(page);
 
 	if (!PageAnon(page)) {
@@ -425,6 +584,117 @@ out:
 	return;
 }
 
+static int pgtable_check_mlocked_mm(struct mm_struct *mm, unsigned long address)
+{
+	struct vm_area_struct *vma;
+	int ret = SWAP_SUCCESS;
+
+	/*
+	 * If this mm is in the process of exiting, skip this page
+	 * for now to let the exit finish.
+	 */
+	if (atomic_read(&mm->mm_users) == 0) {
+		ret = SWAP_AGAIN;
+		goto out;
+	}
+
+	/* During mremap, it's possible pages are not in a VMA. */
+	vma = find_vma(mm, address);
+	if (!vma) {
+		ret = SWAP_FAIL;
+		goto out;
+	}
+
+	/* The page is mlock()d, we cannot swap it out. */
+	if (vma->vm_flags & VM_LOCKED) {
+		ret = SWAP_FAIL;
+	}
+out:
+	return ret;
+}
+
+static int pgtable_check_mlocked(struct ptpage *ptepage, unsigned long address)
+{
+	struct mm_chain *mc;
+	int ret = SWAP_SUCCESS;
+
+#ifdef DEBUG_RMAP
+	BUG_ON(!PagePtepage(ptepage));
+#endif
+	if (PageDirect(ptepage)) {
+		ret = pgtable_check_mlocked_mm(ptepage->pte.mmdirect, address);
+		goto out;
+	}
+
+	for (mc = ptepage->pte.mmchain; mc; mc = mc->next) {
+#ifdef DEBUG_RMAP
+		BUG_ON(mc->mm == NULL);
+#endif
+		ret = pgtable_check_mlocked_mm(mc->mm, address);
+		if (ret != SWAP_SUCCESS)
+			goto out;
+	}
+out:
+	return ret;
+}
+
+/**
+ * pgtable_unmap_one_mm - Decrement the rss count and flush for an mm_struct
+ * @mm: - the mm_struct to decrement
+ * @address: - The address of the page we're removing
+ *
+ * All pte pages keep a chain of mm_struct that are using it.  This does a flush
+ * of the address for that mm_struct and decrements the rss count.
+ */
+static int pgtable_unmap_one_mm(struct mm_struct *mm, unsigned long address)
+{
+	struct vm_area_struct *vma;
+	int ret = SWAP_SUCCESS;
+
+	/* During mremap, it's possible pages are not in a VMA. */
+	vma = find_vma(mm, address);
+	if (!vma) {
+		ret = SWAP_FAIL;
+		goto out;
+	}
+	flush_tlb_page(vma, address);
+	flush_cache_page(vma, address);
+	mm->rss--;
+
+out:
+	return ret;
+}
+
+/**
+ * pgtable_unmap_one - Decrement all rss counts and flush caches for a pte page
+ * @ptepage: the pte page to decrement the count for
+ * @address: the address of the page we're removing
+ *
+ * This decrements the rss counts of all mm_structs that map this pte page
+ * and flushes the tlb and cache for these mm_structs and address
+ */
+static int pgtable_unmap_one(struct ptpage *ptepage, unsigned long address)
+{
+	struct mm_chain *mc;
+	int ret = SWAP_SUCCESS;
+
+#ifdef DEBUG_RMAP
+	BUG_ON(!PagePtepage(ptepage));
+#endif
+
+	if (PageDirect(ptepage)) {
+		ret = pgtable_unmap_one_mm(ptepage->pte.mmdirect, address);
+		if (ret != SWAP_SUCCESS)
+			goto out;
+	} else for (mc = ptepage->pte.mmchain; mc; mc = mc->next) {
+		ret = pgtable_unmap_one_mm(mc->mm, address);
+		if (ret != SWAP_SUCCESS)
+			goto out;
+	}
+out:
+	return ret;
+}
+
 static inline int
 try_to_unmap_obj_one(struct vm_area_struct *vma, struct page *page)
 {
@@ -433,6 +703,7 @@ try_to_unmap_obj_one(struct vm_area_stru
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t pteval;
+	struct ptpage *ptepage;
 	unsigned long loffset;
 	unsigned long address;
 	int ret = SWAP_SUCCESS;
@@ -465,15 +736,22 @@ try_to_unmap_obj_one(struct vm_area_stru
 	if (page_to_pfn(page) != pte_pfn(*pte)) {
 		goto out_unmap;
 	}
-
-	if (vma->vm_flags & VM_LOCKED) {
-		ret =  SWAP_FAIL;
+	ptepage = pmd_ptpage(*pmd);
+	if (!pte_page_trylock(ptepage)) {
+		ret = SWAP_AGAIN;
 		goto out_unmap;
 	}
 
-	flush_cache_page(vma, address);
+	ret = pgtable_check_mlocked(ptepage, address);
+	if (ret != SWAP_SUCCESS)
+		goto out_unlock_pt;
+
 	pteval = ptep_get_and_clear(pte);
-	flush_tlb_page(vma, address);
+	ret = pgtable_unmap_one(ptepage, address);
+	if (ret != SWAP_SUCCESS) {
+		set_pte(pte, pteval);
+		goto out_unlock_pt;
+	}
 
 	if (pte_dirty(pteval))
 		set_page_dirty(page);
@@ -481,10 +759,13 @@ try_to_unmap_obj_one(struct vm_area_stru
 	if (!page->pte.mapcount)
 		BUG();
 
-	mm->rss--;
+	ptepage->mapcount--;
 	page->pte.mapcount--;
 	page_cache_release(page);
 
+out_unlock_pt:
+	pte_page_unlock(ptepage);
+
 out_unmap:
 	pte_unmap(pte);
 
@@ -543,49 +824,37 @@ out:
  *	zone->lru_lock			page_launder()
  *	    page lock			page_launder(), trylock
  *		pte_chain_lock		page_launder()
- *		    mm->page_table_lock	try_to_unmap_one(), trylock
+ *		    pte_page_lock       try_to_unmap_one(), trylock
  */
 static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t));
 static int try_to_unmap_one(struct page * page, pte_addr_t paddr)
 {
 	pte_t *ptep = rmap_ptep_map(paddr);
-	unsigned long address = ptep_to_address(ptep);
-	struct mm_struct * mm = ptep_to_mm(ptep);
-	struct vm_area_struct * vma;
 	pte_t pte;
+	struct ptpage *ptepage = (struct ptpage *)kmap_atomic_to_page(ptep);
+	unsigned long address = ptep_to_address(ptep);
 	int ret;
 
-	if (!mm)
-		BUG();
-
-	/*
-	 * We need the page_table_lock to protect us from page faults,
-	 * munmap, fork, etc...
-	 */
-	if (!spin_trylock(&mm->page_table_lock)) {
+#ifdef DEBUG_RMAP
+	BUG_ON(!PagePtepage(ptepage));
+#endif
+	if (!pte_page_trylock(ptepage)) {
 		rmap_ptep_unmap(ptep);
 		return SWAP_AGAIN;
 	}
 
-
-	/* During mremap, it's possible pages are not in a VMA. */
-	vma = find_vma(mm, address);
-	if (!vma) {
-		ret = SWAP_FAIL;
+	ret = pgtable_check_mlocked(ptepage, address);
+	if (ret != SWAP_SUCCESS)
 		goto out_unlock;
-	}
 
-	/* The page is mlock()d, we cannot swap it out. */
-	if (vma->vm_flags & VM_LOCKED) {
-		ret = SWAP_FAIL;
+	pte = ptep_get_and_clear(ptep);
+
+	ret = pgtable_unmap_one(ptepage, address);
+	if (ret != SWAP_SUCCESS) {
+		set_pte(ptep, pte);
 		goto out_unlock;
 	}
 
-	/* Nuke the page table entry. */
-	flush_cache_page(vma, address);
-	pte = ptep_get_and_clear(ptep);
-	flush_tlb_page(vma, address);
-
 	if (PageSwapCache(page)) {
 		/*
 		 * Store the swap location in the pte.
@@ -594,13 +863,27 @@ static int try_to_unmap_one(struct page 
 		swp_entry_t entry = { .val = page->index };
 		swap_duplicate(entry);
 		set_pte(ptep, swp_entry_to_pte(entry));
+		increment_swapcount(ptepage);
 		BUG_ON(pte_file(*ptep));
-	} else {
+	} else if (PageDirect(ptepage)) {
+		/* 
+		 * We're looking for nonlinear pages, which never have shared 
+		 * pagetable pages, so only need this if PageDirect(ptepage)
+		 */
 		unsigned long pgidx;
+		struct mm_struct *mm = ptepage->pte.mmdirect;
+		struct vm_area_struct * vma = find_vma(mm, address);
+
+		if (!vma) {
+			ret = SWAP_FAIL;
+			goto out_unlock;
+		}
+
 		/*
 		 * If a nonlinear mapping then store the file page offset
 		 * in the pte.
 		 */
+
 		pgidx = (address - vma->vm_start) >> PAGE_SHIFT;
 		pgidx += vma->vm_pgoff;
 		pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
@@ -609,18 +892,21 @@ static int try_to_unmap_one(struct page 
 			BUG_ON(!pte_file(*ptep));
 		}
 	}
+	ptepage->mapcount--;
+	pte_page_unlock(ptepage);
 
 	/* Move the dirty bit to the physical page now the pte is gone. */
 	if (pte_dirty(pte))
 		set_page_dirty(page);
 
-	mm->rss--;
 	page_cache_release(page);
 	ret = SWAP_SUCCESS;
+	goto out;
 
 out_unlock:
+	pte_page_unlock(ptepage);
+out:
 	rmap_ptep_unmap(ptep);
-	spin_unlock(&mm->page_table_lock);
 	return ret;
 }
 
@@ -721,6 +1007,58 @@ out:
 }
 
 /**
+ * increment_rss - increment the rss count by one
+ * @ptepage: The pte page that's getting a new paged mapped
+ *
+ * Since mapping a page into a pte page can increment the rss
+ * for multiple mm_structs, this function iterates through all
+ * the mms and increments them.  It also keeps an rss count
+ * per pte page.
+ */
+void increment_rss(struct ptpage *ptepage)
+{
+	struct mm_chain *mc;
+
+	if (PageDirect(ptepage))
+		ptepage->pte.mmdirect->rss++;
+	else for (mc = ptepage->pte.mmchain; mc; mc = mc->next)
+		mc->mm->rss++;
+
+	ptepage->mapcount++;
+}
+
+/**
+ * decrement_rss - decrement the rss count by one
+ * @ptepage: The pte page that's unmapping a page
+ *
+ * Since unmapping a page can decrement the rss
+ * for multiple mm_structs, this function iterates through all
+ * the mms and decrements them.  It also keeps an rss count
+ * per pte page.
+ */
+void decrement_rss(struct ptpage *ptepage)
+{
+	struct mm_chain *mc;
+
+	if (PageDirect(ptepage))
+		ptepage->pte.mmdirect->rss--;
+	else for (mc = ptepage->pte.mmchain; mc; mc = mc->next)
+		mc->mm->rss--;
+
+	ptepage->mapcount--;
+}
+
+void increment_swapcount(struct ptpage *ptepage)
+{
+	ptepage->swapcount++;
+}
+
+void decrement_swapcount(struct ptpage *ptepage)
+{
+	ptepage->swapcount--;
+}
+
+/**
  ** No more VM stuff below this comment, only pte_chain helper
  ** functions.
  **/
@@ -786,6 +1124,17 @@ struct pte_chain *pte_chain_alloc(int gf
 
 void __init pte_chain_init(void)
 {
+
+	mm_chain_cache = kmem_cache_create(	"mm_chain",
+						sizeof(struct mm_chain),
+						0,
+						0,
+						NULL,
+						NULL);
+
+	if (!mm_chain_cache)
+		panic("failed to create mm_chain cache!\n");
+
 	pte_chain_cache = kmem_cache_create(	"pte_chain",
 						sizeof(struct pte_chain),
 						0,
diff -urpN -X /home/fletch/.diff.exclude 900-mjb1/mm/swapfile.c 950-shpte/mm/swapfile.c
--- 900-mjb1/mm/swapfile.c	Thu Mar 27 21:57:38 2003
+++ 950-shpte/mm/swapfile.c	Sat Mar 29 07:53:14 2003
@@ -21,8 +21,10 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/rmap-locking.h>
+#include <linux/ptshare.h>
 
 #include <asm/pgtable.h>
+#include <asm/rmap.h>
 #include <linux/swapops.h>
 
 spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
@@ -379,7 +381,7 @@ void free_swap_and_cache(swp_entry_t ent
  */
 /* mmlist_lock and vma->vm_mm->page_table_lock are held */
 static void
-unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
+unuse_pte(struct vm_area_struct *vma, struct ptpage *ptepage, pte_t *dir,
 	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
 	pte_t pte = *dir;
@@ -394,8 +396,9 @@ unuse_pte(struct vm_area_struct *vma, un
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	SetPageAnon(page);
 	*pte_chainp = page_add_rmap(page, dir, *pte_chainp);
+	increment_rss(ptepage);
+	decrement_swapcount(ptepage);
 	swap_free(entry);
-	++vma->vm_mm->rss;
 }
 
 /* mmlist_lock and vma->vm_mm->page_table_lock are held */
@@ -403,6 +406,7 @@ static void unuse_pmd(struct vm_area_str
 	unsigned long address, unsigned long size, unsigned long offset,
 	swp_entry_t entry, struct page* page)
 {
+	struct ptpage *ptepage;
 	pte_t * pte;
 	unsigned long end;
 	struct pte_chain *pte_chain = NULL;
@@ -414,6 +418,8 @@ static void unuse_pmd(struct vm_area_str
 		pmd_clear(dir);
 		return;
 	}
+	ptepage = pmd_ptpage(*dir);
+	pte_page_lock(ptepage);
 	pte = pte_offset_map(dir, address);
 	offset += address & PMD_MASK;
 	address &= ~PMD_MASK;
@@ -426,11 +432,11 @@ static void unuse_pmd(struct vm_area_str
 		 */
 		if (pte_chain == NULL)
 			pte_chain = pte_chain_alloc(GFP_ATOMIC);
-		unuse_pte(vma, offset+address-vma->vm_start,
-				pte, entry, page, &pte_chain);
+		unuse_pte(vma, ptepage, pte, entry, page, &pte_chain);
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+	pte_page_unlock(ptepage);
 	pte_unmap(pte - 1);
 	pte_chain_free(pte_chain);
 }