diff -urpN mm3-2.5.42/arch/i386/kernel/sys_i386.c hugetlbfs-2.5.42/arch/i386/kernel/sys_i386.c --- mm3-2.5.42/arch/i386/kernel/sys_i386.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/i386/kernel/sys_i386.c 2002-10-17 21:41:47.000000000 -0700 @@ -249,91 +249,68 @@ asmlinkage int sys_olduname(struct oldol } #ifdef CONFIG_HUGETLB_PAGE -#define HPAGE_ALIGN(x) (((unsigned long)x + (HPAGE_SIZE -1)) & HPAGE_MASK) -extern long sys_munmap(unsigned long, size_t); /* get_addr function gets the currently unused virtaul range in - * current process's address space. It returns the LARGE_PAGE_SIZE + * current process's address space. It returns the HPAGE_SIZE * aligned address (in cases of success). Other kernel generic - * routines only could gurantee that allocated address is PAGE_SIZSE aligned. + * routines only could gurantee that allocated address is PAGE_SIZE aligned. */ -static unsigned long -get_addr(unsigned long addr, unsigned long len) +static unsigned long get_addr(unsigned long addr, unsigned long len) { - struct vm_area_struct *vma; + struct vm_area_struct *vma; if (addr) { - addr = HPAGE_ALIGN(addr); + addr = (addr + HPAGE_SIZE - 1) & HPAGE_MASK; vma = find_vma(current->mm, addr); - if (((TASK_SIZE - len) >= addr) && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE > addr + len && !(vma && addr + len >= vma->vm_start)) goto found_addr; } - addr = HPAGE_ALIGN(TASK_UNMAPPED_BASE); - for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vma || ((addr + len) < vma->vm_start)) + addr = TASK_UNMAPPED_BASE; + for (vma = find_vma(current->mm, addr); TASK_SIZE > addr + len; vma = vma->vm_next) { + if (!vma || addr + len < vma->vm_start) goto found_addr; - addr = HPAGE_ALIGN(vma->vm_end); + addr = (vma->vm_end + HPAGE_SIZE - 1) & HPAGE_MASK; } + return -ENOMEM; found_addr: return addr; } -asmlinkage unsigned long -sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag) +asmlinkage unsigned long sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag) { struct mm_struct *mm = current->mm; unsigned long raddr; int retval = 0; extern int alloc_hugetlb_pages(int, unsigned long, unsigned long, int, int); - if (!(cpu_has_pse)) - return -EINVAL; - if (key < 0) - return -EINVAL; - if (len & (HPAGE_SIZE - 1)) + if (!cpu_has_pse || key < 0 || len & ~HPAGE_MASK) return -EINVAL; down_write(&mm->mmap_sem); raddr = get_addr(addr, len); - if (raddr == -ENOMEM) - goto raddr_out; - retval = alloc_hugetlb_pages(key, raddr, len, prot, flag); - -raddr_out: up_write(&mm->mmap_sem); - if (retval < 0) - return (unsigned long) retval; - return raddr; + if (raddr != -ENOMEM) + retval = alloc_hugetlb_pages(key, raddr, len, prot, flag); + up_write(&mm->mmap_sem); + return (retval < 0) ? (unsigned long)retval : raddr; } -asmlinkage int -sys_free_hugepages(unsigned long addr) +asmlinkage int sys_free_hugepages(unsigned long addr) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int retval; - extern int free_hugepages(struct vm_area_struct *); + struct vm_area_struct *vma; + int retval; vma = find_vma(current->mm, addr); - if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr)) + if (!vma || !(vma->vm_flags & VM_HUGETLB) || vma->vm_start != addr) return -EINVAL; down_write(&mm->mmap_sem); - spin_lock(&mm->page_table_lock); - retval = free_hugepages(vma); - spin_unlock(&mm->page_table_lock); + retval = do_munmap(vma->vm_mm, addr, vma->vm_end - addr); up_write(&mm->mmap_sem); return retval; } - #else - -asmlinkage unsigned long -sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag) +asmlinkage unsigned long sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag) { return -ENOSYS; } -asmlinkage int -sys_free_hugepages(unsigned long addr) +asmlinkage int sys_free_hugepages(unsigned long addr) { return -ENOSYS; } - #endif diff -urpN mm3-2.5.42/arch/i386/mm/hugetlbpage.c hugetlbfs-2.5.42/arch/i386/mm/hugetlbpage.c --- mm3-2.5.42/arch/i386/mm/hugetlbpage.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/i386/mm/hugetlbpage.c 2002-10-17 21:48:53.000000000 -0700 @@ -31,8 +31,7 @@ struct htlbpagekey { int key; } htlbpagek[MAX_ID]; -static struct inode * -find_key_inode(int key) +static struct inode *find_key_inode(int key) { int i; @@ -42,8 +41,8 @@ find_key_inode(int key) } return NULL; } -static struct page * -alloc_hugetlb_page(void) + +static struct page *alloc_hugetlb_page(void) { int i; struct page *page; @@ -64,36 +63,7 @@ alloc_hugetlb_page(void) return page; } -static void -free_hugetlb_page(struct page *page) -{ - spin_lock(&htlbpage_lock); - if ((page->mapping != NULL) && (page_count(page) == 2)) { - struct inode *inode = page->mapping->host; - int i; - - ClearPageDirty(page); - remove_from_page_cache(page); - set_page_count(page, 1); - if ((inode->i_size -= HPAGE_SIZE) == 0) { - for (i = 0; i < MAX_ID; i++) - if (htlbpagek[i].key == inode->i_ino) { - htlbpagek[i].key = 0; - htlbpagek[i].in = NULL; - break; - } - kfree(inode); - } - } - if (put_page_testzero(page)) { - list_add(&page->list, &htlbpage_freelist); - htlbpagemem++; - } - spin_unlock(&htlbpage_lock); -} - -static pte_t * -huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pmd_t *pmd = NULL; @@ -103,8 +73,7 @@ huge_pte_alloc(struct mm_struct *mm, uns return (pte_t *) pmd; } -static pte_t * -huge_pte_offset(struct mm_struct *mm, unsigned long addr) +static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pmd_t *pmd = NULL; @@ -116,9 +85,7 @@ huge_pte_offset(struct mm_struct *mm, un #define mk_pte_huge(entry) {entry.pte_low |= (_PAGE_PRESENT | _PAGE_PSE);} -static void -set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, - struct page *page, pte_t * page_table, int write_access) +static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access) { pte_t entry; @@ -131,24 +98,17 @@ set_huge_pte(struct mm_struct *mm, struc entry = pte_mkyoung(entry); mk_pte_huge(entry); set_pte(page_table, entry); - return; } -static int -anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - int write_access, pte_t * page_table) +static int anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, int write_access, pte_t *page_table) { - struct page *page; - - page = alloc_hugetlb_page(); - if (page == NULL) - return -1; - set_huge_pte(mm, vma, page, page_table, write_access); - return 1; + struct page *page = alloc_hugetlb_page(); + if (page) + set_huge_pte(mm, vma, page, page_table, write_access); + return page ? 1 : -1; } -int -make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags) +int make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags) { int write; struct mm_struct *mm = current->mm; @@ -191,9 +151,7 @@ out_error: /* Error case, remove the pa return -1; } -int -copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) +int copy_hugepage_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { pte_t *src_pte, *dst_pte, entry; struct page *ptepage; @@ -218,10 +176,7 @@ nomem: return -ENOMEM; } -int -follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *st, int *length, int i) +int follow_hugepage(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *st, int *length, int i) { pte_t *ptep, pte; unsigned long start = *st; @@ -254,31 +209,61 @@ back1: return i; } -void -zap_hugetlb_resources(struct vm_area_struct *mpnt) +void free_huge_page(struct page *page) +{ + BUG_ON(page_count(page)); + BUG_ON(page->mapping); + + INIT_LIST_HEAD(&page->list); + + spin_lock(&htlbpage_lock); + list_add(&page->list, &htlbpage_freelist); + htlbpagemem++; + spin_unlock(&htlbpage_lock); +} + +void huge_page_release(struct page *page) { - struct mm_struct *mm = mpnt->vm_mm; - unsigned long len, addr, end; - pte_t *ptep; + if (!put_page_testzero(page)) + return; + + free_huge_page(page); +} + +void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; struct page *page; - addr = mpnt->vm_start; - end = mpnt->vm_end; - len = end - addr; - do { - ptep = huge_pte_offset(mm, addr); - page = pte_page(*ptep); - pte_clear(ptep); - free_hugetlb_page(page); - addr += HPAGE_SIZE; - } while (addr < end); - mm->rss -= (len >> PAGE_SHIFT); - mpnt->vm_ops = NULL; - flush_tlb_range(mpnt, end - len, end); + BUG_ON(start & (HPAGE_SIZE - 1)); + BUG_ON(end & (HPAGE_SIZE - 1)); + + for (address = start; address < end; address += HPAGE_SIZE) { + pte = huge_pte_offset(mm, address); + page = pte_page(*pte); + huge_page_release(page); + pte_clear(pte); + } + mm->rss -= (end - start) >> PAGE_SHIFT; + flush_tlb_range(vma, start, end); } -static void -unlink_vma(struct vm_area_struct *mpnt) +void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length) +{ + struct mm_struct *mm = vma->vm_mm; + spin_lock(&mm->page_table_lock); + unmap_hugepage_range(vma, start, start + length); + spin_unlock(&mm->page_table_lock); +} + +void zap_hugetlb_resources(struct vm_area_struct *vma) +{ + zap_hugepage_range(vma, vma->vm_start, vma->vm_end); +} + +static void unlink_vma(struct vm_area_struct *mpnt) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -297,17 +282,7 @@ unlink_vma(struct vm_area_struct *mpnt) mm->map_count--; } -int -free_hugepages(struct vm_area_struct *mpnt) -{ - unlink_vma(mpnt); - zap_hugetlb_resources(mpnt); - kmem_cache_free(vm_area_cachep, mpnt); - return 1; -} - -static struct inode * -set_new_inode(unsigned long len, int prot, int flag, int key) +static struct inode *set_new_inode(unsigned long len, int prot, int flag, int key) { struct inode *inode; int i; @@ -337,8 +312,7 @@ set_new_inode(unsigned long len, int pro return inode; } -static int -check_size_prot(struct inode *inode, unsigned long len, int prot, int flag) +static int check_size_prot(struct inode *inode, unsigned long len, int prot, int flag) { if (inode->i_uid != current->fsuid) return -1; @@ -349,9 +323,7 @@ check_size_prot(struct inode *inode, uns return 0; } -static int -alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len, - int prot, int flag) +static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -482,9 +454,7 @@ out: return ret; } -static int -alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, - int prot, int flag) +static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag) { if (!capable(CAP_SYS_ADMIN)) { if (!in_group_p(0)) @@ -501,17 +471,14 @@ alloc_private_hugetlb_pages(int key, uns return 0; } -int -alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, - int flag) +int alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag) { if (key > 0) return alloc_shared_hugetlb_pages(key, addr, len, prot, flag); return alloc_private_hugetlb_pages(key, addr, len, prot, flag); } -int -set_hugetlb_mem_size(int count) +int set_hugetlb_mem_size(int count) { int j, lcount; struct page *page, *map; @@ -572,6 +539,5 @@ static struct page * hugetlb_nopage(stru } struct vm_operations_struct hugetlb_vm_ops = { - .close = zap_hugetlb_resources, .nopage = hugetlb_nopage, }; diff -urpN mm3-2.5.42/arch/ia64/kernel/sys_ia64.c hugetlbfs-2.5.42/arch/ia64/kernel/sys_ia64.c --- mm3-2.5.42/arch/ia64/kernel/sys_ia64.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/ia64/kernel/sys_ia64.c 2002-10-17 20:13:30.000000000 -0700 @@ -284,7 +284,7 @@ sys_free_hugepages (unsigned long addr) int retval; vma = find_vma(mm, addr); - if (!vma || !is_vm_hugetlb_page(vma) || (vma->vm_start != addr)) + if (!vma || !(vma->vm_flags & VM_HUGETLB) || (vma->vm_start != addr)) return -EINVAL; down_write(&mm->mmap_sem); diff -urpN mm3-2.5.42/arch/sparc64/kernel/sys_sparc.c hugetlbfs-2.5.42/arch/sparc64/kernel/sys_sparc.c --- mm3-2.5.42/arch/sparc64/kernel/sys_sparc.c 2002-10-15 09:51:08.000000000 -0700 +++ hugetlbfs-2.5.42/arch/sparc64/kernel/sys_sparc.c 2002-10-17 20:13:02.000000000 -0700 @@ -755,7 +755,7 @@ sys_free_hugepages(unsigned long addr) int retval; vma = find_vma(current->mm, addr); - if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr)) + if ((!vma) || (!(vma->vm_flags & VM_HUGETLB)) || (vma->vm_start!=addr)) return -EINVAL; down_write(&mm->mmap_sem); spin_lock(&mm->page_table_lock); diff -urpN mm3-2.5.42/fs/attr.c hugetlbfs-2.5.42/fs/attr.c --- mm3-2.5.42/fs/attr.c 2002-10-15 09:51:08.000000000 -0700 +++ hugetlbfs-2.5.42/fs/attr.c 2002-10-16 22:25:13.000000000 -0700 @@ -93,7 +93,7 @@ out: return error; } -static int setattr_mask(unsigned int ia_valid) +int setattr_mask(unsigned int ia_valid) { unsigned long dn_mask = 0; diff -urpN mm3-2.5.42/fs/hugetlbfs/inode.c hugetlbfs-2.5.42/fs/hugetlbfs/inode.c --- mm3-2.5.42/fs/hugetlbfs/inode.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/hugetlbfs/inode.c 2002-10-18 07:12:38.000000000 -0700 @@ -7,14 +7,21 @@ */ #include +#include +#include +#include /* remove ASAP */ #include #include +#include #include #include #include #include #include #include +#include +#include +#include #include @@ -80,6 +87,277 @@ static int hugetlbfs_commit_write(struct return -EINVAL; } +void huge_pagevec_release(struct pagevec *pvec) +{ + int i; + + for (i = 0; i < pagevec_count(pvec); ++i) + huge_page_release(pvec->pages[i]); + + pagevec_reinit(pvec); +} + +void truncate_partial_hugepage(struct page *page, unsigned partial) +{ + int i; + const unsigned piece = partial & (PAGE_SIZE - 1); + const unsigned tailstart = PAGE_SIZE - piece; + const unsigned whole_pages = partial / PAGE_SIZE; + const unsigned last_page_offset = HPAGE_SIZE/PAGE_SIZE - whole_pages; + + for (i = HPAGE_SIZE/PAGE_SIZE - 1; i >= last_page_offset; ++i) + memclear_highpage_flush(&page[i], 0, PAGE_SIZE); + + if (!piece) + return; + + memclear_highpage_flush(&page[last_page_offset - 1], tailstart, piece); +} + +void truncate_huge_page(struct address_space *mapping, struct page *page) +{ + if (page->mapping != mapping) + return; + + clear_page_dirty(page); + ClearPageUptodate(page); + remove_from_page_cache(page); + huge_page_release(page); +} + +void truncate_hugepages(struct address_space *mapping, loff_t lstart) +{ + const pgoff_t start = (lstart + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + const unsigned partial = lstart & (HPAGE_SIZE - 1); + struct pagevec pvec; + pgoff_t next; + int i; + + pagevec_init(&pvec, 0); + next = start; + + while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); ++i) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + if (page_index > next) + next = page_index; + + ++next; + + if (TestSetPageLocked(page)) + continue; + + if (PageWriteback(page)) { + unlock_page(page); + continue; + } + + truncate_huge_page(mapping, page); + unlock_page(page); + } + huge_pagevec_release(&pvec); + cond_resched(); + } + + if (partial) { + struct page *page = find_lock_page(mapping, start - 1); + if (page) { + wait_on_page_writeback(page); + truncate_partial_hugepage(page, partial); + unlock_page(page); + huge_page_release(page); + } + } + + next = start; + + while (1) { + if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + if (next == start) + break; + next = start; + continue; + } + + for (i = 0; i < pagevec_count(&pvec); ++i) { + struct page *page = pvec.pages[i]; + + lock_page(page); + wait_on_page_writeback(page); + if (page->index > next) + next = page->index; + ++next; + truncate_huge_page(mapping, page); + unlock_page(page); + } + huge_pagevec_release(&pvec); + } + BUG_ON(!lstart && mapping->nrpages); +} + +static void hugetlbfs_delete_inode(struct inode *inode) +{ + list_del_init(&inode->i_hash); + list_del_init(&inode->i_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + + if (inode->i_data.nrpages) + truncate_hugepages(&inode->i_data, 0); + + security_ops->inode_delete(inode); + + clear_inode(inode); + destroy_inode(inode); +} + +static void hugetlbfs_forget_inode(struct inode *inode) +{ + struct super_block *super_block = inode->i_sb; + + if (list_empty(&inode->i_hash)) + goto out_truncate; + + if (!(inode->i_state & (I_DIRTY|I_LOCK))) { + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_unused); + } + inodes_stat.nr_unused++; + if (!super_block | (super_block->s_flags & MS_ACTIVE)) { + spin_unlock(&inode_lock); + return; + } + + /* write_inode_now() ? */ + inodes_stat.nr_unused--; + list_del_init(&inode->i_hash); +out_truncate: + list_del_init(&inode->i_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + if (inode->i_data.nrpages) + truncate_hugepages(&inode->i_data, 0); + clear_inode(inode); + destroy_inode(inode); +} + +static void hugetlbfs_drop_inode(struct inode *inode) +{ + if (!inode->i_nlink) + hugetlbfs_delete_inode(inode); + else + hugetlbfs_forget_inode(inode); +} + +static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff) +{ + unsigned long start, end, length, delta; + struct vm_area_struct *vma; + + list_for_each_entry(vma, list, shared) { + start = vma->vm_start; + end = vma->vm_end; + length = end - start; + + if (vma->vm_pgoff >= pgoff) { + zap_hugepage_range(vma, start, length); + continue; + } + + length >>= PAGE_SHIFT; + delta = pgoff = vma->vm_pgoff; + if (delta >= length) + continue; + + start += delta << PAGE_SHIFT; + length = (length - delta) << PAGE_SHIFT; + zap_hugepage_range(vma, start, length); + } +} + +static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) +{ + unsigned long pgoff; + struct address_space *mapping = inode->i_mapping; + unsigned long limit; + + pgoff = (offset + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + + if (inode->i_size < offset) + goto do_expand; + + inode->i_size = offset; + spin_lock(&mapping->i_shared_lock); + if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) + goto out_unlock; + if (!list_empty(&mapping->i_mmap)) + hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); + if (!list_empty(&mapping->i_mmap_shared)) + hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff); + +out_unlock: + spin_unlock(&mapping->i_shared_lock); + truncate_hugepages(mapping, offset); + return 0; + +do_expand: + limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out; + inode->i_size = offset; + return 0; + +out_sig: + send_sig(SIGXFSZ, current, 0); +out: + return -EFBIG; +} + +static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + unsigned int ia_valid = attr->ia_valid; + unsigned long dn_mask; + + BUG_ON(!inode); + + error = inode_change_ok(inode, attr); + if (error) + goto out; + + error = security_ops->inode_setattr(dentry, attr); + if (error) + goto out; + + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + if (error) + goto out; + + if (ia_valid & ATTR_SIZE) { + error = hugetlb_vmtruncate(inode, attr->ia_size); + if (error) + goto out; + attr->ia_valid &= ~ATTR_SIZE; + error = inode_setattr(inode, attr); + } + if (error) + goto out; + dn_mask = setattr_mask(ia_valid); + if (dn_mask) + dnotify_parent(dentry, dn_mask); +out: + return error; +} + struct inode *hugetlbfs_get_inode(struct super_block *sb, int mode, int dev) { struct inode * inode = new_inode(sb); @@ -189,11 +467,12 @@ static struct inode_operations hugetlbfs rmdir: simple_rmdir, mknod: hugetlbfs_mknod, rename: simple_rename, + setattr: hugetlbfs_setattr, }; static struct super_operations hugetlbfs_ops = { statfs: simple_statfs, - drop_inode: generic_delete_inode, + drop_inode: hugetlbfs_drop_inode, }; static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent) diff -urpN mm3-2.5.42/fs/inode.c hugetlbfs-2.5.42/fs/inode.c --- mm3-2.5.42/fs/inode.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/inode.c 2002-10-16 23:55:25.000000000 -0700 @@ -142,7 +142,7 @@ static struct inode *alloc_inode(struct return inode; } -static void destroy_inode(struct inode *inode) +void destroy_inode(struct inode *inode) { if (inode_has_buffers(inode)) BUG(); diff -urpN mm3-2.5.42/fs/proc/array.c hugetlbfs-2.5.42/fs/proc/array.c --- mm3-2.5.42/fs/proc/array.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/proc/array.c 2002-10-17 20:13:56.000000000 -0700 @@ -414,7 +414,7 @@ int proc_pid_statm(task_t *task, char *b int pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; size += pages; - if (is_vm_hugetlb_page(vma)) { + if (vma->vm_flags & VM_HUGETLB) { if (!(vma->vm_flags & VM_DONTCOPY)) shared += pages; continue; diff -urpN mm3-2.5.42/include/linux/fs.h hugetlbfs-2.5.42/include/linux/fs.h --- mm3-2.5.42/include/linux/fs.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/fs.h 2002-10-16 22:26:57.000000000 -0700 @@ -1150,6 +1150,7 @@ extern int filemap_fdatawrite(struct add extern int filemap_fdatawait(struct address_space *); extern void sync_supers(void); extern sector_t bmap(struct inode *, sector_t); +extern int setattr_mask(unsigned int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int); extern int vfs_permission(struct inode *, int); @@ -1228,6 +1229,7 @@ static inline struct inode *iget(struct extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); +extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern void remove_suid(struct dentry *); diff -urpN mm3-2.5.42/include/linux/hugetlb.h hugetlbfs-2.5.42/include/linux/hugetlb.h --- mm3-2.5.42/include/linux/hugetlb.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/hugetlb.h 2002-10-17 21:49:09.000000000 -0700 @@ -2,47 +2,22 @@ #define _LINUX_HUGETLB_H #ifdef CONFIG_HUGETLB_PAGE -static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) -{ - return vma->vm_flags & VM_HUGETLB; -} -int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, struct vm_area_struct **, unsigned long *, int *, int); -int free_hugepages(struct vm_area_struct *); +int copy_hugepage_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); +int follow_hugepage(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); +void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); +void huge_page_release(struct page *); #else /* !CONFIG_HUGETLB_PAGE */ -static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) -{ - return 0; -} - -static inline int -copy_hugetlb_page_range(struct mm_struct *src, struct mm_struct *dst, - struct vm_area_struct *vma) -{ - return -ENOSYS; -} - -static inline int -follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *start, int *len, int i) -{ - return -ENOSYS; -} -static inline int free_hugepages(struct vm_area_struct *vma) -{ - return -EINVAL; -} +#define follow_hugepage(m,v,p,vs,a,b,i) ({ BUG(); 0; }) +#define copy_hugepage_range(src, dst, vma) ({ BUG(); 0; }) +#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) +#define zap_hugepage_range(vma, start, len) BUG() +#define unmap_hugepage_range(vma, start, end) BUG() +#define huge_page_release(page) BUG() -static inline int -hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) -{ - return -ENOSYS; -} #endif /* !CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_HUGETLBFS @@ -50,29 +25,21 @@ extern struct file_operations hugetlbfs_ extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_zero_setup(size_t); -static inline int is_file_hugetlb_page(struct file *file) +static inline int is_file_hugepages(struct file *file) { return file->f_op == &hugetlbfs_file_operations; } -static inline void set_file_hugetlb_page(struct file *file) +static inline void set_file_hugepages(struct file *file) { file->f_op = &hugetlbfs_file_operations; } #else /* !CONFIG_HUGETLBFS */ -static inline int is_file_hugetlb_page(struct file *file) -{ - return 0; -} -static inline void set_file_hugetlb_page(struct file *file) -{ -} +#define is_file_hugepages(file) 0 +#define set_file_hugepages(file) BUG() +#define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS); -static inline struct file *hugetlb_zero_setup(size_t size) -{ - return ERR_PTR(-ENOSYS); -} #endif /* !CONFIG_HUGETLBFS */ #endif /* _LINUX_HUGETLB_H */ diff -urpN mm3-2.5.42/include/linux/mm.h hugetlbfs-2.5.42/include/linux/mm.h --- mm3-2.5.42/include/linux/mm.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/mm.h 2002-10-17 19:05:05.000000000 -0700 @@ -523,6 +523,7 @@ extern struct vm_area_struct * find_vma_ struct vm_area_struct **pprev); extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below); +extern void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area); /* Look up the first VMA which intersects the interval start_addr..end_addr-1, NULL if none. Assume start_addr < end_addr. */ diff -urpN mm3-2.5.42/ipc/shm.c hugetlbfs-2.5.42/ipc/shm.c --- mm3-2.5.42/ipc/shm.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/ipc/shm.c 2002-10-17 20:16:00.000000000 -0700 @@ -115,7 +115,7 @@ static void shm_destroy (struct shmid_ke shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid (shp->id); shm_unlock(shp->id); - if (!is_file_hugetlb_page(shp->shm_file)) + if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0); fput (shp->shm_file); security_ops->shm_free_security(shp); @@ -221,7 +221,7 @@ static int newseg (key_t key, int shmflg shp->shm_file = file; file->f_dentry->d_inode->i_ino = shp->id; if (shmflg & SHM_HUGETLB) - set_file_hugetlb_page(file); + set_file_hugepages(file); else file->f_op = &shm_file_operations; shm_tot += numpages; @@ -264,6 +264,7 @@ asmlinkage long sys_shmget (key_t key, s shm_unlock(id); } up(&shm_ids.sem); + return err; } @@ -388,8 +389,10 @@ asmlinkage long sys_shmctl (int shmid, i struct shmid_kernel *shp; int err, version; - if (cmd < 0 || shmid < 0) - return -EINVAL; + if (cmd < 0 || shmid < 0) { + err = -EINVAL; + goto out; + } version = ipc_parse_version(&cmd); @@ -410,7 +413,7 @@ asmlinkage long sys_shmctl (int shmid, i err= shm_ids.max_id; if(err<0) err = 0; - return err; + goto out; } case SHM_INFO: { @@ -427,10 +430,13 @@ asmlinkage long sys_shmctl (int shmid, i err = shm_ids.max_id; shm_unlockall(); up(&shm_ids.sem); - if(copy_to_user (buf, &shm_info, sizeof(shm_info))) - return -EFAULT; + if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { + err = -EFAULT; + goto out; + } - return err < 0 ? 0 : err; + err = err < 0 ? 0 : err; + goto out; } case SHM_STAT: case IPC_STAT: @@ -439,9 +445,10 @@ asmlinkage long sys_shmctl (int shmid, i int result; memset(&tbuf, 0, sizeof(tbuf)); shp = shm_lock(shmid); - if(shp==NULL) - return -EINVAL; - if(cmd==SHM_STAT) { + if(shp==NULL) { + err = -EINVAL; + goto out; + } else if(cmd==SHM_STAT) { err = -EINVAL; if (shmid > shm_ids.max_id) goto out_unlock; @@ -465,8 +472,10 @@ asmlinkage long sys_shmctl (int shmid, i tbuf.shm_nattch = shp->shm_nattch; shm_unlock(shmid); if(copy_shmid_to_user (buf, &tbuf, version)) - return -EFAULT; - return result; + err = -EFAULT; + else + err = result; + goto out; } case SHM_LOCK: case SHM_UNLOCK: @@ -474,26 +483,30 @@ asmlinkage long sys_shmctl (int shmid, i /* Allow superuser to lock segment in memory */ /* Should the pages be faulted in here or leave it to user? */ /* need to determine interaction with current->swappable */ - if (!capable(CAP_IPC_LOCK)) - return -EPERM; + if (!capable(CAP_IPC_LOCK)) { + err = -EPERM; + goto out; + } shp = shm_lock(shmid); - if(shp==NULL) - return -EINVAL; + if(shp==NULL) { + err = -EINVAL; + goto out; + } err = shm_checkid(shp,shmid); if(err) goto out_unlock; if(cmd==SHM_LOCK) { - if (!is_file_hugetlb_page(shp->shm_file)) + if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 1); shp->shm_flags |= SHM_LOCKED; } else { - if (!is_file_hugetlb_page(shp->shm_file)) + if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0); shp->shm_flags &= ~SHM_LOCKED; } shm_unlock(shmid); - return err; + goto out; } case IPC_RMID: { @@ -529,13 +542,15 @@ asmlinkage long sys_shmctl (int shmid, i } else shm_destroy (shp); up(&shm_ids.sem); - return err; + goto out; } case IPC_SET: { - if(copy_shmid_from_user (&setbuf, buf, version)) - return -EFAULT; + if(copy_shmid_from_user (&setbuf, buf, version)) { + err = -EFAULT; + goto out; + } down(&shm_ids.sem); shp = shm_lock(shmid); err=-EINVAL; @@ -560,7 +575,8 @@ asmlinkage long sys_shmctl (int shmid, i } default: - return -EINVAL; + err = -EINVAL; + goto out; } err = 0; @@ -568,9 +584,10 @@ out_unlock_up: shm_unlock(shmid); out_up: up(&shm_ids.sem); - return err; + goto out; out_unlock: shm_unlock(shmid); +out: return err; } @@ -590,10 +607,10 @@ asmlinkage long sys_shmat (int shmid, ch int acc_mode; void *user_addr; - if (shmid < 0) - return -EINVAL; - - if ((addr = (ulong)shmaddr)) { + if (shmid < 0) { + err = -EINVAL; + goto out; + } else if ((addr = (ulong)shmaddr)) { if (addr & (SHMLBA-1)) { if (shmflg & SHM_RND) addr &= ~(SHMLBA-1); /* round down */ @@ -623,16 +640,19 @@ asmlinkage long sys_shmat (int shmid, ch * additional creator id... */ shp = shm_lock(shmid); - if(shp == NULL) - return -EINVAL; + if(shp == NULL) { + err = -EINVAL; + goto out; + } err = shm_checkid(shp,shmid); if (err) { shm_unlock(shmid); - return err; + goto out; } if (ipcperms(&shp->shm_perm, acc_mode)) { shm_unlock(shmid); - return -EACCES; + err = -EACCES; + goto out; } file = shp->shm_file; size = file->f_dentry->d_inode->i_size; @@ -673,8 +693,8 @@ invalid: err = 0; if (IS_ERR(user_addr)) err = PTR_ERR(user_addr); +out: return err; - } /* @@ -697,10 +717,8 @@ asmlinkage long sys_shmdt (char *shmaddr /* ->vm_pgoff is always 0, see do_mmap() in sys_shmat() */ retval = 0; - if (vma->vm_ops == &shm_vm_ops) + if (vma->vm_ops == &shm_vm_ops || (vma->vm_flags & VM_HUGETLB)) do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); - else if (is_vm_hugetlb_page(vma)) - free_hugepages(vma); else retval = -EINVAL; out: diff -urpN mm3-2.5.42/mm/memory.c hugetlbfs-2.5.42/mm/memory.c --- mm3-2.5.42/mm/memory.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/memory.c 2002-10-18 07:36:31.000000000 -0700 @@ -211,8 +211,8 @@ int copy_page_range(struct mm_struct *ds unsigned long end = vma->vm_end; unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - if (is_vm_hugetlb_page(vma)) - return copy_hugetlb_page_range(dst, src, vma); + if (vma->vm_flags & VM_HUGETLB) + return copy_hugepage_range(dst, src, vma); src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; @@ -398,6 +398,11 @@ void unmap_page_range(mmu_gather_t *tlb, { pgd_t * dir; + if (vma->vm_flags & VM_HUGETLB) { + unmap_hugepage_range(vma, address, end); + return; + } + BUG_ON(address >= end); dir = pgd_offset(vma->vm_mm, address); @@ -437,6 +442,11 @@ void zap_page_range(struct vm_area_struc mmu_gather_t *tlb; unsigned long end, block; + if (vma->vm_flags & VM_HUGETLB) { + zap_hugepage_range(vma, address, size); + return; + } + spin_lock(&mm->page_table_lock); /* @@ -539,9 +550,8 @@ int get_user_pages(struct task_struct *t || !(flags & vma->vm_flags)) return i ? : -EFAULT; - if (is_vm_hugetlb_page(vma)) { - i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &len, i); + if (vma->vm_flags & VM_HUGETLB) { + i = follow_hugepage(mm, vma, pages, vmas, &start, &len, i); continue; } spin_lock(&mm->page_table_lock); diff -urpN mm3-2.5.42/mm/mmap.c hugetlbfs-2.5.42/mm/mmap.c --- mm3-2.5.42/mm/mmap.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/mmap.c 2002-10-17 19:47:00.000000000 -0700 @@ -952,7 +952,7 @@ no_mmaps: * By the time this function is called, the area struct has been * removed from the process mapping list. */ -static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) +void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) { size_t len = area->vm_end - area->vm_start; @@ -1043,14 +1043,10 @@ static struct vm_area_struct *touched_by touched = NULL; do { struct vm_area_struct *next = mpnt->vm_next; - if (!(is_vm_hugetlb_page(mpnt))) { - mpnt->vm_next = touched; - touched = mpnt; - rb_erase(&mpnt->vm_rb, &mm->mm_rb); - mm->map_count--; - } - else - free_hugepages(mpnt); + mpnt->vm_next = touched; + touched = mpnt; + rb_erase(&mpnt->vm_rb, &mm->mm_rb); + mm->map_count--; mpnt = next; } while (mpnt && mpnt->vm_start < end); *npp = mpnt; diff -urpN mm3-2.5.42/mm/mprotect.c hugetlbfs-2.5.42/mm/mprotect.c --- mm3-2.5.42/mm/mprotect.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/mprotect.c 2002-10-17 20:11:22.000000000 -0700 @@ -252,7 +252,7 @@ sys_mprotect(unsigned long start, size_t /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ - if (is_vm_hugetlb_page(vma)) { + if (vma->vm_flags & VM_HUGETLB) { error = -EACCES; goto out; } diff -urpN mm3-2.5.42/mm/mremap.c hugetlbfs-2.5.42/mm/mremap.c --- mm3-2.5.42/mm/mremap.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/mremap.c 2002-10-17 20:11:38.000000000 -0700 @@ -340,7 +340,7 @@ unsigned long do_mremap(unsigned long ad vma = find_vma(current->mm, addr); if (!vma || vma->vm_start > addr) goto out; - if (is_vm_hugetlb_page(vma)) { + if (vma->vm_flags & VM_HUGETLB) { ret = -EINVAL; goto out; }