diff -urpN mm3-2.5.42/arch/i386/mm/hugetlbpage.c hugetlbfs-2.5.42/arch/i386/mm/hugetlbpage.c --- mm3-2.5.42/arch/i386/mm/hugetlbpage.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/i386/mm/hugetlbpage.c 2002-10-17 14:47:11.000000000 -0700 @@ -64,34 +64,6 @@ alloc_hugetlb_page(void) return page; } -static void -free_hugetlb_page(struct page *page) -{ - spin_lock(&htlbpage_lock); - if ((page->mapping != NULL) && (page_count(page) == 2)) { - struct inode *inode = page->mapping->host; - int i; - - ClearPageDirty(page); - remove_from_page_cache(page); - set_page_count(page, 1); - if ((inode->i_size -= HPAGE_SIZE) == 0) { - for (i = 0; i < MAX_ID; i++) - if (htlbpagek[i].key == inode->i_ino) { - htlbpagek[i].key = 0; - htlbpagek[i].in = NULL; - break; - } - kfree(inode); - } - } - if (put_page_testzero(page)) { - list_add(&page->list, &htlbpage_freelist); - htlbpagemem++; - } - spin_unlock(&htlbpage_lock); -} - static pte_t * huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { @@ -254,27 +226,55 @@ back1: return i; } -void -zap_hugetlb_resources(struct vm_area_struct *mpnt) +void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +void zap_hugetlb_resources(struct vm_area_struct *vma) +{ + zap_hugepage_range(vma, vma->vm_start, vma->vm_end); +} + +void free_huge_page(struct page *page) +{ + BUG_ON(page_count(page)); + BUG_ON(PageReserved(page)); + BUG_ON(page->mapping); + + INIT_LIST_HEAD(&page->list); + + spin_lock(&htlbpage_lock); + list_add(&page->list, &htlbpage_freelist); + htlbpagemem++; + spin_unlock(&htlbpage_lock); +} + +void huge_page_release(struct page *page) +{ + int i; + if (!put_page_testzero(page)) + return; + + for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; ++i) + ClearPageReserved(&page[i]); + free_huge_page(page); +} + +void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length) { - struct mm_struct *mm = mpnt->vm_mm; - unsigned long len, addr, end; - pte_t *ptep; + struct mm_struct *mm = vma->vm_mm; + unsigned long address, end = start + length; + pte_t *pte; struct page *page; - addr = mpnt->vm_start; - end = mpnt->vm_end; - len = end - addr; - do { - ptep = huge_pte_offset(mm, addr); - page = pte_page(*ptep); - pte_clear(ptep); - free_hugetlb_page(page); - addr += HPAGE_SIZE; - } while (addr < end); - mm->rss -= (len >> PAGE_SHIFT); - mpnt->vm_ops = NULL; - flush_tlb_range(mpnt, end - len, end); + BUG_ON(start & (HPAGE_SIZE - 1)); + BUG_ON(length & (HPAGE_SIZE - 1)); + + for (address = start; address < end; address += HPAGE_SIZE) { + pte = huge_pte_offset(mm, address); + page = pte_page(*pte); + huge_page_release(page); + pte_clear(pte); + } + mm->rss -= length >> PAGE_SHIFT; + flush_tlb_range(vma, start, end); } static void @@ -475,6 +475,7 @@ int hugetlb_prefault(struct address_spac } add_to_page_cache(page, mapping, idx); } + get_page(page); set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); } out: diff -urpN mm3-2.5.42/fs/attr.c hugetlbfs-2.5.42/fs/attr.c --- mm3-2.5.42/fs/attr.c 2002-10-15 09:51:08.000000000 -0700 +++ hugetlbfs-2.5.42/fs/attr.c 2002-10-16 22:25:13.000000000 -0700 @@ -93,7 +93,7 @@ out: return error; } -static int setattr_mask(unsigned int ia_valid) +int setattr_mask(unsigned int ia_valid) { unsigned long dn_mask = 0; diff -urpN mm3-2.5.42/fs/hugetlbfs/inode.c hugetlbfs-2.5.42/fs/hugetlbfs/inode.c --- mm3-2.5.42/fs/hugetlbfs/inode.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/hugetlbfs/inode.c 2002-10-16 23:58:24.000000000 -0700 @@ -7,14 +7,21 @@ */ #include +#include +#include +#include /* remove ASAP */ #include #include +#include #include #include #include #include #include #include +#include +#include +#include #include @@ -80,6 +87,277 @@ static int hugetlbfs_commit_write(struct return -EINVAL; } +void huge_pagevec_release(struct pagevec *pvec) +{ + int i; + + for (i = 0; i < pagevec_count(pvec); ++i) + huge_page_release(pvec->pages[i]); + + pagevec_reinit(pvec); +} + +void truncate_partial_hugepage(struct page *page, unsigned partial) +{ + int i; + const unsigned piece = partial & (PAGE_SIZE - 1); + const unsigned tailstart = PAGE_SIZE - piece; + const unsigned whole_pages = partial / PAGE_SIZE; + const unsigned last_page_offset = HPAGE_SIZE/PAGE_SIZE - whole_pages; + + for (i = HPAGE_SIZE/PAGE_SIZE - 1; i >= last_page_offset; ++i) + memclear_highpage_flush(&page[i], 0, PAGE_SIZE); + + if (!piece) + return; + + memclear_highpage_flush(&page[last_page_offset - 1], tailstart, piece); +} + +void truncate_huge_page(struct address_space *mapping, struct page *page) +{ + if (page->mapping != mapping) + return; + + clear_page_dirty(page); + ClearPageUptodate(page); + remove_from_page_cache(page); + huge_page_release(page); +} + +void truncate_hugepages(struct address_space *mapping, loff_t lstart) +{ + const pgoff_t start = (lstart + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + const unsigned partial = lstart & (HPAGE_SIZE - 1); + struct pagevec pvec; + pgoff_t next; + int i; + + pagevec_init(&pvec, 0); + next = start; + + while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); ++i) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + if (page_index > next) + next = page_index; + + ++next; + + if (TestSetPageLocked(page)) + continue; + + if (PageWriteback(page)) { + unlock_page(page); + continue; + } + + truncate_huge_page(mapping, page); + unlock_page(page); + } + huge_pagevec_release(&pvec); + cond_resched(); + } + + if (partial) { + struct page *page = find_lock_page(mapping, start - 1); + if (page) { + wait_on_page_writeback(page); + truncate_partial_hugepage(page, partial); + unlock_page(page); + huge_page_release(page); + } + } + + next = start; + + while (1) { + if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + if (next == start) + break; + next = start; + continue; + } + + for (i = 0; i < pagevec_count(&pvec); ++i) { + struct page *page = pvec.pages[i]; + + lock_page(page); + wait_on_page_writeback(page); + if (page->index > next) + next = page->index; + ++next; + truncate_huge_page(mapping, page); + unlock_page(page); + } + huge_pagevec_release(&pvec); + } + BUG_ON(!lstart && mapping->nrpages); +} + +static void hugetlbfs_delete_inode(struct inode *inode) +{ + list_del_init(&inode->i_hash); + list_del_init(&inode->i_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + + if (inode->i_data.nrpages) + truncate_hugepages(&inode->i_data, 0); + + security_ops->inode_delete(inode); + + clear_inode(inode); + destroy_inode(inode); +} + +static void hugetlbfs_forget_inode(struct inode *inode) +{ + struct super_block *super_block = inode->i_sb; + + if (list_empty(&inode->i_hash)) + goto out_truncate; + + if (!(inode->i_state & (I_DIRTY|I_LOCK))) { + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_unused); + } + inodes_stat.nr_unused++; + spin_unlock(&inode_lock); + if (!super_block | (super_block->s_flags & MS_ACTIVE)) + return; + + /* write_inode_now() ? */ + spin_lock(&inode_lock); + inodes_stat.nr_unused--; + list_del_init(&inode->i_hash); +out_truncate: + list_del_init(&inode->i_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + if (inode->i_data.nrpages) + truncate_hugepages(&inode->i_data, 0); + clear_inode(inode); + destroy_inode(inode); +} + +static void hugetlbfs_drop_inode(struct inode *inode) +{ + if (!inode->i_nlink) + hugetlbfs_delete_inode(inode); + else + hugetlbfs_forget_inode(inode); +} + +static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff) +{ + unsigned long start, end, length, delta; + struct vm_area_struct *vma; + + list_for_each_entry(vma, list, shared) { + start = vma->vm_start; + end = vma->vm_end; + length = end - start; + + if (vma->vm_pgoff >= pgoff) { + zap_hugepage_range(vma, start, length); + continue; + } + + length >>= PAGE_SHIFT; + delta = pgoff = vma->vm_pgoff; + if (delta >= length) + continue; + + start += delta << PAGE_SHIFT; + length = (length - delta) << PAGE_SHIFT; + zap_hugepage_range(vma, start, length); + } +} + +static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) +{ + unsigned long pgoff; + struct address_space *mapping = inode->i_mapping; + unsigned long limit; + + pgoff = (offset + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + + if (inode->i_size < offset) + goto do_expand; + + inode->i_size = offset; + spin_lock(&mapping->i_shared_lock); + if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) + goto out_unlock; + if (!list_empty(&mapping->i_mmap)) + hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); + if (!list_empty(&mapping->i_mmap_shared)) + hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff); + +out_unlock: + spin_unlock(&mapping->i_shared_lock); + truncate_hugepages(mapping, offset); + return 0; + +do_expand: + limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out; + inode->i_size = offset; + return 0; + +out_sig: + send_sig(SIGXFSZ, current, 0); +out: + return -EFBIG; +} + +static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + unsigned int ia_valid = attr->ia_valid; + unsigned long dn_mask; + + BUG_ON(!inode); + + error = inode_change_ok(inode, attr); + if (error) + goto out; + + error = security_ops->inode_setattr(dentry, attr); + if (error) + goto out; + + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + if (error) + goto out; + + if (ia_valid & ATTR_SIZE) { + error = hugetlb_vmtruncate(inode, attr->ia_size); + if (error) + goto out; + attr->ia_valid &= ~ATTR_SIZE; + error = inode_setattr(inode, attr); + } + if (error) + goto out; + dn_mask = setattr_mask(ia_valid); + if (dn_mask) + dnotify_parent(dentry, dn_mask); +out: + return error; +} + struct inode *hugetlbfs_get_inode(struct super_block *sb, int mode, int dev) { struct inode * inode = new_inode(sb); @@ -189,11 +467,12 @@ static struct inode_operations hugetlbfs rmdir: simple_rmdir, mknod: hugetlbfs_mknod, rename: simple_rename, + setattr: hugetlbfs_setattr, }; static struct super_operations hugetlbfs_ops = { statfs: simple_statfs, - drop_inode: generic_delete_inode, + drop_inode: hugetlbfs_drop_inode, }; static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent) diff -urpN mm3-2.5.42/fs/inode.c hugetlbfs-2.5.42/fs/inode.c --- mm3-2.5.42/fs/inode.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/inode.c 2002-10-16 23:55:25.000000000 -0700 @@ -142,7 +142,7 @@ static struct inode *alloc_inode(struct return inode; } -static void destroy_inode(struct inode *inode) +void destroy_inode(struct inode *inode) { if (inode_has_buffers(inode)) BUG(); diff -urpN mm3-2.5.42/include/linux/fs.h hugetlbfs-2.5.42/include/linux/fs.h --- mm3-2.5.42/include/linux/fs.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/fs.h 2002-10-16 22:26:57.000000000 -0700 @@ -1143,6 +1143,7 @@ extern int filemap_fdatawrite(struct add extern int filemap_fdatawait(struct address_space *); extern void sync_supers(void); extern sector_t bmap(struct inode *, sector_t); +extern int setattr_mask(unsigned int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int); extern int vfs_permission(struct inode *, int); @@ -1221,6 +1222,7 @@ static inline struct inode *iget(struct extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); +extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern void remove_suid(struct dentry *); diff -urpN mm3-2.5.42/include/linux/hugetlb.h hugetlbfs-2.5.42/include/linux/hugetlb.h --- mm3-2.5.42/include/linux/hugetlb.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/hugetlb.h 2002-10-16 22:58:43.000000000 -0700 @@ -11,7 +11,9 @@ int copy_hugetlb_page_range(struct mm_st int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); int free_hugepages(struct vm_area_struct *); +void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); +void huge_page_release(struct page *); #else /* !CONFIG_HUGETLB_PAGE */ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) { @@ -38,6 +40,9 @@ static inline int free_hugepages(struct return -EINVAL; } +#define zap_hugepage_range(vma, start, len) BUG() +#define huge_page_release(page) BUG() + static inline int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) {