patches/0000775000077200007720000000000010653433170011523 5ustar mingomingopatches/radix-tree-optimistic.patch0000664000077200007720000002660610653433167017012 0ustar mingomingoSubject: radix-tree: optimistic locking Implement optimistic locking for the concurrent radix tree. Optimistic locking is aimed at avoiding taking higher level node locks. We decent the tree using an RCU lookup, looking for the lowest modification termination point. If found, we try to acquire the lock of that node. After we have obtained this lock, we will need to validate if the initial conditions still hold true. We do this by repeating the steps that found us this node in the first place. Signed-off-by: Peter Zijlstra --- include/linux/radix-tree.h | 27 +++++- init/Kconfig | 6 + lib/radix-tree.c | 194 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 206 insertions(+), 21 deletions(-) Index: linux-rt-rebase.q/include/linux/radix-tree.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/radix-tree.h +++ linux-rt-rebase.q/include/linux/radix-tree.h @@ -197,28 +197,47 @@ static inline void radix_tree_replace_sl rcu_assign_pointer(*pslot, item); } +#if defined(CONFIG_RADIX_TREE_OPTIMISTIC) +static inline void radix_tree_lock(struct radix_tree_context *context) +{ + rcu_read_lock(); + BUG_ON(context->locked); +} +#elif defined(CONFIG_RADIX_TREE_CONCURRENT) static inline void radix_tree_lock(struct radix_tree_context *context) { struct radix_tree_root *root = context->root; + rcu_read_lock(); spin_lock(&root->lock); -#ifdef CONFIG_RADIX_TREE_CONCURRENT BUG_ON(context->locked); context->locked = &root->lock; -#endif } +#else +static inline void radix_tree_lock(struct radix_tree_context *context) +{ + struct radix_tree_root *root = context->root; + + rcu_read_lock(); + spin_lock(&root->lock); +} +#endif +#if defined(CONFIG_RADIX_TREE_CONCURRENT) static inline void radix_tree_unlock(struct radix_tree_context *context) { -#ifdef CONFIG_RADIX_TREE_CONCURRENT BUG_ON(!context->locked); spin_unlock(context->locked); context->locked = NULL; + rcu_read_unlock(); +} #else +static inline void radix_tree_unlock(struct radix_tree_context *context) +{ spin_unlock(&context->root->lock); -#endif rcu_read_unlock(); } +#endif int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); Index: linux-rt-rebase.q/init/Kconfig =================================================================== --- linux-rt-rebase.q.orig/init/Kconfig +++ linux-rt-rebase.q/init/Kconfig @@ -355,8 +355,14 @@ config SYSCTL config RADIX_TREE_CONCURRENT bool "Enable concurrent radix tree operations (EXPERIMENTAL)" + depends on EXPERIMENTAL default y if SMP +config RADIX_TREE_OPTIMISTIC + bool "Enabled optimistic locking (EXPERIMENTAL)" + depends on RADIX_TREE_CONCURRENT + default y + menuconfig EMBEDDED bool "Configure standard kernel features (for small systems)" help Index: linux-rt-rebase.q/lib/radix-tree.c =================================================================== --- linux-rt-rebase.q.orig/lib/radix-tree.c +++ linux-rt-rebase.q/lib/radix-tree.c @@ -368,6 +368,117 @@ static inline void radix_path_unlock(str #define radix_path_unlock(context, punlock) do { } while (0) #endif +#ifdef CONFIG_RADIX_TREE_OPTIMISTIC +typedef int (*radix_valid_fn)(struct radix_tree_node *, int, int); + +static struct radix_tree_node * +radix_optimistic_lookup(struct radix_tree_context *context, unsigned long index, + int tag, radix_valid_fn valid) +{ + unsigned int height, shift; + struct radix_tree_node *node, *ret = NULL, **slot; + struct radix_tree_root *root = context->root; + + node = rcu_dereference(root->rnode); + if (node == NULL) + return NULL; + + if (!radix_tree_is_indirect_ptr(node)) + return NULL; + + node = radix_tree_indirect_to_ptr(node); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + do { + int offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if ((*valid)(node, offset, tag)) + ret = node; + slot = (struct radix_tree_node **)(node->slots + offset); + node = rcu_dereference(*slot); + if (!node) + break; + + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + return ret; +} + +static struct radix_tree_node * +__radix_optimistic_lock(struct radix_tree_context *context, unsigned long index, + int tag, radix_valid_fn valid) +{ + struct radix_tree_node *node; + spinlock_t *locked; + unsigned int shift, offset; + + node = radix_optimistic_lookup(context, index, tag, valid); + if (!node) + goto out; + + locked = radix_node_lock(context->root, node); + if (!locked) + goto out; + +#if 0 + if (node != radix_optimistic_lookup(context, index, tag, valid)) + goto out_unlock; +#else + /* check if the node got freed */ + if (!node->count) + goto out_unlock; + + /* check if the node is still a valid termination point */ + shift = (node->height - 1) * RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!(*valid)(node, offset, tag)) + goto out_unlock; +#endif + + context->locked = locked; + return node; + +out_unlock: + spin_unlock(locked); +out: + return NULL; +} + +static struct radix_tree_node * +radix_optimistic_lock(struct radix_tree_context *context, unsigned long index, + int tag, radix_valid_fn valid) +{ + struct radix_tree_node *node = NULL; + + if (context) { + node = __radix_optimistic_lock(context, index, tag, valid); + if (!node) { + BUG_ON(context->locked); + spin_lock(&context->root->lock); + context->locked = &context->root->lock; + } + } + return node; +} + +static int radix_valid_always(struct radix_tree_node *node, int offset, int tag) +{ + return 1; +} + +static int radix_valid_tag(struct radix_tree_node *node, int offset, int tag) +{ + return tag_get(node, tag, offset); +} +#else +#define radix_optimistic_lock(context, index, tag, valid) NULL +#endif + /** * radix_tree_insert - insert into a radix tree * @root: radix tree root @@ -388,6 +499,13 @@ int radix_tree_insert(struct radix_tree_ BUG_ON(radix_tree_is_indirect_ptr(item)); + node = radix_optimistic_lock(context, index, 0, radix_valid_always); + if (node) { + height = node->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + goto optimistic; + } + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -396,7 +514,6 @@ int radix_tree_insert(struct radix_tree_ } slot = radix_tree_indirect_to_ptr(root->rnode); - height = root->height; shift = (height-1) * RADIX_TREE_MAP_SHIFT; @@ -415,11 +532,11 @@ int radix_tree_insert(struct radix_tree_ } /* Go a level down */ - offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = slot; - radix_ladder_lock(context, node); +optimistic: + offset = (index >> shift) & RADIX_TREE_MAP_MASK; slot = node->slots[offset]; shift -= RADIX_TREE_MAP_SHIFT; height--; @@ -462,6 +579,10 @@ void **radix_tree_lookup_slot(struct rad struct radix_tree_node *node, **slot; RADIX_TREE_CONTEXT(context, root); + node = radix_optimistic_lock(context, index, 0, radix_valid_always); + if (node) + goto optimistic; + node = rcu_dereference(root->rnode); if (node == NULL) return NULL; @@ -473,6 +594,7 @@ void **radix_tree_lookup_slot(struct rad } node = radix_tree_indirect_to_ptr(node); +optimistic: height = node->height; if (index > radix_tree_maxindex(height)) return NULL; @@ -565,6 +687,13 @@ void *radix_tree_tag_set(struct radix_tr struct radix_tree_node *slot; RADIX_TREE_CONTEXT(context, root); + slot = radix_optimistic_lock(context, index, tag, radix_valid_tag); + if (slot) { + height = slot->height; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + goto optimistic; + } + height = root->height; BUG_ON(index > radix_tree_maxindex(height)); @@ -580,6 +709,7 @@ void *radix_tree_tag_set(struct radix_tr radix_ladder_lock(context, slot); +optimistic: offset = (index >> shift) & RADIX_TREE_MAP_MASK; if (!tag_get(slot, tag, offset)) tag_set(slot, tag, offset); @@ -596,13 +726,13 @@ EXPORT_SYMBOL(radix_tree_tag_set); /* * the change can never propagate upwards from here. */ -static inline int radix_tree_unlock_tag(struct radix_tree_root *root, - struct radix_tree_path *pathp, int tag) +static +int radix_valid_tag_clear(struct radix_tree_node *node, int offset, int tag) { int this, other; - this = tag_get(pathp->node, tag, pathp->offset); - other = any_tag_set_but(pathp->node, tag, pathp->offset); + this = tag_get(node, tag, offset); + other = any_tag_set_but(node, tag, offset); return !this || other; } @@ -627,9 +757,22 @@ void *radix_tree_tag_clear(struct radix_ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; struct radix_tree_path *punlock = path, *piter; struct radix_tree_node *slot = NULL; - unsigned int height, shift; + unsigned int height, shift, offset; + RADIX_TREE_CONTEXT(context, root); + slot = radix_optimistic_lock(context, index, tag, + radix_valid_tag_clear); + if (slot) { + height = slot->height; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp->offset = offset; + pathp->node = slot; + radix_path_init(context, pathp); + goto optimistic; + } + pathp->node = NULL; radix_path_init(context, pathp); @@ -641,8 +784,6 @@ void *radix_tree_tag_clear(struct radix_ slot = radix_tree_indirect_to_ptr(root->rnode); while (height > 0) { - int offset; - if (slot == NULL) goto out; @@ -652,11 +793,12 @@ void *radix_tree_tag_clear(struct radix_ pathp->node = slot; radix_path_lock(context, pathp, slot); - if (radix_tree_unlock_tag(root, pathp, tag)) { + if (radix_valid_tag_clear(slot, offset, tag)) { for (; punlock < pathp; punlock++) radix_path_unlock(context, punlock); } +optimistic: slot = slot->slots[offset]; shift -= RADIX_TREE_MAP_SHIFT; height--; @@ -1167,14 +1309,20 @@ static inline void radix_tree_shrink(str } } -static inline int radix_tree_unlock_all(struct radix_tree_root *root, - struct radix_tree_path *pathp) +static +int radix_valid_delete(struct radix_tree_node *node, int offset, int tag) { - int tag; - int unlock = 1; + /* + * we need to check for > 2, because nodes with a single child + * can still be deleted, see radix_tree_shrink(). + */ + int unlock = (node->count > 2); + + if (!unlock) + return unlock; for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (!radix_tree_unlock_tag(root, pathp, tag)) { + if (!radix_valid_tag_clear(node, offset, tag)) { unlock = 0; break; } @@ -1202,6 +1350,17 @@ void *radix_tree_delete(struct radix_tre int offset; RADIX_TREE_CONTEXT(context, root); + slot = radix_optimistic_lock(context, index, 0, radix_valid_delete); + if (slot) { + height = slot->height; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp->offset = offset; + pathp->node = slot; + radix_path_init(context, pathp); + goto optimistic; + } + pathp->node = NULL; radix_path_init(context, pathp); @@ -1229,11 +1388,12 @@ void *radix_tree_delete(struct radix_tre pathp->node = slot; radix_path_lock(context, pathp, slot); - if (slot->count > 2 && radix_tree_unlock_all(root, pathp)) { + if (radix_valid_delete(slot, offset, 0)) { for (; punlock < pathp; punlock++) radix_path_unlock(context, punlock); } +optimistic: slot = slot->slots[offset]; shift -= RADIX_TREE_MAP_SHIFT; height--; patches/latency-tracer-one-off-fix.patch0000664000077200007720000000176310653433162017604 0ustar mingomingoFix a simple issue in latency_tracer.c Fix a simple issue in latency_tracer.c Signed-off-by: Jan Altenberg --- kernel/latency_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -1003,7 +1003,7 @@ static int min_idx(struct block_idx *bid idx = bidx->idx[cpu]; if (idx >= min(max_tr.traces[cpu].trace_idx, MAX_TRACE)) continue; - if (idx >= MAX_TRACE*NR_CPUS) { + if (idx > MAX_TRACE*NR_CPUS) { printk("huh: idx (%d) > %ld*%d!\n", idx, MAX_TRACE, NR_CPUS); WARN_ON(1); @@ -1150,7 +1150,7 @@ static void update_out_trace(void) *out_entry = *entry; out_entry++; sum++; - if (sum >= MAX_TRACE*NR_CPUS) { + if (sum > MAX_TRACE*NR_CPUS) { printk("huh: sum (%d) > %ld*%d!\n", sum, MAX_TRACE, NR_CPUS); WARN_ON(1); patches/print-might-sleep-hack.patch0000664000077200007720000000510310653433170017017 0ustar mingomingoTemporary HACK!!!! PREEMPT_RT suffers from the on going problem of running printk in atomic operations. It is very advantageous to do so but with PREEMPT_RT making spin_locks sleep, it can also be devastating. This patch does not solve the problem of printk sleeping in an atomic operation. This patch just makes printk not report that it is. Of course if printk does report that it's sleeping in an atomic operation, then that printing of the report will also print a report, and you go into recursive hell. We need to really sit down and solve the real issue here. --- include/linux/sched.h | 13 +++++++++++++ kernel/printk.c | 4 ++++ kernel/rtmutex.c | 4 +++- 3 files changed, 20 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/include/linux/sched.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/sched.h +++ linux-rt-rebase.q/include/linux/sched.h @@ -1368,8 +1368,21 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif +#ifdef CONFIG_PREEMPT_RT + /* + * Temporary hack, until we find a solution to + * handle printk in atomic operations. + */ + int in_printk; +#endif }; +#ifdef CONFIG_PREEMPT_RT +# define set_printk_might_sleep(x) do { current->in_printk = x; } while(0) +#else +# define set_printk_might_sleep(x) do { } while(0) +#endif + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH Index: linux-rt-rebase.q/kernel/printk.c =================================================================== --- linux-rt-rebase.q.orig/kernel/printk.c +++ linux-rt-rebase.q/kernel/printk.c @@ -339,10 +339,14 @@ static void __call_console_drivers(unsig int trace_save = trace_enabled; trace_enabled = 0; + set_printk_might_sleep(1); con->write(con, &LOG_BUF(start), end - start); + set_printk_might_sleep(0); trace_enabled = trace_save; #else + set_printk_might_sleep(1); con->write(con, &LOG_BUF(start), end - start); + set_printk_might_sleep(0); #endif } } Index: linux-rt-rebase.q/kernel/rtmutex.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rtmutex.c +++ linux-rt-rebase.q/kernel/rtmutex.c @@ -631,7 +631,9 @@ static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, void fastcall (*slowfn)(struct rt_mutex *lock)) { - might_sleep(); + /* Temporary HACK! */ + if (!current->in_printk) + might_sleep(); if (likely(rt_mutex_cmpxchg(lock, NULL, current))) rt_mutex_deadlock_account_lock(lock, current); patches/preempt-realtime-netconsole.patch0000664000077200007720000000141410653433166020174 0ustar mingomingo--- drivers/net/netconsole.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) Index: linux-rt-rebase.q/drivers/net/netconsole.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/netconsole.c +++ linux-rt-rebase.q/drivers/net/netconsole.c @@ -68,21 +68,16 @@ static int configured = 0; static void write_msg(struct console *con, const char *msg, unsigned int len) { int frag, left; - unsigned long flags; if (!np.dev) return; - local_irq_save(flags); - - for(left = len; left; ) { + for (left = len; left; ) { frag = min(left, MAX_PRINT_CHUNK); netpoll_send_udp(&np, msg, frag); msg += frag; left -= frag; } - - local_irq_restore(flags); } static struct console netconsole = { patches/2.6.21-rc6-lockless8-spinlock-tree_lock.patch0000664000077200007720000003161110653433167021464 0ustar mingomingoFrom: Nick Piggin Subject: [patch 8/9] mm: spinlock tree_lock mapping->tree_lock has no read lockers. convert the lock from an rwlock to a spinlock. Signed-off-by: Nick Piggin --- fs/buffer.c | 4 ++-- fs/inode.c | 2 +- include/asm-arm/cacheflush.h | 4 ++-- include/asm-parisc/cacheflush.h | 4 ++-- include/linux/fs.h | 2 +- mm/filemap.c | 10 +++++----- mm/migrate.c | 6 +++--- mm/page-writeback.c | 14 +++++++------- mm/swap_state.c | 10 +++++----- mm/swapfile.c | 4 ++-- mm/truncate.c | 6 +++--- mm/vmscan.c | 8 ++++---- 12 files changed, 37 insertions(+), 37 deletions(-) Index: linux-rt-rebase.q/fs/buffer.c =================================================================== --- linux-rt-rebase.q.orig/fs/buffer.c +++ linux-rt-rebase.q/fs/buffer.c @@ -684,7 +684,7 @@ static int __set_page_dirty(struct page if (TestSetPageDirty(page)) return 0; - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); @@ -695,7 +695,7 @@ static int __set_page_dirty(struct page radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return 1; Index: linux-rt-rebase.q/fs/inode.c =================================================================== --- linux-rt-rebase.q.orig/fs/inode.c +++ linux-rt-rebase.q/fs/inode.c @@ -193,7 +193,7 @@ void inode_init_once(struct inode *inode mutex_init(&inode->i_mutex); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); - rwlock_init(&inode->i_data.tree_lock); + spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); INIT_LIST_HEAD(&inode->i_data.private_list); spin_lock_init(&inode->i_data.private_lock); Index: linux-rt-rebase.q/include/asm-arm/cacheflush.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/cacheflush.h +++ linux-rt-rebase.q/include/asm-arm/cacheflush.h @@ -413,9 +413,9 @@ static inline void flush_anon_page(struc } #define flush_dcache_mmap_lock(mapping) \ - write_lock_irq(&(mapping)->tree_lock) + spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - write_unlock_irq(&(mapping)->tree_lock) + spin_unlock_irq(&(mapping)->tree_lock) #define flush_icache_user_range(vma,page,addr,len) \ flush_dcache_page(page) Index: linux-rt-rebase.q/include/asm-parisc/cacheflush.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-parisc/cacheflush.h +++ linux-rt-rebase.q/include/asm-parisc/cacheflush.h @@ -45,9 +45,9 @@ void flush_cache_mm(struct mm_struct *mm extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) \ - write_lock_irq(&(mapping)->tree_lock) + spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - write_unlock_irq(&(mapping)->tree_lock) + spin_unlock_irq(&(mapping)->tree_lock) #define flush_icache_page(vma,page) do { \ flush_kernel_dcache_page(page); \ Index: linux-rt-rebase.q/include/linux/fs.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/fs.h +++ linux-rt-rebase.q/include/linux/fs.h @@ -441,7 +441,7 @@ struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ - rwlock_t tree_lock; /* and rwlock protecting it */ + spinlock_t tree_lock; /* and lock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ Index: linux-rt-rebase.q/mm/filemap.c =================================================================== --- linux-rt-rebase.q.orig/mm/filemap.c +++ linux-rt-rebase.q/mm/filemap.c @@ -110,7 +110,7 @@ generic_file_direct_IO(int rw, struct ki /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage - * is safe. The caller must hold a write_lock on the mapping's tree_lock. + * is safe. The caller must hold the mapping's tree_lock. */ void __remove_from_page_cache(struct page *page) { @@ -129,9 +129,9 @@ void remove_from_page_cache(struct page BUG_ON(!PageLocked(page)); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); } static int sync_page(void *word) @@ -442,7 +442,7 @@ int add_to_page_cache(struct page *page, if (error == 0) { set_page_nonewrefs(page); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); error = radix_tree_insert(&mapping->page_tree, offset, page); if (!error) { page_cache_get(page); @@ -452,7 +452,7 @@ int add_to_page_cache(struct page *page, mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); clear_page_nonewrefs(page); radix_tree_preload_end(); } Index: linux-rt-rebase.q/mm/migrate.c =================================================================== --- linux-rt-rebase.q.orig/mm/migrate.c +++ linux-rt-rebase.q/mm/migrate.c @@ -304,14 +304,14 @@ static int migrate_page_move_mapping(str } set_page_nonewrefs(page); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); pslot = radix_tree_lookup_slot(&mapping->page_tree, page_index(page)); if (page_count(page) != 2 + !!PagePrivate(page) || (struct page *)radix_tree_deref_slot(pslot) != page) { - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); clear_page_nonewrefs(page); return -EAGAIN; } @@ -329,7 +329,7 @@ static int migrate_page_move_mapping(str radix_tree_replace_slot(pslot, newpage); page->mapping = NULL; - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); clear_page_nonewrefs(page); /* Index: linux-rt-rebase.q/mm/page-writeback.c =================================================================== --- linux-rt-rebase.q.orig/mm/page-writeback.c +++ linux-rt-rebase.q/mm/page-writeback.c @@ -809,7 +809,7 @@ int __set_page_dirty_no_writeback(struct * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the - * mapping by re-checking page_mapping() insode tree_lock. + * mapping by re-checking page_mapping() inside tree_lock. */ int __set_page_dirty_nobuffers(struct page *page) { @@ -820,7 +820,7 @@ int __set_page_dirty_nobuffers(struct pa if (!mapping) return 1; - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); @@ -832,7 +832,7 @@ int __set_page_dirty_nobuffers(struct pa radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); @@ -977,13 +977,13 @@ int test_clear_page_writeback(struct pag if (mapping) { unsigned long flags; - write_lock_irqsave(&mapping->tree_lock, flags); + spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - write_unlock_irqrestore(&mapping->tree_lock, flags); + spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } @@ -1000,7 +1000,7 @@ int test_set_page_writeback(struct page if (mapping) { unsigned long flags; - write_lock_irqsave(&mapping->tree_lock, flags); + spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) radix_tree_tag_set(&mapping->page_tree, @@ -1010,7 +1010,7 @@ int test_set_page_writeback(struct page radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - write_unlock_irqrestore(&mapping->tree_lock, flags); + spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } Index: linux-rt-rebase.q/mm/swap_state.c =================================================================== --- linux-rt-rebase.q.orig/mm/swap_state.c +++ linux-rt-rebase.q/mm/swap_state.c @@ -38,7 +38,7 @@ static struct backing_dev_info swap_back struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock), + .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, @@ -80,7 +80,7 @@ static int __add_to_swap_cache(struct pa error = radix_tree_preload(gfp_mask); if (!error) { set_page_nonewrefs(page); - write_lock_irq(&swapper_space.tree_lock); + spin_lock_irq(&swapper_space.tree_lock); error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); if (!error) { @@ -90,7 +90,7 @@ static int __add_to_swap_cache(struct pa total_swapcache_pages++; __inc_zone_page_state(page, NR_FILE_PAGES); } - write_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&swapper_space.tree_lock); clear_page_nonewrefs(page); radix_tree_preload_end(); } @@ -202,9 +202,9 @@ void delete_from_swap_cache(struct page entry.val = page_private(page); - write_lock_irq(&swapper_space.tree_lock); + spin_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); - write_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&swapper_space.tree_lock); swap_free(entry); page_cache_release(page); Index: linux-rt-rebase.q/mm/swapfile.c =================================================================== --- linux-rt-rebase.q.orig/mm/swapfile.c +++ linux-rt-rebase.q/mm/swapfile.c @@ -367,13 +367,13 @@ int remove_exclusive_swap_page(struct pa retval = 0; if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ - write_lock_irq(&swapper_space.tree_lock); + spin_lock_irq(&swapper_space.tree_lock); if ((page_count(page) == 2) && !PageWriteback(page)) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } - write_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&swapper_space.tree_lock); } spin_unlock(&swap_lock); Index: linux-rt-rebase.q/mm/truncate.c =================================================================== --- linux-rt-rebase.q.orig/mm/truncate.c +++ linux-rt-rebase.q/mm/truncate.c @@ -347,18 +347,18 @@ invalidate_complete_page2(struct address if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); if (PageDirty(page)) goto failed; BUG_ON(PagePrivate(page)); __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); ClearPageUptodate(page); page_cache_release(page); /* pagecache ref */ return 1; failed: - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); return 0; } Index: linux-rt-rebase.q/mm/vmscan.c =================================================================== --- linux-rt-rebase.q.orig/mm/vmscan.c +++ linux-rt-rebase.q/mm/vmscan.c @@ -370,7 +370,7 @@ int remove_mapping(struct address_space BUG_ON(mapping != page_mapping(page)); set_page_nonewrefs(page); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); /* * The non racy check for a busy page. * @@ -405,13 +405,13 @@ int remove_mapping(struct address_space if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); swap_free(swap); goto free_it; } __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); free_it: __clear_page_nonewrefs(page); @@ -419,7 +419,7 @@ free_it: return 1; cannot_free: - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); clear_page_nonewrefs(page); return 0; } patches/x86_64-convert-to-clockevents.patch0000664000077200007720000003104610653433161020122 0ustar mingomingoSubject: x86_64: convert to clock events Finally switch to the clockevents code. Share code with i386 for hpet and PIT. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/Kconfig | 10 +++ arch/x86_64/kernel/Makefile | 4 + arch/x86_64/kernel/apic.c | 90 +++++++++++++++++++---------------- arch/x86_64/kernel/i8259.c | 46 ------------------ arch/x86_64/kernel/smpboot.c | 4 - arch/x86_64/kernel/time.c | 109 +++++-------------------------------------- include/asm-x86_64/hpet.h | 16 ------ 7 files changed, 76 insertions(+), 203 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/Kconfig +++ linux-rt-rebase.q/arch/x86_64/Kconfig @@ -28,7 +28,15 @@ config GENERIC_TIME bool default y -config GENERIC_CLOCKEVENTS_MIGR +config GENERIC_CLOCKEVENTS + bool + default y + +config GENERIC_CLOCKEVENTS_BROADCAST + bool + default y + +config NONIRQ_WAKEUP bool default y Index: linux-rt-rebase.q/arch/x86_64/kernel/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/Makefile +++ linux-rt-rebase.q/arch/x86_64/kernel/Makefile @@ -9,7 +9,7 @@ obj-y := process.o signal.o entry.o trap x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \ - perfctr-watchdog.o + perfctr-watchdog.o i8253.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o @@ -49,6 +49,8 @@ obj-y += pcspeaker.o CFLAGS_vsyscall.o := $(PROFILING) -g0 +i8253-y += ../../i386/kernel/i8253.o +hpet-y += ../../i386/kernel/hpet.o therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o bootflag-y += ../../i386/kernel/bootflag.o legacy_serial-y += ../../i386/kernel/legacy_serial.o Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -858,25 +858,12 @@ static void __setup_APIC_LVTT(unsigned i static void setup_APIC_timer(void) { - unsigned long flags; - int irqen; + struct clock_event_device *levt = &__get_cpu_var(lapic_events); - local_irq_save(flags); + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); - irqen = ! cpu_isset(smp_processor_id(), - timer_interrupt_broadcast_ipi_mask); - __setup_APIC_LVTT(calibration_result, 0, irqen); - /* Turn off PIT interrupt if we use APIC timer as main timer. - Only works with the PM timer right now - TBD fix it for HPET too. */ - if ((pmtmr_ioport != 0) && - smp_processor_id() == boot_cpu_id && - apic_runs_main_timer == 1 && - !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { - stop_timer_interrupt(); - apic_runs_main_timer++; - } - local_irq_restore(flags); + clockevents_register_device(levt); } /* @@ -951,18 +938,34 @@ static void __init calibrate_APIC_clock( void __init setup_boot_APIC_clock (void) { + /* + * The local apic timer can be disabled via the kernel commandline. + * Register the lapic timer as a dummy clock event source on SMP + * systems, so the broadcast mechanism is used. On UP systems simply + * ignore it. + */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); return; } printk(KERN_INFO "Using local APIC timer interrupts.\n"); - using_apic_timer = 1; - calibrate_APIC_clock(); + /* - * Now set up the timer for real. + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=1!\n"); + setup_APIC_timer(); } @@ -1074,22 +1077,34 @@ void setup_APIC_extended_lvt(unsigned ch void smp_local_timer_interrupt(void) { - profile_tick(CPU_PROFILING); -#ifdef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id) - main_timer_handler(); + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + /* - * We take the 'long' return path, and there every subsystem - * grabs the appropriate locks (kernel lock/ irq lock). + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. * - * We might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } + + /* + * the NMI deadlock-detector uses this. */ + add_pda(apic_timer_irqs, 1); + + evt->event_handler(evt); } /* @@ -1105,11 +1120,6 @@ void smp_apic_timer_interrupt(struct pt_ struct pt_regs *old_regs = set_irq_regs(regs); /* - * the NMI deadlock-detector uses this. - */ - add_pda(apic_timer_irqs, 1); - - /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. */ @@ -1292,7 +1302,7 @@ static __init int setup_noapictimer(char static __init int setup_apicmaintimer(char *str) { apic_runs_main_timer = 1; - nohpet = 1; + return 1; } __setup("apicmaintimer", setup_apicmaintimer); @@ -1308,7 +1318,7 @@ static __init int setup_apicpmtimer(char { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return setup_apicmaintimer(NULL); + return 0; } __setup("apicpmtimer", setup_apicpmtimer); Index: linux-rt-rebase.q/arch/x86_64/kernel/i8259.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/i8259.c +++ linux-rt-rebase.q/arch/x86_64/kernel/i8259.c @@ -444,46 +444,6 @@ void __init init_ISA_irqs (void) } } -static void setup_timer_hardware(void) -{ - outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff , 0x40); /* LSB */ - udelay(10); - outb(LATCH >> 8 , 0x40); /* MSB */ -} - -static int timer_resume(struct sys_device *dev) -{ - setup_timer_hardware(); - return 0; -} - -void i8254_timer_resume(void) -{ - setup_timer_hardware(); -} - -static struct sysdev_class timer_sysclass = { - set_kset_name("timer_pit"), - .resume = timer_resume, -}; - -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int __init init_timer_sysfs(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(init_timer_sysfs); - void __init init_IRQ(void) { int i; @@ -533,12 +493,6 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - /* - * Set the clock to HZ Hz, we already have a valid - * vector now: - */ - setup_timer_hardware(); - if (!acpi_ioapic) setup_irq(2, &irq2); } Index: linux-rt-rebase.q/arch/x86_64/kernel/smpboot.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/smpboot.c +++ linux-rt-rebase.q/arch/x86_64/kernel/smpboot.c @@ -223,8 +223,6 @@ void __cpuinit smp_callin(void) local_irq_disable(); Dprintk("Stack at about %p\n",&cpuid); - disable_APIC_timer(); - /* * Save our processor parameters */ @@ -348,8 +346,6 @@ void __cpuinit start_secondary(void) enable_8259A_irq(0); } - enable_APIC_timer(); - /* * The sibling maps must be set before turing the online map on for * this cpu Index: linux-rt-rebase.q/arch/x86_64/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/time.c +++ linux-rt-rebase.q/arch/x86_64/kernel/time.c @@ -28,6 +28,8 @@ #include #include #include +#include + #ifdef CONFIG_ACPI #include /* for PM timer frequency */ #include @@ -46,12 +48,8 @@ #include #include -static char *timename = NULL; - DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; @@ -194,6 +192,13 @@ static irqreturn_t timer_interrupt(int i return IRQ_HANDLED; } +static irqreturn_t timer_event_interrupt(int irq, void *dev_id) +{ + global_clock_event->event_handler(global_clock_event); + + return IRQ_HANDLED; +} + unsigned long read_persistent_clock(void) { unsigned int year, mon, day, hour, min, sec; @@ -291,63 +296,19 @@ static unsigned int __init tsc_calibrate return pmc_now * tsc_khz / (tsc_now - tsc_start); } -static void __pit_init(int val, u8 mode) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(mode, PIT_MODE); - outb_p(val & 0xff, PIT_CH0); /* LSB */ - outb_p(val >> 8, PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); -} - -void __init pit_init(void) -{ - __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ -} - -void pit_stop_interrupt(void) -{ - __pit_init(0, 0x30); /* mode 0 */ -} - -void stop_timer_interrupt(void) -{ - char *name; - if (hpet_address) { - name = "HPET"; - hpet_timer_stop_set_go(0); - } else { - name = "PIT"; - pit_stop_interrupt(); - } - printk(KERN_INFO "timer: %s interrupt stopped.\n", name); -} - static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL, + .handler = timer_event_interrupt, + .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, .mask = CPU_MASK_NONE, .name = "timer" }; void __init time_init(void) { - if (nohpet) - hpet_address = 0; - - if (hpet_arch_init()) - hpet_address = 0; + if (!hpet_enable()) + setup_pit_timer(); - if (hpet_use_timer) { - /* set tick_nsec to use the proper rate for HPET */ - tick_nsec = TICK_NSEC_HPET; - timename = "HPET"; - } else { - pit_init(); - timename = "PIT"; - } + setup_irq(0, &irq0); tsc_calibrate(); @@ -369,46 +330,4 @@ void __init time_init(void) printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); init_tsc_clocksource(); - - setup_irq(0, &irq0); -} - -/* - * sysfs support for the timer. - */ - -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - return 0; } - -static int timer_resume(struct sys_device *dev) -{ - if (hpet_address) - hpet_reenable(); - else - i8254_timer_resume(); - return 0; -} - -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - -/* XXX this sysfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); Index: linux-rt-rebase.q/include/asm-x86_64/hpet.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/hpet.h +++ linux-rt-rebase.q/include/asm-x86_64/hpet.h @@ -1,18 +1,2 @@ -#ifndef _ASM_X8664_HPET_H -#define _ASM_X8664_HPET_H 1 #include - -#define HPET_TICK_RATE (HZ * 100000UL) - -extern int hpet_rtc_timer_init(void); -extern int hpet_arch_init(void); -extern int hpet_timer_stop_set_go(unsigned long tick); -extern int hpet_reenable(void); -extern unsigned int hpet_calibrate_tsc(void); - -extern int hpet_use_timer; -extern unsigned long hpet_period; -extern unsigned long hpet_tick; - -#endif patches/ppc-gtod-notrace-fix.patch0000664000077200007720000000101410653433163016474 0ustar mingomingo--- arch/powerpc/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/powerpc/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/time.c +++ linux-rt-rebase.q/arch/powerpc/kernel/time.c @@ -922,7 +922,7 @@ void div128_by_32(u64 dividend_high, u64 #include -static cycle_t timebase_read(void) +static cycle_t notrace timebase_read(void) { return (cycle_t)get_tb(); } patches/sched-cpu-clock-unlocked.patch0000664000077200007720000000205010653433170017307 0ustar mingomingoSubject: sched: make cpu_clock() not use the rq clock From: Ingo Molnar it is enough to disable interrupts to get the precise rq-clock of the local CPU. this also solves an NMI watchdog regression: the NMI watchdog calls touch_softlockup_watchdog(), which might deadlock on rq->lock if the NMI hits an rq-locked critical section. Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -446,13 +446,12 @@ static inline unsigned long long rq_cloc */ unsigned long long cpu_clock(int cpu) { - struct rq *rq = cpu_rq(cpu); unsigned long long now; unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); - now = rq_clock(rq); - spin_unlock_irqrestore(&rq->lock, flags); + local_irq_save(flags); + now = rq_clock(cpu_rq(cpu)); + local_irq_restore(flags); return now; } patches/i386-nmi-watchdog-show-regs.patch0000664000077200007720000000101610653433167017534 0ustar mingomingo--- arch/i386/kernel/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/i386/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/nmi.c +++ linux-rt-rebase.q/arch/i386/kernel/nmi.c @@ -392,7 +392,7 @@ notrace __kprobes int nmi_watchdog_tick( spin_lock(&lock); printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); + show_regs(regs); spin_unlock(&lock); cpu_clear(cpu, backtrace_mask); } patches/softlockup-use-cpu-clock.patch0000664000077200007720000000247210653433167017420 0ustar mingomingoSubject: softlockup: use cpu_clock() instead of sched_clock() From: Ingo Molnar sched_clock() is not a reliable time-source, use cpu_clock() instead. Signed-off-by: Ingo Molnar --- kernel/softlockup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/kernel/softlockup.c =================================================================== --- linux-rt-rebase.q.orig/kernel/softlockup.c +++ linux-rt-rebase.q/kernel/softlockup.c @@ -42,14 +42,16 @@ static struct notifier_block panic_block * resolution, and we don't need to waste time with a big divide when * 2^30ns == 1.074s. */ -static unsigned long get_timestamp(void) +static unsigned long get_timestamp(int this_cpu) { - return sched_clock() >> 30; /* 2^30 ~= 10^9 */ + return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */ } void touch_softlockup_watchdog(void) { - __raw_get_cpu_var(touch_timestamp) = get_timestamp(); + int this_cpu = raw_smp_processor_id(); + + per_cpu(touch_timestamp, this_cpu) = get_timestamp(this_cpu); } EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -95,7 +97,7 @@ void softlockup_tick(void) return; } - now = get_timestamp(); + now = get_timestamp(this_cpu); /* Wake up the high-prio watchdog task every second: */ if (now > (touch_timestamp + 1)) patches/hpet-force-enable-on-ich34.patch0000664000077200007720000000560510653433161017354 0ustar mingomingoFrom us15@os.inf.tu-dresden.de Wed Jun 6 14:34:18 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.1 required=5.0 tests=AWL,MAILTO_TO_SPAM_ADDR autolearn=no version=3.1.7-deb Received: from os.inf.tu-dresden.de (os.inf.tu-dresden.de [141.76.48.99]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mail.tglx.de (Postfix) with ESMTP id CB67965C065 for ; Wed, 6 Jun 2007 14:34:18 +0200 (CEST) Received: from nova.inf.tu-dresden.de ([141.76.48.73] helo=laptop.hypervisor.org) by os.inf.tu-dresden.de with esmtpsa (TLSv1:AES256-SHA:256) (Exim 4.67) id 1HvuiQ-0000WF-8q; Wed, 06 Jun 2007 14:34:18 +0200 Date: Wed, 6 Jun 2007 14:34:14 +0200 From: "Udo A. Steinberg" To: Thomas Gleixner , Venkatesh Pallipadi Subject: [PATCH]: Enable HPET on ICH3 and ICH4 Message-ID: <20070606143414.6003edd0@laptop.hypervisor.org> X-Mailer: X-Mailer 5.0 Gold Mime-Version: 1.0 Content-Type: multipart/signed; boundary=Sig_TyoZ8hpf907DzN6.B9sCrGr; protocol="application/pgp-signature"; micalg=PGP-SHA1 X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ ICH3 and ICH4 have undocumented HPET capabilities. This patch enables HPET for platforms based around these ICHs. Tested on various ICH3 and ICH4 platforms. Because HPET is not officially documented for ICH3/4 and may not have been validated by chipset folks, we're on thin ice here. I'd recommend testing this patch in -hrt or -mm for a while and wait for success/failure reports before feeding it upstream. Signed-off-by: Udo A. Steinberg --- arch/i386/kernel/quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) Index: linux-rt-rebase.q/arch/i386/kernel/quirks.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/quirks.c +++ linux-rt-rebase.q/arch/i386/kernel/quirks.c @@ -232,6 +232,14 @@ static void old_ich_force_enable_hpet(st printk(KERN_DEBUG "Failed to force enable HPET\n"); } +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, + old_ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, + old_ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, + old_ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, + old_ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, old_ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, patches/2.6.21-rc6-lockless5-lockless-probe.patch0000664000077200007720000000431110653433167020613 0ustar mingomingoFrom: Nick Piggin Subject: [patch 5/9] mm: lockless probe Probing pages and radix_tree_tagged are lockless operations with the lockless radix-tree. Convert these users to RCU locking rather than using tree_lock. Signed-off-by: Nick Piggin --- mm/page-writeback.c | 8 +++----- mm/readahead.c | 6 ++---- 2 files changed, 5 insertions(+), 9 deletions(-) Index: linux-rt-rebase.q/mm/page-writeback.c =================================================================== --- linux-rt-rebase.q.orig/mm/page-writeback.c +++ linux-rt-rebase.q/mm/page-writeback.c @@ -1022,17 +1022,15 @@ int test_set_page_writeback(struct page EXPORT_SYMBOL(test_set_page_writeback); /* - * Return true if any of the pages in the mapping are marged with the + * Return true if any of the pages in the mapping are marked with the * passed tag. */ int mapping_tagged(struct address_space *mapping, int tag) { - unsigned long flags; int ret; - - read_lock_irqsave(&mapping->tree_lock, flags); + rcu_read_lock(); ret = radix_tree_tagged(&mapping->page_tree, tag); - read_unlock_irqrestore(&mapping->tree_lock, flags); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(mapping_tagged); Index: linux-rt-rebase.q/mm/readahead.c =================================================================== --- linux-rt-rebase.q.orig/mm/readahead.c +++ linux-rt-rebase.q/mm/readahead.c @@ -156,20 +156,19 @@ __do_page_cache_readahead(struct address /* * Preallocate as many pages as we will need. */ - read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { pgoff_t page_offset = offset + page_idx; if (page_offset > end_index) break; + rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, page_offset); + rcu_read_unlock(); if (page) continue; - read_unlock_irq(&mapping->tree_lock); page = page_cache_alloc_cold(mapping); - read_lock_irq(&mapping->tree_lock); if (!page) break; page->index = page_offset; @@ -178,7 +177,6 @@ __do_page_cache_readahead(struct address SetPageReadahead(page); ret++; } - read_unlock_irq(&mapping->tree_lock); /* * Now start the IO. We ignore I/O errors - if the page is not patches/i386-prepare-sharing-pit-code.patch0000664000077200007720000000364310653433161020032 0ustar mingomingoSubject: i386: prepare sharing the PIT code PIT clock events work already and the PIT handling is the same for i386 and x86_64. x86_64 does not support PIT as a clock source, so disable the PIT clocksource for x86_64. Prepare i8253.h to be shared with x8664 Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/i386/kernel/i8253.c | 4 +++- include/asm-i386/i8253.h | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/i8253.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/i8253.c +++ linux-rt-rebase.q/arch/i386/kernel/i8253.c @@ -13,7 +13,6 @@ #include #include #include -#include DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -120,6 +119,7 @@ void __init setup_pit_timer(void) global_clock_event = &pit_clockevent; } +#ifndef CONFIG_X86_64 /* * Since the PIT overflows every tick, its not very useful * to just read by itself. So use jiffies to emulate a free @@ -204,3 +204,5 @@ static int __init init_pit_clocksource(v return clocksource_register(&clocksource_pit); } arch_initcall(init_pit_clocksource); + +#endif Index: linux-rt-rebase.q/include/asm-i386/i8253.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/i8253.h +++ linux-rt-rebase.q/include/asm-i386/i8253.h @@ -1,8 +1,6 @@ #ifndef __ASM_I8253_H__ #define __ASM_I8253_H__ -#include - /* i8253A PIT registers */ #define PIT_MODE 0x43 #define PIT_CH0 0x40 @@ -10,8 +8,12 @@ extern spinlock_t i8253_lock; +#ifdef CONFIG_GENERIC_CLOCKEVENTS + extern struct clock_event_device *global_clock_event; extern void setup_pit_timer(void); +#endif + #endif /* __ASM_I8253_H__ */ patches/mitigate-resched-flood.patch0000664000077200007720000001247510653433170017074 0ustar mingomingo[PATCH 1/3] mitigate-resched-interrupt-floods Mitigate rescheduling interrupt floods. Background: preempt-rt sends a resched interrupt to all other cpus whenever some realtime task gets preempted. This is to give that task a chance to continue running on some other cpu. Unfortunately this can cause 'resched interrupt floods' when there are large numbers of realtime tasks on the system that are continually being preempted. This patch reduces such interrupts by noting that it is not necessary to send rescheduling interrupts to every cpu in the system, just to those cpus in the affinity mask of the task to be migrated. This works well in the real world, as traditionally realtime tasks are carefully targeted to specific cpus or sets of cpus, meaning users often give such tasks reduced affinity masks. Signed-off-by: Joe Korty --- arch/i386/kernel/smp.c | 9 +++++++++ arch/x86_64/kernel/smp.c | 9 +++++++++ include/asm-i386/smp.h | 2 ++ include/asm-x86_64/smp.h | 3 +++ include/linux/smp.h | 9 +++++++++ kernel/sched.c | 4 ++-- 6 files changed, 34 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/smp.c +++ linux-rt-rebase.q/arch/i386/kernel/smp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -486,6 +487,14 @@ void smp_send_reschedule_allbutself(void send_IPI_allbutself(RESCHEDULE_VECTOR); } +void smp_send_reschedule_allbutself_cpumask(cpumask_t mask) +{ + cpu_clear(smp_processor_id(), mask); + cpus_and(mask, mask, cpu_online_map); + if (!cpus_empty(mask)) + send_IPI_mask(mask, RESCHEDULE_VECTOR); +} + /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. Index: linux-rt-rebase.q/arch/x86_64/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/smp.c +++ linux-rt-rebase.q/arch/x86_64/kernel/smp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -303,6 +304,14 @@ void smp_send_reschedule_allbutself(void send_IPI_allbutself(RESCHEDULE_VECTOR); } +void smp_send_reschedule_allbutself_cpumask(cpumask_t mask) +{ + cpu_clear(smp_processor_id(), mask); + cpus_and(mask, mask, cpu_online_map); + if (!cpus_empty(mask)) + send_IPI_mask(mask, RESCHEDULE_VECTOR); +} + /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. Index: linux-rt-rebase.q/include/asm-i386/smp.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/smp.h +++ linux-rt-rebase.q/include/asm-i386/smp.h @@ -179,4 +179,6 @@ static __inline int logical_smp_processo #endif #endif +#define HAVE_RESCHEDULE_ALLBUTSELF_CPUMASK 1 + #endif Index: linux-rt-rebase.q/include/asm-x86_64/smp.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/smp.h +++ linux-rt-rebase.q/include/asm-x86_64/smp.h @@ -113,5 +113,8 @@ static __inline int logical_smp_processo #else #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ + +#define HAVE_RESCHEDULE_ALLBUTSELF_CPUMASK 1 + #endif Index: linux-rt-rebase.q/include/linux/smp.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/smp.h +++ linux-rt-rebase.q/include/linux/smp.h @@ -43,6 +43,14 @@ extern void smp_send_reschedule_allbutse */ extern void smp_send_reschedule_allbutself(void); +#ifdef HAVE_RESCHEDULE_ALLBUTSELF_CPUMASK +extern void smp_send_reschedule_allbutself_cpumask(cpumask_t); +#else +static inline void smp_send_reschedule_allbutself_cpumask(cpumask_t mask) { + smp_send_reschedule_allbutself(); +} +#endif + /* * Prepare machine for booting other CPUs. @@ -108,6 +116,7 @@ static inline int up_smp_call_function(v }) static inline void smp_send_reschedule(int cpu) { } static inline void smp_send_reschedule_allbutself(void) { } +static inline void smp_send_reschedule_allbutself_cpumask(cpumask_t) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_single(cpuid, func, info, retry, wait) \ Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -1858,7 +1858,7 @@ out_set_cpu: * nevertheless, maybe one of them can take * this task: */ - smp_send_reschedule_allbutself(); + smp_send_reschedule_allbutself_cpumask(p->cpus_allowed); schedstat_inc(this_rq, rto_wakeup); } @@ -2116,7 +2116,7 @@ static inline void finish_task_switch(st */ if (unlikely(rt_task(current) && prev->se.on_rq && rt_task(prev))) { schedstat_inc(rq, rto_schedule); - smp_send_reschedule_allbutself(); + smp_send_reschedule_allbutself_cpumask(current->cpus_allowed); } #endif prev_state = prev->state; patches/netfilter-more-debugging.patch0000664000077200007720000000165010653433162017434 0ustar mingomingo doing netfilter changes and turning on netfilter debug means we've got to interpret netfilter warning messages a bit more. --- include/net/netfilter/nf_conntrack.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/include/net/netfilter/nf_conntrack.h =================================================================== --- linux-rt-rebase.q.orig/include/net/netfilter/nf_conntrack.h +++ linux-rt-rebase.q/include/net/netfilter/nf_conntrack.h @@ -63,11 +63,14 @@ union nf_conntrack_help { #ifdef CONFIG_NETFILTER_DEBUG #define NF_CT_ASSERT(x) \ do { \ - if (!(x)) \ + if (!(x)) { \ /* Wooah! I'm tripping my conntrack in a frenzy of \ netplay... */ \ printk("NF_CT_ASSERT: %s:%i(%s)\n", \ __FILE__, __LINE__, __FUNCTION__); \ + if (printk_ratelimit()) \ + WARN_ON(1); \ + } \ } while(0) #else #define NF_CT_ASSERT(x) patches/rcu-preempt-fix-nmi-watchdog.patch0000664000077200007720000000267210653433163020163 0ustar mingomingoSubject: change die_chain from atomic to raw notifiers From: Ingo Molnar atomic notifier chains are using rcu_read_lock()/unlock(), but those are not NMI-safe in -rt - so switch these chains to raw notifiers. Signed-off-by: Ingo Molnar --- kernel/die_notifier.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/kernel/die_notifier.c =================================================================== --- linux-rt-rebase.q.orig/kernel/die_notifier.c +++ linux-rt-rebase.q/kernel/die_notifier.c @@ -5,7 +5,7 @@ #include -static ATOMIC_NOTIFIER_HEAD(die_chain); +static RAW_NOTIFIER_HEAD(die_chain); int notify_die(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) @@ -19,19 +19,19 @@ int notify_die(enum die_val val, const c }; - return atomic_notifier_call_chain(&die_chain, val, &args); + return raw_notifier_call_chain(&die_chain, val, &args); } int register_die_notifier(struct notifier_block *nb) { vmalloc_sync_all(); - return atomic_notifier_chain_register(&die_chain, nb); + return raw_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); int unregister_die_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&die_chain, nb); + return raw_notifier_chain_unregister(&die_chain, nb); } EXPORT_SYMBOL_GPL(unregister_die_notifier); patches/bh-state-lock.patch0000664000077200007720000000606110653433164015207 0ustar mingomingo I was compiling a kernel in a shell that I set to a priority of 20, and it locked up on the bit_spin_lock crap of jbd. This patch adds another spinlock to the buffer head and uses that instead of the bit_spins. From: Steven Rostedt Signed-off-by: Ingo Molnar -- fs/buffer.c | 3 ++- include/linux/buffer_head.h | 1 + include/linux/jbd.h | 12 ++++++------ 3 files changed, 9 insertions(+), 7 deletions(-) Index: linux-rt-rebase.q/fs/buffer.c =================================================================== --- linux-rt-rebase.q.orig/fs/buffer.c +++ linux-rt-rebase.q/fs/buffer.c @@ -40,7 +40,6 @@ #include #include #include -#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -2958,6 +2957,7 @@ struct buffer_head *alloc_buffer_head(gf if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); spin_lock_init(&ret->b_uptodate_lock); + spin_lock_init(&ret->b_state_lock); get_cpu_var(bh_accounting).nr++; recalc_bh_state(); put_cpu_var(bh_accounting); @@ -2970,6 +2970,7 @@ void free_buffer_head(struct buffer_head { BUG_ON(!list_empty(&bh->b_assoc_buffers)); BUG_ON(spin_is_locked(&bh->b_uptodate_lock)); + BUG_ON(spin_is_locked(&bh->b_state_lock)); kmem_cache_free(bh_cachep, bh); get_cpu_var(bh_accounting).nr--; recalc_bh_state(); Index: linux-rt-rebase.q/include/linux/buffer_head.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/buffer_head.h +++ linux-rt-rebase.q/include/linux/buffer_head.h @@ -70,6 +70,7 @@ struct buffer_head { associated with */ atomic_t b_count; /* users using this buffer_head */ spinlock_t b_uptodate_lock; + spinlock_t b_state_lock; }; /* Index: linux-rt-rebase.q/include/linux/jbd.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/jbd.h +++ linux-rt-rebase.q/include/linux/jbd.h @@ -331,32 +331,32 @@ static inline struct journal_head *bh2jh static inline void jbd_lock_bh_state(struct buffer_head *bh) { - bit_spin_lock(BH_State, &bh->b_state); + spin_lock(&bh->b_state_lock); } static inline int jbd_trylock_bh_state(struct buffer_head *bh) { - return bit_spin_trylock(BH_State, &bh->b_state); + return spin_trylock(&bh->b_state_lock); } static inline int jbd_is_locked_bh_state(struct buffer_head *bh) { - return bit_spin_is_locked(BH_State, &bh->b_state); + return spin_is_locked(&bh->b_state_lock); } static inline void jbd_unlock_bh_state(struct buffer_head *bh) { - bit_spin_unlock(BH_State, &bh->b_state); + spin_unlock(&bh->b_state_lock); } static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { - bit_spin_lock(BH_JournalHead, &bh->b_state); + spin_lock_irq(&bh->b_uptodate_lock); } static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) { - bit_spin_unlock(BH_JournalHead, &bh->b_state); + spin_unlock_irq(&bh->b_uptodate_lock); } struct jbd_revoke_table_s; patches/version.patch0000664000077200007720000000175010653433170014234 0ustar mingomingoSubject: add -rt extra-version From: Ingo Molnar add -rt extra-version. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Makefile | 2 +- kernel/workqueue.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/Makefile =================================================================== --- linux-rt-rebase.q.orig/Makefile +++ linux-rt-rebase.q/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 23 -EXTRAVERSION =-rc1 +EXTRAVERSION =-rc1-rt7 NAME = Holy Dancing Manatees, Batman! # *DOCUMENTATION* Index: linux-rt-rebase.q/kernel/workqueue.c =================================================================== --- linux-rt-rebase.q.orig/kernel/workqueue.c +++ linux-rt-rebase.q/kernel/workqueue.c @@ -647,6 +647,7 @@ out: return err; } +EXPORT_SYMBOL(schedule_on_each_cpu); /** * schedule_on_each_cpu_wq - call a function on each online CPU on a per-CPU wq patches/preempt-realtime-mips.patch0000664000077200007720000013103210653433165016772 0ustar mingomingo arch/mips/Kconfig | 13 +- arch/mips/kernel/asm-offsets.c | 2 arch/mips/kernel/entry.S | 22 ++-- arch/mips/kernel/i8259.c | 2 arch/mips/kernel/module.c | 2 arch/mips/kernel/process.c | 8 - arch/mips/kernel/scall32-o32.S | 2 arch/mips/kernel/scall64-64.S | 2 arch/mips/kernel/scall64-n32.S | 2 arch/mips/kernel/scall64-o32.S | 2 arch/mips/kernel/semaphore.c | 22 ++-- arch/mips/kernel/signal.c | 4 arch/mips/kernel/signal32.c | 4 arch/mips/kernel/smp.c | 27 ++++ arch/mips/kernel/time.c | 208 ++++++++++++++++++++++++++++++++++++-- arch/mips/kernel/traps.c | 2 arch/mips/mm/init.c | 2 arch/mips/sibyte/cfe/smp.c | 4 arch/mips/sibyte/sb1250/irq.c | 10 + arch/mips/sibyte/sb1250/smp.c | 2 arch/mips/sibyte/swarm/setup.c | 6 + include/asm-mips/asmmacro.h | 8 - include/asm-mips/atomic.h | 1 include/asm-mips/bitops.h | 5 include/asm-mips/hw_irq.h | 1 include/asm-mips/i8259.h | 2 include/asm-mips/io.h | 1 include/asm-mips/linkage.h | 5 include/asm-mips/m48t35.h | 2 include/asm-mips/rwsem.h | 176 ++++++++++++++++++++++++++++++++ include/asm-mips/semaphore.h | 33 +++--- include/asm-mips/spinlock.h | 18 +-- include/asm-mips/spinlock_types.h | 4 include/asm-mips/thread_info.h | 2 include/asm-mips/time.h | 2 include/asm-mips/timeofday.h | 5 include/asm-mips/uaccess.h | 12 -- 37 files changed, 534 insertions(+), 91 deletions(-) Index: linux-rt-rebase.q/arch/mips/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/mips/Kconfig +++ linux-rt-rebase.q/arch/mips/Kconfig @@ -648,18 +648,16 @@ source "arch/mips/philips/pnx8550/common endmenu + config RWSEM_GENERIC_SPINLOCK bool - depends on !PREEMPT_RT default y config RWSEM_XCHGADD_ALGORITHM bool - depends on !PREEMPT_RT config ASM_SEMAPHORES bool -# depends on !PREEMPT_RT default y config ARCH_HAS_ILOG2_U32 @@ -1798,6 +1796,15 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config GENERIC_TIME + bool + default y + +source "kernel/time/Kconfig" + +config CPU_SPEED + int "CPU speed used for clocksource/clockevent calculations" + default 600 endmenu config LOCKDEP_SUPPORT Index: linux-rt-rebase.q/arch/mips/kernel/asm-offsets.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/asm-offsets.c +++ linux-rt-rebase.q/arch/mips/kernel/asm-offsets.c @@ -10,9 +10,11 @@ */ #include #include +#include #include #include #include +#include #include #include Index: linux-rt-rebase.q/arch/mips/kernel/entry.S =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/entry.S +++ linux-rt-rebase.q/arch/mips/kernel/entry.S @@ -30,7 +30,7 @@ .align 5 #ifndef CONFIG_PREEMPT FEXPORT(ret_from_exception) - local_irq_disable # preempt stop + raw_local_irq_disable # preempt stop b __ret_from_irq #endif FEXPORT(ret_from_irq) @@ -41,7 +41,7 @@ FEXPORT(__ret_from_irq) beqz t0, resume_kernel resume_userspace: - local_irq_disable # make sure we dont miss an + raw_local_irq_disable # make sure we dont miss an # interrupt setting need_resched # between sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -51,7 +51,9 @@ resume_userspace: #ifdef CONFIG_PREEMPT resume_kernel: - local_irq_disable + raw_local_irq_disable + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -61,7 +63,9 @@ need_resched: LONG_L t0, PT_STATUS(sp) # Interrupts off? andi t0, 1 beqz t0, restore_all + raw_local_irq_disable jal preempt_schedule_irq + sw zero, TI_PRE_COUNT($28) b need_resched #endif @@ -69,7 +73,7 @@ FEXPORT(ret_from_fork) jal schedule_tail # a0 = struct task_struct *prev FEXPORT(syscall_exit) - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -142,19 +146,21 @@ FEXPORT(restore_partial) # restore part .set at work_pending: - andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS + # a2 is preloaded with TI_FLAGS + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beqz t0, work_notifysig work_resched: + raw_local_irq_enable t0 jal schedule - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) andi t0, a2, _TIF_WORK_MASK # is there any work to be done # other than syscall tracing? beqz t0, restore_all - andi t0, a2, _TIF_NEED_RESCHED + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bnez t0, work_resched work_notifysig: # deal with pending signals and @@ -170,7 +176,7 @@ syscall_exit_work: li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT and t0, a2 # a2 is preloaded with TI_FLAGS beqz t0, work_pending # trace bit set? - local_irq_enable # could let do_syscall_trace() + raw_local_irq_enable # could let do_syscall_trace() # call schedule() instead move a0, sp li a1, 1 Index: linux-rt-rebase.q/arch/mips/kernel/i8259.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/i8259.c +++ linux-rt-rebase.q/arch/mips/kernel/i8259.c @@ -29,9 +29,9 @@ */ static int i8259A_auto_eoi = -1; -DEFINE_SPINLOCK(i8259A_lock); /* some platforms call this... */ void mask_and_ack_8259A(unsigned int); +DEFINE_RAW_SPINLOCK(i8259A_lock); static struct irq_chip i8259A_chip = { .name = "XT-PIC", Index: linux-rt-rebase.q/arch/mips/kernel/module.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/module.c +++ linux-rt-rebase.q/arch/mips/kernel/module.c @@ -40,7 +40,7 @@ struct mips_hi16 { static struct mips_hi16 *mips_hi16_list; static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { Index: linux-rt-rebase.q/arch/mips/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/process.c +++ linux-rt-rebase.q/arch/mips/kernel/process.c @@ -52,7 +52,7 @@ void __noreturn cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { #ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG extern void smtc_idle_loop_hook(void); @@ -61,9 +61,11 @@ void __noreturn cpu_idle(void) if (cpu_wait) (*cpu_wait)(); } - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } Index: linux-rt-rebase.q/arch/mips/kernel/scall32-o32.S =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/scall32-o32.S +++ linux-rt-rebase.q/arch/mips/kernel/scall32-o32.S @@ -73,7 +73,7 @@ stack_done: 1: sw v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return lw a2, TI_FLAGS($28) # current->work Index: linux-rt-rebase.q/arch/mips/kernel/scall64-64.S =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/scall64-64.S +++ linux-rt-rebase.q/arch/mips/kernel/scall64-64.S @@ -72,7 +72,7 @@ NESTED(handle_sys64, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result n64_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux-rt-rebase.q/arch/mips/kernel/scall64-n32.S =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/scall64-n32.S +++ linux-rt-rebase.q/arch/mips/kernel/scall64-n32.S @@ -69,7 +69,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd v0, PT_R0(sp) # set flag for syscall restarting 1: sd v0, PT_R2(sp) # result - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux-rt-rebase.q/arch/mips/kernel/scall64-o32.S =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/scall64-o32.S +++ linux-rt-rebase.q/arch/mips/kernel/scall64-o32.S @@ -98,7 +98,7 @@ NESTED(handle_sys, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make need_resched and + raw_local_irq_disable # make need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) Index: linux-rt-rebase.q/arch/mips/kernel/semaphore.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/semaphore.c +++ linux-rt-rebase.q/arch/mips/kernel/semaphore.c @@ -36,7 +36,7 @@ * sem->count and sem->waking atomic. Scalability isn't an issue because * this lock is used on UP only so it's just an empty variable. */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -67,7 +67,7 @@ static inline int __sem_update_count(str : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); @@ -80,7 +80,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -94,7 +94,7 @@ void __up(struct semaphore *sem) wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -104,7 +104,7 @@ EXPORT_SYMBOL(__up); * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -133,9 +133,9 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -165,4 +165,10 @@ int __sched __down_interruptible(struct return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); + +int fastcall compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} +EXPORT_SYMBOL(compat_sem_is_locked); Index: linux-rt-rebase.q/arch/mips/kernel/signal.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/signal.c +++ linux-rt-rebase.q/arch/mips/kernel/signal.c @@ -629,6 +629,10 @@ static void do_signal(struct pt_regs *re siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux-rt-rebase.q/arch/mips/kernel/signal32.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/signal32.c +++ linux-rt-rebase.q/arch/mips/kernel/signal32.c @@ -656,6 +656,10 @@ static int setup_rt_frame_32(struct k_si if (err) goto give_sigsegv; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif /* * Arguments to signal handler: * Index: linux-rt-rebase.q/arch/mips/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/smp.c +++ linux-rt-rebase.q/arch/mips/kernel/smp.c @@ -88,7 +88,22 @@ asmlinkage __cpuinit void start_secondar cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -275,6 +290,8 @@ int setup_profiling_timer(unsigned int m return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -332,6 +349,7 @@ static inline void smp_on_each_tlb(void void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_on_other_tlbs(flush_tlb_mm_ipi, (void *)mm); @@ -341,6 +359,7 @@ void flush_tlb_mm(struct mm_struct *mm) if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -364,6 +383,8 @@ void flush_tlb_range(struct vm_area_stru struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -377,6 +398,7 @@ void flush_tlb_range(struct vm_area_stru if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -407,6 +429,8 @@ static void flush_tlb_page_ipi(void *inf void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -419,6 +443,7 @@ void flush_tlb_page(struct vm_area_struc if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } Index: linux-rt-rebase.q/arch/mips/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/time.c +++ linux-rt-rebase.q/arch/mips/kernel/time.c @@ -10,6 +10,11 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. + * + * This implementation of High Res Timers uses two timers. One is the system + * timer. The second is used for the high res timers. The high res timers + * require the CPU to have count/compare registers. The mips_set_next_event() + * function schedules the next high res timer interrupt. */ #include #include @@ -23,6 +28,7 @@ #include #include #include +#include #include #include @@ -47,7 +53,27 @@ /* * forward reference */ -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); + +/* any missed timer interrupts */ +int missed_timer_count; + +#ifdef CONFIG_HIGH_RES_TIMERS +static void mips_set_next_event(unsigned long evt); +static void mips_set_mode(int mode, void *priv); + +static struct clock_event lapic_clockevent = { + .name = "mips clockevent interface", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_HAS_IRQHANDLER +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_next_event = mips_set_next_event, +}; +#endif /* * By default we provide the null RTC ops @@ -56,6 +82,129 @@ static unsigned long null_rtc_get_time(v { return mktime(2000, 1, 1, 0, 0, 0); } +#ifdef CONFIG_SMP +/* + * We have to synchronize the master CPU with all the slave CPUs + */ +static atomic_t cpus_started; +static atomic_t cpus_ready; +static atomic_t cpus_count; +/* + * Master processor inits + */ +static void sync_cpus_init(int v) +{ + atomic_set(&cpus_count, 0); + mb(); + atomic_set(&cpus_started, v); + mb(); + atomic_set(&cpus_ready, v); + mb(); +} + +/* + * Called by the master processor + */ +static void sync_cpus_master(int v) +{ + atomic_set(&cpus_count, 0); + mb(); + atomic_set(&cpus_started, v); + mb(); + /* Wait here till all other CPUs are now ready */ + while (atomic_read(&cpus_count) != (num_online_cpus() -1) ) + mb(); + atomic_set(&cpus_ready, v); + mb(); +} +/* + * Called by the slave processors + */ +static void sync_cpus_slave(int v) +{ + /* Check if the master has been through this */ + while (atomic_read(&cpus_started) != v) + mb(); + atomic_inc(&cpus_count); + mb(); + while (atomic_read(&cpus_ready) != v) + mb(); +} +/* + * Called by the slave CPUs when done syncing the count register + * with the master processor + */ +static void sync_cpus_slave_exit(int v) +{ + while (atomic_read(&cpus_started) != v) + mb(); + atomic_inc(&cpus_count); + mb(); +} + +#define LOOPS 100 +static u32 c0_count[NR_CPUS]; /* Count register per CPU */ +static u32 c[NR_CPUS][LOOPS + 1]; /* Count register per CPU per loop for syncing */ + +/* + * Slave processors execute this via IPI + */ +static void sync_c0_count_slave(void *info) +{ + int cpus = 1, loop, prev_count = 0, cpu = smp_processor_id(); + unsigned long flags; + u32 diff_count; /* CPU count registers are 32-bit */ + local_irq_save(flags); + + for(loop = 0; loop <= LOOPS; loop++) { + /* Sync with the Master processor */ + sync_cpus_slave(cpus++); + c[cpu][loop] = c0_count[cpu] = read_c0_count(); + mb(); + sync_cpus_slave(cpus++); + diff_count = c0_count[0] - c0_count[cpu]; + diff_count += prev_count; + diff_count += read_c0_count(); + write_c0_count(diff_count); + prev_count = (prev_count >> 1) + + ((int)(c0_count[0] - c0_count[cpu]) >> 1); + } + + /* Slave processor is done syncing count register with Master */ + sync_cpus_slave_exit(cpus++); + printk("SMP: Slave processor %d done syncing count \n", cpu); + local_irq_restore(flags); +} + +/* + * Master kicks off the syncing process + */ +void sync_c0_count_master(void) +{ + int cpus = 0, loop, cpu = smp_processor_id(); + unsigned long flags; + + printk("SMP: Starting to sync the c0 count register ... \n"); + sync_cpus_init(cpus++); + + /* Kick off the slave processors to also start the syncing process */ + smp_call_function(sync_c0_count_slave, NULL, 0, 0); + local_irq_save(flags); + + for (loop = 0; loop <= LOOPS; loop++) { + /* Wait for all the CPUs here */ + sync_cpus_master(cpus++); + c[cpu][loop] = c0_count[cpu] = read_c0_count(); + mb(); + /* Do syncing once more */ + sync_cpus_master(cpus++); + } + sync_cpus_master(cpus++); + local_irq_restore(flags); + + printk("SMP: Syncing process completed accross CPUs ... \n"); +} +#endif /* CONFIG_SMP */ static int null_rtc_set_time(unsigned long sec) { @@ -66,19 +215,30 @@ unsigned long (*rtc_mips_get_time)(void) int (*rtc_mips_set_time)(unsigned long) = null_rtc_set_time; int (*rtc_mips_set_mmss)(unsigned long); - /* how many counter cycles in a jiffy */ static unsigned long cycles_per_jiffy __read_mostly; +static unsigned long hrt_cycles_per_jiffy __read_mostly; + + /* expirelo is the count value for next CPU timer interrupt */ static unsigned int expirelo; - /* * Null timer ack for systems not needing one (e.g. i8254). */ static void null_timer_ack(void) { /* nothing */ } +#ifdef CONFIG_HIGH_RES_TIMERS +/* + * Set the next event + */ +static void mips_set_next_event(unsigned long evt) +{ + write_c0_compare(read_c0_count() + evt); +} +#endif + /* * Null high precision timer functions for systems lacking one. */ @@ -95,13 +255,13 @@ static void c0_timer_ack(void) unsigned int count; /* Ack this timer interrupt and set the next one. */ - expirelo += cycles_per_jiffy; + expirelo += hrt_cycles_per_jiffy; write_c0_compare(expirelo); - /* Check to see if we have missed any timer interrupts. */ - while (((count = read_c0_count()) - expirelo) < 0x7fffffff) { - /* missed_timer_count++; */ - expirelo = count + cycles_per_jiffy; + count = read_c0_count(); + if ((count - expirelo) < 0x7fffffff) { + /* missed_timer_count++; */ + expirelo = count + hrt_cycles_per_jiffy; write_c0_compare(expirelo); } } @@ -160,7 +320,7 @@ irqreturn_t timer_interrupt(int irq, voi /* * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. rtc_mips_set_time() has to be + * CMOS clock accordingly every ~11 minutes. rtc_set_time() has to be * called as close as possible to 500 ms before the new second starts. */ if (ntp_synced() && @@ -228,6 +388,15 @@ static inline int handle_perf_irq (int r !r2; } +#ifdef CONFIG_HIGH_RES_TIMERS +void event_timer_handler(struct pt_regs *regs) +{ + c0_timer_ack(); + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs,NULL); +} +#endif + asmlinkage void ll_timer_interrupt(int irq) { int r2 = cpu_has_mips_r2; @@ -235,6 +404,16 @@ asmlinkage void ll_timer_interrupt(int i irq_enter(); kstat_this_cpu.irqs[irq]++; + +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * Run the event handler + */ + if (!r2 || (read_c0_cause() & (1 << 26))) + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs,NULL); +#endif + if (handle_perf_irq(r2)) goto out; @@ -267,7 +446,7 @@ asmlinkage void ll_local_timer_interrupt * b) (optional) calibrate and set the mips_hpt_frequency * (only needed if you intended to use cpu counter as timer interrupt * source) - * 2) setup xtime based on rtc_mips_get_time(). + * 2) setup xtime based on rtc_get_time(). * 3) calculate a couple of cached variables for later usage * 4) plat_timer_setup() - * a) (optional) over-write any choices made above by time_init(). @@ -358,6 +537,9 @@ static void __init init_mips_clocksource void __init time_init(void) { +#ifdef CONFIG_HIGH_RES_TIMERS + u64 temp; +#endif if (board_time_init) board_time_init(); @@ -401,6 +583,12 @@ void __init time_init(void) if (!mips_hpt_frequency) mips_hpt_frequency = calibrate_hpt(); +#ifdef CONFIG_HIGH_RES_TIMERS + hrt_cycles_per_jiffy = ( (CONFIG_CPU_SPEED * 1000000) + HZ / 2) / HZ; +#else + hrt_cycles_per_jiffy = cycles_per_jiffy; +#endif + /* Report the high precision timer rate for a reference. */ printk("Using %u.%03u MHz high precision timer.\n", ((mips_hpt_frequency + 500) / 1000) / 1000, Index: linux-rt-rebase.q/arch/mips/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/traps.c +++ linux-rt-rebase.q/arch/mips/kernel/traps.c @@ -309,7 +309,7 @@ void show_registers(struct pt_regs *regs printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); void __noreturn die(const char * str, struct pt_regs * regs) { Index: linux-rt-rebase.q/arch/mips/mm/init.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/mm/init.c +++ linux-rt-rebase.q/arch/mips/mm/init.c @@ -59,7 +59,7 @@ #endif /* CONFIG_MIPS_MT_SMTC */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); /* * We have up to 8 empty zeroed pages so we can map one of the right colour Index: linux-rt-rebase.q/arch/mips/sibyte/cfe/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/sibyte/cfe/smp.c +++ linux-rt-rebase.q/arch/mips/sibyte/cfe/smp.c @@ -107,4 +107,8 @@ void prom_smp_finish(void) */ void prom_cpus_done(void) { +#ifdef CONFIG_HIGH_RES_TIMERS + extern void sync_c0_count_master(void); + sync_c0_count_master(); +#endif } Index: linux-rt-rebase.q/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/sibyte/sb1250/irq.c +++ linux-rt-rebase.q/arch/mips/sibyte/sb1250/irq.c @@ -81,7 +81,7 @@ static struct irq_chip sb1250_irq_type = /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -352,6 +352,10 @@ void __init arch_init_irq(void) #ifdef CONFIG_KGDB imask |= STATUSF_IP6; #endif + +#ifdef CONFIG_HIGH_RES_TIMERS + imask |= STATUSF_IP7; +#endif /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); @@ -429,6 +433,10 @@ asmlinkage void plat_irq_dispatch(void) else #endif +#ifdef CONFIG_HIGH_RES_TIMERS + if (pending & CAUSEF_IP7) + event_timer_handler(regs); +#endif if (pending & CAUSEF_IP4) sb1250_timer_interrupt(); Index: linux-rt-rebase.q/arch/mips/sibyte/sb1250/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/sibyte/sb1250/smp.c +++ linux-rt-rebase.q/arch/mips/sibyte/sb1250/smp.c @@ -59,7 +59,7 @@ void sb1250_smp_finish(void) { extern void sb1250_time_init(void); sb1250_time_init(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux-rt-rebase.q/arch/mips/sibyte/swarm/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/sibyte/swarm/setup.c +++ linux-rt-rebase.q/arch/mips/sibyte/swarm/setup.c @@ -131,6 +131,12 @@ void __init plat_mem_setup(void) rtc_mips_set_time = m41t81_set_time; } +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * set the mips_hpt_frequency here + */ + mips_hpt_frequency = CONFIG_CPU_SPEED * 1000000; +#endif printk("This kernel optimized for " #ifdef CONFIG_SIMULATION "simulation" Index: linux-rt-rebase.q/include/asm-mips/asmmacro.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/asmmacro.h +++ linux-rt-rebase.q/include/asm-mips/asmmacro.h @@ -21,7 +21,7 @@ #endif #ifdef CONFIG_MIPS_MT_SMTC - .macro local_irq_enable reg=t0 + .macro raw_local_irq_enable reg=t0 mfc0 \reg, CP0_TCSTATUS ori \reg, \reg, TCSTATUS_IXMT xori \reg, \reg, TCSTATUS_IXMT @@ -29,21 +29,21 @@ _ehb .endm - .macro local_irq_disable reg=t0 + .macro raw_local_irq_disable reg=t0 mfc0 \reg, CP0_TCSTATUS ori \reg, \reg, TCSTATUS_IXMT mtc0 \reg, CP0_TCSTATUS _ehb .endm #else - .macro local_irq_enable reg=t0 + .macro raw_local_irq_enable reg=t0 mfc0 \reg, CP0_STATUS ori \reg, \reg, 1 mtc0 \reg, CP0_STATUS irq_enable_hazard .endm - .macro local_irq_disable reg=t0 + .macro raw_local_irq_disable reg=t0 mfc0 \reg, CP0_STATUS ori \reg, \reg, 1 xori \reg, \reg, 1 Index: linux-rt-rebase.q/include/asm-mips/atomic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/atomic.h +++ linux-rt-rebase.q/include/asm-mips/atomic.h @@ -573,7 +573,6 @@ static __inline__ long atomic64_add_retu raw_local_irq_restore(flags); } #endif -#endif smp_llsc_mb(); Index: linux-rt-rebase.q/include/asm-mips/bitops.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/bitops.h +++ linux-rt-rebase.q/include/asm-mips/bitops.h @@ -500,9 +500,6 @@ static inline unsigned long __ffs(unsign } /* - * fls - find last bit set. - * @word: The word to search - * * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ @@ -520,6 +517,8 @@ static inline int fls64(__u64 word) return 64 - word; } +#define __bi_local_irq_save(x) raw_local_irq_save(x) +#define __bi_local_irq_restore(x) raw_local_irq_restore(x) #else #include #endif Index: linux-rt-rebase.q/include/asm-mips/hw_irq.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/hw_irq.h +++ linux-rt-rebase.q/include/asm-mips/hw_irq.h @@ -10,6 +10,7 @@ #include #include +#include extern void disable_8259A_irq(unsigned int irq); extern void enable_8259A_irq(unsigned int irq); Index: linux-rt-rebase.q/include/asm-mips/i8259.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/i8259.h +++ linux-rt-rebase.q/include/asm-mips/i8259.h @@ -35,7 +35,7 @@ #define SLAVE_ICW4_DEFAULT 0x01 #define PIC_ICW4_AEOI 2 -extern spinlock_t i8259A_lock; +extern raw_spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); Index: linux-rt-rebase.q/include/asm-mips/io.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/io.h +++ linux-rt-rebase.q/include/asm-mips/io.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include Index: linux-rt-rebase.q/include/asm-mips/linkage.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/linkage.h +++ linux-rt-rebase.q/include/asm-mips/linkage.h @@ -3,6 +3,11 @@ #ifdef __ASSEMBLY__ #include + +/* FASTCALL stuff */ +#define FASTCALL(x) x +#define fastcall + #endif #endif Index: linux-rt-rebase.q/include/asm-mips/m48t35.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/m48t35.h +++ linux-rt-rebase.q/include/asm-mips/m48t35.h @@ -6,7 +6,7 @@ #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; struct m48t35_rtc { volatile u8 pad[0x7ff8]; /* starts at 0x7ff8 */ Index: linux-rt-rebase.q/include/asm-mips/rwsem.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/asm-mips/rwsem.h @@ -0,0 +1,176 @@ +/* + * include/asm-mips/rwsem.h: R/W semaphores for MIPS using the stuff + * in lib/rwsem.c. Adapted largely from include/asm-ppc/rwsem.h + * by john.cooper@timesys.com + */ + +#ifndef _MIPS_RWSEM_H +#define _MIPS_RWSEM_H + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#include + +/* + * the semaphore definition + */ +struct compat_rw_semaphore { + /* XXX this should be able to be an atomic_t -- paulus */ + signed long count; +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + raw_spinlock_t wait_lock; + struct list_head wait_list; +#if RWSEM_DEBUG + int debug; +#endif +}; + +/* + * initialisation + */ +#if RWSEM_DEBUG +#define __RWSEM_DEBUG_INIT , 0 +#else +#define __RWSEM_DEBUG_INIT /* */ +#endif + +#define __COMPAT_RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEBUG_INIT } + +#define COMPAT_DECLARE_RWSEM(name) \ + struct compat_rw_semaphore name = __COMPAT_RWSEM_INITIALIZER(name) + +extern struct compat_rw_semaphore *rwsem_down_read_failed(struct compat_rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_down_write_failed(struct compat_rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_wake(struct compat_rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_downgrade_wake(struct compat_rw_semaphore *sem); + +static inline void compat_init_rwsem(struct compat_rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +#if RWSEM_DEBUG + sem->debug = 0; +#endif +} + +/* + * lock for reading + */ +static inline void __down_read(struct compat_rw_semaphore *sem) +{ + if (atomic_inc_return((atomic_t *)(&sem->count)) > 0) + smp_wmb(); + else + rwsem_down_read_failed(sem); +} + +static inline int __down_read_trylock(struct compat_rw_semaphore *sem) +{ + int tmp; + + while ((tmp = sem->count) >= 0) { + if (tmp == cmpxchg(&sem->count, tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { + smp_wmb(); + return 1; + } + } + return 0; +} + +/* + * lock for writing + */ +static inline void __down_write(struct compat_rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)); + if (tmp == RWSEM_ACTIVE_WRITE_BIAS) + smp_wmb(); + else + rwsem_down_write_failed(sem); +} + +static inline int __down_write_trylock(struct compat_rw_semaphore *sem) +{ + int tmp; + + tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + smp_wmb(); + return tmp == RWSEM_UNLOCKED_VALUE; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct compat_rw_semaphore *sem) +{ + int tmp; + + smp_wmb(); + tmp = atomic_dec_return((atomic_t *)(&sem->count)); + if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct compat_rw_semaphore *sem) +{ + smp_wmb(); + if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)) < 0) + rwsem_wake(sem); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(int delta, struct compat_rw_semaphore *sem) +{ + atomic_add(delta, (atomic_t *)(&sem->count)); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct compat_rw_semaphore *sem) +{ + int tmp; + + smp_wmb(); + tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count)); + if (tmp < 0) + rwsem_downgrade_wake(sem); +} + +/* + * implement exchange and add functionality + */ +static inline int rwsem_atomic_update(int delta, struct compat_rw_semaphore *sem) +{ + smp_mb(); + return atomic_add_return(delta, (atomic_t *)(&sem->count)); +} + +#endif /* __KERNEL__ */ +#endif /* _MIPS_RWSEM_H */ Index: linux-rt-rebase.q/include/asm-mips/semaphore.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/semaphore.h +++ linux-rt-rebase.q/include/asm-mips/semaphore.h @@ -47,39 +47,42 @@ struct compat_semaphore { wait_queue_head_t wait; }; -#define __SEMAPHORE_INITIALIZER(name, n) \ +#define __COMPAT_SEMAPHORE_INITIALIZER(name, n) \ { \ .count = ATOMIC_INIT(n), \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ } -#define __DECLARE_SEMAPHORE_GENERIC(name, count) \ - struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) +#define __COMPAT_MUTEX_INITIALIZER(name) \ + __COMPAT_SEMAPHORE_INITIALIZER(name, 1) -#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name, 1) -#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name, 0) +#define __COMPAT_DECLARE_SEMAPHORE_GENERIC(name, count) \ + struct compat_semaphore name = __COMPAT_SEMAPHORE_INITIALIZER(name,count) -static inline void sema_init (struct semaphore *sem, int val) +#define COMPAT_DECLARE_MUTEX(name) __COMPAT_DECLARE_SEMAPHORE_GENERIC(name, 1) +#define COMPAT_DECLARE_MUTEX_LOCKED(name) __COMPAT_DECLARE_SEMAPHORE_GENERIC(name, 0) + +static inline void compat_sema_init (struct compat_semaphore *sem, int val) { atomic_set(&sem->count, val); init_waitqueue_head(&sem->wait); } -static inline void init_MUTEX (struct semaphore *sem) +static inline void compat_init_MUTEX (struct compat_semaphore *sem) { - sema_init(sem, 1); + compat_sema_init(sem, 1); } -static inline void init_MUTEX_LOCKED (struct semaphore *sem) +static inline void compat_init_MUTEX_LOCKED (struct compat_semaphore *sem) { - sema_init(sem, 0); + compat_sema_init(sem, 0); } -extern void __down(struct semaphore * sem); -extern int __down_interruptible(struct semaphore * sem); -extern void __up(struct semaphore * sem); +extern void __compat_down(struct compat_semaphore * sem); +extern int __compat_down_interruptible(struct compat_semaphore * sem); +extern void __compat_up(struct compat_semaphore * sem); -static inline void down(struct semaphore * sem) +static inline void compat_down(struct compat_semaphore * sem) { might_sleep(); @@ -112,6 +115,8 @@ static inline void compat_up(struct comp __compat_up(sem); } +extern int compat_sem_is_locked(struct compat_semaphore *sem); + #define compat_sema_count(sem) atomic_read(&(sem)->count) #include Index: linux-rt-rebase.q/include/asm-mips/spinlock.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/spinlock.h +++ linux-rt-rebase.q/include/asm-mips/spinlock.h @@ -28,7 +28,7 @@ * We make no fairness assumptions. They have a cost. */ -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(__raw_spinlock_t *lock) { unsigned int tmp; @@ -70,7 +70,7 @@ static inline void __raw_spin_lock(raw_s smp_llsc_mb(); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(__raw_spinlock_t *lock) { smp_mb(); @@ -83,7 +83,7 @@ static inline void __raw_spin_unlock(raw : "memory"); } -static inline unsigned int __raw_spin_trylock(raw_spinlock_t *lock) +static inline unsigned int __raw_spin_trylock(__raw_spinlock_t *lock) { unsigned int temp, res; @@ -144,7 +144,7 @@ static inline unsigned int __raw_spin_tr */ #define __raw_write_can_lock(rw) (!(rw)->lock) -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(__raw_rwlock_t *rw) { unsigned int tmp; @@ -189,7 +189,7 @@ static inline void __raw_read_lock(raw_r /* Note the use of sub, not subu which will make the kernel die with an overflow exception if we ever try to unlock an rwlock that is already unlocked or is being held by a writer. */ -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(__raw_rwlock_t *rw) { unsigned int tmp; @@ -223,7 +223,7 @@ static inline void __raw_read_unlock(raw } } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(__raw_rwlock_t *rw) { unsigned int tmp; @@ -265,7 +265,7 @@ static inline void __raw_write_lock(raw_ smp_llsc_mb(); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(__raw_rwlock_t *rw) { smp_mb(); @@ -277,7 +277,7 @@ static inline void __raw_write_unlock(ra : "memory"); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(__raw_rwlock_t *rw) { unsigned int tmp; int ret; @@ -321,7 +321,7 @@ static inline int __raw_read_trylock(raw return ret; } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(__raw_rwlock_t *rw) { unsigned int tmp; int ret; Index: linux-rt-rebase.q/include/asm-mips/spinlock_types.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/spinlock_types.h +++ linux-rt-rebase.q/include/asm-mips/spinlock_types.h @@ -7,13 +7,13 @@ typedef struct { volatile unsigned int lock; -} raw_spinlock_t; +} __raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} __raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { 0 } Index: linux-rt-rebase.q/include/asm-mips/thread_info.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/thread_info.h +++ linux-rt-rebase.q/include/asm-mips/thread_info.h @@ -114,6 +114,7 @@ register struct thread_info *__current_t #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ #define TIF_SECCOMP 5 /* secure computing */ +#define TIF_NEED_RESCHED_DELAYED 6 /* reschedule on return to userspace */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -127,6 +128,7 @@ register struct thread_info *__current_t #define _TIF_NEED_RESCHED (1< #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; /* * RTC ops. By default, they point to no-RTC functions. Index: linux-rt-rebase.q/include/asm-mips/timeofday.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/asm-mips/timeofday.h @@ -0,0 +1,5 @@ +#ifndef _ASM_MIPS_TIMEOFDAY_H +#define _ASM_MIPS_TIMEOFDAY_H +#include +#endif + Index: linux-rt-rebase.q/include/asm-mips/uaccess.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-mips/uaccess.h +++ linux-rt-rebase.q/include/asm-mips/uaccess.h @@ -427,7 +427,6 @@ extern size_t __copy_user(void *__to, co const void *__cu_from; \ long __cu_len; \ \ - might_sleep(); \ __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ @@ -483,7 +482,6 @@ extern size_t __copy_user_inatomic(void const void *__cu_from; \ long __cu_len; \ \ - might_sleep(); \ __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ @@ -562,7 +560,6 @@ extern size_t __copy_user_inatomic(void const void __user *__cu_from; \ long __cu_len; \ \ - might_sleep(); \ __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ @@ -593,7 +590,6 @@ extern size_t __copy_user_inatomic(void const void __user *__cu_from; \ long __cu_len; \ \ - might_sleep(); \ __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ @@ -611,7 +607,6 @@ extern size_t __copy_user_inatomic(void const void __user *__cu_from; \ long __cu_len; \ \ - might_sleep(); \ __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ @@ -638,7 +633,6 @@ __clear_user(void __user *addr, __kernel { __kernel_size_t res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, $0\n\t" @@ -687,7 +681,6 @@ __strncpy_from_user(char *__to, const ch { long res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" @@ -724,7 +717,6 @@ strncpy_from_user(char *__to, const char { long res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" @@ -743,7 +735,6 @@ static inline long __strlen_user(const c { long res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" __MODULE_JAL(__strlen_user_nocheck_asm) @@ -773,7 +764,6 @@ static inline long strlen_user(const cha { long res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" __MODULE_JAL(__strlen_user_asm) @@ -790,7 +780,6 @@ static inline long __strnlen_user(const { long res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" @@ -821,7 +810,6 @@ static inline long strnlen_user(const ch { long res; - might_sleep(); __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" patches/neptune-no-at-keyboard.patch0000664000077200007720000000335210653433161017037 0ustar mingomingoneptune needs this to boot ... --- drivers/input/keyboard/atkbd.c | 14 ++++++++++++++ drivers/input/mouse/psmouse-base.c | 15 +++++++++++++++ 2 files changed, 29 insertions(+) Index: linux-rt-rebase.q/drivers/input/keyboard/atkbd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/input/keyboard/atkbd.c +++ linux-rt-rebase.q/drivers/input/keyboard/atkbd.c @@ -1396,9 +1396,23 @@ static ssize_t atkbd_show_err_count(stru return sprintf(buf, "%lu\n", atkbd->err_count); } +static int __read_mostly noatkbd; + +static int __init noatkbd_setup(char *str) +{ + noatkbd = 1; + printk(KERN_INFO "debug: not setting up AT keyboard.\n"); + + return 1; +} + +__setup("noatkbd", noatkbd_setup); static int __init atkbd_init(void) { + if (noatkbd) + return 0; + return serio_register_driver(&atkbd_drv); } Index: linux-rt-rebase.q/drivers/input/mouse/psmouse-base.c =================================================================== --- linux-rt-rebase.q.orig/drivers/input/mouse/psmouse-base.c +++ linux-rt-rebase.q/drivers/input/mouse/psmouse-base.c @@ -1594,10 +1594,25 @@ static int psmouse_get_maxproto(char *bu return sprintf(buffer, "%s\n", psmouse_protocol_by_type(type)->name); } +static int __read_mostly nopsmouse; + +static int __init nopsmouse_setup(char *str) +{ + nopsmouse = 1; + printk(KERN_INFO "debug: not setting up psmouse.\n"); + + return 1; +} + +__setup("nopsmouse", nopsmouse_setup); + static int __init psmouse_init(void) { int err; + if (nopsmouse) + return 0; + kpsmoused_wq = create_singlethread_workqueue("kpsmoused"); if (!kpsmoused_wq) { printk(KERN_ERR "psmouse: failed to create kpsmoused workqueue\n"); patches/ioapic-fix-too-fast-clocks.patch0000664000077200007720000000276410653433162017614 0ustar mingomingoFrom: Akira Tsukamoto This one line patch adds upper bound testing inside timer_irq_works() when evaluating whether irq timer works or not on boot up. It fix the machines having problem with clock running too fast. What this patch do is, if timer interrupts running too fast through IO-APIC IRQ then false back to i8259A IRQ. I really appreciate for the feedback from ATI Xpress 200 chipset user, It should eliminate the needs of adding no_timer_check on kernel options. I have NEC laptop using ATI Xpress 200 chipset with Pentium M 1.8GHz and its clock keep going forward when kernel compiled with local APIC support. Many machines based on RS200 chipset seem to have the same problem, including Acer Ferrari 400X AMD notebook or Compaq R4000. Also I would like to have comments on upper bound limit, 16 ticks, which I chose in this patch. My laptop always reports around 20, which is double from normal. arch/i386/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/i386/kernel/io_apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/io_apic.c +++ linux-rt-rebase.q/arch/i386/kernel/io_apic.c @@ -1900,7 +1900,7 @@ static int __init timer_irq_works(void) * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ - if (jiffies - t1 > 4) + if (jiffies - t1 > 4 && jiffies - t1 < 16) return 1; return 0; patches/softlockup-add-irq-regs-h.patch0000664000077200007720000000272410653433167017452 0ustar mingomingoSubject: core: make asm/irq_regs.h available on every platform From: Ingo Molnar the softlockup detector would like to use get_irq_regs(), so generalize the availability on every Linux architecture. (it is fine for an architecture to always return NULL to get_irq_regs(), which it does by default.) Signed-off-by: Ingo Molnar --- include/asm-arm26/irq_regs.h | 1 + include/asm-cris/irq_regs.h | 1 + include/asm-ppc/irq_regs.h | 1 + include/asm-v850/irq_regs.h | 1 + 4 files changed, 4 insertions(+) Index: linux-rt-rebase.q/include/asm-arm26/irq_regs.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/asm-arm26/irq_regs.h @@ -0,0 +1 @@ +#include Index: linux-rt-rebase.q/include/asm-cris/irq_regs.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/asm-cris/irq_regs.h @@ -0,0 +1 @@ +#include Index: linux-rt-rebase.q/include/asm-ppc/irq_regs.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/asm-ppc/irq_regs.h @@ -0,0 +1 @@ +#include Index: linux-rt-rebase.q/include/asm-v850/irq_regs.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/asm-v850/irq_regs.h @@ -0,0 +1 @@ +#include patches/preempt-realtime-irqs.patch0000664000077200007720000001203710653433166017004 0ustar mingomingo--- include/linux/irq.h | 10 ++++------ kernel/irq/handle.c | 13 +++++++++++-- kernel/irq/manage.c | 18 ++++++++++++++---- kernel/irq/spurious.c | 3 +-- 4 files changed, 30 insertions(+), 14 deletions(-) Index: linux-rt-rebase.q/include/linux/irq.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/irq.h +++ linux-rt-rebase.q/include/linux/irq.h @@ -145,7 +145,6 @@ struct irq_chip { * @irqs_unhandled: stats field for spurious unhandled interrupts * @thread: Thread pointer for threaded preemptible irq handling * @wait_for_handler: Waitqueue to wait for a running preemptible handler - * @cycles: Timestamp for stats and debugging * @lock: locking for SMP * @affinity: IRQ affinity on SMP * @cpu: cpu index useful for balancing @@ -168,10 +167,10 @@ struct irq_desc { unsigned int irq_count; /* For detecting broken IRQs */ unsigned int irqs_unhandled; unsigned long last_unhandled; /* Aging timer for unhandled count */ - struct task_struct *thread; - wait_queue_head_t wait_for_handler; - cycles_t timestamp; - spinlock_t lock; + struct task_struct *thread; + wait_queue_head_t wait_for_handler; + cycles_t timestamp; + raw_spinlock_t lock; #ifdef CONFIG_SMP cpumask_t affinity; unsigned int cpu; @@ -397,7 +396,6 @@ extern int set_irq_msi(unsigned int irq, /* Early initialization of irqs */ extern void early_init_hardirqs(void); -extern cycles_t irq_timestamp(unsigned int irq); #if defined(CONFIG_PREEMPT_HARDIRQS) extern void init_hardirqs(void); Index: linux-rt-rebase.q/kernel/irq/handle.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/handle.c +++ linux-rt-rebase.q/kernel/irq/handle.c @@ -54,12 +54,13 @@ struct irq_desc irq_desc[NR_IRQS] __cach .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + .lock = RAW_SPIN_LOCK_UNLOCKED(irq_desc), #ifdef CONFIG_SMP .affinity = CPU_MASK_ALL #endif } }; +EXPORT_SYMBOL_GPL(irq_desc); /* * What should we do if we get a hw irq event on an illegal vector? @@ -151,6 +152,7 @@ irqreturn_t handle_IRQ_event(unsigned in ret = action->handler(irq, action->dev_id); if (preempt_count() != preempt_count) { + stop_trace(); print_symbol("BUG: unbalanced irq-handler preempt count in %s!\n", (unsigned long) action->handler); printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count()); dump_stack(); @@ -225,7 +227,7 @@ int redirect_hardirq(struct irq_desc *de * This is the original x86 implementation which is used for every * interrupt type. */ -fastcall unsigned int __do_IRQ(unsigned int irq) +fastcall notrace unsigned int __do_IRQ(unsigned int irq) { struct irq_desc *desc = irq_desc + irq; struct irqaction *action; @@ -246,6 +248,13 @@ fastcall unsigned int __do_IRQ(unsigned desc->chip->end(irq); return 1; } + /* + * If the task is currently running in user mode, don't + * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not + * configured, this should be optimized out. + */ + if (user_mode(get_irq_regs())) + touch_softlockup_watchdog(); spin_lock(&desc->lock); if (desc->chip->ack) Index: linux-rt-rebase.q/kernel/irq/manage.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/manage.c +++ linux-rt-rebase.q/kernel/irq/manage.c @@ -578,9 +578,9 @@ int request_irq(unsigned int irq, irq_ha if (irqflags & IRQF_DISABLED) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); handler(irq, dev_id); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else handler(irq, dev_id); } @@ -600,6 +600,11 @@ int hardirq_preemption = 1; EXPORT_SYMBOL(hardirq_preemption); +/* + * Real-Time Preemption depends on hardirq threading: + */ +#ifndef CONFIG_PREEMPT_RT + static int __init hardirq_preempt_setup (char *str) { if (!strncmp(str, "off", 3)) @@ -614,6 +619,7 @@ static int __init hardirq_preempt_setup __setup("hardirq-preempt=", hardirq_preempt_setup); +#endif /* * threaded simple handler @@ -773,12 +779,16 @@ static int do_irqd(void * __desc) sys_sched_setscheduler(current->pid, SCHED_FIFO, ¶m); while (!kthread_should_stop()) { - local_irq_disable(); + local_irq_disable_nort(); set_current_state(TASK_INTERRUPTIBLE); +#ifndef CONFIG_PREEMPT_RT irq_enter(); +#endif do_hardirq(desc); +#ifndef CONFIG_PREEMPT_RT irq_exit(); - local_irq_enable(); +#endif + local_irq_enable_nort(); cond_resched(); #ifdef CONFIG_SMP /* Index: linux-rt-rebase.q/kernel/irq/spurious.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/spurious.c +++ linux-rt-rebase.q/kernel/irq/spurious.c @@ -59,9 +59,8 @@ static int misrouted_irq(int irq) } action = action->next; } - local_irq_disable(); /* Now clean up the flags */ - spin_lock(&desc->lock); + spin_lock_irq(&desc->lock); action = desc->action; /* patches/x86_64-tsc-sync-irqflags-fix.patch0000664000077200007720000000143210653433161017635 0ustar mingomingo--- arch/x86_64/kernel/tsc_sync.c | 4 ++++ 1 file changed, 4 insertions(+) Index: linux-rt-rebase.q/arch/x86_64/kernel/tsc_sync.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/tsc_sync.c +++ linux-rt-rebase.q/arch/x86_64/kernel/tsc_sync.c @@ -97,6 +97,7 @@ static __cpuinit void check_tsc_warp(voi */ void __cpuinit check_tsc_sync_source(int cpu) { + unsigned long flags; int cpus = 2; /* @@ -117,8 +118,11 @@ void __cpuinit check_tsc_sync_source(int /* * Wait for the target to arrive: */ + local_save_flags(flags); + local_irq_enable(); while (atomic_read(&start_count) != cpus-1) cpu_relax(); + local_irq_restore(flags); /* * Trigger the target to continue into the measurement too: */ patches/preempt-irqs-direct-debug-keyboard.patch0000664000077200007720000000477310653433164021344 0ustar mingomingo--- include/linux/sched.h | 6 ++++++ init/main.c | 2 ++ kernel/irq/handle.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) Index: linux-rt-rebase.q/include/linux/sched.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/sched.h +++ linux-rt-rebase.q/include/linux/sched.h @@ -267,6 +267,12 @@ extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); +#ifdef CONFIG_GENERIC_HARDIRQS +extern int debug_direct_keyboard; +#else +# define debug_direct_keyboard 0 +#endif + #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); extern void spawn_softlockup_task(void); Index: linux-rt-rebase.q/init/main.c =================================================================== --- linux-rt-rebase.q.orig/init/main.c +++ linux-rt-rebase.q/init/main.c @@ -867,5 +867,7 @@ static int __init kernel_init(void * unu * initmem segments and start the user-mode stuff.. */ init_post(); + WARN_ON(debug_direct_keyboard); + return 0; } Index: linux-rt-rebase.q/kernel/irq/handle.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/handle.c +++ linux-rt-rebase.q/kernel/irq/handle.c @@ -132,6 +132,11 @@ irqreturn_t handle_IRQ_event(unsigned in irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; +#ifdef __i386__ + if (debug_direct_keyboard && irq == 1) + lockdep_off(); +#endif + handle_dynamic_tick(action); /* @@ -163,9 +168,30 @@ irqreturn_t handle_IRQ_event(unsigned in } local_irq_disable(); +#ifdef __i386__ + if (debug_direct_keyboard && irq == 1) + lockdep_on(); +#endif return retval; } +/* + * Hack - used for development only. + */ +int __read_mostly debug_direct_keyboard = 0; + +int __init debug_direct_keyboard_setup(char *str) +{ + debug_direct_keyboard = 1; + printk(KERN_INFO "Switching IRQ 1 (keyboard) to to direct!\n"); +#ifdef CONFIG_PREEMPT_RT + printk(KERN_INFO "WARNING: kernel may easily crash this way!\n"); +#endif + return 1; +} + +__setup("debug_direct_keyboard", debug_direct_keyboard_setup); + int redirect_hardirq(struct irq_desc *desc) { /* @@ -175,6 +201,11 @@ int redirect_hardirq(struct irq_desc *de !desc->thread) return 0; +#ifdef __i386__ + if (debug_direct_keyboard && (desc - irq_desc == 1)) + return 0; +#endif + BUG_ON(!irqs_disabled()); if (desc->thread && desc->thread->state != TASK_RUNNING) wake_up_process(desc->thread); patches/ns2cyc-result-fix.patch0000664000077200007720000000535110653433162016052 0ustar mingomingoFrom sshtylyov@ru.mvista.com Wed May 16 18:11:13 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from imap.sh.mvista.com (unknown [63.81.120.155]) by mail.tglx.de (Postfix) with ESMTP id B7F0D65C065 for ; Wed, 16 May 2007 18:11:13 +0200 (CEST) Received: from wasted.dev.rtsoft.ru (unknown [10.150.0.9]) by imap.sh.mvista.com (Postfix) with ESMTP id 11FC13EC9 for ; Wed, 16 May 2007 08:38:17 -0700 (PDT) From: Sergei Shtylyov Organization: MontaVista Software Inc. To: tglx@linutronix.de Subject: [PATCH 2.6.21-rt1] ns2cyc() result fix Date: Wed, 16 May 2007 18:39:50 +0300 User-Agent: KMail/1.5 MIME-Version: 1.0 Content-Disposition: inline Content-Type: text/plain; charset="iso-8859-1" Message-Id: <200705161939.50242.sshtylyov@ru.mvista.com> X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Fix the dubious use of cycles_t where cycle_t was appropriate. On the machines with 32-bit cycles_t (like ARM/PPC) it caused these warnings: In file included from arch/powerpc/kernel/time.c:1045: include/linux/clocksource.h: In function `ns2cyc': include/linux/clocksource.h:213: warning: comparison of distinct pointer types lacks a cast include/linux/clocksource.h:213: warning: right shift count >= width of type include/linux/clocksource.h:213: warning: passing argument 1 of `__div64_32' from incompatible pointer type This function and therefore usecs_to_cycles() was unlikely to return a correct result on such machines because of the shift result truncation. Signed-off-by: Sergei Shtylyov --- I'm also uncertain about 'preempt_max_latency' and 'preempt_thresh' variables being declared as 'unsigned long' -- however, looks like those are unlikely to overflow... yet it's unclear why there's casts to 'cycle_t' (which is always 64-bit) when initializing/comparing them... --- include/linux/clocksource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/include/linux/clocksource.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/clocksource.h +++ linux-rt-rebase.q/include/linux/clocksource.h @@ -189,9 +189,9 @@ static inline s64 cyc2ns(struct clocksou * @cs: Pointer to clocksource * @nsecs: Nanoseconds */ -static inline cycles_t ns2cyc(struct clocksource *cs, u64 nsecs) +static inline cycle_t ns2cyc(struct clocksource *cs, u64 nsecs) { - cycles_t ret = nsecs << cs->shift; + cycle_t ret = nsecs << cs->shift; do_div(ret, cs->mult + 1); patches/2.6.21-rc6-lockless1-prep-find_lock_page.patch0000664000077200007720000000261510653433167021560 0ustar mingomingoFrom: Nick Piggin Subject: [patch 1/9] mm: prep find_lock_page find_lock_page does not need to recheck ->index because if the page is in the right mapping then the index must be the same. Also, tree_lock does not need to be retaken after the page is locked in order to test ->mapped has not changed. Signed-off-by: Nick Piggin --- mm/filemap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/mm/filemap.c =================================================================== --- linux-rt-rebase.q.orig/mm/filemap.c +++ linux-rt-rebase.q/mm/filemap.c @@ -621,26 +621,26 @@ struct page *find_lock_page(struct addre { struct page *page; - read_lock_irq(&mapping->tree_lock); repeat: + read_lock_irq(&mapping->tree_lock); page = radix_tree_lookup(&mapping->page_tree, offset); if (page) { page_cache_get(page); if (TestSetPageLocked(page)) { read_unlock_irq(&mapping->tree_lock); __lock_page(page); - read_lock_irq(&mapping->tree_lock); /* Has the page been truncated while we slept? */ - if (unlikely(page->mapping != mapping || - page->index != offset)) { + if (unlikely(page->mapping != mapping)) { unlock_page(page); page_cache_release(page); goto repeat; } + goto out; } } read_unlock_irq(&mapping->tree_lock); +out: return page; } EXPORT_SYMBOL(find_lock_page); patches/futex-performance-hack.patch0000664000077200007720000000330410653433167017110 0ustar mingomingo--- kernel/futex.c | 6 ++++-- kernel/sysctl.c | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/kernel/futex.c =================================================================== --- linux-rt-rebase.q.orig/kernel/futex.c +++ linux-rt-rebase.q/kernel/futex.c @@ -120,12 +120,14 @@ static struct futex_hash_bucket futex_qu /* Futex-fs vfsmount entry: */ static struct vfsmount *futex_mnt; +int futex_performance_hack; + /* * Take mm->mmap_sem, when futex is shared */ static inline void futex_lock_mm(struct rw_semaphore *fshared) { - if (fshared) + if (fshared && !futex_performance_hack) down_read(fshared); } @@ -134,7 +136,7 @@ static inline void futex_lock_mm(struct */ static inline void futex_unlock_mm(struct rw_semaphore *fshared) { - if (fshared) + if (fshared && !futex_performance_hack) up_read(fshared); } Index: linux-rt-rebase.q/kernel/sysctl.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sysctl.c +++ linux-rt-rebase.q/kernel/sysctl.c @@ -66,6 +66,7 @@ extern int print_fatal_signals; extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern int sysctl_panic_on_oom; +extern int futex_performance_hack; extern int max_threads; extern int core_uses_pid; extern int suid_dumpable; @@ -324,6 +325,14 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "futex_performance_hack", + .data = &futex_performance_hack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = KERN_PANIC, .procname = "prof_pid", .data = &prof_pid, patches/spinlock-trylock-cleanup-sungem.patch0000664000077200007720000000117710653433161021002 0ustar mingomingo--- drivers/net/sungem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) Index: linux-rt-rebase.q/drivers/net/sungem.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/sungem.c +++ linux-rt-rebase.q/drivers/net/sungem.c @@ -1034,10 +1034,8 @@ static int gem_start_xmit(struct sk_buff (csum_stuff_off << 21)); } - local_irq_save(flags); - if (!spin_trylock(&gp->tx_lock)) { + if (!spin_trylock_irqsave(&gp->tx_lock, flags)) { /* Tell upper layer to requeue */ - local_irq_restore(flags); return NETDEV_TX_LOCKED; } /* We raced with gem_do_stop() */ patches/preempt-realtime-acpi.patch0000664000077200007720000001316610653433166016746 0ustar mingomingo--- drivers/acpi/ec.c | 12 ++++++++++++ drivers/acpi/hardware/hwregs.c | 16 ++++++++-------- drivers/acpi/processor_idle.c | 2 +- drivers/acpi/utilities/utmutex.c | 2 +- include/acpi/acglobal.h | 7 ++++++- include/acpi/acpiosxf.h | 2 +- 6 files changed, 29 insertions(+), 12 deletions(-) Index: linux-rt-rebase.q/drivers/acpi/ec.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/ec.c +++ linux-rt-rebase.q/drivers/acpi/ec.c @@ -483,7 +483,19 @@ static u32 acpi_ec_gpe_handler(void *dat atomic_inc(&ec->event_count); if (acpi_ec_mode == EC_INTR) { +#if 0 wake_up(&ec->wait); +#else + // hack ... + if (waitqueue_active(&ec->wait)) { + struct task_struct *task; + + task = list_entry(ec->wait.task_list.next, + wait_queue_t, task_list)->private; + if (task) + wake_up_process(task); + } +#endif } value = acpi_ec_read_status(ec); Index: linux-rt-rebase.q/drivers/acpi/hardware/hwregs.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/hardware/hwregs.c +++ linux-rt-rebase.q/drivers/acpi/hardware/hwregs.c @@ -73,7 +73,7 @@ acpi_status acpi_hw_clear_acpi_status(vo ACPI_BITMASK_ALL_FIXED_STATUS, (u16) acpi_gbl_FADT.xpm1a_event_block.address)); - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK, ACPI_REGISTER_PM1_STATUS, @@ -98,7 +98,7 @@ acpi_status acpi_hw_clear_acpi_status(vo status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block); unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } @@ -331,7 +331,7 @@ acpi_status acpi_set_register(u32 regist return_ACPI_STATUS(AE_BAD_PARAMETER); } - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* Always do a register read first so we can insert the new bits */ @@ -441,7 +441,7 @@ acpi_status acpi_set_register(u32 regist unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); /* Normalize the value that was read */ @@ -481,7 +481,7 @@ acpi_hw_register_read(u8 use_lock, u32 r ACPI_FUNCTION_TRACE(hw_register_read); if (ACPI_MTX_LOCK == use_lock) { - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); } switch (register_id) { @@ -560,7 +560,7 @@ acpi_hw_register_read(u8 use_lock, u32 r unlock_and_exit: if (ACPI_MTX_LOCK == use_lock) { - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); } if (ACPI_SUCCESS(status)) { @@ -606,7 +606,7 @@ acpi_status acpi_hw_register_write(u8 us ACPI_FUNCTION_TRACE(hw_register_write); if (ACPI_MTX_LOCK == use_lock) { - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); } switch (register_id) { @@ -730,7 +730,7 @@ acpi_status acpi_hw_register_write(u8 us unlock_and_exit: if (ACPI_MTX_LOCK == use_lock) { - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); } return_ACPI_STATUS(status); Index: linux-rt-rebase.q/drivers/acpi/processor_idle.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/processor_idle.c +++ linux-rt-rebase.q/drivers/acpi/processor_idle.c @@ -948,7 +948,7 @@ static int acpi_idle_enter_c2(struct cpu } static int c3_cpu_count; -static DEFINE_SPINLOCK(c3_lock); +static DEFINE_RAW_SPINLOCK(c3_lock); /** * acpi_idle_enter_c3 - enters an ACPI C3 state-type Index: linux-rt-rebase.q/drivers/acpi/utilities/utmutex.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/utilities/utmutex.c +++ linux-rt-rebase.q/drivers/acpi/utilities/utmutex.c @@ -116,7 +116,7 @@ void acpi_ut_mutex_terminate(void) /* Delete the spinlocks */ acpi_os_delete_lock(acpi_gbl_gpe_lock); - acpi_os_delete_lock(acpi_gbl_hardware_lock); +// acpi_os_delete_lock(acpi_gbl_hardware_lock); return_VOID; } Index: linux-rt-rebase.q/include/acpi/acglobal.h =================================================================== --- linux-rt-rebase.q.orig/include/acpi/acglobal.h +++ linux-rt-rebase.q/include/acpi/acglobal.h @@ -184,7 +184,12 @@ ACPI_EXTERN acpi_semaphore acpi_gbl_glob * interrupt level */ ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ + +/* + * Need to be raw because it might be used in acpi_processor_idle(): + */ +ACPI_EXTERN raw_spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ + #define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock #define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock Index: linux-rt-rebase.q/include/acpi/acpiosxf.h =================================================================== --- linux-rt-rebase.q.orig/include/acpi/acpiosxf.h +++ linux-rt-rebase.q/include/acpi/acpiosxf.h @@ -61,7 +61,7 @@ typedef enum { OSL_EC_BURST_HANDLER } acpi_execute_type; -#define ACPI_NO_UNIT_LIMIT ((u32) -1) +#define ACPI_NO_UNIT_LIMIT (INT_MAX/2) #define ACPI_MUTEX_SEM 1 /* Functions for acpi_os_signal */ patches/panic-dont-stop-box.patch0000664000077200007720000000101310653433166016351 0ustar mingomingo--- kernel/panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/kernel/panic.c =================================================================== --- linux-rt-rebase.q.orig/kernel/panic.c +++ linux-rt-rebase.q/kernel/panic.c @@ -96,7 +96,7 @@ NORET_TYPE void panic(const char * fmt, * unfortunately means it may not be hardened to work in a panic * situation. */ - smp_send_stop(); +// smp_send_stop(); #endif atomic_notifier_call_chain(&panic_notifier_list, 0, buf); patches/latency-tracing.patch0000664000077200007720000034434010653433162015641 0ustar mingomingo Makefile | 11 arch/i386/lib/delay.c | 6 arch/x86_64/kernel/tsc.c | 4 drivers/clocksource/acpi_pm.c | 8 fs/proc/proc_misc.c | 17 include/linux/clocksource.h | 23 include/linux/kernel.h | 2 include/linux/latency_hist.h | 32 include/linux/preempt.h | 20 include/linux/sched.h | 109 + init/main.c | 2 kernel/Makefile | 5 kernel/fork.c | 2 kernel/latency_hist.c | 267 ++++ kernel/latency_trace.c | 2742 ++++++++++++++++++++++++++++++++++++++++++ kernel/lockdep.c | 33 kernel/panic.c | 2 kernel/printk.c | 2 kernel/sched.c | 82 - kernel/sysctl.c | 128 + kernel/time/timekeeping.c | 27 lib/Kconfig.debug | 186 ++ lib/debug_locks.c | 8 scripts/Makefile | 1 scripts/trace-it.c | 79 + 25 files changed, 3737 insertions(+), 61 deletions(-) Index: linux-rt-rebase.q/Makefile =================================================================== --- linux-rt-rebase.q.orig/Makefile +++ linux-rt-rebase.q/Makefile @@ -491,10 +491,15 @@ endif include $(srctree)/arch/$(ARCH)/Makefile -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer -fno-optimize-sibling-calls else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_DEBUG_INFO Index: linux-rt-rebase.q/arch/i386/lib/delay.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/lib/delay.c +++ linux-rt-rebase.q/arch/i386/lib/delay.c @@ -23,7 +23,7 @@ #endif /* simple loop based delay: */ -static void delay_loop(unsigned long loops) +static notrace void delay_loop(unsigned long loops) { int d0; @@ -38,7 +38,7 @@ static void delay_loop(unsigned long loo } /* TSC based delay: */ -static void delay_tsc(unsigned long loops) +static notrace void delay_tsc(unsigned long loops) { unsigned long bclock, now; @@ -69,7 +69,7 @@ int read_current_timer(unsigned long *ti return -1; } -void __delay(unsigned long loops) +void notrace __delay(unsigned long loops) { delay_fn(loops); } Index: linux-rt-rebase.q/arch/x86_64/kernel/tsc.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/tsc.c +++ linux-rt-rebase.q/arch/x86_64/kernel/tsc.c @@ -247,13 +247,13 @@ __setup("notsc", notsc_setup); /* clock source code: */ -static cycle_t read_tsc(void) +static notrace cycle_t read_tsc(void) { cycle_t ret = (cycle_t)get_cycles_sync(); return ret; } -static cycle_t __vsyscall_fn vread_tsc(void) +static notrace cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)get_cycles_sync(); return ret; Index: linux-rt-rebase.q/drivers/clocksource/acpi_pm.c =================================================================== --- linux-rt-rebase.q.orig/drivers/clocksource/acpi_pm.c +++ linux-rt-rebase.q/drivers/clocksource/acpi_pm.c @@ -30,13 +30,13 @@ */ u32 pmtmr_ioport __read_mostly; -static inline u32 read_pmtmr(void) +static notrace inline u32 read_pmtmr(void) { /* mask the output to 24 bits */ return inl(pmtmr_ioport) & ACPI_PM_MASK; } -u32 acpi_pm_read_verified(void) +u32 notrace acpi_pm_read_verified(void) { u32 v1 = 0, v2 = 0, v3 = 0; @@ -56,12 +56,12 @@ u32 acpi_pm_read_verified(void) return v2; } -static cycle_t acpi_pm_read_slow(void) +static notrace cycle_t acpi_pm_read_slow(void) { return (cycle_t)acpi_pm_read_verified(); } -static cycle_t acpi_pm_read(void) +static notrace cycle_t acpi_pm_read(void) { return (cycle_t)read_pmtmr(); } Index: linux-rt-rebase.q/fs/proc/proc_misc.c =================================================================== --- linux-rt-rebase.q.orig/fs/proc/proc_misc.c +++ linux-rt-rebase.q/fs/proc/proc_misc.c @@ -634,6 +634,20 @@ static int execdomains_read_proc(char *p return proc_calc_metrics(page, start, off, count, eof, len); } +#ifdef CONFIG_EVENT_TRACE +extern struct seq_operations latency_trace_op; +static int latency_trace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &latency_trace_op); +} +static struct file_operations proc_latency_trace_operations = { + .open = latency_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + #ifdef CONFIG_MAGIC_SYSRQ /* * writing 'C' to /proc/sysrq-trigger is like sysrq-C @@ -727,6 +741,9 @@ void __init proc_misc_init(void) #ifdef CONFIG_SCHEDSTATS create_seq_entry("schedstat", 0, &proc_schedstat_operations); #endif +#ifdef CONFIG_EVENT_TRACE + create_seq_entry("latency_trace", 0, &proc_latency_trace_operations); +#endif #ifdef CONFIG_PROC_KCORE proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { Index: linux-rt-rebase.q/include/linux/clocksource.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/clocksource.h +++ linux-rt-rebase.q/include/linux/clocksource.h @@ -21,6 +21,9 @@ typedef u64 cycle_t; struct clocksource; +extern unsigned long preempt_max_latency; +extern unsigned long preempt_thresh; + /** * struct clocksource - hardware abstraction for a free running counter * Provides mostly state-free accessors to the underlying hardware. @@ -178,8 +181,20 @@ static inline cycle_t clocksource_read(s */ static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) { - u64 ret = (u64)cycles; - ret = (ret * cs->mult) >> cs->shift; + return ((u64)cycles * cs->mult) >> cs->shift; +} + +/** + * ns2cyc - converts nanoseconds to clocksource cycles + * @cs: Pointer to clocksource + * @nsecs: Nanoseconds + */ +static inline cycles_t ns2cyc(struct clocksource *cs, u64 nsecs) +{ + cycles_t ret = nsecs << cs->shift; + + do_div(ret, cs->mult + 1); + return ret; } @@ -227,4 +242,8 @@ static inline void update_vsyscall(struc } #endif +extern cycle_t get_monotonic_cycles(void); +extern unsigned long cycles_to_usecs(cycle_t); +extern cycle_t usecs_to_cycles(unsigned long); + #endif /* _LINUX_CLOCKSOURCE_H */ Index: linux-rt-rebase.q/include/linux/kernel.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/kernel.h +++ linux-rt-rebase.q/include/linux/kernel.h @@ -156,6 +156,8 @@ asmlinkage int vprintk(const char *fmt, __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; +extern void early_printk(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); Index: linux-rt-rebase.q/include/linux/latency_hist.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/linux/latency_hist.h @@ -0,0 +1,32 @@ +/* + * kernel/latency_hist.h + * + * Add support for histograms of preemption-off latency and + * interrupt-off latency and wakeup latency, it depends on + * Real-Time Preemption Support. + * + * Copyright (C) 2005 MontaVista Software, Inc. + * Yi Yang + * + */ +#ifndef _LINUX_LATENCY_HIST_H_ +#define _LINUX_LATENCY_HIST_H_ + +enum { + INTERRUPT_LATENCY = 0, + PREEMPT_LATENCY, + WAKEUP_LATENCY +}; + +#define MAX_ENTRY_NUM 10240 +#define LATENCY_TYPE_NUM 3 + +#ifdef CONFIG_LATENCY_HIST +extern void latency_hist(int latency_type, int cpu, unsigned long latency); +# define latency_hist_flag 1 +#else +# define latency_hist(a,b,c) do { (void)(cpu); } while (0) +# define latency_hist_flag 0 +#endif /* CONFIG_LATENCY_HIST */ + +#endif /* ifndef _LINUX_LATENCY_HIST_H_ */ Index: linux-rt-rebase.q/include/linux/preempt.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/preempt.h +++ linux-rt-rebase.q/include/linux/preempt.h @@ -9,12 +9,26 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT - extern void fastcall add_preempt_count(int val); - extern void fastcall sub_preempt_count(int val); +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_CRITICAL_TIMING) + extern void notrace add_preempt_count(unsigned int val); + extern void notrace sub_preempt_count(unsigned int val); + extern void notrace mask_preempt_count(unsigned int mask); + extern void notrace unmask_preempt_count(unsigned int mask); #else # define add_preempt_count(val) do { preempt_count() += (val); } while (0) # define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) +# define mask_preempt_count(mask) \ + do { preempt_count() |= (mask); } while (0) +# define unmask_preempt_count(mask) \ + do { preempt_count() &= ~(mask); } while (0) +#endif + +#ifdef CONFIG_CRITICAL_TIMING + extern void touch_critical_timing(void); + extern void stop_critical_timing(void); +#else +# define touch_critical_timing() do { } while (0) +# define stop_critical_timing() do { } while (0) #endif #define inc_preempt_count() add_preempt_count(1) Index: linux-rt-rebase.q/include/linux/sched.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/sched.h +++ linux-rt-rebase.q/include/linux/sched.h @@ -239,6 +239,7 @@ static inline void show_state(void) } extern void show_regs(struct pt_regs *); +extern void irq_show_regs_callback(int cpu, struct pt_regs *regs); /* * TASK is a pointer to the task whose backtrace we want to see (or NULL for current @@ -275,6 +276,107 @@ static inline void touch_all_softlockup_ } #endif +#if defined(CONFIG_PREEMPT_TRACE) || defined(CONFIG_EVENT_TRACE) + extern void print_traces(struct task_struct *task); +#else +# define print_traces(task) do { } while (0) +#endif + +#ifdef CONFIG_FRAME_POINTER +# ifndef CONFIG_ARM +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# else + extern unsigned long arm_return_addr(int level); +# define CALLER_ADDR0 arm_return_addr(0) +# define CALLER_ADDR1 arm_return_addr(1) +# define CALLER_ADDR2 arm_return_addr(2) +# define CALLER_ADDR3 arm_return_addr(3) +# define CALLER_ADDR4 arm_return_addr(4) +# define CALLER_ADDR5 arm_return_addr(5) +#endif +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + +#ifdef CONFIG_MCOUNT + extern void notrace mcount(void); +#else +# define mcount() do { } while (0) +#endif + +#ifdef CONFIG_EVENT_TRACE + extern int mcount_enabled, trace_enabled, trace_user_triggered, + trace_user_trigger_irq, trace_freerunning, trace_verbose, + trace_print_on_crash, trace_all_cpus, print_functions, + syscall_tracing, stackframe_tracing, trace_use_raw_cycles, + trace_all_runnable; + extern void notrace trace_special(unsigned long v1, unsigned long v2, unsigned long v3); + extern void notrace trace_special_pid(int pid, unsigned long v1, unsigned long v2); + extern void notrace trace_special_u64(unsigned long long v1, unsigned long v2); + extern void notrace trace_special_sym(void); + extern void stop_trace(void); +# define start_trace() do { trace_enabled = 1; } while (0) + extern void print_last_trace(void); + extern void nmi_trace(unsigned long eip, unsigned long parent_eip, + unsigned long flags); + extern long user_trace_start(void); + extern long user_trace_stop(void); + extern void trace_cmdline(void); + extern void init_tracer(void); +#else +# define mcount_enabled 0 +# define trace_enabled 0 +# define syscall_tracing 0 +# define stackframe_tracing 0 +# define trace_user_triggered 0 +# define trace_freerunning 0 +# define trace_all_cpus 0 +# define trace_verbose 0 +# define trace_special(v1,v2,v3) do { } while (0) +# define trace_special_pid(pid,v1,v2) do { } while (0) +# define trace_special_u64(v1,v2) do { } while (0) +# define trace_special_sym() do { } while (0) +# define stop_trace() do { } while (0) +# define start_trace() do { } while (0) +# define print_last_trace() do { } while (0) +# define nmi_trace(eip, parent_eip, flags) do { } while (0) +# define user_trace_start() do { } while (0) +# define user_trace_stop() do { } while (0) +# define trace_cmdline() do { } while (0) +# define init_tracer() do { } while (0) +#endif + +extern int timeofday_API_hacks(void *tv, void *tz); + +#ifdef CONFIG_WAKEUP_TIMING + extern int wakeup_timing; + extern void __trace_start_sched_wakeup(struct task_struct *p); + extern void trace_stop_sched_switched(struct task_struct *p); + extern void trace_change_sched_cpu(struct task_struct *p, int new_cpu); +#else +# define wakeup_timing 0 +# define __trace_start_sched_wakeup(p) do { } while (0) +# define trace_stop_sched_switched(p) do { } while (0) +# define trace_change_sched_cpu(p, cpu) do { } while (0) +#endif + +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) @@ -1109,6 +1211,13 @@ struct task_struct { unsigned int lockdep_recursion; #endif +#define MAX_PREEMPT_TRACE 16 + +#ifdef CONFIG_PREEMPT_TRACE + unsigned long preempt_trace_eip[MAX_PREEMPT_TRACE]; + unsigned long preempt_trace_parent_eip[MAX_PREEMPT_TRACE]; +#endif + /* journalling filesystem info */ void *journal_info; Index: linux-rt-rebase.q/init/main.c =================================================================== --- linux-rt-rebase.q.orig/init/main.c +++ linux-rt-rebase.q/init/main.c @@ -592,6 +592,8 @@ asmlinkage void __init start_kernel(void if (panic_later) panic(panic_later, panic_param); + init_tracer(); + lockdep_info(); /* Index: linux-rt-rebase.q/kernel/Makefile =================================================================== --- linux-rt-rebase.q.orig/kernel/Makefile +++ linux-rt-rebase.q/kernel/Makefile @@ -39,6 +39,11 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o +obj-$(CONFIG_DEBUG_PREEMPT) += latency_trace.o +obj-$(CONFIG_WAKEUP_TIMING) += latency_trace.o +obj-$(CONFIG_EVENT_TRACE) += latency_trace.o +obj-$(CONFIG_CRITICAL_TIMING) += latency_trace.o +obj-$(CONFIG_LATENCY_HIST) += latency_hist.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_KPROBES) += kprobes.o Index: linux-rt-rebase.q/kernel/fork.c =================================================================== --- linux-rt-rebase.q.orig/kernel/fork.c +++ linux-rt-rebase.q/kernel/fork.c @@ -996,7 +996,7 @@ static struct task_struct *copy_process( rt_mutex_init_task(p); -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP) DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif Index: linux-rt-rebase.q/kernel/latency_hist.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/kernel/latency_hist.c @@ -0,0 +1,267 @@ +/* + * kernel/latency_hist.c + * + * Add support for histograms of preemption-off latency and + * interrupt-off latency and wakeup latency, it depends on + * Real-Time Preemption Support. + * + * Copyright (C) 2005 MontaVista Software, Inc. + * Yi Yang + * + */ +#include +#include +#include +#include +#include +#include +#include + +typedef struct hist_data_struct { + atomic_t hist_mode; /* 0 log, 1 don't log */ + unsigned long min_lat; + unsigned long avg_lat; + unsigned long max_lat; + unsigned long long beyond_hist_bound_samples; + unsigned long long accumulate_lat; + unsigned long long total_samples; + unsigned long long hist_array[MAX_ENTRY_NUM]; +} hist_data_t; + +static struct proc_dir_entry * latency_hist_root = NULL; +static char * latency_hist_proc_dir_root = "latency_hist"; + +static char * percpu_proc_name = "CPU"; + +#ifdef CONFIG_INTERRUPT_OFF_HIST +static DEFINE_PER_CPU(hist_data_t, interrupt_off_hist); +static char * interrupt_off_hist_proc_dir = "interrupt_off_latency"; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST +static DEFINE_PER_CPU(hist_data_t, preempt_off_hist); +static char * preempt_off_hist_proc_dir = "preempt_off_latency"; +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST +static DEFINE_PER_CPU(hist_data_t, wakeup_latency_hist); +static char * wakeup_latency_hist_proc_dir = "wakeup_latency"; +#endif + +static struct proc_dir_entry *entry[LATENCY_TYPE_NUM][NR_CPUS]; + +static inline u64 u64_div(u64 x, u64 y) +{ + do_div(x, y); + return x; +} + +void latency_hist(int latency_type, int cpu, unsigned long latency) +{ + hist_data_t * my_hist; + + if ((cpu < 0) || (cpu >= NR_CPUS) || (latency_type < INTERRUPT_LATENCY) + || (latency_type > WAKEUP_LATENCY) || (latency < 0)) + return; + + switch(latency_type) { +#ifdef CONFIG_INTERRUPT_OFF_HIST + case INTERRUPT_LATENCY: + my_hist = (hist_data_t *)&per_cpu(interrupt_off_hist, cpu); + break; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + case PREEMPT_LATENCY: + my_hist = (hist_data_t *)&per_cpu(preempt_off_hist, cpu); + break; +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + case WAKEUP_LATENCY: + my_hist = (hist_data_t *)&per_cpu(wakeup_latency_hist, cpu); + break; +#endif + default: + return; + } + + if (atomic_read(&my_hist->hist_mode) == 0) + return; + + if (latency >= MAX_ENTRY_NUM) + my_hist->beyond_hist_bound_samples++; + else + my_hist->hist_array[latency]++; + + if (latency < my_hist->min_lat) + my_hist->min_lat = latency; + else if (latency > my_hist->max_lat) + my_hist->max_lat = latency; + + my_hist->total_samples++; + my_hist->accumulate_lat += latency; + my_hist->avg_lat = (unsigned long) u64_div(my_hist->accumulate_lat, + my_hist->total_samples); + return; +} + +static void *l_start(struct seq_file *m, loff_t * pos) +{ + loff_t *index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); + loff_t index = *pos; + hist_data_t *my_hist = (hist_data_t *) m->private; + + if (!index_ptr) + return NULL; + + if (index == 0) { + atomic_dec(&my_hist->hist_mode); + seq_printf(m, "#Minimum latency: %lu microseconds.\n" + "#Average latency: %lu microseconds.\n" + "#Maximum latency: %lu microseconds.\n" + "#Total samples: %llu\n" + "#There are %llu samples greater or equal than %d microseconds\n" + "#usecs\t%16s\n" + , my_hist->min_lat + , my_hist->avg_lat + , my_hist->max_lat + , my_hist->total_samples + , my_hist->beyond_hist_bound_samples + , MAX_ENTRY_NUM, "samples"); + } + if (index >= MAX_ENTRY_NUM) + return NULL; + + *index_ptr = index; + return index_ptr; +} + +static void *l_next(struct seq_file *m, void *p, loff_t * pos) +{ + loff_t *index_ptr = p; + hist_data_t *my_hist = (hist_data_t *) m->private; + + if (++*pos >= MAX_ENTRY_NUM) { + atomic_inc(&my_hist->hist_mode); + return NULL; + } + *index_ptr = *pos; + return index_ptr; +} + +static void l_stop(struct seq_file *m, void *p) +{ + kfree(p); +} + +static int l_show(struct seq_file *m, void *p) +{ + int index = *(loff_t *) p; + hist_data_t *my_hist = (hist_data_t *) m->private; + + seq_printf(m, "%5d\t%16llu\n", index, my_hist->hist_array[index]); + return 0; +} + +static struct seq_operations latency_hist_seq_op = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show +}; + +static int latency_hist_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *entry_ptr = NULL; + int ret, i, j, break_flags = 0; + struct seq_file *seq; + + entry_ptr = PDE(file->f_dentry->d_inode); + for (i = 0; i < LATENCY_TYPE_NUM; i++) { + for (j = 0; j < NR_CPUS; j++) { + if (entry[i][j] == NULL) + continue; + if (entry_ptr->low_ino == entry[i][j]->low_ino) { + break_flags = 1; + break; + } + } + if (break_flags == 1) + break; + } + ret = seq_open(file, &latency_hist_seq_op); + if (break_flags == 1) { + seq = (struct seq_file *)file->private_data; + seq->private = entry[i][j]->data; + } + return ret; +} + +static struct file_operations latency_hist_seq_fops = { + .open = latency_hist_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static __init int latency_hist_init(void) +{ + struct proc_dir_entry *tmp_parent_proc_dir; + int i = 0, len = 0; + hist_data_t *my_hist; + char procname[64]; + + latency_hist_root = proc_mkdir(latency_hist_proc_dir_root, NULL); + + +#ifdef CONFIG_INTERRUPT_OFF_HIST + tmp_parent_proc_dir = proc_mkdir(interrupt_off_hist_proc_dir, latency_hist_root); + for (i = 0; i < NR_CPUS; i++) { + len = sprintf(procname, "%s%d", percpu_proc_name, i); + procname[len] = '\0'; + entry[INTERRUPT_LATENCY][i] = + create_proc_entry(procname, 0, tmp_parent_proc_dir); + entry[INTERRUPT_LATENCY][i]->data = (void *)&per_cpu(interrupt_off_hist, i); + entry[INTERRUPT_LATENCY][i]->proc_fops = &latency_hist_seq_fops; + my_hist = (hist_data_t *) entry[INTERRUPT_LATENCY][i]->data; + atomic_set(&my_hist->hist_mode,1); + my_hist->min_lat = 0xFFFFFFFFUL; + } +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + tmp_parent_proc_dir = proc_mkdir(preempt_off_hist_proc_dir, latency_hist_root); + for (i = 0; i < NR_CPUS; i++) { + len = sprintf(procname, "%s%d", percpu_proc_name, i); + procname[len] = '\0'; + entry[PREEMPT_LATENCY][i] = + create_proc_entry(procname, 0, tmp_parent_proc_dir); + entry[PREEMPT_LATENCY][i]->data = (void *)&per_cpu(preempt_off_hist, i); + entry[PREEMPT_LATENCY][i]->proc_fops = &latency_hist_seq_fops; + my_hist = (hist_data_t *) entry[PREEMPT_LATENCY][i]->data; + atomic_set(&my_hist->hist_mode,1); + my_hist->min_lat = 0xFFFFFFFFUL; + } +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + tmp_parent_proc_dir = proc_mkdir(wakeup_latency_hist_proc_dir, latency_hist_root); + for (i = 0; i < NR_CPUS; i++) { + len = sprintf(procname, "%s%d", percpu_proc_name, i); + procname[len] = '\0'; + entry[WAKEUP_LATENCY][i] = + create_proc_entry(procname, 0, tmp_parent_proc_dir); + entry[WAKEUP_LATENCY][i]->data = (void *)&per_cpu(wakeup_latency_hist, i); + entry[WAKEUP_LATENCY][i]->proc_fops = &latency_hist_seq_fops; + my_hist = (hist_data_t *) entry[WAKEUP_LATENCY][i]->data; + atomic_set(&my_hist->hist_mode,1); + my_hist->min_lat = 0xFFFFFFFFUL; + } +#endif + return 0; + +} + +__initcall(latency_hist_init); + Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -0,0 +1,2742 @@ +/* + * kernel/latency_trace.c + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef DEFINE_RAW_SPINLOCK +# define DEFINE_RAW_SPINLOCK DEFINE_SPINLOCK +#endif + +#ifndef RAW_SPIN_LOCK_UNLOCKED +# define RAW_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED +#endif + +int trace_use_raw_cycles = 0; + +#define __raw_spinlock_t raw_spinlock_t +#define need_resched_delayed() 0 + +#ifdef CONFIG_EVENT_TRACE +/* + * Convert raw cycles to usecs. + * Note: this is not the 'clocksource cycles' value, it's the raw + * cycle counter cycles. We use GTOD to timestamp latency start/end + * points, but the trace entries inbetween are timestamped with + * get_cycles(). + */ +static unsigned long notrace cycles_to_us(cycle_t delta) +{ + if (!trace_use_raw_cycles) + return cycles_to_usecs(delta); +#ifdef CONFIG_X86 + do_div(delta, cpu_khz/1000+1); +#elif defined(CONFIG_PPC) + delta = mulhwu(tb_to_us, delta); +#elif defined(CONFIG_ARM) + delta = mach_cycles_to_usecs(delta); +#else + #error Implement cycles_to_usecs. +#endif + + return (unsigned long) delta; +} +#endif + +static notrace inline cycle_t now(void) +{ + if (trace_use_raw_cycles) + return get_cycles(); + return get_monotonic_cycles(); +} + +#ifndef irqs_off +# define irqs_off() 0 +#endif + +#ifndef DEBUG_WARN_ON +static inline int DEBUG_WARN_ON(int cond) +{ + WARN_ON(cond); + return 0; +} +#endif + +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# ifdef CONFIG_CRITICAL_PREEMPT_TIMING +# define irqs_off_preempt_count() preempt_count() +# else +# define irqs_off_preempt_count() 0 +# endif +#endif + +#ifdef CONFIG_WAKEUP_TIMING +struct sch_struct { + __raw_spinlock_t trace_lock; + struct task_struct *task; + int cpu; + struct cpu_trace *tr; +} ____cacheline_aligned_in_smp; + +static __cacheline_aligned_in_smp struct sch_struct sch = + { trace_lock: __RAW_SPIN_LOCK_UNLOCKED }; + +int wakeup_timing = 1; +#endif + +/* + * Track maximum latencies and save the trace: + */ + +/* + * trace_stop_sched_switched must not be called with runqueue locks held! + */ +static __cacheline_aligned_in_smp DECLARE_MUTEX(max_mutex); + +/* + * Sequence count - we record it when starting a measurement and + * skip the latency if the sequence has changed - some other section + * did a maximum and could disturb our measurement with serial console + * printouts, etc. Truly coinciding maximum latencies should be rare + * and what happens together happens separately as well, so this doesnt + * decrease the validity of the maximum found: + */ +static __cacheline_aligned_in_smp unsigned long max_sequence; + +enum trace_type +{ + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + TRACE_SPECIAL, + TRACE_SPECIAL_PID, + TRACE_SPECIAL_U64, + TRACE_SPECIAL_SYM, + TRACE_CMDLINE, + TRACE_SYSCALL, + TRACE_SYSRET, + + __TRACE_LAST_TYPE +}; + +enum trace_flag_type +{ + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED = 0x02, + TRACE_FLAG_NEED_RESCHED_DELAYED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_IRQS_HARD_OFF = 0x20, +}; + +/* + * Maximum preemption latency measured. Initialize to maximum, + * we clear it after bootup. + */ +#ifdef CONFIG_LATENCY_HIST +unsigned long preempt_max_latency = (cycle_t)0UL; +#else +unsigned long preempt_max_latency = (cycle_t)ULONG_MAX; +#endif + +unsigned long preempt_thresh; + +/* + * Should this new latency be reported/recorded? + */ +static int report_latency(cycle_t delta) +{ + if (latency_hist_flag && !trace_user_triggered) + return 1; + + if (preempt_thresh) { + if (delta < preempt_thresh) + return 0; + } else { + if (delta <= preempt_max_latency) + return 0; + } + return 1; +} + +#ifdef CONFIG_EVENT_TRACE + +/* + * Number of per-CPU trace entries: + */ +#define MAX_TRACE (65536UL*16UL) + +#define CMDLINE_BYTES 16 + +/* + * 32 bytes on 32-bit platforms: + */ +struct trace_entry { + char type; + char cpu; + char flags; + char preempt_count; // assumes PREEMPT_MASK is 8 bits or less + int pid; + cycle_t timestamp; + union { + struct { + unsigned long eip; + unsigned long parent_eip; + } fn; + struct { + unsigned long eip; + unsigned long v1, v2, v3; + } special; + struct { + unsigned char str[CMDLINE_BYTES]; + } cmdline; + struct { + unsigned long nr; // highest bit: compat call + unsigned long p1, p2, p3; + } syscall; + struct { + unsigned long ret; + } sysret; + struct { + unsigned long __pad3[4]; + } pad; + } u; +} __attribute__((packed)); + +#endif + +struct cpu_trace { + atomic_t disabled; + unsigned long trace_idx; + cycle_t preempt_timestamp; + unsigned long critical_start, critical_end; + unsigned long critical_sequence; + atomic_t underrun; + atomic_t overrun; + int early_warning; + int latency_type; + int cpu; + +#ifdef CONFIG_EVENT_TRACE + struct trace_entry *trace; + char comm[CMDLINE_BYTES]; + pid_t pid; + unsigned long uid; + unsigned long nice; + unsigned long policy; + unsigned long rt_priority; + unsigned long saved_latency; +#endif +#ifdef CONFIG_DEBUG_STACKOVERFLOW + unsigned long stack_check; +#endif +} ____cacheline_aligned_in_smp; + +static struct cpu_trace cpu_traces[NR_CPUS] ____cacheline_aligned_in_smp = +{ [0 ... NR_CPUS-1] = { +#ifdef CONFIG_DEBUG_STACKOVERFLOW + .stack_check = 1 +#endif + } }; + +#ifdef CONFIG_EVENT_TRACE + +int trace_enabled = 0; +int syscall_tracing = 1; +int stackframe_tracing = 0; +int mcount_enabled = 0; +int trace_freerunning = 0; +int trace_print_on_crash = 0; +int trace_verbose = 0; +int trace_all_cpus = 0; +int print_functions = 0; +int trace_all_runnable = 0; + +/* + * user-triggered via gettimeofday(0,1)/gettimeofday(0,0) + */ +int trace_user_triggered = 0; +int trace_user_trigger_irq = -1; + +struct saved_trace_struct { + int cpu; + cycle_t first_timestamp, last_timestamp; + struct cpu_trace traces[NR_CPUS]; +} ____cacheline_aligned_in_smp; + +/* + * The current worst-case trace: + */ +static struct saved_trace_struct max_tr; + +/* + * /proc/latency_trace atomicity: + */ +static DECLARE_MUTEX(out_mutex); + +static struct saved_trace_struct out_tr; + +static void notrace printk_name(unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + printk("%s+%#lx/%#lx", sym_name, offset, size); + else + printk("<%08lx>", eip); +} + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + +#ifndef STACK_WARN +# define STACK_WARN (THREAD_SIZE/8) +#endif + +#define MIN_STACK_NEEDED (sizeof(struct thread_info) + STACK_WARN) +#define MAX_STACK (THREAD_SIZE - sizeof(struct thread_info)) + +#if (defined(__i386__) || defined(__x86_64__)) && defined(CONFIG_FRAME_POINTER) +# define PRINT_EXACT_STACKFRAME +#endif + +#ifdef PRINT_EXACT_STACKFRAME +static unsigned long *worst_stack_bp; +#endif +static DEFINE_RAW_SPINLOCK(worst_stack_lock); +unsigned long worst_stack_left = THREAD_SIZE; +static unsigned long worst_stack_printed = THREAD_SIZE; +static char worst_stack_comm[TASK_COMM_LEN+1]; +static int worst_stack_pid; +static unsigned long worst_stack_sp; +static char worst_stack[THREAD_SIZE]; + +static notrace void fill_worst_stack(unsigned long stack_left) +{ + unsigned long flags; + + /* + * On x64, we must not read the PDA during early bootup: + */ +#ifdef CONFIG_X86_64 + if (system_state == SYSTEM_BOOTING) + return; +#endif + spin_lock_irqsave(&worst_stack_lock, flags); + if (likely(stack_left < worst_stack_left)) { + worst_stack_left = stack_left; + memcpy(worst_stack, current_thread_info(), THREAD_SIZE); + worst_stack_sp = (unsigned long)&stack_left; + memcpy(worst_stack_comm, current->comm, TASK_COMM_LEN); + worst_stack_pid = current->pid; +#ifdef PRINT_EXACT_STACKFRAME +# ifdef __i386__ + asm ("mov %%ebp, %0\n" :"=g"(worst_stack_bp)); +# elif defined(__x86_64__) + asm ("mov %%rbp, %0\n" :"=g"(worst_stack_bp)); +# else +# error Poke the author of above asm code lines ! +# endif +#endif + } + spin_unlock_irqrestore(&worst_stack_lock, flags); +} + +#ifdef PRINT_EXACT_STACKFRAME + +/* + * This takes a BP offset to point the BP back into the saved stack, + * the original stack might be long gone (but the stackframe within + * the saved copy still contains references to it). + */ +#define CONVERT_TO_SAVED_STACK(bp) \ + ((void *)worst_stack + ((unsigned long)bp & (THREAD_SIZE-1))) + +static void show_stackframe(void) +{ + unsigned long addr, frame_size, *bp, *prev_bp, sum = 0; + + bp = CONVERT_TO_SAVED_STACK(worst_stack_bp); + + while (bp[0]) { + addr = bp[1]; + if (!kernel_text_address(addr)) + break; + + prev_bp = bp; + bp = CONVERT_TO_SAVED_STACK((unsigned long *)bp[0]); + + frame_size = (bp - prev_bp) * sizeof(long); + + if (frame_size < THREAD_SIZE) { + printk("{ %4ld} ", frame_size); + sum += frame_size; + } else + printk("{=%4ld} ", sum); + + printk("[<%08lx>] ", addr); + printk_name(addr); + printk("\n"); + } +} + +#else + +static inline int valid_stack_ptr(void *p) +{ + return p > (void *)worst_stack && + p < (void *)worst_stack + THREAD_SIZE - 3; +} + +static void show_stackframe(void) +{ + unsigned long prev_frame, addr; + unsigned long *stack; + + prev_frame = (unsigned long)(worst_stack + + (worst_stack_sp & (THREAD_SIZE-1))); + stack = (unsigned long *)prev_frame; + + while (valid_stack_ptr(stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) { + printk("(%4ld) ", (unsigned long)stack - prev_frame); + printk("[<%08lx>] ", addr); + print_symbol("%s\n", addr); + prev_frame = (unsigned long)stack; + } + if ((char *)stack >= worst_stack + THREAD_SIZE) + break; + } +} + +#endif + +static notrace void __print_worst_stack(void) +{ + unsigned long fill_ratio; + printk("----------------------------->\n"); + printk("| new stack fill maximum: %s/%d, %ld bytes (out of %ld bytes).\n", + worst_stack_comm, worst_stack_pid, + MAX_STACK-worst_stack_left, (long)MAX_STACK); + fill_ratio = (MAX_STACK-worst_stack_left)*100/(long)MAX_STACK; + printk("| Stack fill ratio: %02ld%%", fill_ratio); + if (fill_ratio >= 90) + printk(" - BUG: that's quite high, please report this!\n"); + else + printk(" - that's still OK, no need to report this.\n"); + printk("------------|\n"); + + show_stackframe(); + printk("<---------------------------\n\n"); +} + +static notrace void print_worst_stack(void) +{ + unsigned long flags; + + if (irqs_disabled() || preempt_count()) + return; + + spin_lock_irqsave(&worst_stack_lock, flags); + if (worst_stack_printed == worst_stack_left) { + spin_unlock_irqrestore(&worst_stack_lock, flags); + return; + } + worst_stack_printed = worst_stack_left; + spin_unlock_irqrestore(&worst_stack_lock, flags); + + __print_worst_stack(); +} + +static notrace void debug_stackoverflow(struct cpu_trace *tr) +{ + long stack_left; + + if (unlikely(tr->stack_check <= 0)) + return; + atomic_inc(&tr->disabled); + + /* Debugging check for stack overflow: is there less than 1KB free? */ +#ifdef __i386__ + __asm__ __volatile__("and %%esp,%0" : + "=r" (stack_left) : "0" (THREAD_SIZE - 1)); +#elif defined(__x86_64__) + __asm__ __volatile__("and %%rsp,%0" : + "=r" (stack_left) : "0" (THREAD_SIZE - 1)); +#else +# error Poke the author of above asm code lines ! +#endif + if (unlikely(stack_left < MIN_STACK_NEEDED)) { + tr->stack_check = 0; + printk(KERN_ALERT "BUG: stack overflow: only %ld bytes left! [%08lx...(%08lx-%08lx)]\n", + stack_left - sizeof(struct thread_info), + (long)&stack_left, + (long)current_thread_info(), + (long)current_thread_info() + THREAD_SIZE); + fill_worst_stack(stack_left); + __print_worst_stack(); + goto out; + } + if (unlikely(stack_left < worst_stack_left)) { + tr->stack_check--; + fill_worst_stack(stack_left); + print_worst_stack(); + tr->stack_check++; + } else + if (worst_stack_printed != worst_stack_left) { + tr->stack_check--; + print_worst_stack(); + tr->stack_check++; + } +out: + atomic_dec(&tr->disabled); +} + +#endif + +#ifdef CONFIG_EARLY_PRINTK +static void notrace early_printk_name(unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + early_printk("%s <%08lx>", sym_name, eip); + else + early_printk("<%08lx>", eip); +} + +static __raw_spinlock_t early_print_lock = __RAW_SPIN_LOCK_UNLOCKED; + +static void notrace early_print_entry(struct trace_entry *entry) +{ + int hardirq, softirq; + + __raw_spin_lock(&early_print_lock); + early_printk("%-5d ", entry->pid); + + early_printk("%d%c%c", + entry->cpu, + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_HARD_OFF) ? 'D' : '.', + (entry->flags & TRACE_FLAG_NEED_RESCHED_DELAYED) ? 'n' : + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + early_printk("H"); + else { + if (hardirq) + early_printk("h"); + else { + if (softirq) + early_printk("s"); + else + early_printk("."); + } + } + + early_printk(":%d: ", entry->preempt_count); + + if (entry->type == TRACE_FN) { + early_printk_name(entry->u.fn.eip); + early_printk(" <= ("); + early_printk_name(entry->u.fn.parent_eip); + early_printk(")\n"); + } else { + /* special entries: */ + early_printk_name(entry->u.special.eip); + early_printk(": <%08lx> <%08lx> <%08lx>\n", + entry->u.special.v1, + entry->u.special.v2, + entry->u.special.v3); + } + __raw_spin_unlock(&early_print_lock); +} +#else +# define early_print_entry(x) do { } while(0) +#endif + +static void notrace +____trace(int cpu, enum trace_type type, struct cpu_trace *tr, + unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, unsigned long v3, + unsigned long flags) +{ + struct trace_entry *entry; + unsigned long idx, idx_next; + cycle_t timestamp; + u32 pc; + +#ifdef CONFIG_DEBUG_PREEMPT +// WARN_ON(!atomic_read(&tr->disabled)); +#endif + if (!tr->critical_start && !trace_user_triggered && !trace_all_cpus && + !trace_print_on_crash && !print_functions) + goto out; + /* + * Allocate the next index. Make sure an NMI (or interrupt) + * has not taken it away. Potentially redo the timestamp as + * well to make sure the trace timestamps are in chronologic + * order. + */ +again: + idx = tr->trace_idx; + idx_next = idx + 1; + timestamp = now(); + + if (unlikely((trace_freerunning || print_functions || atomic_read(&tr->underrun)) && + (idx_next >= MAX_TRACE) && !atomic_read(&tr->overrun))) { + atomic_inc(&tr->underrun); + idx_next = 0; + } + if (unlikely(idx >= MAX_TRACE)) { + atomic_inc(&tr->overrun); + goto out; + } +#ifdef __HAVE_ARCH_CMPXCHG + if (unlikely(cmpxchg(&tr->trace_idx, idx, idx_next) != idx)) { + if (idx_next == 0) + atomic_dec(&tr->underrun); + goto again; + } +#else +# ifdef CONFIG_SMP +# error CMPXCHG missing +# else + /* No worry, we are protected by the atomic_incr(&tr->disabled) + * in __trace further down + */ + tr->trace_idx = idx_next; +# endif +#endif + if (unlikely(idx_next != 0 && atomic_read(&tr->underrun))) + atomic_inc(&tr->underrun); + + pc = preempt_count(); + + if (unlikely(!tr->trace)) + goto out; + entry = tr->trace + idx; + entry->type = type; +#ifdef CONFIG_SMP + entry->cpu = cpu; +#endif + entry->flags = (irqs_off() ? TRACE_FLAG_IRQS_OFF : 0) | + (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_HARD_OFF : 0)| + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | + ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | + (need_resched_delayed() ? TRACE_FLAG_NEED_RESCHED_DELAYED : 0); + entry->preempt_count = pc & 0xff; + entry->pid = current->pid; + entry->timestamp = timestamp; + + switch (type) { + case TRACE_FN: + entry->u.fn.eip = eip; + entry->u.fn.parent_eip = parent_eip; + if (unlikely(print_functions && !in_interrupt())) + early_print_entry(entry); + break; + case TRACE_SPECIAL: + case TRACE_SPECIAL_PID: + case TRACE_SPECIAL_U64: + case TRACE_SPECIAL_SYM: + entry->u.special.eip = eip; + entry->u.special.v1 = v1; + entry->u.special.v2 = v2; + entry->u.special.v3 = v3; + if (unlikely(print_functions && !in_interrupt())) + early_print_entry(entry); + break; + case TRACE_SYSCALL: + entry->u.syscall.nr = eip; + entry->u.syscall.p1 = v1; + entry->u.syscall.p2 = v2; + entry->u.syscall.p3 = v3; + break; + case TRACE_SYSRET: + entry->u.sysret.ret = eip; + break; + case TRACE_CMDLINE: + memcpy(entry->u.cmdline.str, current->comm, CMDLINE_BYTES); + break; + default: + break; + } +out: + ; +} + +static inline void notrace +___trace(enum trace_type type, unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, + unsigned long v3) +{ + struct cpu_trace *tr; + unsigned long flags; + int cpu; + + if (unlikely(trace_enabled <= 0)) + return; + +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_X86) + debug_stackoverflow(cpu_traces + raw_smp_processor_id()); +#endif + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + /* + * Trace on the CPU where the current highest-prio task + * is waiting to become runnable: + */ +#ifdef CONFIG_WAKEUP_TIMING + if (wakeup_timing && !trace_all_cpus && !trace_print_on_crash && + !print_functions) { + if (!sch.tr || cpu != sch.cpu) + goto out; + tr = sch.tr; + } else + tr = cpu_traces + cpu; +#else + tr = cpu_traces + cpu; +#endif + atomic_inc(&tr->disabled); + if (likely(atomic_read(&tr->disabled) == 1)) { +//#define DEBUG_STACK_POISON +#ifdef DEBUG_STACK_POISON + char stack; + + memset(&stack - 128, 0x34, 128); +#endif + ____trace(cpu, type, tr, eip, parent_eip, v1, v2, v3, flags); + } + atomic_dec(&tr->disabled); +#ifdef CONFIG_WAKEUP_TIMING +out: +#endif + raw_local_irq_restore(flags); +} + +/* + * Special, ad-hoc tracepoints: + */ +void notrace trace_special(unsigned long v1, unsigned long v2, unsigned long v3) +{ + ___trace(TRACE_SPECIAL, CALLER_ADDR0, 0, v1, v2, v3); +} + +EXPORT_SYMBOL(trace_special); + +void notrace trace_special_pid(int pid, unsigned long v1, unsigned long v2) +{ + ___trace(TRACE_SPECIAL_PID, CALLER_ADDR0, 0, pid, v1, v2); +} + +EXPORT_SYMBOL(trace_special_pid); + +void notrace trace_special_u64(unsigned long long v1, unsigned long v2) +{ + ___trace(TRACE_SPECIAL_U64, CALLER_ADDR0, 0, + (unsigned long) (v1 >> 32), (unsigned long) (v1 & 0xFFFFFFFF), + v2); +} + +EXPORT_SYMBOL(trace_special_u64); + +void notrace trace_special_sym(void) +{ +#define STACK_ENTRIES 8 + unsigned long entries[STACK_ENTRIES]; + struct stack_trace trace; + + if (!trace_enabled || !stackframe_tracing) + return; + + trace.entries = entries; + trace.skip = 3; + trace.max_entries = STACK_ENTRIES; + trace.nr_entries = 0; + + save_stack_trace(&trace); + /* + * clear out the rest: + */ + while (trace.nr_entries < trace.max_entries) + entries[trace.nr_entries++] = 0; + + ___trace(TRACE_SPECIAL_SYM, entries[0], 0, + entries[1], entries[2], entries[3]); + ___trace(TRACE_SPECIAL_SYM, entries[4], 0, + entries[5], entries[6], entries[7]); +} + +EXPORT_SYMBOL(trace_special_sym); + +/* + * Non-inlined function: + */ +void notrace __trace(unsigned long eip, unsigned long parent_eip) +{ + ___trace(TRACE_FN, eip, parent_eip, 0, 0, 0); +} + +#ifdef CONFIG_MCOUNT + +extern void mcount(void); + +EXPORT_SYMBOL(mcount); + +void notrace __mcount(void) +{ + ___trace(TRACE_FN, CALLER_ADDR1, CALLER_ADDR2, 0, 0, 0); +} + +#endif + +void notrace +sys_call(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3) +{ + if (syscall_tracing) + ___trace(TRACE_SYSCALL, nr, 0, p1, p2, p3); +} + +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86) + +void notrace +sys_ia32_call(unsigned long nr, unsigned long p1, unsigned long p2, + unsigned long p3) +{ + if (syscall_tracing) + ___trace(TRACE_SYSCALL, nr | 0x80000000, 0, p1, p2, p3); +} + +#endif + +void notrace sys_ret(unsigned long ret) +{ + if (syscall_tracing) + ___trace(TRACE_SYSRET, ret, 0, 0, 0, 0); +} + +static void notrace print_name(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + /* + * Special trace values: + */ + if (((long)eip < 100000L) && ((long)eip > -100000L)) { + seq_printf(m, "(%5ld)", eip); + return; + } + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + seq_puts(m, sym_name); + else + seq_printf(m, "<%08lx>", eip); +} + +static void notrace print_name_offset(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + seq_printf(m, "%s+%#lx/%#lx <%08lx>", + sym_name, offset, size, eip); + else + seq_printf(m, "<%08lx>", eip); +} + +static unsigned long out_sequence = -1; + +static int pid_to_cmdline_array[PID_MAX_DEFAULT+1]; + +static void notrace _trace_cmdline(int cpu, struct cpu_trace *tr) +{ + unsigned long flags; + + local_save_flags(flags); + ____trace(cpu, TRACE_CMDLINE, tr, 0, 0, 0, 0, 0, flags); +} + +void notrace trace_cmdline(void) +{ + ___trace(TRACE_CMDLINE, 0, 0, 0, 0, 0); +} + +static void construct_pid_to_cmdline(struct cpu_trace *tr) +{ + unsigned int i, j, entries, pid; + + if (tr->critical_sequence == out_sequence) + return; + out_sequence = tr->critical_sequence; + + memset(pid_to_cmdline_array, -1, sizeof(int) * (PID_MAX_DEFAULT + 1)); + + if (!tr->trace) + return; + + entries = min(tr->trace_idx, MAX_TRACE); + + for (i = 0; i < entries; i++) { + struct trace_entry *entry = tr->trace + i; + + if (entry->type != TRACE_CMDLINE) + continue; + pid = entry->pid; + if (pid < PID_MAX_DEFAULT) { + pid_to_cmdline_array[pid] = i; + /* + * Replace space with underline - makes it easier + * to process for tools: + */ + for (j = 0; j < CMDLINE_BYTES; j++) + if (entry->u.cmdline.str[j] == ' ') + entry->u.cmdline.str[j] = '_'; + } + } +} + +char *pid_to_cmdline(unsigned long pid) +{ + struct cpu_trace *tr = out_tr.traces + 0; + char *cmdline = "<...>"; + int idx; + + pid = min(pid, (unsigned long)PID_MAX_DEFAULT); + if (!pid) + return ""; + + if (pid_to_cmdline_array[pid] != -1) { + idx = pid_to_cmdline_array[pid]; + if (tr->trace[idx].type == TRACE_CMDLINE) + cmdline = tr->trace[idx].u.cmdline.str; + } + return cmdline; +} + +static void copy_trace(struct cpu_trace *save, struct cpu_trace *tr, int reorder) +{ + if (!save->trace || !tr->trace) + return; + /* free-running needs reordering */ + if (reorder && atomic_read(&tr->underrun)) { + int i, idx, idx0 = tr->trace_idx; + + for (i = 0; i < MAX_TRACE; i++) { + idx = (idx0 + i) % MAX_TRACE; + save->trace[i] = tr->trace[idx]; + } + save->trace_idx = MAX_TRACE; + } else { + save->trace_idx = tr->trace_idx; + + memcpy(save->trace, tr->trace, + min(save->trace_idx, MAX_TRACE) * + sizeof(struct trace_entry)); + } + save->underrun = tr->underrun; + save->overrun = tr->overrun; +} + + +struct block_idx { + int idx[NR_CPUS]; +}; + +/* + * return the trace entry (position) of the smallest-timestamp + * one (that is still in the valid idx range): + */ +static int min_idx(struct block_idx *bidx) +{ + cycle_t min_stamp = (cycle_t) -1; + struct trace_entry *entry; + int cpu, min_cpu = -1, idx; + + for_each_online_cpu(cpu) { + idx = bidx->idx[cpu]; + if (idx >= min(max_tr.traces[cpu].trace_idx, MAX_TRACE)) + continue; + if (idx >= MAX_TRACE*NR_CPUS) { + printk("huh: idx (%d) > %ld*%d!\n", idx, MAX_TRACE, + NR_CPUS); + WARN_ON(1); + break; + } + entry = max_tr.traces[cpu].trace + bidx->idx[cpu]; + if (entry->timestamp < min_stamp) { + min_cpu = cpu; + min_stamp = entry->timestamp; + } + } + + return min_cpu; +} + +/* + * This code is called to construct an output trace from + * the maximum trace. Having separate traces serves both + * atomicity (a new max might be saved while we are busy + * accessing /proc/latency_trace) and it is also used to + * delay the (expensive) sorting of the output trace by + * timestamps, in the trace_all_cpus case. + */ +static void update_out_trace(void) +{ + struct trace_entry *out_entry, *entry, *tmp; + cycle_t stamp, first_stamp, last_stamp; + struct block_idx bidx = { { 0, }, }; + struct cpu_trace *tmp_max, *tmp_out; + int cpu, sum, entries, underrun_sum, overrun_sum; + + /* + * For out_tr we only have the first array's trace entries + * allocated - and they have are larger on SMP to make room + * for all trace entries from all CPUs. + */ + tmp_out = out_tr.traces + 0; + tmp_max = max_tr.traces + max_tr.cpu; + /* + * Easier to copy this way. Note: the trace buffer is private + * to the output buffer, so preserve it: + */ + copy_trace(tmp_out, tmp_max, 0); + tmp = tmp_out->trace; + *tmp_out = *tmp_max; + tmp_out->trace = tmp; + + out_tr.cpu = max_tr.cpu; + + if (!tmp_out->trace) + return; + + out_entry = tmp_out->trace + 0; + + if (!trace_all_cpus) { + entries = min(tmp_out->trace_idx, MAX_TRACE); + if (!entries) + return; + out_tr.first_timestamp = tmp_out->trace[0].timestamp; + out_tr.last_timestamp = tmp_out->trace[entries-1].timestamp; + return; + } + /* + * Find the range of timestamps that are fully traced in + * all CPU traces. (since CPU traces can cover a variable + * range of time, we have to find the best range.) + */ + first_stamp = 0; + for_each_online_cpu(cpu) { + tmp_max = max_tr.traces + cpu; + stamp = tmp_max->trace[0].timestamp; + if (stamp > first_stamp) + first_stamp = stamp; + } + /* + * Save the timestamp range: + */ + tmp_max = max_tr.traces + max_tr.cpu; + entries = min(tmp_max->trace_idx, MAX_TRACE); + /* + * No saved trace yet? + */ + if (!entries) { + out_tr.traces[0].trace_idx = 0; + return; + } + + last_stamp = tmp_max->trace[entries-1].timestamp; + + if (last_stamp < first_stamp) { + WARN_ON(1); + + for_each_online_cpu(cpu) { + tmp_max = max_tr.traces + cpu; + entries = min(tmp_max->trace_idx, MAX_TRACE); + printk("CPU%d: %016Lx (%016Lx) ... #%d (%016Lx) %016Lx\n", + cpu, + tmp_max->trace[0].timestamp, + tmp_max->trace[1].timestamp, + entries, + tmp_max->trace[entries-2].timestamp, + tmp_max->trace[entries-1].timestamp); + } + tmp_max = max_tr.traces + max_tr.cpu; + entries = min(tmp_max->trace_idx, MAX_TRACE); + + printk("CPU%d entries: %d\n", max_tr.cpu, entries); + printk("first stamp: %016Lx\n", first_stamp); + printk(" last stamp: %016Lx\n", first_stamp); + } + +#if 0 + printk("first_stamp: %Ld [%016Lx]\n", first_stamp, first_stamp); + printk(" last_stamp: %Ld [%016Lx]\n", last_stamp, last_stamp); + printk(" +1 stamp: %Ld [%016Lx]\n", + tmp_max->trace[entries].timestamp, + tmp_max->trace[entries].timestamp); + printk(" +2 stamp: %Ld [%016Lx]\n", + tmp_max->trace[entries+1].timestamp, + tmp_max->trace[entries+1].timestamp); + printk(" delta: %Ld\n", last_stamp-first_stamp); + printk(" entries: %d\n", entries); +#endif + + out_tr.first_timestamp = first_stamp; + out_tr.last_timestamp = last_stamp; + + /* + * Fetch trace entries one by one, in increasing timestamp + * order. Start at first_stamp, stop at last_stamp: + */ + sum = 0; + for (;;) { + cpu = min_idx(&bidx); + if (cpu == -1) + break; + entry = max_tr.traces[cpu].trace + bidx.idx[cpu]; + if (entry->timestamp > last_stamp) + break; + + bidx.idx[cpu]++; + if (entry->timestamp < first_stamp) + continue; + *out_entry = *entry; + out_entry++; + sum++; + if (sum >= MAX_TRACE*NR_CPUS) { + printk("huh: sum (%d) > %ld*%d!\n", sum, MAX_TRACE, + NR_CPUS); + WARN_ON(1); + break; + } + } + + sum = 0; + underrun_sum = 0; + overrun_sum = 0; + for_each_online_cpu(cpu) { + sum += max_tr.traces[cpu].trace_idx; + underrun_sum += atomic_read(&max_tr.traces[cpu].underrun); + overrun_sum += atomic_read(&max_tr.traces[cpu].overrun); + } + tmp_out->trace_idx = sum; + atomic_set(&tmp_out->underrun, underrun_sum); + atomic_set(&tmp_out->overrun, overrun_sum); +} + +static void notrace print_help_header(struct seq_file *m) +{ + seq_puts(m, " _------=> CPU# \n"); + seq_puts(m, " / _-----=> irqs-off \n"); + seq_puts(m, " | / _----=> need-resched \n"); + seq_puts(m, " || / _---=> hardirq/softirq \n"); + seq_puts(m, " ||| / _--=> preempt-depth \n"); + seq_puts(m, " |||| / \n"); + seq_puts(m, " ||||| delay \n"); + seq_puts(m, " cmd pid ||||| time | caller \n"); + seq_puts(m, " \\ / ||||| \\ | / \n"); +} + +static void * notrace l_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + unsigned long entries; + struct cpu_trace *tr = out_tr.traces + 0; + + down(&out_mutex); + /* + * if the file is being read newly, update the output trace: + */ + if (!n) { + // TODO: use the sequence counter here to optimize + down(&max_mutex); + update_out_trace(); + up(&max_mutex); +#if 0 + if (!tr->trace_idx) { + up(&out_mutex); + return NULL; + } +#endif + construct_pid_to_cmdline(tr); + } + entries = min(tr->trace_idx, MAX_TRACE); + + if (!n) { + seq_printf(m, "preemption latency trace v1.1.5 on %s\n", + UTS_RELEASE); + seq_puts(m, "--------------------------------------------------------------------\n"); + seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d | (M:%s VP:%d, KP:%d, SP:%d HP:%d", + cycles_to_usecs(tr->saved_latency), + entries, + (entries + atomic_read(&tr->underrun) + + atomic_read(&tr->overrun)), + out_tr.cpu, +#if defined(CONFIG_PREEMPT_NONE) + "server", +#elif defined(CONFIG_PREEMPT_VOLUNTARY) + "desktop", +#elif defined(CONFIG_PREEMPT_DESKTOP) + "preempt", +#else + "rt", +#endif + 0, 0, +#ifdef CONFIG_PREEMPT_SOFTIRQS + softirq_preemption +#else + 0 +#endif + , +#ifdef CONFIG_PREEMPT_HARDIRQS + hardirq_preemption +#else + 0 +#endif + ); +#ifdef CONFIG_SMP + seq_printf(m, " #P:%d)\n", num_online_cpus()); +#else + seq_puts(m, ")\n"); +#endif + seq_puts(m, " -----------------\n"); + seq_printf(m, " | task: %.16s-%d (uid:%ld nice:%ld policy:%ld rt_prio:%ld)\n", + tr->comm, tr->pid, tr->uid, tr->nice, + tr->policy, tr->rt_priority); + seq_puts(m, " -----------------\n"); + if (trace_user_triggered) { + seq_puts(m, " => started at: "); + print_name_offset(m, tr->critical_start); + seq_puts(m, "\n => ended at: "); + print_name_offset(m, tr->critical_end); + seq_puts(m, "\n"); + } + seq_puts(m, "\n"); + + if (!trace_verbose) + print_help_header(m); + } + if (n >= entries || !tr->trace) + return NULL; + + return tr->trace + n; +} + +static void * notrace l_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct cpu_trace *tr = out_tr.traces; + unsigned long entries = min(tr->trace_idx, MAX_TRACE); + + WARN_ON(!tr->trace); + + if (++*pos >= entries) { + if (*pos == entries) + seq_puts(m, "\n\nvim:ft=help\n"); + return NULL; + } + return tr->trace + *pos; +} + +static void notrace l_stop(struct seq_file *m, void *p) +{ + up(&out_mutex); +} + +static void print_timestamp(struct seq_file *m, unsigned long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4ldus", abs_usecs); + if (rel_usecs > 100) + seq_puts(m, "!: "); + else if (rel_usecs > 1) + seq_puts(m, "+: "); + else + seq_puts(m, " : "); +} + +static void +print_timestamp_short(struct seq_file *m, unsigned long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4ldus", abs_usecs); + if (rel_usecs > 100) + seq_putc(m, '!'); + else if (rel_usecs > 1) + seq_putc(m, '+'); + else + seq_putc(m, ' '); +} + +static void +print_generic(struct seq_file *m, struct trace_entry *entry) +{ + int hardirq, softirq; + + seq_printf(m, "%8.8s-%-5d ", pid_to_cmdline(entry->pid), entry->pid); + seq_printf(m, "%d", entry->cpu); + seq_printf(m, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_HARD_OFF) ? 'D' : '.', + (entry->flags & TRACE_FLAG_NEED_RESCHED_DELAYED) ? 'n' : + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + seq_putc(m, 'H'); + else { + if (hardirq) + seq_putc(m, 'h'); + else { + if (softirq) + seq_putc(m, 's'); + else + seq_putc(m, '.'); + } + } + + if (entry->preempt_count) + seq_printf(m, "%x", entry->preempt_count); + else + seq_puts(m, "."); +} + + +static int notrace l_show_fn(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + if (trace_verbose) { + seq_printf(m, "%16s %5d %d %d %08x %08lx [%016Lx] %ld.%03ldms (+%ld.%03ldms): ", + pid_to_cmdline(entry->pid), + entry->pid, entry->cpu, entry->flags, + entry->preempt_count, trace_idx, + entry->timestamp, abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + print_name_offset(m, entry->u.fn.eip); + seq_puts(m, " ("); + print_name_offset(m, entry->u.fn.parent_eip); + seq_puts(m, ")\n"); + } else { + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + print_name(m, entry->u.fn.eip); + seq_puts(m, " ("); + print_name(m, entry->u.fn.parent_eip); + seq_puts(m, ")\n"); + } + return 0; +} + +static int notrace l_show_special(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry, int mode64) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + if (trace_verbose) + print_name_offset(m, entry->u.special.eip); + else + print_name(m, entry->u.special.eip); + + if (!mode64) { + /* + * For convenience, print small numbers in decimal: + */ + if (abs((int)entry->u.special.v1) < 100000) + seq_printf(m, " (%5ld ", entry->u.special.v1); + else + seq_printf(m, " (%lx ", entry->u.special.v1); + if (abs((int)entry->u.special.v2) < 100000) + seq_printf(m, "%5ld ", entry->u.special.v2); + else + seq_printf(m, "%lx ", entry->u.special.v2); + if (abs((int)entry->u.special.v3) < 100000) + seq_printf(m, "%5ld)\n", entry->u.special.v3); + else + seq_printf(m, "%lx)\n", entry->u.special.v3); + } else { + seq_printf(m, " (%13Ld %ld)\n", + ((u64)entry->u.special.v1 << 32) + + (u64)entry->u.special.v2, entry->u.special.v3); + } + return 0; +} + +static int notrace +l_show_special_pid(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + unsigned int pid; + + pid = entry->u.special.v1; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + if (trace_verbose) + print_name_offset(m, entry->u.special.eip); + else + print_name(m, entry->u.special.eip); + seq_printf(m, " <%.8s-%d> (%ld %ld)\n", + pid_to_cmdline(pid), pid, + entry->u.special.v2, entry->u.special.v3); + + return 0; +} + +static int notrace +l_show_special_sym(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry, int mode64) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + if (trace_verbose) + print_name_offset(m, entry->u.special.eip); + else + print_name(m, entry->u.special.eip); + + seq_puts(m, "()<-"); + print_name(m, entry->u.special.v1); + seq_puts(m, "()<-"); + print_name(m, entry->u.special.v2); + seq_puts(m, "()<-"); + print_name(m, entry->u.special.v3); + seq_puts(m, "()\n"); + + return 0; +} + + +static int notrace l_show_cmdline(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + + if (!trace_verbose) + return 0; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + seq_printf(m, + "[ => %16s ] %ld.%03ldms (+%ld.%03ldms)\n", + entry->u.cmdline.str, + abs_usecs/1000, abs_usecs % 1000, + rel_usecs/1000, rel_usecs % 1000); + + return 0; +} + +extern unsigned long sys_call_table[NR_syscalls]; + +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86) +extern unsigned long ia32_sys_call_table[], ia32_syscall_end[]; +#define IA32_NR_syscalls (ia32_syscall_end - ia32_sys_call_table) +#endif + +static int notrace l_show_syscall(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + unsigned long nr; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp_short(m, abs_usecs, rel_usecs); + + seq_puts(m, "> "); + nr = entry->u.syscall.nr; +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86) + if (nr & 0x80000000) { + nr &= ~0x80000000; + if (nr < IA32_NR_syscalls) + print_name(m, ia32_sys_call_table[nr]); + else + seq_printf(m, "", nr); + } else +#endif + if (nr < NR_syscalls) + print_name(m, sys_call_table[nr]); + else + seq_printf(m, "", nr); + +#ifdef CONFIG_64BIT + seq_printf(m, " (%016lx %016lx %016lx)\n", + entry->u.syscall.p1, entry->u.syscall.p2, entry->u.syscall.p3); +#else + seq_printf(m, " (%08lx %08lx %08lx)\n", + entry->u.syscall.p1, entry->u.syscall.p2, entry->u.syscall.p3); +#endif + + return 0; +} + +static int notrace l_show_sysret(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp_short(m, abs_usecs, rel_usecs); + + seq_printf(m, "< (%ld)\n", entry->u.sysret.ret); + + return 0; +} + + +static int notrace l_show(struct seq_file *m, void *p) +{ + struct cpu_trace *tr = out_tr.traces; + struct trace_entry *entry, *entry0, *next_entry; + unsigned long trace_idx; + + cond_resched(); + entry = p; + if (entry->timestamp < out_tr.first_timestamp) + return 0; + if (entry->timestamp > out_tr.last_timestamp) + return 0; + + entry0 = tr->trace; + trace_idx = entry - entry0; + + if (trace_idx + 1 < tr->trace_idx) + next_entry = entry + 1; + else + next_entry = entry; + + if (trace_verbose) + seq_printf(m, "(T%d/#%ld) ", entry->type, trace_idx); + + switch (entry->type) { + case TRACE_FN: + l_show_fn(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SPECIAL: + l_show_special(m, trace_idx, entry, entry0, next_entry, 0); + break; + case TRACE_SPECIAL_PID: + l_show_special_pid(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SPECIAL_U64: + l_show_special(m, trace_idx, entry, entry0, next_entry, 1); + break; + case TRACE_SPECIAL_SYM: + l_show_special_sym(m, trace_idx, entry, entry0, + next_entry, 1); + break; + case TRACE_CMDLINE: + l_show_cmdline(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SYSCALL: + l_show_syscall(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SYSRET: + l_show_sysret(m, trace_idx, entry, entry0, next_entry); + break; + default: + seq_printf(m, "unknown trace type %d\n", entry->type); + } + return 0; +} + +struct seq_operations latency_trace_op = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show +}; + +/* + * Copy the new maximum trace into the separate maximum-trace + * structure. (this way the maximum trace is permanently saved, + * for later retrieval via /proc/latency_trace) + */ +static void update_max_tr(struct cpu_trace *tr) +{ + struct cpu_trace *save; + int cpu, all_cpus = 0; + +#ifdef CONFIG_PREEMPT + WARN_ON(!preempt_count() && !irqs_disabled()); +#endif + + max_tr.cpu = tr->cpu; + save = max_tr.traces + tr->cpu; + + if ((wakeup_timing || trace_user_triggered || trace_print_on_crash || + print_functions) && trace_all_cpus) { + all_cpus = 1; + for_each_online_cpu(cpu) + atomic_inc(&cpu_traces[cpu].disabled); + } + + save->saved_latency = preempt_max_latency; + save->preempt_timestamp = tr->preempt_timestamp; + save->critical_start = tr->critical_start; + save->critical_end = tr->critical_end; + save->critical_sequence = tr->critical_sequence; + + memcpy(save->comm, current->comm, CMDLINE_BYTES); + save->pid = current->pid; + save->uid = current->uid; + save->nice = current->static_prio - 20 - MAX_RT_PRIO; + save->policy = current->policy; + save->rt_priority = current->rt_priority; + + if (all_cpus) { + for_each_online_cpu(cpu) { + copy_trace(max_tr.traces + cpu, cpu_traces + cpu, 1); + atomic_dec(&cpu_traces[cpu].disabled); + } + } else + copy_trace(save, tr, 1); +} + +#else /* !EVENT_TRACE */ + +static inline void notrace +____trace(int cpu, enum trace_type type, struct cpu_trace *tr, + unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, unsigned long v3, + unsigned long flags) +{ +} + +static inline void notrace +___trace(enum trace_type type, unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, + unsigned long v3) +{ +} + +static inline void notrace __trace(unsigned long eip, unsigned long parent_eip) +{ +} + +static inline void update_max_tr(struct cpu_trace *tr) +{ +} + +static inline void notrace _trace_cmdline(int cpu, struct cpu_trace *tr) +{ +} + +#endif + +static int setup_preempt_thresh(char *s) +{ + int thresh; + + get_option(&s, &thresh); + if (thresh > 0) { + preempt_thresh = usecs_to_cycles(thresh); + printk("Preemption threshold = %u us\n", thresh); + } + return 1; +} +__setup("preempt_thresh=", setup_preempt_thresh); + +static inline void notrace reset_trace_idx(int cpu, struct cpu_trace *tr) +{ + if (trace_all_cpus) + for_each_online_cpu(cpu) { + tr = cpu_traces + cpu; + tr->trace_idx = 0; + atomic_set(&tr->underrun, 0); + atomic_set(&tr->overrun, 0); + } + else{ + tr->trace_idx = 0; + atomic_set(&tr->underrun, 0); + atomic_set(&tr->overrun, 0); + } +} + +#ifdef CONFIG_CRITICAL_TIMING + +static void notrace +check_critical_timing(int cpu, struct cpu_trace *tr, unsigned long parent_eip) +{ + unsigned long latency, t0, t1; + cycle_t T0, T1, T2, delta; + unsigned long flags; + + if (trace_user_triggered) + return; + /* + * usecs conversion is slow so we try to delay the conversion + * as long as possible: + */ + T0 = tr->preempt_timestamp; + T1 = get_monotonic_cycles(); + delta = T1-T0; + + local_save_flags(flags); + + if (!report_latency(delta)) + goto out; + + ____trace(cpu, TRACE_FN, tr, CALLER_ADDR0, parent_eip, 0, 0, 0, flags); + /* + * Update the timestamp, because the trace entry above + * might change it (it can only get larger so the latency + * is fair to be reported): + */ + T2 = get_monotonic_cycles(); + + delta = T2-T0; + + latency = cycles_to_usecs(delta); + latency_hist(tr->latency_type, cpu, latency); + + if (latency_hist_flag) { + if (preempt_max_latency >= delta) + goto out; + } + + if (tr->critical_sequence != max_sequence || down_trylock(&max_mutex)) + goto out; + +#ifndef CONFIG_CRITICAL_LATENCY_HIST + if (!preempt_thresh && preempt_max_latency > delta) { + printk("bug: updating %016Lx > %016Lx?\n", + preempt_max_latency, delta); + printk(" [%016Lx %016Lx %016Lx]\n", T0, T1, T2); + } +#endif + + preempt_max_latency = delta; + t0 = cycles_to_usecs(T0); + t1 = cycles_to_usecs(T1); + + tr->critical_end = parent_eip; + + update_max_tr(tr); + +#ifndef CONFIG_CRITICAL_LATENCY_HIST + if (preempt_thresh) + printk("(%16s-%-5d|#%d): %lu us critical section " + "violates %lu us threshold.\n" + " => started at timestamp %lu: ", + current->comm, current->pid, + raw_smp_processor_id(), + latency, cycles_to_usecs(preempt_thresh), t0); + else + printk("(%16s-%-5d|#%d): new %lu us maximum-latency " + "critical section.\n => started at timestamp %lu: ", + current->comm, current->pid, + raw_smp_processor_id(), + latency, t0); + + print_symbol("<%s>\n", tr->critical_start); + printk(" => ended at timestamp %lu: ", t1); + print_symbol("<%s>\n", tr->critical_end); + dump_stack(); + t1 = cycles_to_usecs(get_monotonic_cycles()); + printk(" => dump-end timestamp %lu\n\n", t1); +#endif + + max_sequence++; + + up(&max_mutex); + +out: + tr->critical_sequence = max_sequence; + tr->preempt_timestamp = get_monotonic_cycles(); + tr->early_warning = 0; + reset_trace_idx(cpu, tr); + _trace_cmdline(cpu, tr); + ____trace(cpu, TRACE_FN, tr, CALLER_ADDR0, parent_eip, 0, 0, 0, flags); +} + +void notrace touch_critical_timing(void) +{ + int cpu = raw_smp_processor_id(); + struct cpu_trace *tr = cpu_traces + cpu; + + if (!tr->critical_start || atomic_read(&tr->disabled) || + trace_user_triggered || wakeup_timing) + return; + + if (preempt_count() > 0 && tr->critical_start) { + atomic_inc(&tr->disabled); + check_critical_timing(cpu, tr, CALLER_ADDR0); + tr->critical_start = CALLER_ADDR0; + tr->critical_sequence = max_sequence; + atomic_dec(&tr->disabled); + } +} +EXPORT_SYMBOL(touch_critical_timing); + +void notrace stop_critical_timing(void) +{ + struct cpu_trace *tr = cpu_traces + raw_smp_processor_id(); + + tr->critical_start = 0; +} +EXPORT_SYMBOL(stop_critical_timing); + +static inline void notrace +__start_critical_timing(unsigned long eip, unsigned long parent_eip, + int latency_type) +{ + int cpu = raw_smp_processor_id(); + struct cpu_trace *tr = cpu_traces + cpu; + unsigned long flags; + + if (tr->critical_start || atomic_read(&tr->disabled) || + trace_user_triggered || wakeup_timing) + return; + + atomic_inc(&tr->disabled); + + tr->critical_sequence = max_sequence; + tr->preempt_timestamp = get_monotonic_cycles(); + tr->critical_start = eip; + reset_trace_idx(cpu, tr); + tr->latency_type = latency_type; + _trace_cmdline(cpu, tr); + + local_save_flags(flags); + ____trace(cpu, TRACE_FN, tr, eip, parent_eip, 0, 0, 0, flags); + + atomic_dec(&tr->disabled); +} + +static inline void notrace +__stop_critical_timing(unsigned long eip, unsigned long parent_eip) +{ + int cpu = raw_smp_processor_id(); + struct cpu_trace *tr = cpu_traces + cpu; + unsigned long flags; + + if (!tr->critical_start || atomic_read(&tr->disabled) || + trace_user_triggered || wakeup_timing) + return; + + atomic_inc(&tr->disabled); + local_save_flags(flags); + ____trace(cpu, TRACE_FN, tr, eip, parent_eip, 0, 0, 0, flags); + check_critical_timing(cpu, tr, eip); + tr->critical_start = 0; + atomic_dec(&tr->disabled); +} + +#endif + +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + +#ifdef CONFIG_LOCKDEP + +void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) +{ + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_off_preempt_count() && irqs_disabled_flags(flags)) + __stop_critical_timing(a0, a1); +} + +void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) +{ + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_off_preempt_count() && irqs_disabled_flags(flags)) + __start_critical_timing(a0, a1, INTERRUPT_LATENCY); +} + +#else /* !CONFIG_LOCKDEP */ + +/* + * Dummy: + */ + +void early_boot_irqs_off(void) +{ +} + +void early_boot_irqs_on(void) +{ +} + +void trace_softirqs_on(unsigned long ip) +{ +} + +void trace_softirqs_off(unsigned long ip) +{ +} + +inline void print_irqtrace_events(struct task_struct *curr) +{ +} + +/* + * We are only interested in hardirq on/off events: + */ +void notrace trace_hardirqs_on(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_off_preempt_count() && irqs_disabled_flags(flags)) + __stop_critical_timing(CALLER_ADDR0, 0 /* CALLER_ADDR1 */); +} + +EXPORT_SYMBOL(trace_hardirqs_on); + +void notrace trace_hardirqs_off(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_off_preempt_count() && irqs_disabled_flags(flags)) + __start_critical_timing(CALLER_ADDR0, 0 /* CALLER_ADDR1 */, + INTERRUPT_LATENCY); +} + +EXPORT_SYMBOL(trace_hardirqs_off); + +#endif /* !CONFIG_LOCKDEP */ + +#endif /* CONFIG_CRITICAL_IRQSOFF_TIMING */ + +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_CRITICAL_TIMING) + +static inline unsigned long get_parent_eip(void) +{ + unsigned long parent_eip = CALLER_ADDR1; + + if (in_lock_functions(parent_eip)) { + parent_eip = CALLER_ADDR2; + if (in_lock_functions(parent_eip)) + parent_eip = CALLER_ADDR3; + } + + return parent_eip; +} + +void notrace add_preempt_count(unsigned int val) +{ + unsigned long eip = CALLER_ADDR0; + unsigned long parent_eip = get_parent_eip(); + +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? + */ + if (DEBUG_WARN_ON(((int)preempt_count() < 0))) + return; + /* + * Spinlock count overflowing soon? + */ + if (DEBUG_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10)) + return; +#endif + + preempt_count() += val; +#ifdef CONFIG_PREEMPT_TRACE + if (val <= 10) { + unsigned int idx = preempt_count() & PREEMPT_MASK; + if (idx < MAX_PREEMPT_TRACE) { + current->preempt_trace_eip[idx] = eip; + current->preempt_trace_parent_eip[idx] = parent_eip; + } + } +#endif +#ifdef CONFIG_CRITICAL_PREEMPT_TIMING + { +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_disabled_flags(flags)) +#endif + if (preempt_count() == val) + __start_critical_timing(eip, parent_eip, + PREEMPT_LATENCY); + } +#endif + (void)eip, (void)parent_eip; +} +EXPORT_SYMBOL(add_preempt_count); + +void notrace sub_preempt_count(unsigned int val) +{ +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? + */ + if (DEBUG_WARN_ON(unlikely(val > preempt_count()))) + return; + /* + * Is the spinlock portion underflowing? + */ + if (DEBUG_WARN_ON((val < PREEMPT_MASK) && + !(preempt_count() & PREEMPT_MASK))) + return; +#endif + +#ifdef CONFIG_CRITICAL_PREEMPT_TIMING + { +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_disabled_flags(flags)) +#endif + if (preempt_count() == val) + __stop_critical_timing(CALLER_ADDR0, + CALLER_ADDR1); + } +#endif + preempt_count() -= val; +} + +EXPORT_SYMBOL(sub_preempt_count); + +void notrace mask_preempt_count(unsigned int mask) +{ + unsigned long eip = CALLER_ADDR0; + unsigned long parent_eip = get_parent_eip(); + + preempt_count() |= mask; + +#ifdef CONFIG_CRITICAL_PREEMPT_TIMING + { +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_disabled_flags(flags)) +#endif + if (preempt_count() == mask) + __start_critical_timing(eip, parent_eip, + PREEMPT_LATENCY); + } +#endif + (void) eip, (void) parent_eip; +} +EXPORT_SYMBOL(mask_preempt_count); + +void notrace unmask_preempt_count(unsigned int mask) +{ +#ifdef CONFIG_CRITICAL_PREEMPT_TIMING + { +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_disabled_flags(flags)) +#endif + if (preempt_count() == mask) + __stop_critical_timing(CALLER_ADDR0, + CALLER_ADDR1); + } +#endif + preempt_count() &= ~mask; +} +EXPORT_SYMBOL(unmask_preempt_count); + + +#endif + +/* + * Wakeup latency timing/tracing. We get upcalls from the scheduler + * when a task is being woken up and we time/trace it until it gets + * to a CPU - or an even-higher-prio task supercedes it. (in that + * case we throw away the currently traced task - we dont try to + * handle nesting, that simplifies things significantly) + */ +#ifdef CONFIG_WAKEUP_TIMING + +static void notrace +check_wakeup_timing(struct cpu_trace *tr, unsigned long parent_eip, + unsigned long *flags) +{ + int cpu = raw_smp_processor_id(); + unsigned long latency, t0, t1; + cycle_t T0, T1, delta; + + if (trace_user_triggered) + return; + + atomic_inc(&tr->disabled); + if (atomic_read(&tr->disabled) != 1) + goto out; + + T0 = tr->preempt_timestamp; + T1 = get_monotonic_cycles(); + /* + * Any wraparound or time warp and we are out: + */ + if (T0 > T1) + goto out; + delta = T1-T0; + + if (!report_latency(delta)) + goto out; + + ____trace(smp_processor_id(), TRACE_FN, tr, CALLER_ADDR0, parent_eip, + 0, 0, 0, *flags); + + latency = cycles_to_usecs(delta); + latency_hist(tr->latency_type, cpu, latency); + + if (latency_hist_flag) { + if (preempt_max_latency >= delta) + goto out; + } + + if (tr->critical_sequence != max_sequence || down_trylock(&max_mutex)) + goto out; + +#ifndef CONFIG_WAKEUP_LATENCY_HIST + if (!preempt_thresh && preempt_max_latency > delta) { + printk("bug2: updating %016lx > %016Lx?\n", + preempt_max_latency, delta); + printk(" [%016Lx %016Lx]\n", T0, T1); + } +#endif + + preempt_max_latency = delta; + t0 = cycles_to_usecs(T0); + t1 = cycles_to_usecs(T1); + tr->critical_end = parent_eip; + + update_max_tr(tr); + + atomic_dec(&tr->disabled); + __raw_spin_unlock(&sch.trace_lock); + local_irq_restore(*flags); + +#ifndef CONFIG_WAKEUP_LATENCY_HIST + if (preempt_thresh) + printk("(%16s-%-5d|#%d): %lu us wakeup latency " + "violates %lu us threshold.\n", + current->comm, current->pid, + raw_smp_processor_id(), latency, + cycles_to_usecs(preempt_thresh)); + else + printk("(%16s-%-5d|#%d): new %lu us maximum-latency " + "wakeup.\n", current->comm, current->pid, + raw_smp_processor_id(), latency); +#endif + + max_sequence++; + + up(&max_mutex); + + return; + +out: + atomic_dec(&tr->disabled); + __raw_spin_unlock(&sch.trace_lock); + local_irq_restore(*flags); +} + +/* + * Start wakeup latency tracing - called with the runqueue held + * and interrupts disabled: + */ +void __trace_start_sched_wakeup(struct task_struct *p) +{ + struct cpu_trace *tr; + int cpu; + + if (trace_user_triggered || !wakeup_timing) { + trace_special_pid(p->pid, p->prio, -1); + return; + } + + __raw_spin_lock(&sch.trace_lock); + if (sch.task && (sch.task->prio <= p->prio)) + goto out_unlock; + + /* + * New highest-prio task just woke up - start tracing: + */ + sch.task = p; + cpu = task_cpu(p); + sch.cpu = cpu; + /* + * We keep using this CPU's trace buffer even if the task + * gets migrated to another CPU. Tracing only happens on + * the CPU that 'owns' the highest-prio task so it's + * fundamentally single-threaded. + */ + sch.tr = tr = cpu_traces + cpu; + reset_trace_idx(cpu, tr); + +// if (!atomic_read(&tr->disabled)) { + atomic_inc(&tr->disabled); + tr->critical_sequence = max_sequence; + tr->preempt_timestamp = get_monotonic_cycles(); + tr->latency_type = WAKEUP_LATENCY; + tr->critical_start = CALLER_ADDR0; + _trace_cmdline(raw_smp_processor_id(), tr); + atomic_dec(&tr->disabled); +// } + + mcount(); + trace_special_pid(p->pid, p->prio, cpu); + trace_special_sym(); +out_unlock: + __raw_spin_unlock(&sch.trace_lock); +} + +void trace_stop_sched_switched(struct task_struct *p) +{ + struct cpu_trace *tr; + unsigned long flags; + + if (trace_user_triggered || !wakeup_timing) + return; + + local_irq_save(flags); + __raw_spin_lock(&sch.trace_lock); + if (p == sch.task) { + trace_special_pid(p->pid, p->prio, task_cpu(p)); + + sch.task = NULL; + tr = sch.tr; + sch.tr = NULL; + WARN_ON(!tr); + /* auto-unlocks the spinlock: */ + check_wakeup_timing(tr, CALLER_ADDR0, &flags); + } else { + if (sch.task) + trace_special_pid(sch.task->pid, sch.task->prio, + p->prio); + if (sch.task && (sch.task->prio >= p->prio)) + sch.task = NULL; + __raw_spin_unlock(&sch.trace_lock); + } + local_irq_restore(flags); +} + +void trace_change_sched_cpu(struct task_struct *p, int new_cpu) +{ + unsigned long flags; + + if (!wakeup_timing) + return; + + trace_special_pid(p->pid, task_cpu(p), new_cpu); + trace_special_sym(); + local_irq_save(flags); + __raw_spin_lock(&sch.trace_lock); + if (p == sch.task && task_cpu(p) != new_cpu) { + sch.cpu = new_cpu; + trace_special(task_cpu(p), new_cpu, 0); + } + __raw_spin_unlock(&sch.trace_lock); + local_irq_restore(flags); +} + +#endif + +#ifdef CONFIG_EVENT_TRACE + +long user_trace_start(void) +{ + struct cpu_trace *tr; + unsigned long flags; + int cpu; + + if (!trace_user_triggered || trace_print_on_crash || print_functions) + return -EINVAL; + + /* + * If the user has not yet reset the max latency after + * bootup then we assume that this was the intention + * (we wont get any tracing done otherwise): + */ + if (preempt_max_latency == (cycle_t)ULONG_MAX) + preempt_max_latency = 0; + + /* + * user_trace_start() might be called from hardirq + * context, if trace_user_triggered_irq is set, so + * be careful about locking: + */ + if (preempt_count() || irqs_disabled()) { + if (down_trylock(&max_mutex)) + return -EAGAIN; + } else + down(&max_mutex); + + local_irq_save(flags); + cpu = smp_processor_id(); + tr = cpu_traces + cpu; + +#ifdef CONFIG_WAKEUP_TIMING + if (wakeup_timing) { + __raw_spin_lock(&sch.trace_lock); + sch.task = current; + sch.cpu = cpu; + sch.tr = tr; + __raw_spin_unlock(&sch.trace_lock); + } +#endif + reset_trace_idx(cpu, tr); + + tr->critical_sequence = max_sequence; + tr->preempt_timestamp = get_monotonic_cycles(); + tr->critical_start = CALLER_ADDR0; + _trace_cmdline(cpu, tr); + mcount(); + + WARN_ON(!irqs_disabled()); + local_irq_restore(flags); + + up(&max_mutex); + + return 0; +} + +EXPORT_SYMBOL_GPL(user_trace_start); + +long user_trace_stop(void) +{ + unsigned long latency = 0, flags; + struct cpu_trace *tr; + cycle_t delta; + + if (!trace_user_triggered || trace_print_on_crash || print_functions) + return -EINVAL; + + local_irq_save(flags); + mcount(); + +#ifdef CONFIG_WAKEUP_TIMING + if (wakeup_timing) { + struct task_struct *t; + + __raw_spin_lock(&sch.trace_lock); + t = sch.task; + if (current != t) { + __raw_spin_unlock(&sch.trace_lock); + local_irq_restore(flags); + printk("wrong stop: curr: %s/%d[%d] => %p\n", + current->comm, current->pid, + task_thread_info(current)->cpu, t); + if (t) + printk("wrong stop: curr: %s/%d[%d]\n", + t->comm, t->pid, + task_thread_info(t)->cpu); + return -EINVAL; + } + sch.task = NULL; + tr = sch.tr; + sch.tr = NULL; + __raw_spin_unlock(&sch.trace_lock); + } else +#endif + tr = cpu_traces + smp_processor_id(); + + atomic_inc(&tr->disabled); + if (tr->preempt_timestamp) { + cycle_t T0, T1; + unsigned long long tmp0; + + T0 = tr->preempt_timestamp; + T1 = get_monotonic_cycles(); + tmp0 = preempt_max_latency; + if (T1 < T0) + T0 = T1; + delta = T1 - T0; + if (!report_latency(delta)) + goto out; + if (tr->critical_sequence != max_sequence || + down_trylock(&max_mutex)) + goto out; + + WARN_ON(!preempt_thresh && preempt_max_latency > delta); + + preempt_max_latency = delta; + update_max_tr(tr); + + latency = cycles_to_usecs(delta); + + max_sequence++; + up(&max_mutex); +out: + tr->preempt_timestamp = 0; + } + atomic_dec(&tr->disabled); + local_irq_restore(flags); + + if (latency) { + if (preempt_thresh) + printk("(%16s-%-5d|#%d): %lu us user-latency " + "violates %lu us threshold.\n", + current->comm, current->pid, + raw_smp_processor_id(), latency, + cycles_to_usecs(preempt_thresh)); + else + printk("(%16s-%-5d|#%d): new %lu us user-latency.\n", + current->comm, current->pid, + raw_smp_processor_id(), latency); + } + + return 0; +} + +EXPORT_SYMBOL(user_trace_stop); + +static int trace_print_cpu = -1; + +void notrace stop_trace(void) +{ + if (trace_print_on_crash && trace_print_cpu == -1) { + trace_enabled = -1; + trace_print_cpu = raw_smp_processor_id(); + } +} + +EXPORT_SYMBOL(stop_trace); + +static void print_entry(struct trace_entry *entry, struct trace_entry *entry0) +{ + unsigned long abs_usecs; + int hardirq, softirq; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + + printk("%-5d ", entry->pid); + + printk("%d%c%c", + entry->cpu, + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_HARD_OFF) ? 'D' : '.', + (entry->flags & TRACE_FLAG_NEED_RESCHED_DELAYED) ? 'n' : + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + printk("H"); + else { + if (hardirq) + printk("h"); + else { + if (softirq) + printk("s"); + else + printk("."); + } + } + + if (entry->preempt_count) + printk(":%x ", entry->preempt_count); + else + printk(":. "); + + printk("%ld.%03ldms: ", abs_usecs/1000, abs_usecs % 1000); + + switch (entry->type) { + case TRACE_FN: + printk_name(entry->u.fn.eip); + printk(" <= ("); + printk_name(entry->u.fn.parent_eip); + printk(")\n"); + break; + case TRACE_SPECIAL: + printk(" special: %lx %lx %lx\n", + entry->u.special.v1, entry->u.special.v2, + entry->u.special.v3); + break; + case TRACE_SPECIAL_U64: + printk(" spec64: %lx%08lx %lx\n", + entry->u.special.v1, entry->u.special.v2, + entry->u.special.v3); + break; + } +} + +/* + * Print the current trace at crash time. + * + * We print it backwards, so that the newest (most interesting) entries + * are printed first. + */ +void print_last_trace(void) +{ + unsigned int idx0, idx, i, cpu; + struct cpu_trace *tr; + struct trace_entry *entry0, *entry; + + preempt_disable(); + cpu = smp_processor_id(); + if (trace_enabled != -1 || trace_print_cpu != cpu || + !trace_print_on_crash) { + if (trace_print_on_crash) + printk("skipping trace printing on CPU#%d != %d\n", + cpu, trace_print_cpu); + preempt_enable(); + return; + } + + trace_print_on_crash = 0; + + tr = cpu_traces + cpu; + if (!tr->trace) + goto out; + + printk("Last %ld trace entries:\n", MAX_TRACE); + idx0 = tr->trace_idx; + printk("curr idx: %d\n", idx0); + if (idx0 >= MAX_TRACE) + idx0 = 0; + idx = idx0; + entry0 = tr->trace + idx0; + + for (i = 0; i < MAX_TRACE; i++) { + if (idx == 0) + idx = MAX_TRACE-1; + else + idx--; + entry = tr->trace + idx; + switch (entry->type) { + case TRACE_FN: + case TRACE_SPECIAL: + case TRACE_SPECIAL_U64: + print_entry(entry, entry0); + break; + } + } + printk("printed %ld entries\n", MAX_TRACE); +out: + preempt_enable(); +} + +#ifdef CONFIG_SMP +/* + * On SMP, try to 'peek' on other CPU's traces and record them + * in this CPU's trace. This way we get a rough idea about what's + * going on there, without the overhead of global tracing. + * + * (no need to make this PER_CPU, we bounce it around anyway.) + */ +unsigned long nmi_eips[NR_CPUS]; +unsigned long nmi_flags[NR_CPUS]; + +void notrace nmi_trace(unsigned long eip, unsigned long parent_eip, + unsigned long flags) +{ + int cpu, this_cpu = smp_processor_id(); + + __trace(eip, parent_eip); + + nmi_eips[this_cpu] = parent_eip; + nmi_flags[this_cpu] = flags; + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_online(cpu) && cpu != this_cpu) { + __trace(eip, nmi_eips[cpu]); + __trace(eip, nmi_flags[cpu]); + } +} +#else +/* + * On UP, NMI tracing is quite simple: + */ +void notrace nmi_trace(unsigned long eip, unsigned long parent_eip, + unsigned long flags) +{ + __trace(eip, parent_eip); +} +#endif + +#endif + +#ifdef CONFIG_PREEMPT_TRACE + +static void print_preempt_trace(struct task_struct *task) +{ + unsigned int count = task_thread_info(task)->preempt_count; + unsigned int i, lim = count & PREEMPT_MASK; + if (lim >= MAX_PREEMPT_TRACE) + lim = MAX_PREEMPT_TRACE-1; + printk("---------------------------\n"); + printk("| preempt count: %08x ]\n", count); + printk("| %d-level deep critical section nesting:\n", lim); + printk("----------------------------------------\n"); + for (i = 1; i <= lim; i++) { + printk(".. [<%08lx>] .... ", task->preempt_trace_eip[i]); + print_symbol("%s\n", task->preempt_trace_eip[i]); + printk(".....[<%08lx>] .. ( <= ", + task->preempt_trace_parent_eip[i]); + print_symbol("%s)\n", task->preempt_trace_parent_eip[i]); + } + printk("\n"); +} + +#endif + +#if defined(CONFIG_PREEMPT_TRACE) || defined(CONFIG_EVENT_TRACE) +void print_traces(struct task_struct *task) +{ + if (!task) + task = current; + +#ifdef CONFIG_PREEMPT_TRACE + print_preempt_trace(task); +#endif +#ifdef CONFIG_EVENT_TRACE + print_last_trace(); +#endif +} +#endif + +#ifdef CONFIG_EVENT_TRACE +/* + * Allocate all the per-CPU trace buffers and the + * save-maximum/save-output staging buffers: + */ +void __init init_tracer(void) +{ + unsigned long size, total_size = 0; + struct trace_entry *array; + struct cpu_trace *tr; + int cpu; + + printk("num_possible_cpus(): %d\n", num_possible_cpus()); + + size = sizeof(struct trace_entry)*MAX_TRACE; + + for_each_possible_cpu(cpu) { + tr = cpu_traces + cpu; + array = alloc_bootmem(size); + if (!array) { + printk(KERN_ERR + "CPU#%d: failed to allocate %ld bytes trace buffer!\n", + cpu, size); + } else { + printk(KERN_INFO + "CPU#%d: allocated %ld bytes trace buffer.\n", + cpu, size); + total_size += size; + } + tr->cpu = cpu; + tr->trace = array; + + array = alloc_bootmem(size); + if (!array) { + printk(KERN_ERR + "CPU#%d: failed to allocate %ld bytes max-trace buffer!\n", + cpu, size); + } else { + printk(KERN_INFO + "CPU#%d: allocated %ld bytes max-trace buffer.\n", + cpu, size); + total_size += size; + } + max_tr.traces[cpu].trace = array; + } + + /* + * The output trace buffer is a special one that only has + * trace entries for the first cpu-trace structure: + */ + size = sizeof(struct trace_entry)*MAX_TRACE*num_possible_cpus(); + array = alloc_bootmem(size); + if (!array) { + printk(KERN_ERR + "failed to allocate %ld bytes out-trace buffer!\n", + size); + } else { + printk(KERN_INFO "allocated %ld bytes out-trace buffer.\n", + size); + total_size += size; + } + out_tr.traces[0].trace = array; + printk(KERN_INFO + "tracer: a total of %ld bytes allocated.\n", + total_size); +} +#endif Index: linux-rt-rebase.q/kernel/lockdep.c =================================================================== --- linux-rt-rebase.q.orig/kernel/lockdep.c +++ linux-rt-rebase.q/kernel/lockdep.c @@ -1036,7 +1036,7 @@ find_usage_forwards(struct lock_class *s * Return 1 otherwise and keep unchanged. * Return 0 on error. */ -static noinline int +static noinline notrace int find_usage_backwards(struct lock_class *source, unsigned int depth) { struct lock_list *entry; @@ -1586,7 +1586,7 @@ static inline int validate_chain(struct * We are building curr_chain_key incrementally, so double-check * it from scratch, to make sure that it's done correctly: */ -static void check_chain_key(struct task_struct *curr) +static void notrace check_chain_key(struct task_struct *curr) { #ifdef CONFIG_DEBUG_LOCKDEP struct held_lock *hlock, *prev_hlock = NULL; @@ -2009,7 +2009,7 @@ void early_boot_irqs_on(void) /* * Hardirqs will be enabled: */ -void trace_hardirqs_on(void) +void notrace trace_hardirqs_on(void) { struct task_struct *curr = current; unsigned long ip; @@ -2050,6 +2050,9 @@ void trace_hardirqs_on(void) curr->hardirq_enable_ip = ip; curr->hardirq_enable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_on_events); +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + time_hardirqs_on(CALLER_ADDR0, 0 /* CALLER_ADDR1 */); +#endif } EXPORT_SYMBOL(trace_hardirqs_on); @@ -2057,7 +2060,7 @@ EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void trace_hardirqs_off(void) +void notrace trace_hardirqs_off(void) { struct task_struct *curr = current; @@ -2075,6 +2078,9 @@ void trace_hardirqs_off(void) curr->hardirq_disable_ip = _RET_IP_; curr->hardirq_disable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_off_events); +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + time_hardirqs_off(CALLER_ADDR0, 0 /* CALLER_ADDR1 */); +#endif } else debug_atomic_inc(&redundant_hardirqs_off); } @@ -2241,8 +2247,8 @@ static inline int separate_irq_context(s /* * Mark a lock with a usage bit, and validate the state transition: */ -static int mark_lock(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) +static int notrace mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) { unsigned int new_mask = 1 << new_bit, ret = 1; @@ -2301,6 +2307,7 @@ static int mark_lock(struct task_struct * We must printk outside of the graph_lock: */ if (ret == 2) { + user_trace_stop(); printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); print_lock(this); print_irqtrace_events(curr); @@ -2648,7 +2655,7 @@ __lock_release(struct lockdep_map *lock, /* * Check whether we follow the irq-flags state precisely: */ -static void check_flags(unsigned long flags) +static notrace void check_flags(unsigned long flags) { #if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS) if (!debug_locks) @@ -2680,8 +2687,9 @@ static void check_flags(unsigned long fl * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: */ -void lock_acquire(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, unsigned long ip) +void notrace +lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip) { unsigned long flags; @@ -2692,9 +2700,9 @@ void lock_acquire(struct lockdep_map *lo return; raw_local_irq_save(flags); + current->lockdep_recursion = 1; check_flags(flags); - current->lockdep_recursion = 1; __lock_acquire(lock, subclass, trylock, read, check, irqs_disabled_flags(flags), ip); current->lockdep_recursion = 0; @@ -2703,7 +2711,8 @@ void lock_acquire(struct lockdep_map *lo EXPORT_SYMBOL_GPL(lock_acquire); -void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +void notrace +lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { unsigned long flags; @@ -2714,8 +2723,8 @@ void lock_release(struct lockdep_map *lo return; raw_local_irq_save(flags); - check_flags(flags); current->lockdep_recursion = 1; + check_flags(flags); __lock_release(lock, nested, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); Index: linux-rt-rebase.q/kernel/panic.c =================================================================== --- linux-rt-rebase.q.orig/kernel/panic.c +++ linux-rt-rebase.q/kernel/panic.c @@ -66,6 +66,8 @@ NORET_TYPE void panic(const char * fmt, unsigned long caller = (unsigned long) __builtin_return_address(0); #endif + stop_trace(); + /* * It's possible to come here directly from a panic-assertion and not * have preempt disabled. Some functions called from here want Index: linux-rt-rebase.q/kernel/printk.c =================================================================== --- linux-rt-rebase.q.orig/kernel/printk.c +++ linux-rt-rebase.q/kernel/printk.c @@ -324,12 +324,14 @@ static void __call_console_drivers(unsig { struct console *con; + touch_critical_timing(); for (con = console_drivers; con; con = con->next) { if ((con->flags & CON_ENABLED) && con->write && (cpu_online(smp_processor_id()) || (con->flags & CON_ANYTIME))) con->write(con, &LOG_BUF(start), end - start); } + touch_critical_timing(); } static int __read_mostly ignore_loglevel; Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -82,6 +82,10 @@ unsigned long long __attribute__((weak)) #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) +#define __PRIO(prio) \ + ((prio) <= 99 ? 199 - (prio) : (prio) - 120) + +#define PRIO(p) __PRIO((p)->prio) /* * 'User priority' is the nice value converted to something we * can work with better when scaling various scheduler parameters, @@ -307,6 +311,8 @@ static DEFINE_MUTEX(sched_hotcpu_mutex); static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) { rq->curr->sched_class->check_preempt_curr(rq, p); + if (p != rq->curr && p->prio < rq->curr->prio) + __trace_start_sched_wakeup(p); } static inline int cpu_of(struct rq *rq) @@ -984,6 +990,7 @@ unsigned long weighted_cpuload(const int static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) { #ifdef CONFIG_SMP + trace_change_sched_cpu(p, cpu); task_thread_info(p)->cpu = cpu; set_task_cfs_rq(p); #endif @@ -1551,14 +1558,19 @@ out: int fastcall wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | + int ret = try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); + mcount(); + return ret; } EXPORT_SYMBOL(wake_up_process); int fastcall wake_up_state(struct task_struct *p, unsigned int state) { - return try_to_wake_up(p, state, 0); + int ret = try_to_wake_up(p, state, 0); + + mcount(); + return ret; } /* @@ -1728,6 +1740,7 @@ static inline void finish_task_switch(st prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); + trace_stop_sched_switched(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { @@ -1799,10 +1812,13 @@ context_switch(struct rq *rq, struct tas spin_release(&rq->lock.dep_map, 1, _THIS_IP_); #endif + trace_cmdline(); + /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); barrier(); + trace_special_pid(prev->pid, PRIO(prev), PRIO(current)); /* * this_rq must be evaluated again because prev may have moved * CPUs since it called schedule(), thus the 'rq' on its stack @@ -3247,41 +3263,39 @@ void scheduler_tick(void) #endif } -#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) +#if defined(CONFIG_EVENT_TRACE) && defined(CONFIG_DEBUG_RT_MUTEXES) -void fastcall add_preempt_count(int val) +static void trace_array(struct prio_array *array) { - /* - * Underflow? - */ - if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) - return; - preempt_count() += val; - /* - * Spinlock count overflowing soon? - */ - DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= - PREEMPT_MASK - 10); + int i; + struct task_struct *p; + struct list_head *head, *tmp; + + for (i = 0; i < MAX_RT_PRIO; i++) { + head = array->queue + i; + if (list_empty(head)) { + WARN_ON(test_bit(i, array->bitmap)); + continue; + } + WARN_ON(!test_bit(i, array->bitmap)); + list_for_each(tmp, head) { + p = list_entry(tmp, struct task_struct, run_list); + trace_special_pid(p->pid, p->prio, PRIO(p)); + } + } } -EXPORT_SYMBOL(add_preempt_count); -void fastcall sub_preempt_count(int val) +static inline void trace_all_runnable_tasks(struct rq *rq) { - /* - * Underflow? - */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) - return; - /* - * Is the spinlock portion underflowing? - */ - if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && - !(preempt_count() & PREEMPT_MASK))) - return; + if (trace_enabled) + trace_array(&rq->active); +} - preempt_count() -= val; +#else + +static inline void trace_all_runnable_tasks(struct rq *rq) +{ } -EXPORT_SYMBOL(sub_preempt_count); #endif @@ -3392,6 +3406,8 @@ need_resched_nonpreemptible: prev->sched_class->put_prev_task(rq, prev, now); next = pick_next_task(rq, prev, now); + trace_all_runnable_tasks(rq); + sched_info_switch(prev, next); if (likely(prev != next)) { @@ -3400,8 +3416,10 @@ need_resched_nonpreemptible: ++*switch_count; context_switch(rq, prev, next); /* unlocks the rq */ - } else + } else { spin_unlock_irq(&rq->lock); + trace_stop_sched_switched(next); + } if (unlikely(reacquire_kernel_lock(current) < 0)) { cpu = smp_processor_id(); @@ -3866,6 +3884,7 @@ void rt_mutex_setprio(struct task_struct check_preempt_curr(rq, p); } } + task_rq_unlock(rq, &flags); } @@ -6374,6 +6393,7 @@ void __might_sleep(char *file, int line) if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) return; prev_jiffy = jiffies; + stop_trace(); printk(KERN_ERR "BUG: sleeping function called from invalid" " context at %s:%d\n", file, line); printk("in_atomic():%d, irqs_disabled():%d\n", Index: linux-rt-rebase.q/kernel/sysctl.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sysctl.c +++ linux-rt-rebase.q/kernel/sysctl.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -321,6 +323,132 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_WAKEUP_TIMING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "wakeup_timing", + .data = &wakeup_timing, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#if defined(CONFIG_WAKEUP_TIMING) || defined(CONFIG_EVENT_TRACE) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "preempt_max_latency", + .data = &preempt_max_latency, + .maxlen = sizeof(preempt_max_latency), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "preempt_thresh", + .data = &preempt_thresh, + .maxlen = sizeof(preempt_thresh), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, +#endif +#ifdef CONFIG_EVENT_TRACE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_enabled", + .data = &trace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "syscall_tracing", + .data = &syscall_tracing, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "stackframe_tracing", + .data = &stackframe_tracing, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "mcount_enabled", + .data = &mcount_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_user_triggered", + .data = &trace_user_triggered, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_user_trigger_irq", + .data = &trace_user_trigger_irq, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_freerunning", + .data = &trace_freerunning, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_print_on_crash", + .data = &trace_print_on_crash, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_verbose", + .data = &trace_verbose, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_all_cpus", + .data = &trace_all_cpus, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_use_raw_cycles", + .data = &trace_use_raw_cycles, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "trace_all_runnable", + .data = &trace_all_runnable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = KERN_CORE_USES_PID, .procname = "core_uses_pid", Index: linux-rt-rebase.q/kernel/time/timekeeping.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/timekeeping.c +++ linux-rt-rebase.q/kernel/time/timekeeping.c @@ -114,6 +114,33 @@ static inline void __get_realtime_clock_ timespec_add_ns(ts, nsecs); } +cycle_t notrace get_monotonic_cycles(void) +{ + cycle_t cycle_now, cycle_delta; + + /* read clocksource: */ + cycle_now = clocksource_read(clock); + + /* calculate the delta since the last update_wall_time: */ + cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; + + return clock->cycle_last + cycle_delta; +} + +unsigned long notrace cycles_to_usecs(cycle_t cycles) +{ + u64 ret = cyc2ns(clock, cycles); + + do_div(ret, 1000); + + return ret; +} + +cycle_t notrace usecs_to_cycles(unsigned long usecs) +{ + return ns2cyc(clock, (u64)usecs * 1000); +} + /** * getnstimeofday - Returns the time of day in a timespec * @ts: pointer to the timespec to be set Index: linux-rt-rebase.q/lib/Kconfig.debug =================================================================== --- linux-rt-rebase.q.orig/lib/Kconfig.debug +++ linux-rt-rebase.q/lib/Kconfig.debug @@ -332,6 +332,192 @@ config STACKTRACE depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT +config PREEMPT_TRACE + bool + default y + depends on DEBUG_PREEMPT + +config EVENT_TRACE + bool "Kernel event tracing" + default n + depends on GENERIC_TIME + select FRAME_POINTER + select STACKTRACE + help + This option enables a kernel tracing mechanism that will track + certain kernel events such as system call entry and return, + IRQ entry, context-switching, etc. + + Run the scripts/trace-it utility on a kernel with this option + enabled for sample output. + +config FUNCTION_TRACE + bool "Kernel function call tracing" + default n + depends on !REORDER + select EVENT_TRACE + help + This option enables a kernel tracing mechanism that will track + precise function-call granularity kernel execution. Sample + output: + + pcscd-1772 0D..2 6867us : deactivate_task (-2 1) + pcscd-1772 0D..2 6867us : dequeue_task (deactivate_task) + -0 0D..2 6870us : __switch_to (__schedule) + -0 0D..2 6871us : __schedule (-2 20) + -0 0D..2 6871us : __lock_acquire (lock_acquire) + -0 0D..2 6872us : __spin_unlock_irq (__schedule) + + Run the scripts/trace-it sample utility on a kernel with this + option enabled to capture 1 second worth of events. + + (Note that kernel size and overhead increases noticeably + with this option enabled.) + +config WAKEUP_TIMING + bool "Wakeup latency timing" + depends on GENERIC_TIME + help + This option measures the time spent from a highprio thread being + woken up to it getting scheduled on a CPU, with microsecond + accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started via: + + echo 0 > /proc/sys/kernel/preempt_max_latency + +config LATENCY_TRACE + bool "Latency tracing" + default n + depends on LATENCY_TIMING && !REORDER && GENERIC_TIME + select FRAME_POINTER + select FUNCTION_TRACE + help + When this option is enabled then the last maximum latency timing + event's full trace can be found in /proc/latency_trace, in a + human-readable (or rather as some would say, in a + kernel-developer-readable) form. + + (Note that kernel size and overhead increases noticeably + with this option enabled.) + +config CRITICAL_PREEMPT_TIMING + bool "Non-preemptible critical section latency timing" + default n + depends on PREEMPT + depends on GENERIC_TIME + help + This option measures the time spent in preempt-off critical + sections, with microsecond accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started via: + + echo 0 > /proc/sys/kernel/preempt_max_latency + + (Note that kernel size and overhead increases with this option + enabled. This option and the irqs-off timing option can be + used together or separately.) + +config CRITICAL_IRQSOFF_TIMING + bool "Interrupts-off critical section latency timing" + default n + depends on GENERIC_TIME + select TRACE_IRQFLAGS + help + This option measures the time spent in irqs-off critical + sections, with microsecond accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started via: + + echo 0 > /proc/sys/kernel/preempt_max_latency + + (Note that kernel size and overhead increases with this option + enabled. This option and the preempt-off timing option can be + used together or separately.) + +config WAKEUP_LATENCY_HIST + bool "wakeup latency histogram" + default n + depends on WAKEUP_TIMING + help + This option logs all the wakeup latency timing to a big histogram + bucket, in the meanwhile, it also dummies up printk produced by + wakeup latency timing. + + The wakeup latency timing histogram can be viewed via: + + cat /proc/latency_hist/wakeup_latency/CPU* + + (Note: * presents CPU ID.) + +config PREEMPT_OFF_HIST + bool "non-preemptible critical section latency histogram" + default n + depends on CRITICAL_PREEMPT_TIMING + help + This option logs all the non-preemptible critical section latency + timing to a big histogram bucket, in the meanwhile, it also + dummies up printk produced by non-preemptible critical section + latency timing. + + The non-preemptible critical section latency timing histogram can + be viewed via: + + cat /proc/latency_hist/preempt_off_latency/CPU* + + (Note: * presents CPU ID.) + +config INTERRUPT_OFF_HIST + bool "interrupts-off critical section latency histogram" + default n + depends on CRITICAL_IRQSOFF_TIMING + help + This option logs all the interrupts-off critical section latency + timing to a big histogram bucket, in the meanwhile, it also + dummies up printk produced by interrupts-off critical section + latency timing. + + The interrupts-off critical section latency timing histogram can + be viewed via: + + cat /proc/latency_hist/interrupt_off_latency/CPU* + + (Note: * presents CPU ID.) + +config CRITICAL_TIMING + bool + default y + depends on CRITICAL_PREEMPT_TIMING || CRITICAL_IRQSOFF_TIMING + +config DEBUG_TRACE_IRQFLAGS + bool + default y + depends on CRITICAL_IRQSOFF_TIMING + +config LATENCY_TIMING + bool + default y + depends on WAKEUP_TIMING || CRITICAL_TIMING + select SYSCTL + +config CRITICAL_LATENCY_HIST + bool + default y + depends on PREEMPT_OFF_HIST || INTERRUPT_OFF_HIST + +config LATENCY_HIST + bool + default y + depends on WAKEUP_LATENCY_HIST || CRITICAL_LATENCY_HIST + +config MCOUNT + bool + depends on FUNCTION_TRACE + default y + config DEBUG_KOBJECT bool "kobject debugging" depends on DEBUG_KERNEL Index: linux-rt-rebase.q/lib/debug_locks.c =================================================================== --- linux-rt-rebase.q.orig/lib/debug_locks.c +++ linux-rt-rebase.q/lib/debug_locks.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -36,7 +37,14 @@ int debug_locks_silent; int debug_locks_off(void) { if (xchg(&debug_locks, 0)) { +#ifdef CONFIG_DEBUG_RT_MUTEXES + if (spin_is_locked(¤t->pi_lock)) + spin_unlock(¤t->pi_lock); +#endif if (!debug_locks_silent) { + stop_trace(); + user_trace_stop(); + printk("stopped custom tracer.\n"); console_verbose(); return 1; } Index: linux-rt-rebase.q/scripts/Makefile =================================================================== --- linux-rt-rebase.q.orig/scripts/Makefile +++ linux-rt-rebase.q/scripts/Makefile @@ -7,6 +7,7 @@ # conmakehash: Create chartable # conmakehash: Create arrays for initializing the kernel console tables +hostprogs-$(CONFIG_EVENT_TRACE) += trace-it hostprogs-$(CONFIG_KALLSYMS) += kallsyms hostprogs-$(CONFIG_LOGO) += pnmtologo hostprogs-$(CONFIG_VT) += conmakehash Index: linux-rt-rebase.q/scripts/trace-it.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/scripts/trace-it.c @@ -0,0 +1,79 @@ + +/* + * Copyright (C) 2005, Ingo Molnar + * + * user-triggered tracing. + * + * The -rt kernel has a built-in kernel tracer, which will trace + * all kernel function calls (and a couple of special events as well), + * by using a build-time gcc feature that instruments all kernel + * functions. + * + * The tracer is highly automated for a number of latency tracing purposes, + * but it can also be switched into 'user-triggered' mode, which is a + * half-automatic tracing mode where userspace apps start and stop the + * tracer. This file shows a dumb example how to turn user-triggered + * tracing on, and how to start/stop tracing. Note that if you do + * multiple start/stop sequences, the kernel will do a maximum search + * over their latencies, and will keep the trace of the largest latency + * in /proc/latency_trace. The maximums are also reported to the kernel + * log. (but can also be read from /proc/sys/kernel/preempt_max_latency) + * + * For the tracer to be activated, turn on CONFIG_EVENT_TRACING + * in the .config, rebuild the kernel and boot into it. The trace will + * get _alot_ more verbose if you also turn on CONFIG_FUNCTION_TRACING, + * every kernel function call will be put into the trace. Note that + * CONFIG_FUNCTION_TRACING has significant runtime overhead, so you dont + * want to use it for performance testing :) + */ + +#include +#include +#include +#include +#include +#include +#include + +int main (int argc, char **argv) +{ + int ret; + + if (getuid() != 0) { + fprintf(stderr, "needs to run as root.\n"); + exit(1); + } + ret = system("cat /proc/sys/kernel/mcount_enabled >/dev/null 2>/dev/null"); + if (ret) { + fprintf(stderr, "CONFIG_LATENCY_TRACING not enabled?\n"); + exit(1); + } + system("echo 1 > /proc/sys/kernel/trace_user_triggered"); + system("[ -e /proc/sys/kernel/wakeup_timing ] && echo 0 > /proc/sys/kernel/wakeup_timing"); + system("echo 1 > /proc/sys/kernel/trace_enabled"); + system("echo 1 > /proc/sys/kernel/mcount_enabled"); + system("echo 0 > /proc/sys/kernel/trace_freerunning"); + system("echo 0 > /proc/sys/kernel/trace_print_on_crash"); + system("echo 0 > /proc/sys/kernel/trace_verbose"); + system("echo 0 > /proc/sys/kernel/preempt_thresh 2>/dev/null"); + system("echo 0 > /proc/sys/kernel/preempt_max_latency 2>/dev/null"); + + // start tracing + if (prctl(0, 1)) { + fprintf(stderr, "trace-it: couldnt start tracing!\n"); + return 1; + } + usleep(10000000); + if (prctl(0, 0)) { + fprintf(stderr, "trace-it: couldnt stop tracing!\n"); + return 1; + } + + system("echo 0 > /proc/sys/kernel/trace_user_triggered"); + system("echo 0 > /proc/sys/kernel/trace_enabled"); + system("cat /proc/latency_trace"); + + return 0; +} + + patches/clockevents-allow-build-without-runtime-use.patch0000664000077200007720000000557210653433161023262 0ustar mingomingoSubject: clockevents: Allow build w/o run-tine usage for migration purposes Migration aid to allow preparatory patches which introduce not yet used parts of clock events code. Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 8 ++++++-- kernel/time/Kconfig | 5 +++++ kernel/time/Makefile | 2 +- kernel/time/clockevents.c | 3 ++- 4 files changed, 14 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/include/linux/clockchips.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/clockchips.h +++ linux-rt-rebase.q/include/linux/clockchips.h @@ -8,7 +8,7 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H -#ifdef CONFIG_GENERIC_CLOCKEVENTS +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD #include #include @@ -127,9 +127,13 @@ extern void clockevents_unregister_notif extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ktime_t now); +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); - #else +# define clockevents_notify(reason, arg) do { } while (0) +#endif + +#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ #define clockevents_notify(reason, arg) do { } while (0) Index: linux-rt-rebase.q/kernel/time/Kconfig =================================================================== --- linux-rt-rebase.q.orig/kernel/time/Kconfig +++ linux-rt-rebase.q/kernel/time/Kconfig @@ -23,3 +23,8 @@ config HIGH_RES_TIMERS hardware is not capable then this option only increases the size of the kernel image. +config GENERIC_CLOCKEVENTS_BUILD + bool + default y + depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR + Index: linux-rt-rebase.q/kernel/time/Makefile =================================================================== --- linux-rt-rebase.q.orig/kernel/time/Makefile +++ linux-rt-rebase.q/kernel/time/Makefile @@ -1,6 +1,6 @@ obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o Index: linux-rt-rebase.q/kernel/time/clockevents.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/clockevents.c +++ linux-rt-rebase.q/kernel/time/clockevents.c @@ -204,6 +204,7 @@ void clockevents_exchange_device(struct local_irq_restore(flags); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events */ @@ -232,4 +233,4 @@ void clockevents_notify(unsigned long re spin_unlock(&clockevents_lock); } EXPORT_SYMBOL_GPL(clockevents_notify); - +#endif patches/move-native-irq.patch0000664000077200007720000000172210653433162015572 0ustar mingomingo--- kernel/irq/migration.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/kernel/irq/migration.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/migration.c +++ linux-rt-rebase.q/kernel/irq/migration.c @@ -61,6 +61,7 @@ void move_masked_irq(int irq) void move_native_irq(int irq) { struct irq_desc *desc = irq_desc + irq; + int mask = 1; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -68,8 +69,17 @@ void move_native_irq(int irq) if (unlikely(desc->status & IRQ_DISABLED)) return; - desc->chip->mask(irq); + /* + * If the irq is already in progress, it should be masked. + * If we unmask it, we might cause an interrupt storm on RT. + */ + if (unlikely(desc->status & IRQ_INPROGRESS)) + mask = 0; + + if (mask) + desc->chip->mask(irq); move_masked_irq(irq); - desc->chip->unmask(irq); + if (mask) + desc->chip->unmask(irq); } patches/arm-fix-atomic-cmpxchg.patch0000664000077200007720000000121010653433164017005 0ustar mingomingo--- include/asm-arm/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/include/asm-arm/atomic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/atomic.h +++ linux-rt-rebase.q/include/asm-arm/atomic.h @@ -189,10 +189,10 @@ static inline unsigned long __cmpxchg(vo volatile unsigned long *p = ptr; if (size == 4) { - local_irq_save(flags); + raw_local_irq_save(flags); if ((prev = *p) == old) *p = new; - local_irq_restore(flags); + raw_local_irq_restore(flags); return(prev); } else return wrong_size_cmpxchg(ptr); patches/cputimer-thread-rt-fix.patch0000664000077200007720000000315010653433165017053 0ustar mingomingo--- kernel/posix-cpu-timers.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) Index: linux-rt-rebase.q/kernel/posix-cpu-timers.c =================================================================== --- linux-rt-rebase.q.orig/kernel/posix-cpu-timers.c +++ linux-rt-rebase.q/kernel/posix-cpu-timers.c @@ -1292,18 +1292,6 @@ void __run_posix_cpu_timers(struct task_ LIST_HEAD(firing); struct k_itimer *timer, *next; - -#define UNEXPIRED(clock) \ - (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ - cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) - - if (UNEXPIRED(prof) && UNEXPIRED(virt) && - (tsk->it_sched_expires == 0 || - tsk->se.sum_exec_runtime < tsk->it_sched_expires)) - return; - -#undef UNEXPIRED - /* * Double-check with locks held. */ @@ -1428,6 +1416,19 @@ void run_posix_cpu_timers(struct task_st BUG_ON(!irqs_disabled()); if(!per_cpu(posix_timer_task, cpu)) return; + + +#define UNEXPIRED(clock) \ + (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ + cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) + + if (UNEXPIRED(prof) && UNEXPIRED(virt) && + (tsk->it_sched_expires == 0 || + tsk->sum_exec_runtime < tsk->it_sched_expires)) + return; + +#undef UNEXPIRED + /* get per-cpu references */ tasklist = per_cpu(posix_timer_tasklist, cpu); @@ -1446,7 +1447,7 @@ void run_posix_cpu_timers(struct task_st per_cpu(posix_timer_tasklist, cpu) = tsk; } /* XXX signal the thread somehow */ - wake_up_process(per_cpu(posix_timer_task,cpu)); + wake_up_process(per_cpu(posix_timer_task, cpu)); } patches/preempt-realtime-ppc-more-resched-fixups.patch0000664000077200007720000000546510653433165022505 0ustar mingomingo--- arch/powerpc/kernel/entry_64.S | 16 +++++++++++----- arch/powerpc/kernel/idle.c | 4 ++-- include/asm-powerpc/thread_info.h | 3 ++- 3 files changed, 15 insertions(+), 8 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/kernel/entry_64.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/entry_64.S +++ linux-rt-rebase.q/arch/powerpc/kernel/entry_64.S @@ -449,7 +449,8 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PREEMPT clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ - li r0,_TIF_NEED_RESCHED /* bits to check */ + li r0,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) + /* bits to check */ ld r3,_MSR(r1) ld r4,TI_FLAGS(r9) /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ @@ -558,16 +559,21 @@ do_work: cmpdi r0,0 crandc eq,cr1*4+eq,eq bne restore + /* here we are preempting the current task */ 1: - /* preempt_schedule_irq() expects interrupts disabled. */ - bl .preempt_schedule_irq + li r0,1 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + ori r10,r10,MSR_EE + mtmsrd r10,1 /* reenable interrupts */ + bl .preempt_schedule mfmsr r10 clrrdi r9,r1,THREAD_SHIFT rldicl r10,r10,48,1 /* disable interrupts again */ rotldi r10,r10,16 mtmsrd r10,1 ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED + andi. r0,r4,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1b b restore @@ -582,7 +588,7 @@ user_work: ori r10,r10,MSR_EE mtmsrd r10,1 - andi. r0,r4,_TIF_NEED_RESCHED + andi. r0,r4,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq 1f bl .schedule b .ret_from_except_lite Index: linux-rt-rebase.q/arch/powerpc/kernel/idle.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/idle.c +++ linux-rt-rebase.q/arch/powerpc/kernel/idle.c @@ -61,8 +61,8 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { tick_nohz_stop_sched_tick(); - - while (!need_resched() && !cpu_should_die()) { + while (!need_resched() && !need_resched_delayed() && + !cpu_should_die()) { ppc64_runlatch_off(); if (ppc_md.power_save) { Index: linux-rt-rebase.q/include/asm-powerpc/thread_info.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-powerpc/thread_info.h +++ linux-rt-rebase.q/include/asm-powerpc/thread_info.h @@ -150,7 +150,8 @@ static inline struct thread_info *curren #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP) #define _TIF_USER_WORK_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ - _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) + _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK | \ + _TIF_NEED_RESCHED_DELAYED) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) /* Bits in local_flags */ patches/x86_64-prep-idle-loop-for-dynticks.patch0000664000077200007720000000234610653433161020747 0ustar mingomingoSubject: x86_64: prepare idle loop for dynamic ticks From: Chris Wright Add tick_nohz_{stop,restart}_sched_tick to idle loop in prepartion for turning on dynticks. These are just noops until NO_HZ is enabled. Signed-off-by: Chris Wright Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/process.c | 4 ++++ 1 file changed, 4 insertions(+) Index: linux-rt-rebase.q/arch/x86_64/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/process.c +++ linux-rt-rebase.q/arch/x86_64/kernel/process.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -207,6 +208,8 @@ void cpu_idle (void) if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; + tick_nohz_stop_sched_tick(); + check_pgt_cache(); rmb(); idle = pm_idle; @@ -228,6 +231,7 @@ void cpu_idle (void) __exit_idle(); } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); patches/preempt-realtime-net-softirq-fixups.patch0000664000077200007720000000274310653433166021620 0ustar mingomingoSubject: NOHZ: local_softirq_pending with tickless From: Mikulas Patocka quota += dev->weight; else dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } EXPORT_SYMBOL(__netif_rx_schedule); @@ -2107,7 +2107,7 @@ out: softnet_break: __get_cpu_var(netdev_rx_stat).time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } patches/irda-fix.patch0000664000077200007720000000176610653433170014261 0ustar mingomingoThis was found around the 2.6.10 timeframe when testing with the -rt patch and I believe is still is an issue. irttp_dup() does a memcpy() of the tsap_cb structure causing the spinlock protecting various fields in the structure to be duped. This works OK in the non-RT case but in the RT case we end up with two mutexes pointing to the same wait_list and leading to an OOPS. Fix is to simply initialize the spinlock after the memcpy(). Signed-off-by: Deepak Saxena --- net/irda/irttp.c | 1 + 1 file changed, 1 insertion(+) Index: linux-rt-rebase.q/net/irda/irttp.c =================================================================== --- linux-rt-rebase.q.orig/net/irda/irttp.c +++ linux-rt-rebase.q/net/irda/irttp.c @@ -1453,6 +1453,7 @@ struct tsap_cb *irttp_dup(struct tsap_cb } /* Dup */ memcpy(new, orig, sizeof(struct tsap_cb)); + spin_lock_init(&new->lock); /* We don't need the old instance any more */ spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); patches/trace-with-caller-addr.patch0000664000077200007720000000700010653433162016761 0ustar mingomingo--- arch/x86_64/lib/thunk.S | 18 ++++++++++++++++-- kernel/latency_trace.c | 22 ++++++++++++++++++++++ kernel/lockdep.c | 16 ++++++++++++---- 3 files changed, 50 insertions(+), 6 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/lib/thunk.S =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/lib/thunk.S +++ linux-rt-rebase.q/arch/x86_64/lib/thunk.S @@ -47,8 +47,22 @@ thunk __up_wakeup,__up #ifdef CONFIG_TRACE_IRQFLAGS - thunk trace_hardirqs_on_thunk,trace_hardirqs_on - thunk trace_hardirqs_off_thunk,trace_hardirqs_off + /* put return address in rdi (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + CFI_STARTPROC + SAVE_ARGS + /* SAVE_ARGS pushs 9 elements */ + /* the next element would be the rip */ + movq 9*8(%rsp), %rdi + call \func + jmp restore + CFI_ENDPROC + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller #endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -1984,6 +1984,28 @@ void notrace trace_hardirqs_off(void) EXPORT_SYMBOL(trace_hardirqs_off); +/* used by x86_64 thunk.S */ +void notrace trace_hardirqs_on_caller(unsigned long caller_addr) +{ + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_off_preempt_count() && irqs_disabled_flags(flags)) + __stop_critical_timing(caller_addr, 0 /* CALLER_ADDR1 */); +} + +void notrace trace_hardirqs_off_caller(unsigned long caller_addr) +{ + unsigned long flags; + + local_save_flags(flags); + + if (!irqs_off_preempt_count() && irqs_disabled_flags(flags)) + __start_critical_timing(caller_addr, 0 /* CALLER_ADDR1 */, + INTERRUPT_LATENCY); +} + #endif /* !CONFIG_LOCKDEP */ #endif /* CONFIG_CRITICAL_IRQSOFF_TIMING */ Index: linux-rt-rebase.q/kernel/lockdep.c =================================================================== --- linux-rt-rebase.q.orig/kernel/lockdep.c +++ linux-rt-rebase.q/kernel/lockdep.c @@ -2009,7 +2009,7 @@ void early_boot_irqs_on(void) /* * Hardirqs will be enabled: */ -void notrace trace_hardirqs_on(void) +void notrace trace_hardirqs_on_caller(unsigned long a0) { struct task_struct *curr = current; unsigned long ip; @@ -2051,16 +2051,20 @@ void notrace trace_hardirqs_on(void) curr->hardirq_enable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_on_events); #ifdef CONFIG_CRITICAL_IRQSOFF_TIMING - time_hardirqs_on(CALLER_ADDR0, 0 /* CALLER_ADDR1 */); + time_hardirqs_on(a0, 0 /* CALLER_ADDR1 */); #endif } +void notrace trace_hardirqs_on(void) { + trace_hardirqs_on_caller(CALLER_ADDR0); +} + EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void notrace trace_hardirqs_off(void) +void notrace trace_hardirqs_off_caller(unsigned long a0) { struct task_struct *curr = current; @@ -2079,12 +2083,16 @@ void notrace trace_hardirqs_off(void) curr->hardirq_disable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_off_events); #ifdef CONFIG_CRITICAL_IRQSOFF_TIMING - time_hardirqs_off(CALLER_ADDR0, 0 /* CALLER_ADDR1 */); + time_hardirqs_off(a0, 0 /* CALLER_ADDR1 */); #endif } else debug_atomic_inc(&redundant_hardirqs_off); } +void notrace trace_hardirqs_off(void) { + trace_hardirqs_off_caller(CALLER_ADDR0); +} + EXPORT_SYMBOL(trace_hardirqs_off); /* patches/serial-slow-machines.patch0000664000077200007720000000323510653433165016601 0ustar mingomingo--- drivers/char/tty_io.c | 4 ++++ drivers/serial/8250.c | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/drivers/char/tty_io.c =================================================================== --- linux-rt-rebase.q.orig/drivers/char/tty_io.c +++ linux-rt-rebase.q/drivers/char/tty_io.c @@ -3648,10 +3648,14 @@ void tty_flip_buffer_push(struct tty_str tty->buf.tail->commit = tty->buf.tail->used; spin_unlock_irqrestore(&tty->buf.lock, flags); +#ifndef CONFIG_PREEMPT_RT if (tty->low_latency) flush_to_ldisc(&tty->buf.work.work); else schedule_delayed_work(&tty->buf.work, 1); +#else + flush_to_ldisc(&tty->buf.work.work); +#endif } EXPORT_SYMBOL(tty_flip_buffer_push); Index: linux-rt-rebase.q/drivers/serial/8250.c =================================================================== --- linux-rt-rebase.q.orig/drivers/serial/8250.c +++ linux-rt-rebase.q/drivers/serial/8250.c @@ -1451,7 +1451,10 @@ static irqreturn_t serial8250_interrupt( { struct irq_info *i = dev_id; struct list_head *l, *end = NULL; - int pass_counter = 0, handled = 0; +#ifndef CONFIG_PREEMPT_RT + int pass_counter = 0; +#endif + int handled = 0; DEBUG_INTR("serial8250_interrupt(%d)...", irq); @@ -1489,12 +1492,18 @@ static irqreturn_t serial8250_interrupt( l = l->next; + /* + * On preempt-rt we can be preempted and run in our + * own thread. + */ +#ifndef CONFIG_PREEMPT_RT if (l == i->head && pass_counter++ > PASS_LIMIT) { /* If we hit this, we're dead. */ printk(KERN_ERR "serial8250: too much work for " "irq%d\n", irq); break; } +#endif } while (l != end); spin_unlock(&i->lock); patches/preempt-rt-cs5530-lock-ide-fix.patch0000664000077200007720000000163210653433165020042 0ustar mingomingo drivers/ide/pci/cs5530.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) Index: linux-rt-rebase.q/drivers/ide/pci/cs5530.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ide/pci/cs5530.c +++ linux-rt-rebase.q/drivers/ide/pci/cs5530.c @@ -227,8 +227,8 @@ static unsigned int __devinit init_chips goto out; } - spin_lock_irqsave(&ide_lock, flags); - /* all CPUs (there should only be one CPU with this chipset) */ + /* Local CPU. ide_lock is acquired in do_ide_setup_pci_device. */ + local_irq_save(flags); /* * Enable BusMaster and MemoryWriteAndInvalidate for the cs5530: @@ -280,7 +280,7 @@ static unsigned int __devinit init_chips pci_write_config_byte(master_0, 0x42, 0x00); pci_write_config_byte(master_0, 0x43, 0xc1); - spin_unlock_irqrestore(&ide_lock, flags); + local_irq_restore(flags); out: pci_dev_put(master_0); patches/kmap-atomic-prepare.patch0000664000077200007720000001122110653433167016405 0ustar mingomingo With the separation of pagefault_{disable,enable}() from the preempt_count a previously overlooked dependancy became painfully clear. kmap_atomic() is per cpu and relies not only on disabling the pagefault handler, but really needs preemption disabled too. make this explicit now - so that we can change pagefault_disable(). Signed-off-by: Peter Zijlstra --- arch/i386/mm/highmem.c | 4 +++- arch/mips/mm/highmem.c | 5 ++++- arch/sparc/mm/highmem.c | 4 +++- include/asm-frv/highmem.h | 2 ++ include/asm-ppc/highmem.h | 4 +++- 5 files changed, 15 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/arch/i386/mm/highmem.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/mm/highmem.c +++ linux-rt-rebase.q/arch/i386/mm/highmem.c @@ -51,7 +51,7 @@ void *__kmap_atomic_prot(struct page *pa enum fixed_addresses idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); @@ -94,6 +94,7 @@ void __kunmap_atomic(void *kvaddr, enum arch_flush_lazy_mmu_mode(); pagefault_enable(); + preempt_enable(); } /* This is the same as kmap_atomic() but can map memory that doesn't @@ -104,6 +105,7 @@ void *__kmap_atomic_pfn(unsigned long pf enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); Index: linux-rt-rebase.q/arch/mips/mm/highmem.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/mm/highmem.c +++ linux-rt-rebase.q/arch/mips/mm/highmem.c @@ -38,7 +38,7 @@ void *__kmap_atomic(struct page *page, e enum fixed_addresses idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr, enum if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -78,6 +79,7 @@ void __kunmap_atomic(void *kvaddr, enum #endif pagefault_enable(); + preempt_enable(); } /* @@ -89,6 +91,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); Index: linux-rt-rebase.q/arch/sparc/mm/highmem.c =================================================================== --- linux-rt-rebase.q.orig/arch/sparc/mm/highmem.c +++ linux-rt-rebase.q/arch/sparc/mm/highmem.c @@ -34,7 +34,7 @@ void *kmap_atomic(struct page *page, enu unsigned long idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -71,6 +71,7 @@ void kunmap_atomic(void *kvaddr, enum km if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -97,6 +98,7 @@ void kunmap_atomic(void *kvaddr, enum km #endif pagefault_enable(); + preempt_enable(); } /* We may be fed a pagetable here by ptep_to_xxx and others. */ Index: linux-rt-rebase.q/include/asm-frv/highmem.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-frv/highmem.h +++ linux-rt-rebase.q/include/asm-frv/highmem.h @@ -115,6 +115,7 @@ static inline void *kmap_atomic(struct p { unsigned long paddr; + preempt_disable(); pagefault_disable(); paddr = page_to_phys(page); @@ -171,6 +172,7 @@ static inline void kunmap_atomic(void *k BUG(); } pagefault_enable(); + preempt_enable(); } #endif /* !__ASSEMBLY__ */ Index: linux-rt-rebase.q/include/asm-ppc/highmem.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-ppc/highmem.h +++ linux-rt-rebase.q/include/asm-ppc/highmem.h @@ -78,7 +78,7 @@ static inline void *kmap_atomic(struct p unsigned int idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -102,6 +102,7 @@ static inline void kunmap_atomic(void *k if (vaddr < KMAP_FIX_BEGIN) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -115,6 +116,7 @@ static inline void kunmap_atomic(void *k flush_tlb_page(NULL, vaddr); #endif pagefault_enable(); + preempt_enable(); } static inline struct page *kmap_atomic_to_page(void *ptr) patches/arm-latency-tracer-support.patch0000664000077200007720000000523410653433163017756 0ustar mingomingoadd latency tracer support for EP93xx boards Add latency tracer support for the EP93xx platform. This is done by: - adding the correct Kconfig options - add (an empty) save_stack_trace implementation. -> Someone needs to implement save_stack_trace for arm :) Maybe we can use the implementation from rmk? - implementing mach_read_cycles (read out EP93XX_TIMER4_VALUE_LOW) - implementing mach_cycles_to_usecs (just the same way as for the PXA platform) - implementing mach_usecs_to_cycles (just the same way as for the PXA platform) Signed-off-by: Jan Altenberg --- arch/arm/Kconfig | 4 ++++ arch/arm/lib/Makefile | 1 + arch/arm/lib/stacktrace.c | 7 +++++++ include/asm-arm/arch-ep93xx/timex.h | 6 ++++++ 4 files changed, 18 insertions(+) Index: linux-rt-rebase.q/arch/arm/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/arm/Kconfig +++ linux-rt-rebase.q/arch/arm/Kconfig @@ -33,6 +33,10 @@ config GENERIC_CLOCKEVENTS bool default n +config STACKTRACE_SUPPORT + bool + default y + config MMU bool default y Index: linux-rt-rebase.q/arch/arm/lib/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/arm/lib/Makefile +++ linux-rt-rebase.q/arch/arm/lib/Makefile @@ -41,6 +41,7 @@ lib-$(CONFIG_ARCH_RPC) += ecard.o io-ac lib-$(CONFIG_ARCH_CLPS7500) += io-acorn.o lib-$(CONFIG_ARCH_L7200) += io-acorn.o lib-$(CONFIG_ARCH_SHARK) += io-shark.o +lib-$(CONFIG_STACKTRACE) += stacktrace.o $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S Index: linux-rt-rebase.q/arch/arm/lib/stacktrace.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/arch/arm/lib/stacktrace.c @@ -0,0 +1,7 @@ +#include +#include + +void save_stack_trace(struct stack_trace *trace) +{ +} + Index: linux-rt-rebase.q/include/asm-arm/arch-ep93xx/timex.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/arch-ep93xx/timex.h +++ linux-rt-rebase.q/include/asm-arm/arch-ep93xx/timex.h @@ -1,5 +1,11 @@ /* * linux/include/asm-arm/arch-ep93xx/timex.h */ +#include +#include #define CLOCK_TICK_RATE 983040 + +#define mach_read_cycles() __raw_readl(EP93XX_TIMER4_VALUE_LOW) +#define mach_cycles_to_usecs(d) (((d) * ((1000000LL << 32) / CLOCK_TICK_RATE)) >> 32) +#define mach_usecs_to_cycles(d) (((d) * (((long long)CLOCK_TICK_RATE << 32) / 1000000)) >> 32) patches/vortex-fix.patch0000664000077200007720000000514610653433165014671 0ustar mingomingo Argh, cut and paste wasn't enough... Use this patch instead. It needs an irq disable. But, believe it or not, on SMP this is actually better. If the irq is shared (as it is in Mark's case), we don't stop the irq of other devices from being handled on another CPU (unfortunately for Mark, he pinned all interrupts to one CPU). Andrew, should this be changed in mainline too? -- Steve Signed-off-by: Steven Rostedt drivers/net/3c59x.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) Index: linux-rt-rebase.q/drivers/net/3c59x.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/3c59x.c +++ linux-rt-rebase.q/drivers/net/3c59x.c @@ -792,9 +792,9 @@ static void poll_vortex(struct net_devic { struct vortex_private *vp = netdev_priv(dev); unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1728,6 +1728,7 @@ vortex_timer(unsigned long data) int next_tick = 60*HZ; int ok = 0; int media_status, old_window; + unsigned long flags; if (vortex_debug > 2) { printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n", @@ -1735,7 +1736,7 @@ vortex_timer(unsigned long data) printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo); } - disable_irq_lockdep(dev->irq); + spin_lock_irqsave(&vp->lock, flags); old_window = ioread16(ioaddr + EL3_CMD) >> 13; EL3WINDOW(4); media_status = ioread16(ioaddr + Wn4_Media); @@ -1758,9 +1759,7 @@ vortex_timer(unsigned long data) case XCVR_MII: case XCVR_NWAY: { ok = 1; - spin_lock_bh(&vp->lock); vortex_check_media(dev, 0); - spin_unlock_bh(&vp->lock); } break; default: /* Other media types handled by Tx timeouts. */ @@ -1816,7 +1815,7 @@ leave_media_alone: dev->name, media_tbl[dev->if_port].name); EL3WINDOW(old_window); - enable_irq_lockdep(dev->irq); + spin_unlock_irqrestore(&vp->lock, flags); mod_timer(&vp->timer, RUN_AT(next_tick)); if (vp->deferred) iowrite16(FakeIntr, ioaddr + EL3_CMD); @@ -1849,13 +1848,17 @@ static void vortex_tx_timeout(struct net /* * Block interrupts because vortex_interrupt does a bare spin_lock() */ +#ifndef CONFIG_PREEMPT_RT unsigned long flags; local_irq_save(flags); +#endif if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev); else vortex_interrupt(dev->irq, dev); +#ifndef CONFIG_PREEMPT_RT local_irq_restore(flags); +#endif } } patches/preempt-realtime-i386.patch0000664000077200007720000007412310653433165016522 0ustar mingomingo--- arch/i386/Kconfig.debug | 2 + arch/i386/kernel/apic.c | 2 - arch/i386/kernel/cpu/mtrr/generic.c | 2 - arch/i386/kernel/head.S | 1 arch/i386/kernel/i8253.c | 2 - arch/i386/kernel/i8259.c | 2 - arch/i386/kernel/io_apic.c | 4 +-- arch/i386/kernel/irq.c | 4 ++- arch/i386/kernel/microcode.c | 2 - arch/i386/kernel/nmi.c | 5 +++ arch/i386/kernel/process.c | 14 ++++++++-- arch/i386/kernel/signal.c | 14 ++++++++++ arch/i386/kernel/smp.c | 22 ++++++++++++---- arch/i386/kernel/time.c | 2 - arch/i386/kernel/traps.c | 29 +++++++++++++++++---- arch/i386/kernel/vm86.c | 1 arch/i386/mm/fault.c | 7 +++-- arch/i386/mm/highmem.c | 37 +++++++++++++++++++++------- arch/i386/mm/pgtable.c | 2 - arch/i386/oprofile/Kconfig | 3 ++ arch/i386/pci/common.c | 2 - arch/i386/pci/direct.c | 29 ++++++++++++++------- arch/i386/pci/pci.h | 2 - include/asm-i386/acpi.h | 4 +-- include/asm-i386/dma.h | 2 - include/asm-i386/highmem.h | 27 ++++++++++++++++++++ include/asm-i386/i8253.h | 2 - include/asm-i386/i8259.h | 2 - include/asm-i386/mach-default/irq_vectors.h | 2 - include/asm-i386/mc146818rtc.h | 2 - include/asm-i386/pgtable.h | 2 - include/asm-i386/tlbflush.h | 26 +++++++++++++++++++ include/asm-i386/xor.h | 21 +++++++++++++-- 33 files changed, 222 insertions(+), 58 deletions(-) Index: linux-rt-rebase.q/arch/i386/Kconfig.debug =================================================================== --- linux-rt-rebase.q.orig/arch/i386/Kconfig.debug +++ linux-rt-rebase.q/arch/i386/Kconfig.debug @@ -49,6 +49,7 @@ config DEBUG_PAGEALLOC config DEBUG_RODATA bool "Write protect kernel read-only data structures" depends on DEBUG_KERNEL + default y help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const @@ -59,6 +60,7 @@ config DEBUG_RODATA config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on DEBUG_KERNEL + default y help If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates Index: linux-rt-rebase.q/arch/i386/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/apic.c +++ linux-rt-rebase.q/arch/i386/kernel/apic.c @@ -579,7 +579,7 @@ static void local_apic_timer_interrupt(v * interrupt as well. Thus we cannot inline the local irq ... ] */ -void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) +void fastcall notrace smp_apic_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); Index: linux-rt-rebase.q/arch/i386/kernel/cpu/mtrr/generic.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/cpu/mtrr/generic.c +++ linux-rt-rebase.q/arch/i386/kernel/cpu/mtrr/generic.c @@ -330,7 +330,7 @@ static unsigned long set_mtrr_state(void static unsigned long cr4 = 0; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they Index: linux-rt-rebase.q/arch/i386/kernel/head.S =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/head.S +++ linux-rt-rebase.q/arch/i386/kernel/head.S @@ -492,6 +492,7 @@ ignore_int: call printk #endif addl $(5*4),%esp + call dump_stack popl %ds popl %es popl %edx Index: linux-rt-rebase.q/arch/i386/kernel/i8253.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/i8253.c +++ linux-rt-rebase.q/arch/i386/kernel/i8253.c @@ -14,7 +14,7 @@ #include #include -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); /* Index: linux-rt-rebase.q/arch/i386/kernel/i8259.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/i8259.c +++ linux-rt-rebase.q/arch/i386/kernel/i8259.c @@ -34,7 +34,7 @@ */ static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); static struct irq_chip i8259A_chip = { Index: linux-rt-rebase.q/arch/i386/kernel/io_apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/io_apic.c +++ linux-rt-rebase.q/arch/i386/kernel/io_apic.c @@ -56,8 +56,8 @@ atomic_t irq_mis_count; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(vector_lock); int timer_over_8254 __initdata = 1; Index: linux-rt-rebase.q/arch/i386/kernel/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/irq.c +++ linux-rt-rebase.q/arch/i386/kernel/irq.c @@ -79,6 +79,8 @@ fastcall notrace unsigned int do_IRQ(str u32 *isp; #endif + irq_show_regs_callback(smp_processor_id(), regs); + if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", __FUNCTION__, irq); @@ -100,7 +102,7 @@ fastcall notrace unsigned int do_IRQ(str __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } Index: linux-rt-rebase.q/arch/i386/kernel/microcode.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/microcode.c +++ linux-rt-rebase.q/arch/i386/kernel/microcode.c @@ -116,7 +116,7 @@ MODULE_LICENSE("GPL"); #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); Index: linux-rt-rebase.q/arch/i386/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/nmi.c +++ linux-rt-rebase.q/arch/i386/kernel/nmi.c @@ -62,7 +62,12 @@ static int endflag __initdata = 0; */ static __init void nmi_cpu_busy(void *data) { + /* + * avoid a warning, on PREEMPT_RT this wont run in hardirq context: + */ +#ifndef CONFIG_PREEMPT_RT local_irq_enable_in_hardirq(); +#endif /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the Index: linux-rt-rebase.q/arch/i386/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/process.c +++ linux-rt-rebase.q/arch/i386/kernel/process.c @@ -382,15 +382,23 @@ void exit_thread(void) if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + void *io_bitmap_ptr = t->io_bitmap_ptr; + int cpu; + struct tss_struct *tss; - kfree(t->io_bitmap_ptr); + /* + * On PREEMPT_RT we must not call kfree() with + * preemption disabled, so we first zap the pointer: + */ t->io_bitmap_ptr = NULL; + kfree(io_bitmap_ptr); + clear_thread_flag(TIF_IO_BITMAP); /* * Careful, clear this in the TSS too: */ + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); t->io_bitmap_max = 0; tss->io_bitmap_owner = NULL; Index: linux-rt-rebase.q/arch/i386/kernel/signal.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/signal.c +++ linux-rt-rebase.q/arch/i386/kernel/signal.c @@ -540,6 +540,13 @@ handle_signal(unsigned long sig, siginfo } } +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so * that register information in the sigcontext is correct. @@ -580,6 +587,13 @@ static void fastcall do_signal(struct pt struct k_sigaction ka; sigset_t *oldset; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux-rt-rebase.q/arch/i386/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/smp.c +++ linux-rt-rebase.q/arch/i386/kernel/smp.c @@ -247,7 +247,7 @@ void send_IPI_mask_sequence(cpumask_t ma static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); /* * We cannot call mmdrop() because we are in interrupt context, @@ -477,10 +477,20 @@ static void native_smp_send_reschedule(i } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -635,14 +645,14 @@ static void native_smp_send_stop(void) } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) Index: linux-rt-rebase.q/arch/i386/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/time.c +++ linux-rt-rebase.q/arch/i386/kernel/time.c @@ -124,7 +124,7 @@ static int set_rtc_mmss(unsigned long no int timer_ack; -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); Index: linux-rt-rebase.q/arch/i386/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/traps.c +++ linux-rt-rebase.q/arch/i386/kernel/traps.c @@ -280,6 +280,12 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_EVENT_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -308,8 +314,12 @@ void show_registers(struct pt_regs *regs regs->eax, regs->ebx, regs->ecx, regs->edx); printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x " + " preempt:%08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, + ss, preempt_count()); + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, current->pid, current_thread_info(), current, task_thread_info(current)); @@ -369,11 +379,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = RAW_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -480,6 +490,11 @@ static void __kprobes do_trap(int trapnr if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { /* * We want error_code and trap_no set for userspace faults and @@ -736,10 +751,11 @@ void __kprobes die_nmi(struct pt_regs *r crash_kexec(regs); } + nmi_exit(); do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs * regs) +static notrace __kprobes void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -779,11 +795,12 @@ static __kprobes void default_do_nmi(str static int ignore_nmis; -fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace __kprobes void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); Index: linux-rt-rebase.q/arch/i386/kernel/vm86.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/vm86.c +++ linux-rt-rebase.q/arch/i386/kernel/vm86.c @@ -137,6 +137,7 @@ struct pt_regs * fastcall save_v86_state local_irq_enable(); if (!current->thread.vm86_info) { + local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } Index: linux-rt-rebase.q/arch/i386/mm/fault.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/mm/fault.c +++ linux-rt-rebase.q/arch/i386/mm/fault.c @@ -297,8 +297,8 @@ int show_unhandled_signals = 1; * bit 3 == 1 means use of reserved bit detected * bit 4 == 1 means fault was an instruction fetch */ -fastcall void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) +fastcall notrace void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; @@ -309,6 +309,7 @@ fastcall void __kprobes do_page_fault(st /* get the address */ address = read_cr2(); + trace_special(regs->eip, error_code, address); tsk = current; @@ -498,6 +499,8 @@ bad_area_nosemaphore: if (nr == 6) { stop_trace(); + user_trace_stop(); + zap_rt_locks(); do_invalid_op(regs, 0); return; } Index: linux-rt-rebase.q/arch/i386/mm/highmem.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/mm/highmem.c +++ linux-rt-rebase.q/arch/i386/mm/highmem.c @@ -18,6 +18,26 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +46,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; @@ -47,12 +67,12 @@ void *kmap_atomic_prot(struct page *page return (void*) vaddr; } -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { return kmap_atomic_prot(page, type, kmap_prot); } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); @@ -79,7 +99,7 @@ void kunmap_atomic(void *kvaddr, enum km /* This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -94,7 +114,7 @@ void *kmap_atomic_pfn(unsigned long pfn, return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -109,6 +129,7 @@ struct page *kmap_atomic_to_page(void *p EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); Index: linux-rt-rebase.q/arch/i386/mm/pgtable.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/mm/pgtable.c +++ linux-rt-rebase.q/arch/i386/mm/pgtable.c @@ -208,7 +208,7 @@ void pmd_ctor(void *pmd, struct kmem_cac * vmalloc faults work because attached pagetables are never freed. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) Index: linux-rt-rebase.q/arch/i386/oprofile/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/i386/oprofile/Kconfig +++ linux-rt-rebase.q/arch/i386/oprofile/Kconfig @@ -15,3 +15,6 @@ config OPROFILE If unsure, say N. +config PROFILE_NMI + bool + default y Index: linux-rt-rebase.q/arch/i386/pci/common.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/pci/common.c +++ linux-rt-rebase.q/arch/i386/pci/common.c @@ -52,7 +52,7 @@ int pcibios_scanned; * This interrupt-safe spinlock protects all accesses to PCI * configuration space. */ -DEFINE_SPINLOCK(pci_config_lock); +DEFINE_RAW_SPINLOCK(pci_config_lock); /* * Several buggy motherboards address only 16 devices and mirror Index: linux-rt-rebase.q/arch/i386/pci/direct.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/pci/direct.c +++ linux-rt-rebase.q/arch/i386/pci/direct.c @@ -220,16 +220,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -239,17 +246,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } Index: linux-rt-rebase.q/arch/i386/pci/pci.h =================================================================== --- linux-rt-rebase.q.orig/arch/i386/pci/pci.h +++ linux-rt-rebase.q/arch/i386/pci/pci.h @@ -78,7 +78,7 @@ struct irq_routing_table { extern unsigned int pcibios_irq_mask; extern int pcibios_scanned; -extern spinlock_t pci_config_lock; +extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); Index: linux-rt-rebase.q/include/asm-i386/acpi.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/acpi.h +++ linux-rt-rebase.q/include/asm-i386/acpi.h @@ -52,8 +52,8 @@ #define ACPI_ASM_MACROS #define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_DISABLE_IRQS() local_irq_disable_nort() +#define ACPI_ENABLE_IRQS() local_irq_enable_nort() #define ACPI_FLUSH_CPU_CACHE() wbinvd() int __acpi_acquire_global_lock(unsigned int *lock); Index: linux-rt-rebase.q/include/asm-i386/dma.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/dma.h +++ linux-rt-rebase.q/include/asm-i386/dma.h @@ -134,7 +134,7 @@ #define DMA_AUTOINIT 0x10 -extern spinlock_t dma_spin_lock; +extern spinlock_t dma_spin_lock; static __inline__ unsigned long claim_dma_lock(void) { Index: linux-rt-rebase.q/include/asm-i386/highmem.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/highmem.h +++ linux-rt-rebase.q/include/asm-i386/highmem.h @@ -67,6 +67,16 @@ extern void * FASTCALL(kmap_high(struct extern void FASTCALL(kunmap_high(struct page *page)); void *kmap(struct page *page); +extern void kunmap_virt(void *ptr); +extern struct page *kmap_to_page(void *ptr); +void kunmap(struct page *page); + +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); +void *__kmap_atomic(struct page *page, enum km_type type); +void __kunmap_atomic(void *kvaddr, enum km_type type); +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type); +struct page *__kmap_atomic_to_page(void *ptr); + void kunmap(struct page *page); void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); @@ -80,6 +90,23 @@ struct page *kmap_atomic_to_page(void *p #define flush_cache_kmaps() do { } while (0) +/* + * on PREEMPT_RT kmap_atomic() is a wrapper that uses kmap(): + */ +#ifdef CONFIG_PREEMPT_RT +# define kmap_atomic_prot(page, type, prot) kmap(page) +# define kmap_atomic(page, type) kmap(page) +# define kmap_atomic_pfn(pfn, type) kmap(pfn_to_page(pfn)) +# define kunmap_atomic(kvaddr, type) kunmap_virt(kvaddr) +# define kmap_atomic_to_page(kvaddr) kmap_to_page(kvaddr) +#else +# define kmap_atomic_prot(page, type, prot) __kmap_atomic_prot(page, type, prot) +# define kmap_atomic(page, type) __kmap_atomic(page, type) +# define kmap_atomic_pfn(pfn, type) __kmap_atomic_pfn(pfn, type) +# define kunmap_atomic(kvaddr, type) __kunmap_atomic(kvaddr, type) +# define kmap_atomic_to_page(kvaddr) __kmap_atomic_to_page(kvaddr) +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_HIGHMEM_H */ Index: linux-rt-rebase.q/include/asm-i386/i8253.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/i8253.h +++ linux-rt-rebase.q/include/asm-i386/i8253.h @@ -6,7 +6,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -extern spinlock_t i8253_lock; +extern raw_spinlock_t i8253_lock; extern struct clock_event_device *global_clock_event; Index: linux-rt-rebase.q/include/asm-i386/i8259.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/i8259.h +++ linux-rt-rebase.q/include/asm-i386/i8259.h @@ -7,7 +7,7 @@ extern unsigned int cached_irq_mask; #define cached_master_mask (__byte(0, cached_irq_mask)) #define cached_slave_mask (__byte(1, cached_irq_mask)) -extern spinlock_t i8259A_lock; +extern raw_spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); Index: linux-rt-rebase.q/include/asm-i386/mach-default/irq_vectors.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/mach-default/irq_vectors.h +++ linux-rt-rebase.q/include/asm-i386/mach-default/irq_vectors.h @@ -63,7 +63,7 @@ * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 Index: linux-rt-rebase.q/include/asm-i386/mc146818rtc.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/mc146818rtc.h +++ linux-rt-rebase.q/include/asm-i386/mc146818rtc.h @@ -72,7 +72,7 @@ static inline unsigned char current_lock lock_cmos(reg) #define lock_cmos_suffix(reg) \ unlock_cmos(); \ - local_irq_restore(cmos_flags); \ + local_irq_restore(cmos_flags); \ } while (0) #else #define lock_cmos_prefix(reg) do {} while (0) Index: linux-rt-rebase.q/include/asm-i386/pgtable.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/pgtable.h +++ linux-rt-rebase.q/include/asm-i386/pgtable.h @@ -36,7 +36,7 @@ struct vm_area_struct; extern unsigned long empty_zero_page[1024]; extern pgd_t swapper_pg_dir[1024]; extern struct kmem_cache *pmd_cache; -extern spinlock_t pgd_lock; +extern raw_spinlock_t pgd_lock; extern struct page *pgd_list; void check_pgt_cache(void); Index: linux-rt-rebase.q/include/asm-i386/tlbflush.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/tlbflush.h +++ linux-rt-rebase.q/include/asm-i386/tlbflush.h @@ -4,6 +4,21 @@ #include #include +/* + * TLB-flush needs to be nonpreemptible on PREEMPT_RT due to the + * following complex race scenario: + * + * if the current task is lazy-TLB and does a TLB flush and + * gets preempted after the movl %%r3, %0 but before the + * movl %0, %%cr3 then its ->active_mm might change and it will + * install the wrong cr3 when it switches back. This is not a + * problem for the lazy-TLB task itself, but if the next task it + * switches to has an ->mm that is also the lazy-TLB task's + * new ->active_mm, then the scheduler will assume that cr3 is + * the new one, while we overwrote it with the old one. The result + * is the wrong cr3 in the new (non-lazy-TLB) task, which typically + * causes an infinite pagefault upon the next userspace access. + */ #ifdef CONFIG_PARAVIRT #include #else @@ -16,11 +31,13 @@ do { \ unsigned int tmpreg; \ \ + preempt_disable(); \ __asm__ __volatile__( \ "movl %%cr3, %0; \n" \ "movl %0, %%cr3; # flush TLB \n" \ : "=r" (tmpreg) \ :: "memory"); \ + preempt_enable(); \ } while (0) /* @@ -31,6 +48,7 @@ do { \ unsigned int tmpreg, cr4, cr4_orig; \ \ + preempt_disable(); \ __asm__ __volatile__( \ "movl %%cr4, %2; # turn off PGE \n" \ "movl %2, %1; \n" \ @@ -42,6 +60,7 @@ : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ : "i" (~X86_CR4_PGE) \ : "memory"); \ + preempt_enable(); \ } while (0) #define __native_flush_tlb_single(addr) \ @@ -98,6 +117,13 @@ static inline void flush_tlb_mm(struct mm_struct *mm) { + /* + * This is safe on PREEMPT_RT because if we preempt + * right after the check but before the __flush_tlb(), + * and if ->active_mm changes, then we might miss a + * TLB flush, but that TLB flush happened already when + * ->active_mm was changed: + */ if (mm == current->active_mm) __flush_tlb(); } Index: linux-rt-rebase.q/include/asm-i386/xor.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/xor.h +++ linux-rt-rebase.q/include/asm-i386/xor.h @@ -862,7 +862,21 @@ static struct xor_block_template xor_blo #include #undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ +/* + * MMX/SSE ops disable preemption for long periods of time, + * so on PREEMPT_RT use the register-based ops only: + */ +#ifdef CONFIG_PREEMPT_RT +# define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + } while (0) +# define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST) +#else +# define XOR_TRY_TEMPLATES \ do { \ xor_speed(&xor_block_8regs); \ xor_speed(&xor_block_8regs_p); \ @@ -875,9 +889,10 @@ static struct xor_block_template xor_blo xor_speed(&xor_block_p5_mmx); \ } \ } while (0) - /* We force the use of the SSE xor block because it can write around L2. We may also be able to load into the L1 only depending on how the cpu deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ +# define XOR_SELECT_TEMPLATE(FASTEST) \ (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) +#endif + patches/latency-trace-fix.patch0000664000077200007720000000515610653433163016074 0ustar mingomingoFrom linux-rt-users-owner@vger.kernel.org Fri Jul 13 20:13:10 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by mail.tglx.de (Postfix) with ESMTP id 9AD1E65C3E9; Fri, 13 Jul 2007 20:13:10 +0200 (CEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760492AbXGMSNJ (ORCPT + 1 other); Fri, 13 Jul 2007 14:13:09 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S932549AbXGMSNJ (ORCPT ); Fri, 13 Jul 2007 14:13:09 -0400 Received: from deeprooted.net ([216.254.16.51]:38939 "EHLO paris.hilman.org" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1759850AbXGMSNG (ORCPT ); Fri, 13 Jul 2007 14:13:06 -0400 Received: by paris.hilman.org (Postfix, from userid 1000) id 98015E4C5C2; Fri, 13 Jul 2007 10:52:28 -0700 (PDT) Message-Id: <20070713175228.311226264@mvista.com> References: <20070713175214.336577416@mvista.com> User-Agent: quilt/0.45-1 Date: Fri, 13 Jul 2007 10:52:17 -0700 From: Kevin Hilman To: tglx@linutronix.de, mingo@elte.hu Cc: linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH -rt 3/6] Compile fix for PREEMPT_TIMING on and TRACE_IRQFLAGS off Content-Disposition: inline; filename=latency-trace-fix.patch Sender: linux-rt-users-owner@vger.kernel.org Precedence: bulk X-Mailing-List: linux-rt-users@vger.kernel.org X-Filter-To: .Kernel.rt-users X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Mime-Version: 1.0 Fix compile of latency_trace.c in the case where CRITICAL_PREEMPT_TIMING=y and TRACE_IRQFLAGS=n (because DEBUG_KERNEL is disabled) Signed-off-by: Kevin Hilman --- kernel/latency_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -2154,7 +2154,7 @@ void notrace unmask_preempt_count(unsign } EXPORT_SYMBOL(unmask_preempt_count); -#ifdef CONFIG_CRITICAL_PREEMPT_TIMING +#if defined(CONFIG_CRITICAL_PREEMPT_TIMING) && defined(CONFIG_TRACE_IRQFLAGS) /* Some archs do their cpu_idle with preemption on. Don't measure it */ void notrace trace_preempt_enter_idle(void) patches/s_files-pipe-fix.patch0000664000077200007720000000206010653433167015713 0ustar mingomingoSubject: s_files: free_write_pipe() fix From: Ingo Molnar file_kill() has to look at the file's inode (for the barrier logic), hence make sure we free the inode before the file. Signed-off-by: Ingo Molnar --- fs/pipe.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) Index: linux-rt-rebase.q/fs/pipe.c =================================================================== --- linux-rt-rebase.q.orig/fs/pipe.c +++ linux-rt-rebase.q/fs/pipe.c @@ -1012,12 +1012,17 @@ struct file *create_write_pipe(void) return ERR_PTR(err); } -void free_write_pipe(struct file *f) +void free_write_pipe(struct file *file) { - free_pipe_info(f->f_dentry->d_inode); - dput(f->f_path.dentry); - mntput(f->f_path.mnt); - put_filp(f); + struct dentry *dentry = file->f_path.dentry; + struct vfsmount *mnt = file->f_path.mnt; + + free_pipe_info(file->f_dentry->d_inode); + file->f_path.dentry = NULL; + file->f_path.mnt = NULL; + put_filp(file); + dput(dentry); + mntput(mnt); } struct file *create_read_pipe(struct file *wrf) patches/preempt-realtime-profiling.patch0000664000077200007720000000216510653433166020020 0ustar mingomingo--- kernel/profile.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/kernel/profile.c =================================================================== --- linux-rt-rebase.q.orig/kernel/profile.c +++ linux-rt-rebase.q/kernel/profile.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); static cpumask_t prof_cpu_mask = CPU_MASK_ALL; +int prof_pid = -1; #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); static DEFINE_PER_CPU(int, cpu_profile_flip); @@ -411,7 +413,8 @@ void __profile_tick(int type, struct pt_ { if (type == CPU_PROFILING && timer_hook) timer_hook(regs); - if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask)) + if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask) && + (prof_pid == -1 || prof_pid == current->pid)) profile_hit(type, (void *)profile_pc(regs)); } patches/kstat-add-rt-stats.patch0000664000077200007720000001232210653433165016203 0ustar mingomingoFrom: tglx Subject: add rt stats to /proc/stat add RT stats to /proc/stat Signed-off-by: Ingo Molnar fs/proc/proc_misc.c | 29 +++++++++++++++++++++-------- include/linux/kernel_stat.h | 2 ++ kernel/sched.c | 6 +++++- 3 files changed, 28 insertions(+), 9 deletions(-) Index: linux-rt-rebase.q/fs/proc/proc_misc.c =================================================================== --- linux-rt-rebase.q.orig/fs/proc/proc_misc.c +++ linux-rt-rebase.q/fs/proc/proc_misc.c @@ -442,7 +442,8 @@ static int show_stat(struct seq_file *p, { int i; unsigned long jif; - cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; + cputime64_t user_rt, user, nice, system_rt, system, idle, + iowait, irq, softirq, steal; u64 sum = 0; struct timespec boottime; unsigned int *per_irq_sum; @@ -451,7 +452,7 @@ static int show_stat(struct seq_file *p, if (!per_irq_sum) return -ENOMEM; - user = nice = system = idle = iowait = + user_rt = user = nice = system_rt = system = idle = iowait = irq = softirq = steal = cputime64_zero; getboottime(&boottime); jif = boottime.tv_sec; @@ -467,6 +468,8 @@ static int show_stat(struct seq_file *p, irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); + user_rt = cputime64_add(user_rt, kstat_cpu(i).cpustat.user_rt); + system_rt = cputime64_add(system_rt, kstat_cpu(i).cpustat.system_rt); for (j = 0; j < NR_IRQS; j++) { unsigned int temp = kstat_cpu(i).irqs[j]; sum += temp; @@ -474,7 +477,10 @@ static int show_stat(struct seq_file *p, } } - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n", + user = cputime64_add(user_rt, user); + system = cputime64_add(system_rt, system); + + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(system), @@ -482,19 +488,24 @@ static int show_stat(struct seq_file *p, (unsigned long long)cputime64_to_clock_t(iowait), (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal)); + (unsigned long long)cputime64_to_clock_t(steal), + (unsigned long long)cputime64_to_clock_t(user_rt), + (unsigned long long)cputime64_to_clock_t(system_rt)); + for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kstat_cpu(i).cpustat.user; + user_rt = kstat_cpu(i).cpustat.user_rt; + system_rt = kstat_cpu(i).cpustat.system_rt; + user = cputime64_add(user_rt, kstat_cpu(i).cpustat.user); nice = kstat_cpu(i).cpustat.nice; - system = kstat_cpu(i).cpustat.system; + system = cputime64_add(system_rt, kstat_cpu(i).cpustat.system); idle = kstat_cpu(i).cpustat.idle; iowait = kstat_cpu(i).cpustat.iowait; irq = kstat_cpu(i).cpustat.irq; softirq = kstat_cpu(i).cpustat.softirq; steal = kstat_cpu(i).cpustat.steal; - seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n", + seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", i, (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), @@ -503,7 +514,9 @@ static int show_stat(struct seq_file *p, (unsigned long long)cputime64_to_clock_t(iowait), (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal)); + (unsigned long long)cputime64_to_clock_t(steal), + (unsigned long long)cputime64_to_clock_t(user_rt), + (unsigned long long)cputime64_to_clock_t(system_rt)); } seq_printf(p, "intr %llu", (unsigned long long)sum); Index: linux-rt-rebase.q/include/linux/kernel_stat.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/kernel_stat.h +++ linux-rt-rebase.q/include/linux/kernel_stat.h @@ -23,6 +23,8 @@ struct cpu_usage_stat { cputime64_t idle; cputime64_t iowait; cputime64_t steal; + cputime64_t user_rt; + cputime64_t system_rt; }; struct kernel_stat { Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -3215,7 +3215,9 @@ void account_user_time(struct task_struc /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (rt_task(p)) + cpustat->user_rt = cputime64_add(cpustat->user_rt, tmp); + else if (TASK_NICE(p) > 0) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -3242,6 +3244,8 @@ void account_system_time(struct task_str cpustat->irq = cputime64_add(cpustat->irq, tmp); else if (softirq_count() || (p->flags & PF_SOFTIRQ)) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); + else if (rt_task(p)) + cpustat->system_rt = cputime64_add(cpustat->system_rt, tmp); else if (p != rq->idle) cpustat->system = cputime64_add(cpustat->system, tmp); else if (atomic_read(&rq->nr_iowait) > 0) patches/gtod-optimize.patch0000664000077200007720000000116210653433167015345 0ustar mingomingo--- kernel/timer.c | 7 +++++++ 1 file changed, 7 insertions(+) Index: linux-rt-rebase.q/kernel/timer.c =================================================================== --- linux-rt-rebase.q.orig/kernel/timer.c +++ linux-rt-rebase.q/kernel/timer.c @@ -990,6 +990,13 @@ static inline void update_times(void) static unsigned long last_tick = INITIAL_JIFFIES; unsigned long ticks, flags; + /* + * Dont take the xtime_lock from every CPU in + * every tick - only when needed: + */ + if (jiffies == last_tick) + return; + write_seqlock_irqsave(&xtime_lock, flags); ticks = jiffies - last_tick; if (ticks) { patches/msi-suspend-resume-workaround.patch0000664000077200007720000000163110653433162020504 0ustar mingomingo--- drivers/base/power/resume.c | 1 + drivers/pci/msi.c | 4 ++++ 2 files changed, 5 insertions(+) Index: linux-rt-rebase.q/drivers/base/power/resume.c =================================================================== --- linux-rt-rebase.q.orig/drivers/base/power/resume.c +++ linux-rt-rebase.q/drivers/base/power/resume.c @@ -9,6 +9,7 @@ */ #include +#include #include #include "../base.h" #include "power.h" Index: linux-rt-rebase.q/drivers/pci/msi.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/msi.c +++ linux-rt-rebase.q/drivers/pci/msi.c @@ -235,6 +235,10 @@ static void __pci_restore_msi_state(stru return; entry = get_irq_msi(dev->irq); + if (!entry) { + WARN_ON(1); + return; + } pos = entry->msi_attrib.pos; pci_intx(dev, 0); /* disable intx */ patches/fix-acpi-build-weirdness.patch0000664000077200007720000000115010653433162017340 0ustar mingomingo arch/i386/pci/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/i386/pci/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/i386/pci/Makefile +++ linux-rt-rebase.q/arch/i386/pci/Makefile @@ -4,8 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_ACPI) += acpi.o + pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o patches/redo-regparm-option.patch0000664000077200007720000000633010653433162016441 0ustar mingomingo undo: commit a1a70c25bed75ed36ed48bbe18b9029428d2452d Author: Adrian Bunk Date: Thu Dec 7 02:14:12 2006 +0100 [PATCH] i386: always enable regparm needed for latency tracing. --- Documentation/stable_api_nonsense.txt | 3 +++ arch/i386/Kconfig | 7 +++++++ arch/i386/Makefile | 4 +++- include/asm-i386/module.h | 8 +++++++- 4 files changed, 20 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/Documentation/stable_api_nonsense.txt =================================================================== --- linux-rt-rebase.q.orig/Documentation/stable_api_nonsense.txt +++ linux-rt-rebase.q/Documentation/stable_api_nonsense.txt @@ -62,6 +62,9 @@ consider the following facts about the L - different structures can contain different fields - Some functions may not be implemented at all, (i.e. some locks compile away to nothing for non-SMP builds.) + - Parameter passing of variables from function to function can be + done in different ways (the CONFIG_REGPARM option controls + this.) - Memory within the kernel can be aligned in different ways, depending on the build options. - Linux runs on a wide range of different processor architectures. Index: linux-rt-rebase.q/arch/i386/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/i386/Kconfig +++ linux-rt-rebase.q/arch/i386/Kconfig @@ -780,6 +780,13 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y +# +# function tracing might turn this off: +# +config REGPARM + bool + default y + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS Index: linux-rt-rebase.q/arch/i386/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/i386/Makefile +++ linux-rt-rebase.q/arch/i386/Makefile @@ -31,7 +31,7 @@ LDFLAGS_vmlinux := --emit-relocs endif CHECKFLAGS += -D__i386__ -CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return +CFLAGS += -pipe -msoft-float -freg-struct-return # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) @@ -39,6 +39,8 @@ CFLAGS += $(call cc-option,-mpreferred-s # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu +cflags-$(CONFIG_REGPARM) += -mregparm=3 + # temporary until string.h is fixed cflags-y += -ffreestanding Index: linux-rt-rebase.q/include/asm-i386/module.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/module.h +++ linux-rt-rebase.q/include/asm-i386/module.h @@ -64,12 +64,18 @@ struct mod_arch_specific #error unknown processor family #endif +#ifdef CONFIG_REGPARM +#define MODULE_REGPARM "REGPARM " +#else +#define MODULE_REGPARM "" +#endif + #ifdef CONFIG_4KSTACKS #define MODULE_STACKSIZE "4KSTACKS " #else #define MODULE_STACKSIZE "" #endif -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE #endif /* _ASM_I386_MODULE_H */ patches/x86_64-apic-change-setup-calling-convention.patch0000664000077200007720000000322010653433161022561 0ustar mingomingoSubject: x86_64: apic change setup_APIC_timer calling convention setup_APIC_timer takes the file global calibration result as an argument. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/apic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -784,7 +784,7 @@ static void __setup_APIC_LVTT(unsigned i apic_write(APIC_TMICT, clocks); } -static void setup_APIC_timer(unsigned int clocks) +static void setup_APIC_timer(void) { unsigned long flags; int irqen; @@ -793,7 +793,7 @@ static void setup_APIC_timer(unsigned in irqen = ! cpu_isset(smp_processor_id(), timer_interrupt_broadcast_ipi_mask); - __setup_APIC_LVTT(clocks, 0, irqen); + __setup_APIC_LVTT(calibration_result, 0, irqen); /* Turn off PIT interrupt if we use APIC timer as main timer. Only works with the PM timer right now TBD fix it for HPET too. */ @@ -880,7 +880,7 @@ void __init setup_boot_APIC_clock (void) /* * Now set up the timer for real. */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); local_irq_enable(); } @@ -888,7 +888,7 @@ void __init setup_boot_APIC_clock (void) void __cpuinit setup_secondary_APIC_clock(void) { local_irq_disable(); /* FIXME: Do we need this? --RR */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); local_irq_enable(); } patches/rt-mutex-trylock-export.patch0000664000077200007720000000734410653433164017350 0ustar mingomingoFrom linux-kernel-owner@vger.kernel.org Wed May 23 01:44:17 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=unavailable version=3.1.7-deb Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by mail.tglx.de (Postfix) with ESMTP id 32C4A65C3E9 for ; Wed, 23 May 2007 01:44:17 +0200 (CEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1759353AbXEVXoG (ORCPT ); Tue, 22 May 2007 19:44:06 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1757791AbXEVXn4 (ORCPT ); Tue, 22 May 2007 19:43:56 -0400 Received: from rwcrmhc11.comcast.net ([204.127.192.81]:35206 "EHLO rwcrmhc11.comcast.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757669AbXEVXn4 (ORCPT ); Tue, 22 May 2007 19:43:56 -0400 Received: from sx.thebigcorporation.com ([69.181.45.228]) by comcast.net (rwcrmhc11) with ESMTP id <20070522233624m1100rg2vge>; Tue, 22 May 2007 23:36:29 +0000 Received: from sx.thebigcorporation.com (localhost.localdomain [127.0.0.1]) by sx.thebigcorporation.com (8.13.8/8.13.8) with ESMTP id l4MNaKHv029409; Tue, 22 May 2007 16:36:20 -0700 Received: (from sven@localhost) by sx.thebigcorporation.com (8.13.8/8.13.8/Submit) id l4MNaJIn029408; Tue, 22 May 2007 16:36:19 -0700 X-Authentication-Warning: sx.thebigcorporation.com: sven set sender to sven@thebigcorporation.com using -f Subject: [PATCH] 2.6.21-rt6 From: Sven-Thorsten Dietrich To: LKML Cc: Ingo Molnar In-Reply-To: <1179874795.25500.40.camel@sx.thebigcorporation.com> References: <1179874795.25500.40.camel@sx.thebigcorporation.com> Content-Type: text/plain Organization: The Big Corporation Date: Tue, 22 May 2007 16:36:19 -0700 Message-Id: <1179876979.25500.54.camel@sx.thebigcorporation.com> Mime-Version: 1.0 X-Mailer: Evolution 2.8.3 (2.8.3-2.fc6) Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org X-Filter-To: .Kernel.LKML X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit On Tue, 2007-05-22 at 15:59 -0700, Sven-Thorsten Dietrich wrote: > Add > header and export for rt_write_trylock_irqsave. Disregard the last patch, flags parameter was missing in the header. --- include/linux/spinlock.h | 2 ++ kernel/rt.c | 1 + 2 files changed, 3 insertions(+) Index: linux-rt-rebase.q/include/linux/spinlock.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/spinlock.h +++ linux-rt-rebase.q/include/linux/spinlock.h @@ -294,6 +294,8 @@ do { \ extern void __lockfunc rt_write_lock(rwlock_t *rwlock); extern void __lockfunc rt_read_lock(rwlock_t *rwlock); extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, + unsigned long *flags); extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); Index: linux-rt-rebase.q/kernel/rt.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rt.c +++ linux-rt-rebase.q/kernel/rt.c @@ -172,6 +172,7 @@ int __lockfunc rt_write_trylock_irqsave( *flags = 0; return rt_write_trylock(rwlock); } +EXPORT_SYMBOL(rt_write_trylock_irqsave); int __lockfunc rt_read_trylock(rwlock_t *rwlock) { patches/tasklet-fix-preemption-race.patch0000664000077200007720000001001010653433165020063 0ustar mingomingoFrom johnstul@us.ibm.com Wed Jun 6 04:17:34 2007 Return-Path: Received: from e3.ny.us.ibm.com (e3.ny.us.ibm.com [32.97.182.143]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mail.tglx.de (Postfix) with ESMTP id 1CCC065C065 for ; Wed, 6 Jun 2007 04:17:34 +0200 (CEST) Received: from d01relay04.pok.ibm.com (d01relay04.pok.ibm.com [9.56.227.236]) by e3.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id l561EvIT011411 for ; Tue, 5 Jun 2007 21:14:57 -0400 Received: from d01av04.pok.ibm.com (d01av04.pok.ibm.com [9.56.224.64]) by d01relay04.pok.ibm.com (8.13.8/8.13.8/NCO v8.3) with ESMTP id l562HUG6545736 for ; Tue, 5 Jun 2007 22:17:30 -0400 Received: from d01av04.pok.ibm.com (loopback [127.0.0.1]) by d01av04.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id l562HUu0027167 for ; Tue, 5 Jun 2007 22:17:30 -0400 Received: from [9.47.21.16] (cog.beaverton.ibm.com [9.47.21.16]) by d01av04.pok.ibm.com (8.12.11.20060308/8.12.11) with ESMTP id l562HTkh027139; Tue, 5 Jun 2007 22:17:29 -0400 Subject: [PATCH -rt] Fix TASKLET_STATE_SCHED WARN_ON() From: john stultz To: Ingo Molnar Cc: Thomas Gleixner , Steven Rostedt , "Paul E. McKenney" , lkml Content-Type: text/plain Date: Tue, 05 Jun 2007 19:17:23 -0700 Message-Id: <1181096244.6018.20.camel@localhost> Mime-Version: 1.0 X-Mailer: Evolution 2.10.1 X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Hey Ingo, So we've been seeing the following trace fairly frequently on our SMP boxes when running kernbench: BUG: at kernel/softirq.c:639 __tasklet_action() Call Trace: [] dump_trace+0xaa/0x32a [] show_trace+0x41/0x5c [] dump_stack+0x15/0x17 [] __tasklet_action+0xdf/0x12e [] tasklet_action+0x27/0x29 [] ksoftirqd+0x16c/0x271 [] kthread+0xf5/0x128 [] child_rip+0xa/0x12 Paul also pointed this out awhile back: http://lkml.org/lkml/2007/2/25/1 Anyway, I think I finally found the issue. Its a bit hard to explain, but the idea is while __tasklet_action is running the tasklet function on CPU1, if a call to tasklet_schedule() on CPU2 is made, and if right after we mark the TASKLET_STATE_SCHED bit we are preempted, __tasklet_action on CPU1 might be able to re-run the function, clear the bit and unlock the tasklet before CPU2 enters __tasklet_common_schedule. Once __tasklet_common_schedule locks the tasklet, we will add the tasklet to the list with the TASKLET_STATE_SCHED *unset*. I've verified this race occurs w/ a WARN_ON in __tasklet_common_schedule(). This fix avoids this race by making sure *after* we've locked the tasklet that the STATE_SCHED bit is set before adding it to the list. Does it look ok to you? thanks -john Signed-off-by: John Stultz --- kernel/softirq.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/kernel/softirq.c =================================================================== --- linux-rt-rebase.q.orig/kernel/softirq.c +++ linux-rt-rebase.q/kernel/softirq.c @@ -462,10 +462,17 @@ static void inline __tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) { if (tasklet_trylock(t)) { - WARN_ON(t->next != NULL); - t->next = head->list; - head->list = t; - raise_softirq_irqoff(nr); + /* We may have been preempted before tasklet_trylock + * and __tasklet_action may have already run. + * So double check the sched bit while the takslet + * is locked before adding it to the list. + */ + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { + WARN_ON(t->next != NULL); + t->next = head->list; + head->list = t; + raise_softirq_irqoff(nr); + } tasklet_unlock(t); } } patches/dynticks-rcu-rt-fixlet.patch0000664000077200007720000000142610653433163017104 0ustar mingomingo--- kernel/rcupreempt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) Index: linux-rt-rebase.q/kernel/rcupreempt.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rcupreempt.c +++ linux-rt-rebase.q/kernel/rcupreempt.c @@ -338,6 +338,17 @@ void __synchronize_sched(void) sched_setaffinity(0, oldmask); } +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + return !!rcu_data.waitlist || rcu_pending(cpu); +} + int rcu_pending(int cpu) { return (rcu_data.donelist != NULL || patches/latency-tracing-x86_64.patch0000664000077200007720000001750510653433162016575 0ustar mingomingo arch/x86_64/ia32/ia32entry.S | 11 ++++++++- arch/x86_64/kernel/entry.S | 45 +++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/head64.c | 3 +- arch/x86_64/kernel/irq.c | 6 +++++ arch/x86_64/kernel/setup64.c | 4 +-- arch/x86_64/kernel/smpboot.c | 2 - arch/x86_64/kernel/traps.c | 1 arch/x86_64/kernel/vsyscall.c | 2 - include/asm-x86_64/calling.h | 50 ++++++++++++++++++++++++++++++++++++++++++ include/asm-x86_64/unistd.h | 2 + 10 files changed, 120 insertions(+), 6 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/ia32/ia32entry.S =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/ia32/ia32entry.S +++ linux-rt-rebase.q/arch/x86_64/ia32/ia32entry.S @@ -120,7 +120,9 @@ sysenter_do_call: cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys IA32_ARG_FIXUP 1 + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli @@ -229,7 +231,9 @@ cstar_do_call: cmpl $IA32_NR_syscalls-1,%eax ja ia32_badsys IA32_ARG_FIXUP 1 + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli @@ -323,8 +327,10 @@ ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys IA32_ARG_FIXUP + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) jmp int_ret_from_sys_call @@ -395,7 +401,7 @@ END(ia32_ptregs_common) .section .rodata,"a" .align 8 -ia32_sys_call_table: +ENTRY(ia32_sys_call_table) .quad sys_restart_syscall .quad sys_exit .quad stub32_fork @@ -721,4 +727,7 @@ ia32_sys_call_table: .quad compat_sys_timerfd .quad sys_eventfd .quad sys32_fallocate +#ifdef CONFIG_EVENT_TRACE + .globl ia32_syscall_end +#endif ia32_syscall_end: Index: linux-rt-rebase.q/arch/x86_64/kernel/entry.S =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/entry.S +++ linux-rt-rebase.q/arch/x86_64/kernel/entry.S @@ -53,6 +53,47 @@ .code64 +#ifdef CONFIG_EVENT_TRACE + +ENTRY(mcount) + cmpl $0, mcount_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r11 + push %r10 + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + + pop %rbp +out: + ret + +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif @@ -234,7 +275,9 @@ ENTRY(system_call) cmpq $__NR_syscall_max,%rax ja badsys movq %r10,%rcx + TRACE_SYS_CALL call *sys_call_table(,%rax,8) # XXX: rip relative + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) /* * Syscall return path ending with SYSRET (fast path) @@ -316,7 +359,9 @@ tracesys: cmova %rcx,%rax ja 1f movq %r10,%rcx /* fixup for C */ + TRACE_SYS_CALL call *sys_call_table(,%rax,8) + TRACE_SYS_RET 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ Index: linux-rt-rebase.q/arch/x86_64/kernel/head64.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/head64.c +++ linux-rt-rebase.q/arch/x86_64/kernel/head64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,7 @@ static void __init copy_bootdata(char *r memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } -void __init x86_64_start_kernel(char * real_mode_data) +void __init notrace x86_64_start_kernel(char * real_mode_data) { int i; Index: linux-rt-rebase.q/arch/x86_64/kernel/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/irq.c +++ linux-rt-rebase.q/arch/x86_64/kernel/irq.c @@ -117,6 +117,12 @@ asmlinkage unsigned int do_IRQ(struct pt irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; +#ifdef CONFIG_EVENT_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->rip, irq, 0); + #ifdef CONFIG_DEBUG_STACKOVERFLOW stack_overflow_check(regs); #endif Index: linux-rt-rebase.q/arch/x86_64/kernel/setup64.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/setup64.c +++ linux-rt-rebase.q/arch/x86_64/kernel/setup64.c @@ -114,7 +114,7 @@ void __init setup_per_cpu_areas(void) } } -void pda_init(int cpu) +void notrace pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -191,7 +191,7 @@ unsigned long kernel_eflags; * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __cpuinit cpu_init (void) +void __cpuinit notrace cpu_init (void) { int cpu = stack_smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); Index: linux-rt-rebase.q/arch/x86_64/kernel/smpboot.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/smpboot.c +++ linux-rt-rebase.q/arch/x86_64/kernel/smpboot.c @@ -316,7 +316,7 @@ static inline void set_cpu_sibling_map(i /* * Setup code on secondary processor (after comming out of the trampoline) */ -void __cpuinit start_secondary(void) +void __cpuinit notrace start_secondary(void) { /* * Dont put anything before smp_callin(), SMP Index: linux-rt-rebase.q/arch/x86_64/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/traps.c +++ linux-rt-rebase.q/arch/x86_64/kernel/traps.c @@ -351,6 +351,7 @@ show_trace(struct task_struct *tsk, stru printk("\nCall Trace:\n"); dump_trace(tsk, regs, stack, &print_trace_ops, NULL); printk("\n"); + print_traces(tsk); } static void Index: linux-rt-rebase.q/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/vsyscall.c +++ linux-rt-rebase.q/arch/x86_64/kernel/vsyscall.c @@ -44,7 +44,7 @@ #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) notrace #define __syscall_clobber "r11","rcx","memory" #define __pa_vsymbol(x) \ ({unsigned long v; \ Index: linux-rt-rebase.q/include/asm-x86_64/calling.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/calling.h +++ linux-rt-rebase.q/include/asm-x86_64/calling.h @@ -160,3 +160,53 @@ .macro icebp .byte 0xf1 .endm + +/* + * latency-tracing helpers: + */ + + .macro TRACE_SYS_CALL + +#ifdef CONFIG_EVENT_TRACE + SAVE_ARGS + + mov %rdx, %rcx + mov %rsi, %rdx + mov %rdi, %rsi + mov %rax, %rdi + + call sys_call + + RESTORE_ARGS +#endif + .endm + + + .macro TRACE_SYS_IA32_CALL + +#ifdef CONFIG_EVENT_TRACE + SAVE_ARGS + + mov %rdx, %rcx + mov %rsi, %rdx + mov %rdi, %rsi + mov %rax, %rdi + + call sys_ia32_call + + RESTORE_ARGS +#endif + .endm + + .macro TRACE_SYS_RET + +#ifdef CONFIG_EVENT_TRACE + SAVE_ARGS + + mov %rax, %rdi + + call sys_ret + + RESTORE_ARGS +#endif + .endm Index: linux-rt-rebase.q/include/asm-x86_64/unistd.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/unistd.h +++ linux-rt-rebase.q/include/asm-x86_64/unistd.h @@ -11,6 +11,8 @@ * Note: holes are not allowed. */ +#define NR_syscalls (__NR_syscall_max+1) + /* at least 8 syscall per cacheline */ #define __NR_read 0 __SYSCALL(__NR_read, sys_read) patches/preempt-realtime-input.patch0000664000077200007720000000250310653433166017162 0ustar mingomingo--- drivers/input/gameport/gameport.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/drivers/input/gameport/gameport.c =================================================================== --- linux-rt-rebase.q.orig/drivers/input/gameport/gameport.c +++ linux-rt-rebase.q/drivers/input/gameport/gameport.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* HZ */ #include #include @@ -102,12 +103,12 @@ static int gameport_measure_speed(struct tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -126,11 +127,11 @@ static int gameport_measure_speed(struct tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } patches/preempt-irqs-i386-ioapic-mask-quirk.patch0000664000077200007720000001360610653433164021220 0ustar mingomingoFrom mschmidt@redhat.com Thu Jun 21 13:32:02 2007 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [66.187.233.31]) by mail.tglx.de (Postfix) with ESMTP id CA11565C065 for ; Thu, 21 Jun 2007 13:32:02 +0200 (CEST) Received: from int-mx1.corp.redhat.com (int-mx1.corp.redhat.com [172.16.52.254]) by mx1.redhat.com (8.13.1/8.13.1) with ESMTP id l5LBVoq3016914; Thu, 21 Jun 2007 07:31:50 -0400 Received: from pobox.stuttgart.redhat.com (pobox.stuttgart.redhat.com [172.16.2.10]) by int-mx1.corp.redhat.com (8.13.1/8.13.1) with ESMTP id l5LBVmp0010104; Thu, 21 Jun 2007 07:31:49 -0400 Received: from [10.34.32.84] (brian.englab.brq.redhat.com [10.34.32.84]) by pobox.stuttgart.redhat.com (8.12.11.20060308/8.12.11) with ESMTP id l5LBVl5k000423; Thu, 21 Jun 2007 13:31:47 +0200 Message-ID: <467A61A3.7060804@redhat.com> Date: Thu, 21 Jun 2007 13:31:47 +0200 From: Michal Schmidt User-Agent: Thunderbird 1.5.0.12 (X11/20070529) MIME-Version: 1.0 To: Steven Rostedt CC: Ingo Molnar , Thomas Gleixner , linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org Subject: Re: [PATCH -rt] irq nobody cared workaround for i386 References: <4676CF81.2000205@redhat.com> <4677D7AF.7040700@redhat.com> <467932B4.6030800@redhat.com> <467936FE.8050704@redhat.com> In-Reply-To: <467936FE.8050704@redhat.com> X-Enigmail-Version: 0.94.2.0 Content-Type: text/plain; charset=ISO-8859-1 X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Steven Rostedt wrote: > Michal Schmidt wrote: > >> I came to the conclusion that the IO-APICs which need the fix for the >> nobody cared bug don't have the issue ack_ioapic_quirk_irq is designed >> to work-around. It should be safe simply to use the normal >> ack_ioapic_irq as the .eoi method in pcix_ioapic_chip. >> So this is the port of Steven's fix for the nobody cared bug to i386. It >> works fine on IBM LS21 I have access to. >> >> > You want to make that "apic > 0". Note the spacing. If it breaks > 80 characters, then simply put it to a new line. > > [...] > ACK > > -- Steve > OK, I fixed the spacing in both occurences. Signed-off-by: Michal Schmidt --- arch/i386/kernel/io_apic.c | 66 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/io_apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/io_apic.c +++ linux-rt-rebase.q/arch/i386/kernel/io_apic.c @@ -261,6 +261,18 @@ static void __unmask_IO_APIC_irq (unsign __modify_IO_APIC_irq(irq, 0, 0x00010000); } +/* trigger = 0 (edge mode) */ +static void __pcix_mask_IO_APIC_irq (unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0, 0x00008000); +} + +/* mask = 0, trigger = 1 (level mode) */ +static void __pcix_unmask_IO_APIC_irq (unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); +} + static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -279,6 +291,24 @@ static void unmask_IO_APIC_irq (unsigned spin_unlock_irqrestore(&ioapic_lock, flags); } +static void pcix_mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __pcix_mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void pcix_unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __pcix_unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -1235,22 +1265,27 @@ static int assign_irq_vector(int irq) return vector; } + static struct irq_chip ioapic_chip; +static struct irq_chip pcix_ioapic_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) +static void ioapic_register_intr(int irq, int vector, unsigned long trigger, + int pcix) { + struct irq_chip *chip = pcix ? &pcix_ioapic_chip : &ioapic_chip; + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - else { - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); - } + set_irq_chip_and_handler_name(irq, chip, handle_fasteoi_irq, + pcix ? "pcix-fasteoi" : "fasteoi"); + else + set_irq_chip_and_handler_name(irq, chip, handle_edge_irq, + pcix ? "pcix-edge" : "edge"); + set_intr_gate(vector, interrupt[irq]); } @@ -1314,7 +1349,8 @@ static void __init setup_IO_APIC_irqs(vo if (IO_APIC_IRQ(irq)) { vector = assign_irq_vector(irq); entry.vector = vector; - ioapic_register_intr(irq, vector, IOAPIC_AUTO); + ioapic_register_intr(irq, vector, IOAPIC_AUTO, + apic > 0); if (!apic && (irq < 16)) disable_8259A_irq(irq); @@ -2006,6 +2042,18 @@ static struct irq_chip ioapic_chip __rea .retrigger = ioapic_retrigger_irq, }; +static struct irq_chip pcix_ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = pcix_mask_IO_APIC_irq, + .unmask = pcix_unmask_IO_APIC_irq, + .ack = ack_ioapic_irq, + .eoi = ack_ioapic_irq, +#ifdef CONFIG_SMP + .set_affinity = set_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; static inline void init_IO_APIC_traps(void) { @@ -2806,7 +2854,7 @@ int io_apic_set_pci_routing (int ioapic, mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); - ioapic_register_intr(irq, entry.vector, edge_level); + ioapic_register_intr(irq, entry.vector, edge_level, ioapic > 0); if (!ioapic && (irq < 16)) disable_8259A_irq(irq); patches/x86_64-preparatory-apic-set-lvtt.patch0000664000077200007720000000455410653433161020552 0ustar mingomingoSubject: x86_64: prepare apic code for clock events Change __setup_APIC_LVTT so it takes the arguments which are necessary for the later clock events switch. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/apic.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -760,14 +760,14 @@ void __init init_apic_mappings(void) #define APIC_DIVISOR 16 -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; - int cpu = smp_processor_id(); - - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!irqen) lvtt_value |= APIC_LVT_MASKED; apic_write(APIC_LVTT, lvtt_value); @@ -780,12 +780,14 @@ static void __setup_APIC_LVTT(unsigned i & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write(APIC_TMICT, clocks/APIC_DIVISOR); } static void setup_APIC_timer(unsigned int clocks) { unsigned long flags; + int irqen; local_irq_save(flags); @@ -808,7 +810,10 @@ static void setup_APIC_timer(unsigned in c2 |= inb_p(0x40) << 8; } while (c2 - c1 < 300); } - __setup_APIC_LVTT(clocks); + + irqen = ! cpu_isset(smp_processor_id(), + timer_interrupt_broadcast_ipi_mask); + __setup_APIC_LVTT(clocks, 0, irqen); /* Turn off PIT interrupt if we use APIC timer as main timer. Only works with the PM timer right now TBD fix it for HPET too. */ @@ -846,8 +851,10 @@ static int __init calibrate_APIC_clock(v * Put whatever arbitrary (but long enough) timeout * value into the APIC clock, we just want to get the * counter running for calibration. + * + * No interrupt enable ! */ - __setup_APIC_LVTT(4000000000); + __setup_APIC_LVTT(4000000000, 0, 0); apic_start = apic_read(APIC_TMCCT); #ifdef CONFIG_X86_PM_TIMER patches/ppc-remove-broken-vsyscall.patch0000664000077200007720000006045510653433163017751 0ustar mingomingoFrom sshtylyov@ru.mvista.com Wed May 16 20:55:24 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from imap.sh.mvista.com (unknown [63.81.120.155]) by mail.tglx.de (Postfix) with ESMTP id A9FD665C065 for ; Wed, 16 May 2007 20:55:24 +0200 (CEST) Received: from wasted.dev.rtsoft.ru (unknown [10.150.0.9]) by imap.sh.mvista.com (Postfix) with ESMTP id A97873EC9; Wed, 16 May 2007 11:55:18 -0700 (PDT) From: Sergei Shtylyov (by way of Sergei Shtylyov ) Organization: MontaVista Software Inc. Subject: [PATCH 2.6.21-rt2] PowerPC: remove broken vsyscall code Date: Wed, 16 May 2007 21:56:51 +0300 User-Agent: KMail/1.5 MIME-Version: 1.0 Content-Disposition: inline To: tglx@linutronix.de, mingo@elte.hu Cc: linux-kernel@vger.kernel.org, johnstul@us.ibm.com Content-Type: text/plain; charset="iso-8859-1" Message-Id: <200705162256.51722.sshtylyov@ru.mvista.com> X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Remove PowerPC vsyscalls that were broken by the generic TOD patch. Signed-off-by: Sergei Shtylyov --- Since there's still no working PowerPC TOD vsyscalls fix, and they continue to be broken in the RT patch, I've respun this patch again... arch/powerpc/kernel/asm-offsets.c | 15 - arch/powerpc/kernel/smp.c | 2 arch/powerpc/kernel/vdso32/Makefile | 2 arch/powerpc/kernel/vdso32/datapage.S | 18 - arch/powerpc/kernel/vdso32/gettimeofday.S | 324 ------------------------------ arch/powerpc/kernel/vdso32/vdso32.lds.S | 4 arch/powerpc/kernel/vdso64/Makefile | 2 arch/powerpc/kernel/vdso64/datapage.S | 18 - arch/powerpc/kernel/vdso64/gettimeofday.S | 255 ----------------------- arch/powerpc/kernel/vdso64/vdso64.lds.S | 4 include/asm-powerpc/time.h | 20 - include/asm-powerpc/vdso_datapage.h | 14 - 12 files changed, 2 insertions(+), 676 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/kernel/asm-offsets.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/asm-offsets.c +++ linux-rt-rebase.q/arch/powerpc/kernel/asm-offsets.c @@ -273,16 +273,7 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); - DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); - DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); - DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); - DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32)); - DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); - DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); #ifdef CONFIG_PPC64 DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64)); DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); @@ -303,12 +294,6 @@ int main(void) DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - /* Other bits used by the vdso */ - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(CLOCK_REALTIME_RES, TICK_NSEC); - #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif Index: linux-rt-rebase.q/arch/powerpc/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/smp.c +++ linux-rt-rebase.q/arch/powerpc/kernel/smp.c @@ -336,8 +336,6 @@ void smp_call_function_interrupt(void) } } -extern struct gettimeofday_struct do_gtod; - struct thread_info *current_set[NR_CPUS]; DECLARE_PER_CPU(unsigned int, pvr); Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso32/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso32/Makefile +++ linux-rt-rebase.q/arch/powerpc/kernel/vdso32/Makefile @@ -1,7 +1,7 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso32 = sigtramp.o datapage.o cacheflush.o note.o # Build rules Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso32/datapage.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso32/datapage.S +++ linux-rt-rebase.q/arch/powerpc/kernel/vdso32/datapage.S @@ -65,21 +65,3 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_ma blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) - -/* - * void unsigned long long __kernel_get_tbfreq(void); - * - * returns the timebase frequency in HZ - */ -V_FUNCTION_BEGIN(__kernel_get_tbfreq) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - bl __get_datapage@local - lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) - lwz r3,CFG_TB_TICKS_PER_SEC(r3) - mtlr r12 - crclr cr0*4+so - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_tbfreq) Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso32/gettimeofday.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso32/gettimeofday.S +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Userland implementation of gettimeofday() for 32 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, - * IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r10,r3 /* r10 saves tv */ - mr r11,r4 /* r11 saves tz */ - bl __get_datapage@local /* get data page */ - mr r9, r3 /* datapage ptr in r9 */ - cmplwi r10,0 /* check if tv is NULL */ - beq 3f - bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 2f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r5,r4,12,20,31 - rlwimi r5,r3,12,0,19 - stw r5,TVAL32_TV_SEC(r10) - - /* get remaining xsec and convert to usec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication - */ - rlwinm r5,r4,12,0,19 - lis r6,1000000@h - ori r6,r6,1000000@l - mulhwu r5,r5,r6 - stw r5,TVAL32_TV_USEC(r10) - -3: cmplwi r11,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r9) - stw r4,TZONE_TZ_MINWEST(r11) - stw r5,TZONE_TZ_DSTTIME(r11) - -1: mtlr r12 - crclr cr0*4+so - li r3,0 - blr - -2: - mtlr r12 - mr r3,r10 - mr r4,r11 - li r0,__NR_gettimeofday - sc - blr - .cfi_endproc -V_FUNCTION_END(__kernel_gettimeofday) - -/* - * Exact prototype of clock_gettime() - * - * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpli cr0,r3,CLOCK_REALTIME - cmpli cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r10,r3 /* r10 saves id */ - mr r11,r4 /* r11 saves tp */ - bl __get_datapage@local /* get data page */ - mr r9,r3 /* datapage ptr in r9 */ - beq cr1,50f /* if monotonic -> jump there */ - - /* - * CLOCK_REALTIME - */ - - bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 98f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r5,r4,12,20,31 - rlwimi r5,r3,12,0,19 - stw r5,TSPC32_TV_SEC(r11) - - /* get remaining xsec and convert to nsec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication, then we multiply by 1000 - */ - rlwinm r5,r4,12,0,19 - lis r6,1000000@h - ori r6,r6,1000000@l - mulhwu r5,r5,r6 - mulli r5,r5,1000 - stw r5,TSPC32_TV_NSEC(r11) - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * CLOCK_MONOTONIC - */ - -50: bl __do_get_xsec@local /* get xsec from tb & kernel */ - bne- 98f /* out of line -> do syscall */ - - /* seconds are xsec >> 20 */ - rlwinm r6,r4,12,20,31 - rlwimi r6,r3,12,0,19 - - /* get remaining xsec and convert to nsec. we scale - * up remaining xsec by 12 bits and get the top 32 bits - * of the multiplication, then we multiply by 1000 - */ - rlwinm r7,r4,12,0,19 - lis r5,1000000@h - ori r5,r5,1000000@l - mulhwu r7,r7,r5 - mulli r7,r7,1000 - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5 - * can be used - */ - - lwz r3,WTOM_CLOCK_SEC(r9) - lwz r4,WTOM_CLOCK_NSEC(r9) - - /* We now have our result in r3,r4. We create a fake dependency - * on that result and re-check the counter - */ - or r5,r4,r3 - xor r0,r5,r5 - add r9,r9,r0 -#ifdef CONFIG_PPC64 - lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) -#else - lwz r0,(CFG_TB_UPDATE_COUNT)(r9) -#endif - cmpl cr0,r8,r0 /* check if updated */ - bne- 50b - - /* Calculate and store result. Note that this mimmics the C code, - * which may cause funny results if nsec goes negative... is that - * possible at all ? - */ - add r3,r3,r6 - add r4,r4,r7 - lis r5,NSEC_PER_SEC@h - ori r5,r5,NSEC_PER_SEC@l - cmpl cr0,r4,r5 - cmpli cr1,r4,0 - blt 1f - subf r4,r5,r4 - addi r3,r3,1 -1: bge cr1,1f - addi r3,r3,-1 - add r4,r4,r5 -1: stw r3,TSPC32_TV_SEC(r11) - stw r4,TSPC32_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -98: - mtlr r12 - mr r3,r10 - mr r4,r11 -99: - li r0,__NR_clock_gettime - sc - blr - .cfi_endproc -V_FUNCTION_END(__kernel_clock_gettime) - - -/* - * Exact prototype of clock_getres() - * - * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - li r3,0 - cmpli cr0,r4,0 - crclr cr0*4+so - beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l - stw r3,TSPC32_TV_SEC(r4) - stw r5,TSPC32_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc -V_FUNCTION_END(__kernel_clock_getres) - - -/* - * This is the core of gettimeofday() & friends, it returns the xsec - * value in r3 & r4 and expects the datapage ptr (non clobbered) - * in r9. clobbers r0,r4,r5,r6,r7,r8. - * When returning, r8 contains the counter value that can be reused - * by the monotonic clock implementation - */ -__do_get_xsec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -#ifdef CONFIG_PPC64 -1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9) -#else -1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9) -#endif - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: mftbu r3 - mftbl r4 - mftbu r0 - cmpl cr0,r3,r0 - bne- 2b - - /* Substract tb orig stamp. If the high part is non-zero, we jump to - * the slow path which call the syscall. - * If it's ok, then we have our 32 bits tb_ticks value in r7 - */ - subfc r7,r6,r4 - subfe. r0,r5,r3 - bne- 3f - - /* Load scale factor & do multiplication */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - lwz r6,(CFG_TB_TO_XS+4)(r9) - mulhwu r4,r7,r5 - mulhwu r6,r7,r6 - mullw r0,r7,r5 - addc r6,r6,r0 - - /* At this point, we have the scaled xsec value in r4 + XER:CA - * we load & add the stamp since epoch - */ - lwz r5,CFG_STAMP_XSEC(r9) - lwz r6,(CFG_STAMP_XSEC+4)(r9) - adde r4,r4,r6 - addze r3,r5 - - /* We now have our result in r3,r4. We create a fake dependency - * on that result and re-check the counter - */ - or r6,r4,r3 - xor r0,r6,r6 - add r9,r9,r0 -#ifdef CONFIG_PPC64 - lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9) -#else - lwz r0,(CFG_TB_UPDATE_COUNT)(r9) -#endif - cmpl cr0,r8,r0 /* check if updated */ - bne- 1b - - /* Warning ! The caller expects CR:EQ to be set to indicate a - * successful calculation (so it won't fallback to the syscall - * method). We have overriden that CR bit in the counter check, - * but fortunately, the loop exit condition _is_ CR:EQ set, so - * we can exit safely here. If you change this code, be careful - * of that side effect. - */ -3: blr - .cfi_endproc Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso32/vdso32.lds.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ linux-rt-rebase.q/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -117,10 +117,6 @@ VERSION global: __kernel_datapage_offset; /* Has to be there for the kernel to find */ __kernel_get_syscall_map; - __kernel_gettimeofday; - __kernel_clock_gettime; - __kernel_clock_getres; - __kernel_get_tbfreq; __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp32; Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso64/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso64/Makefile +++ linux-rt-rebase.q/arch/powerpc/kernel/vdso64/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso64 = sigtramp.o datapage.o cacheflush.o note.o # Build rules Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso64/datapage.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso64/datapage.S +++ linux-rt-rebase.q/arch/powerpc/kernel/vdso64/datapage.S @@ -65,21 +65,3 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_ma blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) - - -/* - * void unsigned long __kernel_get_tbfreq(void); - * - * returns the timebase frequency in HZ - */ -V_FUNCTION_BEGIN(__kernel_get_tbfreq) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) - ld r3,CFG_TB_TICKS_PER_SEC(r3) - mtlr r12 - crclr cr0*4+so - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_tbfreq) Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso64/gettimeofday.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ - cmpldi r11,0 /* check if tv is NULL */ - beq 2f - bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ - ori r7,r7,16960 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ - std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / - * XSEC_PER_SEC - */ - rldicl r0,r0,44,20 - std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ -2: cmpldi r10,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc -V_FUNCTION_END(__kernel_gettimeofday) - - -/* - * Exact prototype of clock_gettime() - * - * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r10,r3 /* r10 saves id */ - mr r11,r4 /* r11 saves tp */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ - beq cr1,50f /* if monotonic -> jump there */ - - /* - * CLOCK_REALTIME - */ - - bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ - ori r7,r7,16960 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ - std r5,TSPC64_TV_SEC(r11) /* store sec in tv */ - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / - * XSEC_PER_SEC - */ - rldicl r0,r0,44,20 - mulli r0,r0,1000 /* nsec = usec * 1000 */ - std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */ - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * CLOCK_MONOTONIC - */ - -50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ - ori r7,r7,16960 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / - * XSEC_PER_SEC - */ - rldicl r6,r0,44,20 - mulli r6,r6,1000 /* nsec = usec * 1000 */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r5,r6 contain our sec/nsec values. - * can be used - */ - - lwa r4,WTOM_CLOCK_SEC(r3) - lwa r7,WTOM_CLOCK_NSEC(r3) - - /* We now have our result in r4,r7. We create a fake dependency - * on that result and re-check the counter - */ - or r9,r4,r7 - xor r0,r9,r9 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 50b - - /* Calculate and store result. Note that this mimmics the C code, - * which may cause funny results if nsec goes negative... is that - * possible at all ? - */ - add r4,r4,r5 - add r7,r7,r6 - lis r9,NSEC_PER_SEC@h - ori r9,r9,NSEC_PER_SEC@l - cmpl cr0,r7,r9 - cmpli cr1,r7,0 - blt 1f - subf r7,r9,r7 - addi r4,r4,1 -1: bge cr1,1f - addi r4,r4,-1 - add r7,r7,r9 -1: std r4,TSPC64_TV_SEC(r11) - std r7,TSPC64_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -98: - mtlr r12 - mr r3,r10 - mr r4,r11 -99: - li r0,__NR_clock_gettime - sc - blr - .cfi_endproc -V_FUNCTION_END(__kernel_clock_gettime) - - -/* - * Exact prototype of clock_getres() - * - * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - li r3,0 - cmpli cr0,r4,0 - crclr cr0*4+so - beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l - std r3,TSPC64_TV_SEC(r4) - std r5,TSPC64_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc -V_FUNCTION_END(__kernel_clock_getres) - - -/* - * This is the core of gettimeofday(), it returns the xsec - * value in r4 and expects the datapage ptr (non clobbered) - * in r3. clobbers r0,r4,r5,r6,r7,r8 - * When returning, r8 contains the counter value that can be reused - */ -V_FUNCTION_BEGIN(__do_get_xsec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r7) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r7,r9,r7 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - mulhdu r7,r7,r5 - - /* Add stamp since epoch */ - ld r6,CFG_STAMP_XSEC(r3) - add r4,r6,r7 - - xor r0,r4,r4 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 1b - blr - .cfi_endproc -V_FUNCTION_END(__do_get_xsec) Index: linux-rt-rebase.q/arch/powerpc/kernel/vdso64/vdso64.lds.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ linux-rt-rebase.q/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -115,10 +115,6 @@ VERSION global: __kernel_datapage_offset; /* Has to be there for the kernel to find */ __kernel_get_syscall_map; - __kernel_gettimeofday; - __kernel_clock_gettime; - __kernel_clock_getres; - __kernel_get_tbfreq; __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; Index: linux-rt-rebase.q/include/asm-powerpc/time.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-powerpc/time.h +++ linux-rt-rebase.q/include/asm-powerpc/time.h @@ -47,26 +47,6 @@ extern unsigned long ppc_proc_freq; extern unsigned long ppc_tb_freq; #define DEFAULT_TB_FREQ 125000000UL -/* - * By putting all of this stuff into a single struct we - * reduce the number of cache lines touched by do_gettimeofday. - * Both by collecting all of the data in one cache line and - * by touching only one TOC entry on ppc64. - */ -struct gettimeofday_vars { - u64 tb_to_xs; - u64 stamp_xsec; - u64 tb_orig_stamp; -}; - -struct gettimeofday_struct { - unsigned long tb_ticks_per_sec; - struct gettimeofday_vars vars[2]; - struct gettimeofday_vars * volatile varp; - unsigned var_idx; - unsigned tb_to_us; -}; - struct div_result { u64 result_high; u64 result_low; Index: linux-rt-rebase.q/include/asm-powerpc/vdso_datapage.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-powerpc/vdso_datapage.h +++ linux-rt-rebase.q/include/asm-powerpc/vdso_datapage.h @@ -74,11 +74,6 @@ struct vdso_data { __u32 icache_size; /* L1 i-cache size 0x68 */ __u32 icache_line_size; /* L1 i-cache line size 0x6C */ - /* those additional ones don't have to be located anywhere - * special as they were not part of the original systemcfg - */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; @@ -89,15 +84,6 @@ struct vdso_data { * And here is the simpler 32 bits version */ struct vdso_data { - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ - __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; patches/lockdep-more-entries.patch0000664000077200007720000000135010653433163016575 0ustar mingomingo--- kernel/lockdep_internals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/kernel/lockdep_internals.h =================================================================== --- linux-rt-rebase.q.orig/kernel/lockdep_internals.h +++ linux-rt-rebase.q/kernel/lockdep_internals.h @@ -15,12 +15,12 @@ * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ -#define MAX_LOCKDEP_ENTRIES 8192UL +#define MAX_LOCKDEP_ENTRIES 16384UL #define MAX_LOCKDEP_KEYS_BITS 11 #define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) -#define MAX_LOCKDEP_CHAINS_BITS 14 +#define MAX_LOCKDEP_CHAINS_BITS 15 #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) /* patches/x86_64-apic-shuffle-calibration-around.patch0000664000077200007720000000420210653433161021617 0ustar mingomingoSubject: x86_64: Move apic calibration code around Let the calibration code fill in calibration_result directly and move the variable on top of the file. Fixup a printk w/o log level while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/apic.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -56,6 +56,8 @@ static struct resource lapic_resource = .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; +static unsigned int calibration_result; + /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers @@ -822,7 +824,7 @@ static void setup_APIC_timer(unsigned in #define TICK_COUNT 100000000 -static int __init calibrate_APIC_clock(void) +static void __init calibrate_APIC_clock(void) { unsigned apic, apic_start; unsigned long tsc, tsc_start; @@ -856,17 +858,14 @@ static int __init calibrate_APIC_clock(v result = (apic_start - apic) * 1000L * tsc_khz / (tsc - tsc_start); } - printk("result %d\n", result); - + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); - return result * APIC_DIVISOR / HZ; + calibration_result = result * APIC_DIVISOR / HZ; } -static unsigned int calibration_result; - void __init setup_boot_APIC_clock (void) { if (disable_apic_timer) { @@ -879,7 +878,7 @@ void __init setup_boot_APIC_clock (void) local_irq_disable(); - calibration_result = calibrate_APIC_clock(); + calibrate_APIC_clock(); /* * Now set up the timer for real. */ @@ -986,8 +985,6 @@ void setup_APIC_extended_lvt(unsigned ch apic_write(reg, v); } -#undef APIC_DIVISOR - /* * Local timer interrupt handler. It does both profiling and * process statistics/rescheduling. patches/reset-latency-histogram.patch0000664000077200007720000000564310653433163017330 0ustar mingomingoSubject: Latency tracer: Reset histogram when preempt_max_latency was reset From: Carsten Emde When the histogram mode is active, it is not possible to reset the histogram for a second one. Reset it, when preempt_max_latency was reset. Signed-off-by: Carsten Emde --- include/linux/latency_hist.h | 1 + kernel/latency_hist.c | 28 ++++++++++++++++++++++++++++ kernel/latency_trace.c | 15 +++++++++++++++ 3 files changed, 44 insertions(+) Index: linux-rt-rebase.q/include/linux/latency_hist.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/latency_hist.h +++ linux-rt-rebase.q/include/linux/latency_hist.h @@ -23,6 +23,7 @@ enum { #ifdef CONFIG_LATENCY_HIST extern void latency_hist(int latency_type, int cpu, unsigned long latency); +extern void latency_hist_reset(void); # define latency_hist_flag 1 #else # define latency_hist(a,b,c) do { (void)(cpu); } while (0) Index: linux-rt-rebase.q/kernel/latency_hist.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_hist.c +++ linux-rt-rebase.q/kernel/latency_hist.c @@ -265,3 +265,31 @@ static __init int latency_hist_init(void __initcall(latency_hist_init); + +#ifdef CONFIG_WAKEUP_LATENCY_HIST +static void hist_reset(hist_data_t *hist) +{ + atomic_dec(&hist->hist_mode); + + memset(hist->hist_array, 0, sizeof(hist->hist_array)); + hist->beyond_hist_bound_samples = 0UL; + hist->min_lat = 0xFFFFFFFFUL; + hist->max_lat = 0UL; + hist->total_samples = 0UL; + hist->accumulate_lat = 0UL; + hist->avg_lat = 0UL; + + atomic_inc(&hist->hist_mode); +} + +void latency_hist_reset(void) +{ + int cpu; + hist_data_t *hist; + + for_each_online_cpu(cpu) { + hist = &per_cpu(wakeup_latency_hist, cpu); + hist_reset(hist); + } +} +#endif Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -2180,6 +2180,8 @@ void notrace trace_preempt_exit_idle(voi */ #ifdef CONFIG_WAKEUP_TIMING +unsigned long last_preempt_max_latency; + static void notrace check_wakeup_timing(struct cpu_trace *tr, unsigned long parent_eip, unsigned long *flags) @@ -2207,6 +2209,19 @@ check_wakeup_timing(struct cpu_trace *tr if (!report_latency(delta)) goto out; +#ifdef CONFIG_WAKEUP_LATENCY_HIST + /* + * Was preempt_max_latency reset? + * If so, we reinitialize the latency histograms to keep them in sync. + * + * FIXME: Remove the poll and write our own procfs handler, so + * we can trigger on the write to preempt_max_latency + */ + if (last_preempt_max_latency > 0 && preempt_max_latency == 0) + latency_hist_reset(); + last_preempt_max_latency = preempt_max_latency; +#endif + ____trace(smp_processor_id(), TRACE_FN, tr, CALLER_ADDR0, parent_eip, 0, 0, 0, *flags); patches/gcc-warnings-shut-up.patch0000664000077200007720000000710410653433162016534 0ustar mingomingo fs/isofs/namei.c | 2 +- fs/jffs2/erase.c | 2 +- fs/nfsd/nfsctl.c | 2 +- kernel/audit.c | 2 +- net/core/flow.c | 2 +- net/sunrpc/svc.c | 2 +- sound/core/control_compat.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) Index: linux-rt-rebase.q/fs/isofs/namei.c =================================================================== --- linux-rt-rebase.q.orig/fs/isofs/namei.c +++ linux-rt-rebase.q/fs/isofs/namei.c @@ -158,7 +158,7 @@ isofs_find_entry(struct inode *dir, stru struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { int found; - unsigned long block, offset; + unsigned long block = 0, offset = 0 /* avoid stupid gcc warning */; struct inode *inode; struct page *page; Index: linux-rt-rebase.q/fs/jffs2/erase.c =================================================================== --- linux-rt-rebase.q.orig/fs/jffs2/erase.c +++ linux-rt-rebase.q/fs/jffs2/erase.c @@ -362,7 +362,7 @@ static void jffs2_mark_erased_block(stru { size_t retlen; int ret; - uint32_t bad_offset; + uint32_t bad_offset = 0 /* shut up gcc */; switch (jffs2_block_check_erase(c, jeb, &bad_offset)) { case -EAGAIN: goto refile; Index: linux-rt-rebase.q/fs/nfsd/nfsctl.c =================================================================== --- linux-rt-rebase.q.orig/fs/nfsd/nfsctl.c +++ linux-rt-rebase.q/fs/nfsd/nfsctl.c @@ -298,7 +298,7 @@ static ssize_t write_filehandle(struct f * qword quoting is used, so filehandle will be \x.... */ char *dname, *path; - int maxsize; + int maxsize = 0; char *mesg = buf; int len; struct auth_domain *dom; Index: linux-rt-rebase.q/kernel/audit.c =================================================================== --- linux-rt-rebase.q.orig/kernel/audit.c +++ linux-rt-rebase.q/kernel/audit.c @@ -1051,7 +1051,7 @@ struct audit_buffer *audit_log_start(str { struct audit_buffer *ab = NULL; struct timespec t; - unsigned int serial; + unsigned int serial = 0 /* shut up gcc */; int reserve; unsigned long timeout_start = jiffies; Index: linux-rt-rebase.q/net/core/flow.c =================================================================== --- linux-rt-rebase.q.orig/net/core/flow.c +++ linux-rt-rebase.q/net/core/flow.c @@ -172,7 +172,7 @@ static int flow_key_compare(struct flowi void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { - struct flow_cache_entry *fle, **head; + struct flow_cache_entry *fle, **head = NULL /* shut up GCC */; unsigned int hash; int cpu; Index: linux-rt-rebase.q/net/sunrpc/svc.c =================================================================== --- linux-rt-rebase.q.orig/net/sunrpc/svc.c +++ linux-rt-rebase.q/net/sunrpc/svc.c @@ -547,7 +547,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_rqst *rqstp; int error = -ENOMEM; int have_oldmask = 0; - cpumask_t oldmask; + cpumask_t oldmask = CPU_MASK_NONE /* shut up GCC */; rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); if (!rqstp) Index: linux-rt-rebase.q/sound/core/control_compat.c =================================================================== --- linux-rt-rebase.q.orig/sound/core/control_compat.c +++ linux-rt-rebase.q/sound/core/control_compat.c @@ -219,7 +219,7 @@ static int copy_ctl_value_from_user(stru struct snd_ctl_elem_value32 __user *data32, int *typep, int *countp) { - int i, type, count, size; + int i, type, count = 0 /* shut up gcc warning */, size; unsigned int indirect; if (copy_from_user(&data->id, &data32->id, sizeof(data->id))) patches/latency-tracing-remove-trace-array.patch0000664000077200007720000000254410653433162021341 0ustar mingomingo--- kernel/sched.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -3263,42 +3263,6 @@ void scheduler_tick(void) #endif } -#if defined(CONFIG_EVENT_TRACE) && defined(CONFIG_DEBUG_RT_MUTEXES) - -static void trace_array(struct prio_array *array) -{ - int i; - struct task_struct *p; - struct list_head *head, *tmp; - - for (i = 0; i < MAX_RT_PRIO; i++) { - head = array->queue + i; - if (list_empty(head)) { - WARN_ON(test_bit(i, array->bitmap)); - continue; - } - WARN_ON(!test_bit(i, array->bitmap)); - list_for_each(tmp, head) { - p = list_entry(tmp, struct task_struct, run_list); - trace_special_pid(p->pid, p->prio, PRIO(p)); - } - } -} - -static inline void trace_all_runnable_tasks(struct rq *rq) -{ - if (trace_enabled) - trace_array(&rq->active); -} - -#else - -static inline void trace_all_runnable_tasks(struct rq *rq) -{ -} - -#endif - /* * Print scheduling while atomic bug: */ @@ -3406,8 +3370,6 @@ need_resched_nonpreemptible: prev->sched_class->put_prev_task(rq, prev, now); next = pick_next_task(rq, prev, now); - trace_all_runnable_tasks(rq); - sched_info_switch(prev, next); if (likely(prev != next)) { patches/x86-fixup-tsc-merge-artifacts.patch0000664000077200007720000000073310653433161020167 0ustar mingomingo--- arch/x86_64/kernel/tsc.c | 2 -- 1 file changed, 2 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/tsc.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/tsc.c +++ linux-rt-rebase.q/arch/x86_64/kernel/tsc.c @@ -118,8 +118,6 @@ core_initcall(cpufreq_tsc); #endif -static int tsc_unstable = 0; - /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. patches/x86_64-apic-calibration-remove-divisor.patch0000664000077200007720000000332210653433161021651 0ustar mingomingoSubject: x86_64: Remove APIC_DIVISOR APIC_DIVISOR is rather useless. It makes the calibration result more accurate in the first place, but we discard this later when we write the value to the APIC timer by dividing the calibration value by APIC_DIVISOR. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/apic.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -760,8 +760,6 @@ void __init init_apic_mappings(void) * P5 APIC double write bug. */ -#define APIC_DIVISOR 16 - static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -783,7 +781,7 @@ static void __setup_APIC_LVTT(unsigned i | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks/APIC_DIVISOR); + apic_write(APIC_TMICT, clocks); } static void setup_APIC_timer(unsigned int clocks) @@ -836,7 +834,7 @@ static void __init calibrate_APIC_clock( * * No interrupt enable ! */ - __setup_APIC_LVTT(4000000000, 0, 0); + __setup_APIC_LVTT(250000000, 0, 0); apic_start = apic_read(APIC_TMCCT); #ifdef CONFIG_X86_PM_TIMER @@ -863,7 +861,7 @@ static void __init calibrate_APIC_clock( printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); - calibration_result = result * APIC_DIVISOR / HZ; + calibration_result = result / HZ; } void __init setup_boot_APIC_clock (void) patches/rt-mutex-compat-semaphores.patch0000664000077200007720000003224110653433164017763 0ustar mingomingo drivers/acpi/osl.c | 12 ++++++------ drivers/media/dvb/dvb-core/dvb_frontend.c | 2 +- drivers/media/dvb/dvb-core/dvb_frontend.h | 2 +- drivers/net/3c527.c | 2 +- drivers/net/hamradio/6pack.c | 2 +- drivers/net/hamradio/mkiss.c | 2 +- drivers/net/plip.c | 5 ++++- drivers/net/ppp_async.c | 2 +- drivers/net/ppp_synctty.c | 2 +- drivers/pci/hotplug/cpqphp_ctrl.c | 4 ++-- drivers/pci/hotplug/ibmphp_hpc.c | 2 +- drivers/scsi/aacraid/aacraid.h | 4 ++-- drivers/scsi/qla2xxx/qla_def.h | 2 +- drivers/usb/storage/usb.h | 2 +- fs/jffs2/jffs2_fs_i.h | 2 +- fs/xfs/linux-2.6/sema.h | 9 +++++++-- fs/xfs/linux-2.6/xfs_buf.h | 4 ++-- include/linux/parport.h | 2 +- 18 files changed, 35 insertions(+), 27 deletions(-) Index: linux-rt-rebase.q/drivers/acpi/osl.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/osl.c +++ linux-rt-rebase.q/drivers/acpi/osl.c @@ -739,13 +739,13 @@ void acpi_os_delete_lock(acpi_spinlock h acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { - struct semaphore *sem = NULL; + struct compat_semaphore *sem = NULL; - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct compat_semaphore)); if (!sem) return AE_NO_MEMORY; - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct compat_semaphore)); sema_init(sem, initial_units); @@ -768,7 +768,7 @@ EXPORT_SYMBOL(acpi_os_create_semaphore); acpi_status acpi_os_delete_semaphore(acpi_handle handle) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; if (!sem) @@ -796,7 +796,7 @@ EXPORT_SYMBOL(acpi_os_delete_semaphore); acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; int ret = 0; @@ -883,7 +883,7 @@ EXPORT_SYMBOL(acpi_os_wait_semaphore); */ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; if (!sem || (units < 1)) Index: linux-rt-rebase.q/drivers/media/dvb/dvb-core/dvb_frontend.c =================================================================== --- linux-rt-rebase.q.orig/drivers/media/dvb/dvb-core/dvb_frontend.c +++ linux-rt-rebase.q/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -98,7 +98,7 @@ struct dvb_frontend_private { struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct compat_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; struct task_struct *thread; Index: linux-rt-rebase.q/drivers/media/dvb/dvb-core/dvb_frontend.h =================================================================== --- linux-rt-rebase.q.orig/drivers/media/dvb/dvb-core/dvb_frontend.h +++ linux-rt-rebase.q/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -142,7 +142,7 @@ struct dvb_fe_events { int eventr; int overflow; wait_queue_head_t wait_queue; - struct semaphore sem; + struct compat_semaphore sem; }; struct dvb_frontend { Index: linux-rt-rebase.q/drivers/net/3c527.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/3c527.c +++ linux-rt-rebase.q/drivers/net/3c527.c @@ -182,7 +182,7 @@ struct mc32_local u16 rx_ring_tail; /* index to rx de-queue end */ - struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct compat_semaphore cmd_mutex; /* Serialises issuing of execute commands */ struct completion execution_cmd; /* Card has completed an execute command */ struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; Index: linux-rt-rebase.q/drivers/net/hamradio/6pack.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/hamradio/6pack.c +++ linux-rt-rebase.q/drivers/net/hamradio/6pack.c @@ -123,7 +123,7 @@ struct sixpack { struct timer_list tx_t; struct timer_list resync_t; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; spinlock_t lock; }; Index: linux-rt-rebase.q/drivers/net/hamradio/mkiss.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/hamradio/mkiss.c +++ linux-rt-rebase.q/drivers/net/hamradio/mkiss.c @@ -84,7 +84,7 @@ struct mkiss { #define CRC_MODE_SMACK_TEST 4 atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ Index: linux-rt-rebase.q/drivers/net/plip.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/plip.c +++ linux-rt-rebase.q/drivers/net/plip.c @@ -228,7 +228,10 @@ struct net_local { struct hh_cache *hh); spinlock_t lock; atomic_t kill_timer; - struct semaphore killed_timer_sem; + /* + * PREEMPT_RT: this isnt a mutex, it should be struct completion. + */ + struct compat_semaphore killed_timer_sem; }; static inline void enable_parport_interrupts (struct net_device *dev) Index: linux-rt-rebase.q/drivers/net/ppp_async.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ppp_async.c +++ linux-rt-rebase.q/drivers/net/ppp_async.c @@ -67,7 +67,7 @@ struct asyncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; Index: linux-rt-rebase.q/drivers/net/ppp_synctty.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ppp_synctty.c +++ linux-rt-rebase.q/drivers/net/ppp_synctty.c @@ -70,7 +70,7 @@ struct syncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ }; Index: linux-rt-rebase.q/drivers/pci/hotplug/cpqphp_ctrl.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/hotplug/cpqphp_ctrl.c +++ linux-rt-rebase.q/drivers/pci/hotplug/cpqphp_ctrl.c @@ -45,8 +45,8 @@ static int configure_new_function(struct u8 behind_bridge, struct resource_lists *resources); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux-rt-rebase.q/drivers/pci/hotplug/ibmphp_hpc.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/hotplug/ibmphp_hpc.c +++ linux-rt-rebase.q/drivers/pci/hotplug/ibmphp_hpc.c @@ -106,7 +106,7 @@ static int tid_poll; static struct mutex sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct compat_semaphore sem_exit; // make sure polling thread goes away //---------------------------------------------------------------------------- // local function prototypes //---------------------------------------------------------------------------- Index: linux-rt-rebase.q/drivers/scsi/aacraid/aacraid.h =================================================================== --- linux-rt-rebase.q.orig/drivers/scsi/aacraid/aacraid.h +++ linux-rt-rebase.q/drivers/scsi/aacraid/aacraid.h @@ -715,7 +715,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct compat_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -785,7 +785,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct compat_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ Index: linux-rt-rebase.q/drivers/scsi/qla2xxx/qla_def.h =================================================================== --- linux-rt-rebase.q.orig/drivers/scsi/qla2xxx/qla_def.h +++ linux-rt-rebase.q/drivers/scsi/qla2xxx/qla_def.h @@ -2413,7 +2413,7 @@ typedef struct scsi_qla_host { struct semaphore mbx_cmd_sem; /* Serialialize mbx access */ struct semaphore vport_sem; /* Virtual port synchronization */ - struct semaphore mbx_intr_sem; /* Used for completion notification */ + struct compat_semaphore mbx_intr_sem; /* Used for completion notification */ uint32_t mbx_flags; #define MBX_IN_PROGRESS BIT_0 Index: linux-rt-rebase.q/drivers/usb/storage/usb.h =================================================================== --- linux-rt-rebase.q.orig/drivers/usb/storage/usb.h +++ linux-rt-rebase.q/drivers/usb/storage/usb.h @@ -147,7 +147,7 @@ struct us_data { struct task_struct *ctl_thread; /* the control thread */ /* mutual exclusion and synchronization structures */ - struct semaphore sema; /* to sleep thread on */ + struct compat_semaphore sema; /* to sleep thread on */ struct completion notify; /* thread begin/end */ wait_queue_head_t delay_wait; /* wait during scan, reset */ Index: linux-rt-rebase.q/fs/jffs2/jffs2_fs_i.h =================================================================== --- linux-rt-rebase.q.orig/fs/jffs2/jffs2_fs_i.h +++ linux-rt-rebase.q/fs/jffs2/jffs2_fs_i.h @@ -24,7 +24,7 @@ struct jffs2_inode_info { before letting GC proceed. Or we'd have to put ugliness into the GC code so it didn't attempt to obtain the i_mutex for the inode(s) which are already locked */ - struct semaphore sem; + struct compat_semaphore sem; /* The highest (datanode) version number used for this ino */ uint32_t highest_version; Index: linux-rt-rebase.q/fs/xfs/linux-2.6/sema.h =================================================================== --- linux-rt-rebase.q.orig/fs/xfs/linux-2.6/sema.h +++ linux-rt-rebase.q/fs/xfs/linux-2.6/sema.h @@ -27,7 +27,7 @@ * sema_t structure just maps to struct semaphore in Linux kernel. */ -typedef struct semaphore sema_t; +typedef struct compat_semaphore sema_t; #define initnsema(sp, val, name) sema_init(sp, val) #define psema(sp, b) down(sp) @@ -36,7 +36,12 @@ typedef struct semaphore sema_t; static inline int issemalocked(sema_t *sp) { - return down_trylock(sp) || (up(sp), 0); + int rv; + + if ((rv = down_trylock(sp))) + return (rv); + up(sp); + return (0); } /* Index: linux-rt-rebase.q/fs/xfs/linux-2.6/xfs_buf.h =================================================================== --- linux-rt-rebase.q.orig/fs/xfs/linux-2.6/xfs_buf.h +++ linux-rt-rebase.q/fs/xfs/linux-2.6/xfs_buf.h @@ -118,7 +118,7 @@ typedef int (*xfs_buf_bdstrat_t)(struct #define XB_PAGES 2 typedef struct xfs_buf { - struct semaphore b_sema; /* semaphore for lockables */ + struct compat_semaphore b_sema; /* semaphore for lockables */ unsigned long b_queuetime; /* time buffer was queued */ atomic_t b_pin_count; /* pin count */ wait_queue_head_t b_waiters; /* unpin waiters */ @@ -138,7 +138,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ xfs_buf_relse_t b_relse; /* releasing function */ xfs_buf_bdstrat_t b_strat; /* pre-write function */ - struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ + struct compat_semaphore b_iodonesema; /* Semaphore for I/O waiters */ void *b_fspriv; void *b_fspriv2; void *b_fspriv3; Index: linux-rt-rebase.q/include/linux/parport.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/parport.h +++ linux-rt-rebase.q/include/linux/parport.h @@ -265,7 +265,7 @@ enum ieee1284_phase { struct ieee1284_info { int mode; volatile enum ieee1284_phase phase; - struct semaphore irq; + struct compat_semaphore irq; }; /* A parallel port */ patches/fix-migrating-softirq.patch0000664000077200007720000001117410653433170017002 0ustar mingomingoFrom rostedt@goodmis.org Wed Jun 13 14:47:26 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from ms-smtp-02.nyroc.rr.com (ms-smtp-02.nyroc.rr.com [24.24.2.56]) by mail.tglx.de (Postfix) with ESMTP id AB7B665C3D9 for ; Wed, 13 Jun 2007 14:47:26 +0200 (CEST) Received: from [192.168.23.10] (cpe-24-94-51-176.stny.res.rr.com [24.94.51.176]) by ms-smtp-02.nyroc.rr.com (8.13.6/8.13.6) with ESMTP id l5DClGVg022890; Wed, 13 Jun 2007 08:47:17 -0400 (EDT) Subject: [PATCH RT] fix migrating softirq [cause of network hang] From: Steven Rostedt To: Ingo Molnar Cc: LKML , RT , Thomas Gleixner , john stultz Content-Type: text/plain Date: Wed, 13 Jun 2007 08:47:16 -0400 Message-Id: <1181738836.10408.54.camel@localhost.localdomain> Mime-Version: 1.0 X-Mailer: Evolution 2.6.3 X-Virus-Scanned: Symantec AntiVirus Scan Engine X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Softirqs are bound to a single CPU. That is to say, that once a softirq function starts to run, it will stay on the CPU that it is running on while it's running. In RT, softirqs are threads, and we have a softirq thread per cpu. Each softirq thread is bound to a single CPU that it represents. In order to speed things up and lower context switches in RT, if a softirq thread is of the same priority as an interrupt thread, then when the interrupt thread is about to exit, it tests to see if any softirq threads need to be run on that cpu. Instead of running the softirq thread, it simply performs the functions for the softirq within the interrupt thread. The problem is, nothing prevents the interrupt thread from migrating. So while the interrupt thread is running the softirq function, it may migrate to another CPU in the middle of that function. This means that any CPU data that the softirq is touching can be corrupted. I was experiencing a network hang that sometimes would come back, and sometimes not. Using my logdev debugger, I started to debug this problem. I came across this at the moment of the hang: [ 389.131279] cpu:0 (IRQ-11:427) tcp_rcv_established:4056 rcv_nxt=-1665585797 [ 389.131615] cpu:1 192.168.23.72:22 <== 192.168.23.60:41352 ack:2629381499 seq:1773074099 (----A-) len:0 win:790 end_seq:1773074099 [ 389.131626] cpu:1 (IRQ-11:427) ip_finish_output2:187 dst->hh=ffff81003b213080 [ 389.131635] cpu:1 (IRQ-11:427) ip_finish_output2:189 hh_output=ffffffff80429009 Here we see IRQ-11 in the process of finishing up the softirq-net-tx function. In the middle of it, we receive a packet, and that must have pushed the interrupt thread over to CPU 1, and it finished up the softirq there. This patch temporarily binds the hardirq thread on the CPU that it runs the softirqs on. With this patch I have not seen my network hang. I ran it over night, doing compiles and such, and it seems fine. I would be able to cause the hang with various loads within a minute, now I can't cause it after several minutes. I'm assuming that this fix may fix other bugs too. Signed-off-by: Steven Rostedt --- kernel/irq/manage.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) Index: linux-rt-rebase.q/kernel/irq/manage.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/manage.c +++ linux-rt-rebase.q/kernel/irq/manage.c @@ -763,7 +763,15 @@ static int do_irqd(void * __desc) struct irq_desc *desc = __desc; #ifdef CONFIG_SMP - set_cpus_allowed(current, desc->affinity); + cpumask_t cpus_allowed, mask; + + cpus_allowed = desc->affinity; + /* + * Restrict it to one cpu so we avoid being migrated inside of + * do_softirq_from_hardirq() + */ + mask = cpumask_of_cpu(first_cpu(desc->affinity)); + set_cpus_allowed(current, mask); #endif current->flags |= PF_NOFREEZE | PF_HARDIRQ; @@ -787,8 +795,16 @@ static int do_irqd(void * __desc) /* * Did IRQ affinities change? */ - if (!cpus_equal(current->cpus_allowed, desc->affinity)) - set_cpus_allowed(current, desc->affinity); + if (!cpus_equal(cpus_allowed, desc->affinity)) { + cpus_allowed = desc->affinity; + /* + * Restrict it to one cpu so we avoid being + * migrated inside of + * do_softirq_from_hardirq() + */ + mask = cpumask_of_cpu(first_cpu(desc->affinity)); + set_cpus_allowed(current, mask); + } #endif schedule(); } patches/new-softirq-code.patch0000664000077200007720000002317510653433170015742 0ustar mingomingoSubject: [patch] softirq preemption: optimization From: Ingo Molnar optimize softirq preemption by allowing a hardirq context to pick up softirq processing. Signed-off-by: Ingo Molnar --- kernel/irq/manage.c | 19 +----- kernel/softirq.c | 160 ++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 131 insertions(+), 48 deletions(-) Index: linux-rt-rebase.q/kernel/irq/manage.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/manage.c +++ linux-rt-rebase.q/kernel/irq/manage.c @@ -694,7 +694,6 @@ static void thread_edge_irq(irq_desc_t * desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); - cond_resched_hardirq_context(); spin_lock_irq(&desc->lock); if (!noirqdebug) note_interrupt(irq, desc, action_ret); @@ -723,7 +722,6 @@ static void thread_do_irq(irq_desc_t *de desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); - cond_resched_hardirq_context(); spin_lock_irq(&desc->lock); if (!noirqdebug) note_interrupt(irq, desc, action_ret); @@ -759,8 +757,6 @@ static void do_hardirq(struct irq_desc * wake_up(&desc->wait_for_handler); } -extern asmlinkage void __do_softirq(void); - static int do_irqd(void * __desc) { struct sched_param param = { 0, }; @@ -780,16 +776,13 @@ static int do_irqd(void * __desc) while (!kthread_should_stop()) { local_irq_disable_nort(); - set_current_state(TASK_INTERRUPTIBLE); -#ifndef CONFIG_PREEMPT_RT - irq_enter(); -#endif - do_hardirq(desc); -#ifndef CONFIG_PREEMPT_RT - irq_exit(); -#endif + do { + set_current_state(TASK_INTERRUPTIBLE); + do_hardirq(desc); + do_softirq_from_hardirq(); + } while (current->state == TASK_RUNNING); + local_irq_enable_nort(); - cond_resched(); #ifdef CONFIG_SMP /* * Did IRQ affinities change? Index: linux-rt-rebase.q/kernel/softirq.c =================================================================== --- linux-rt-rebase.q.orig/kernel/softirq.c +++ linux-rt-rebase.q/kernel/softirq.c @@ -101,8 +101,26 @@ static void wakeup_softirqd(int softirq) /* Interrupts are disabled: no need to stop preemption */ struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; - if (tsk && tsk->state != TASK_RUNNING) - wake_up_process(tsk); + if (unlikely(!tsk)) + return; +#if defined(CONFIG_PREEMPT_SOFTIRQS) && defined(CONFIG_PREEMPT_HARDIRQS) + /* + * Optimization: if we are in a hardirq thread context, and + * if the priority of the softirq thread is the same as the + * priority of the hardirq thread, then 'merge' softirq + * processing into the hardirq context. (it will later on + * execute softirqs via do_softirq_from_hardirq()). + * So here we can skip the wakeup and can rely on the hardirq + * context processing it later on. + */ + if ((current->flags & PF_HARDIRQ) && !hardirq_count() && + (tsk->normal_prio == current->normal_prio)) + return; +#endif + /* + * Wake up the softirq task: + */ + wake_up_process(tsk); } /* @@ -251,50 +269,100 @@ EXPORT_SYMBOL(local_bh_enable_ip); * we want to handle softirqs as soon as possible, but they * should not be able to lock up the box. */ -#define MAX_SOFTIRQ_RESTART 10 +#define MAX_SOFTIRQ_RESTART 20 + +static DEFINE_PER_CPU(u32, softirq_running); -asmlinkage void ___do_softirq(void) +static void ___do_softirq(const int same_prio_only) { + int max_restart = MAX_SOFTIRQ_RESTART, max_loops = MAX_SOFTIRQ_RESTART; + __u32 pending, available_mask, same_prio_skipped; struct softirq_action *h; - __u32 pending; - int max_restart = MAX_SOFTIRQ_RESTART; - int cpu; + struct task_struct *tsk; + int cpu, softirq; pending = local_softirq_pending(); account_system_vtime(current); cpu = smp_processor_id(); restart: + available_mask = -1; + softirq = 0; + same_prio_skipped = 0; /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); - local_irq_enable(); - h = softirq_vec; do { + u32 softirq_mask = 1 << softirq; + if (pending & 1) { - { - u32 preempt_count = preempt_count(); - h->action(h); - if (preempt_count != preempt_count()) { - print_symbol("BUG: softirq exited %s with wrong preemption count!\n", (unsigned long) h->action); - printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count()); - preempt_count() = preempt_count; + u32 preempt_count = preempt_count(); + +#if defined(CONFIG_PREEMPT_SOFTIRQS) && defined(CONFIG_PREEMPT_HARDIRQS) + /* + * If executed by a same-prio hardirq thread + * then skip pending softirqs that belong + * to softirq threads with different priority: + */ + if (same_prio_only) { + tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; + if (tsk && tsk->normal_prio != + current->normal_prio) { + same_prio_skipped |= softirq_mask; + available_mask &= ~softirq_mask; + goto next; } } +#endif + /* + * Is this softirq already being processed? + */ + if (per_cpu(softirq_running, cpu) & softirq_mask) { + available_mask &= ~softirq_mask; + goto next; + } + per_cpu(softirq_running, cpu) |= softirq_mask; + local_irq_enable(); + + h->action(h); + if (preempt_count != preempt_count()) { + print_symbol("BUG: softirq exited %s with wrong preemption count!\n", (unsigned long) h->action); + printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count()); + preempt_count() = preempt_count; + } rcu_bh_qsctr_inc(cpu); cond_resched_softirq_context(); + local_irq_disable(); + per_cpu(softirq_running, cpu) &= ~softirq_mask; } +next: h++; + softirq++; pending >>= 1; } while (pending); - local_irq_disable(); - + or_softirq_pending(same_prio_skipped); pending = local_softirq_pending(); - if (pending && --max_restart) - goto restart; + if (pending & available_mask) { + if (--max_restart) + goto restart; + /* + * With softirq threading there's no reason not to + * finish the workload we have: + */ +#ifdef CONFIG_PREEMPT_SOFTIRQS + if (--max_loops) { + if (printk_ratelimit()) + printk("INFO: softirq overload: %08x\n", pending); + max_restart = MAX_SOFTIRQ_RESTART; + goto restart; + } + if (printk_ratelimit()) + printk("BUG: softirq loop! %08x\n", pending); +#endif + } if (pending) trigger_softirqs(); @@ -322,7 +390,7 @@ asmlinkage void __do_softirq(void) p_flags = current->flags & PF_HARDIRQ; current->flags &= ~PF_HARDIRQ; - ___do_softirq(); + ___do_softirq(0); trace_softirq_exit(); @@ -346,20 +414,29 @@ void do_softirq_from_hardirq(void) if (!local_softirq_pending()) return; /* - * 'immediate' softirq execution: + * 'immediate' softirq execution, from hardirq context: */ + local_irq_disable(); __local_bh_disable((unsigned long)__builtin_return_address(0)); +#ifndef CONFIG_PREEMPT_SOFTIRQS + trace_softirq_enter(); +#endif p_flags = current->flags & PF_HARDIRQ; current->flags &= ~PF_HARDIRQ; + current->flags |= PF_SOFTIRQ; - ___do_softirq(); + ___do_softirq(1); +#ifndef CONFIG_PREEMPT_SOFTIRQS trace_softirq_exit(); - +#endif account_system_vtime(current); - _local_bh_enable(); current->flags |= p_flags; + current->flags &= ~PF_SOFTIRQ; + + _local_bh_enable(); + local_irq_enable(); } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -693,8 +770,9 @@ static int ksoftirqd(void * __data) { struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 }; struct softirqdata *data = __data; - u32 mask = (1 << data->nr); + u32 softirq_mask = (1 << data->nr); struct softirq_action *h; + int cpu = data->cpu; #ifdef CONFIG_PREEMPT_SOFTIRQS init_waitqueue_head(&data->wait); @@ -706,7 +784,8 @@ static int ksoftirqd(void * __data) while (!kthread_should_stop()) { preempt_disable(); - if (!(local_softirq_pending() & mask)) { + if (!(local_softirq_pending() & softirq_mask)) { +sleep_more: __preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -718,16 +797,26 @@ static int ksoftirqd(void * __data) data->running = 1; #endif - while (local_softirq_pending() & mask) { + while (local_softirq_pending() & softirq_mask) { /* Preempt disable stops cpu going offline. If already offline, we'll be on wrong CPU: don't process */ - if (cpu_is_offline(data->cpu)) + if (cpu_is_offline(cpu)) goto wait_to_die; local_irq_disable(); + /* + * Is the softirq already being executed by + * a hardirq context? + */ + if (per_cpu(softirq_running, cpu) & softirq_mask) { + local_irq_enable(); + set_current_state(TASK_INTERRUPTIBLE); + goto sleep_more; + } + per_cpu(softirq_running, cpu) |= softirq_mask; __preempt_enable_no_resched(); - set_softirq_pending(local_softirq_pending() & ~mask); + set_softirq_pending(local_softirq_pending() & ~softirq_mask); local_bh_disable(); local_irq_enable(); @@ -737,6 +826,7 @@ static int ksoftirqd(void * __data) rcu_bh_qsctr_inc(data->cpu); local_irq_disable(); + per_cpu(softirq_running, cpu) &= ~softirq_mask; _local_bh_enable(); local_irq_enable(); @@ -879,19 +969,19 @@ static int __cpuinit cpu_callback(struct } #endif case CPU_DEAD: - case CPU_DEAD_FROZEN: { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - - sched_setscheduler(p, SCHED_FIFO, ¶m); + case CPU_DEAD_FROZEN: for (i = 0; i < MAX_SOFTIRQ; i++) { + struct sched_param param; + + param.sched_priority = MAX_RT_PRIO-1; p = per_cpu(ksoftirqd, hotcpu)[i].tsk; + sched_setscheduler(p, SCHED_FIFO, ¶m); per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; kthread_stop(p); } takeover_tasklets(hotcpu); break; #endif /* CONFIG_HOTPLUG_CPU */ - } } return NOTIFY_OK; } patches/rt-mutex-irq-flags-checking.patch0000664000077200007720000000462710653433164020001 0ustar mingomingo--- include/linux/irqflags.h | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) Index: linux-rt-rebase.q/include/linux/irqflags.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/irqflags.h +++ linux-rt-rebase.q/include/linux/irqflags.h @@ -11,6 +11,12 @@ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H +#define BUILD_CHECK_IRQ_FLAGS(flags) \ + do { \ + BUILD_BUG_ON(sizeof(flags) != sizeof(unsigned long)); \ + typecheck(unsigned long, flags); \ + } while (0) + #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); @@ -59,10 +65,15 @@ #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) #define local_irq_save(flags) \ - do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + raw_local_irq_save(flags); \ + trace_hardirqs_off(); \ + } while (0) #define local_irq_restore(flags) \ do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ @@ -78,8 +89,16 @@ */ # define raw_local_irq_disable() local_irq_disable() # define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) local_irq_save(flags) -# define raw_local_irq_restore(flags) local_irq_restore(flags) +# define raw_local_irq_save(flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + local_irq_save(flags); \ + } while (0) +# define raw_local_irq_restore(flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + local_irq_restore(flags); \ + } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -89,7 +108,11 @@ raw_safe_halt(); \ } while (0) -#define local_save_flags(flags) raw_local_save_flags(flags) +#define local_save_flags(flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + raw_local_save_flags(flags); \ + } while (0) #define irqs_disabled() \ ({ \ @@ -99,7 +122,11 @@ raw_irqs_disabled_flags(flags); \ }) -#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +#define irqs_disabled_flags(flags) \ +({ \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + raw_irqs_disabled_flags(flags); \ +}) #endif /* CONFIG_X86 */ #endif patches/mm-concurrent-pagecache.patch0000664000077200007720000003717610653433167017257 0ustar mingomingoSubject: mm: concurrent pagecache write side Remove the tree_lock, change address_space::nrpages to atomic_long_t because its not protected any longer and use the concurrent radix tree API to protect the modifying radix tree operations. The tree_lock is actually renamed to priv_lock and its only remaining user will be the __flush_dcache_page logic on arm an parisc. Another potential user would be the per address_space node mask allocation Christoph is working on. [ BUG: the NFS client code seems to rely on mapping->tree_lock in some hidden way, which makes it crash... ] Signed-off-by: Peter Zijlstra --- fs/buffer.c | 7 ++++--- fs/inode.c | 2 +- include/asm-arm/cacheflush.h | 4 ++-- include/asm-parisc/cacheflush.h | 4 ++-- include/linux/fs.h | 12 ++++++------ mm/filemap.c | 17 +++++++++-------- mm/migrate.c | 12 ++++++------ mm/page-writeback.c | 39 +++++++++++++++++++++++---------------- mm/swap_state.c | 18 ++++++++++-------- mm/swapfile.c | 2 -- mm/truncate.c | 3 --- mm/vmscan.c | 4 ---- 12 files changed, 63 insertions(+), 61 deletions(-) Index: linux-rt-rebase.q/fs/buffer.c =================================================================== --- linux-rt-rebase.q.orig/fs/buffer.c +++ linux-rt-rebase.q/fs/buffer.c @@ -685,18 +685,19 @@ static int __set_page_dirty(struct page return 0; lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ + DEFINE_RADIX_TREE_CONTEXT(ctx, &mapping->page_tree); WARN_ON_ONCE(warn && !PageUptodate(page)); if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); task_io_account_write(PAGE_CACHE_SIZE); } - radix_tree_tag_set(&mapping->page_tree, + radix_tree_lock(&ctx); + radix_tree_tag_set(ctx.tree, page_index(page), PAGECACHE_TAG_DIRTY); + radix_tree_unlock(&ctx); } - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); Index: linux-rt-rebase.q/fs/inode.c =================================================================== --- linux-rt-rebase.q.orig/fs/inode.c +++ linux-rt-rebase.q/fs/inode.c @@ -193,7 +193,7 @@ void inode_init_once(struct inode *inode mutex_init(&inode->i_mutex); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); - spin_lock_init(&inode->i_data.tree_lock); + spin_lock_init(&inode->i_data.priv_lock); spin_lock_init(&inode->i_data.i_mmap_lock); INIT_LIST_HEAD(&inode->i_data.private_list); spin_lock_init(&inode->i_data.private_lock); Index: linux-rt-rebase.q/include/asm-arm/cacheflush.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/cacheflush.h +++ linux-rt-rebase.q/include/asm-arm/cacheflush.h @@ -413,9 +413,9 @@ static inline void flush_anon_page(struc } #define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) + spin_lock_irq(&(mapping)->priv_lock) #define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) + spin_unlock_irq(&(mapping)->priv_lock) #define flush_icache_user_range(vma,page,addr,len) \ flush_dcache_page(page) Index: linux-rt-rebase.q/include/asm-parisc/cacheflush.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-parisc/cacheflush.h +++ linux-rt-rebase.q/include/asm-parisc/cacheflush.h @@ -45,9 +45,9 @@ void flush_cache_mm(struct mm_struct *mm extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) + spin_lock_irq(&(mapping)->priv_lock) #define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) + spin_unlock_irq(&(mapping)->priv_lock) #define flush_icache_page(vma,page) do { \ flush_kernel_dcache_page(page); \ Index: linux-rt-rebase.q/include/linux/fs.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/fs.h +++ linux-rt-rebase.q/include/linux/fs.h @@ -441,13 +441,13 @@ struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ - spinlock_t tree_lock; /* and lock protecting it */ + spinlock_t priv_lock; /* spinlock protecting various stuffs */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ spinlock_t i_mmap_lock; /* protect tree, count, list */ unsigned int truncate_count; /* Cover race condition with truncate */ - unsigned long __nrpages; /* number of total pages */ + atomic_long_t __nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ unsigned long flags; /* error bits/gfp mask */ @@ -464,22 +464,22 @@ struct address_space { static inline void mapping_nrpages_init(struct address_space *mapping) { - mapping->__nrpages = 0; + mapping->__nrpages = (atomic_long_t)ATOMIC_LONG_INIT(0); } static inline unsigned long mapping_nrpages(struct address_space *mapping) { - return mapping->__nrpages; + return (unsigned long)atomic_long_read(&mapping->__nrpages); } static inline void mapping_nrpages_inc(struct address_space *mapping) { - mapping->__nrpages++; + atomic_long_inc(&mapping->__nrpages); } static inline void mapping_nrpages_dec(struct address_space *mapping) { - mapping->__nrpages--; + atomic_long_dec(&mapping->__nrpages); } struct block_device { Index: linux-rt-rebase.q/mm/filemap.c =================================================================== --- linux-rt-rebase.q.orig/mm/filemap.c +++ linux-rt-rebase.q/mm/filemap.c @@ -115,8 +115,11 @@ generic_file_direct_IO(int rw, struct ki void __remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + DEFINE_RADIX_TREE_CONTEXT(ctx, &mapping->page_tree); - radix_tree_delete(&mapping->page_tree, page->index); + radix_tree_lock(&ctx); + radix_tree_delete(ctx.tree, page->index); + radix_tree_unlock(&ctx); page->mapping = NULL; mapping_nrpages_dec(mapping); __dec_zone_page_state(page, NR_FILE_PAGES); @@ -125,14 +128,10 @@ void __remove_from_page_cache(struct pag void remove_from_page_cache(struct page *page) { - struct address_space *mapping = page->mapping; - BUG_ON(!PageLocked(page)); lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); __remove_from_page_cache(page); - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); } @@ -443,9 +442,12 @@ int add_to_page_cache(struct page *page, int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (error == 0) { + DEFINE_RADIX_TREE_CONTEXT(ctx, &mapping->page_tree); + lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); - error = radix_tree_insert(&mapping->page_tree, offset, page); + radix_tree_lock(&ctx); + error = radix_tree_insert(ctx.tree, offset, page); + radix_tree_unlock(&ctx); if (!error) { page_cache_get(page); SetPageLocked(page); @@ -454,7 +456,6 @@ int add_to_page_cache(struct page *page, mapping_nrpages_inc(mapping); __inc_zone_page_state(page, NR_FILE_PAGES); } - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); radix_tree_preload_end(); } Index: linux-rt-rebase.q/mm/migrate.c =================================================================== --- linux-rt-rebase.q.orig/mm/migrate.c +++ linux-rt-rebase.q/mm/migrate.c @@ -295,6 +295,7 @@ static int migrate_page_move_mapping(str struct page *newpage, struct page *page) { void **pslot; + struct radix_tree_context ctx; if (!mapping) { /* Anonymous page without mapping */ @@ -303,15 +304,14 @@ static int migrate_page_move_mapping(str return 0; } + init_radix_tree_context(&ctx, &mapping->page_tree); lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); - - pslot = radix_tree_lookup_slot(&mapping->page_tree, - page_index(page)); + radix_tree_lock(&ctx); + pslot = radix_tree_lookup_slot(ctx.tree, page_index(page)); if (page_count(page) != 2 + !!PagePrivate(page) || (struct page *)radix_tree_deref_slot(pslot) != page) { - spin_unlock(&mapping->tree_lock); + radix_tree_unlock(&ctx); unlock_page_ref_irq(page); return -EAGAIN; } @@ -329,7 +329,7 @@ static int migrate_page_move_mapping(str radix_tree_replace_slot(pslot, newpage); page->mapping = NULL; - spin_unlock(&mapping->tree_lock); + radix_tree_unlock(&ctx); /* * If moved to a different zone then also account Index: linux-rt-rebase.q/mm/page-writeback.c =================================================================== --- linux-rt-rebase.q.orig/mm/page-writeback.c +++ linux-rt-rebase.q/mm/page-writeback.c @@ -821,19 +821,21 @@ int __set_page_dirty_nobuffers(struct pa return 1; lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ + DEFINE_RADIX_TREE_CONTEXT(ctx, &mapping->page_tree); + BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); task_io_account_write(PAGE_CACHE_SIZE); } - radix_tree_tag_set(&mapping->page_tree, + radix_tree_lock(&ctx); + radix_tree_tag_set(ctx.tree, page_index(page), PAGECACHE_TAG_DIRTY); + radix_tree_unlock(&ctx); } - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); if (mapping->host) { /* !PageAnon && !swapper_space */ @@ -980,13 +982,15 @@ int test_clear_page_writeback(struct pag unsigned long flags; lock_page_ref_irqsave(page, flags); - spin_lock(&mapping->tree_lock); ret = TestClearPageWriteback(page); - if (ret) - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), + if (ret) { + DEFINE_RADIX_TREE_CONTEXT(ctx, &mapping->page_tree); + + radix_tree_lock(&ctx); + radix_tree_tag_clear(ctx.tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - spin_unlock(&mapping->tree_lock); + radix_tree_unlock(&ctx); + } unlock_page_ref_irqrestore(page, flags); } else { ret = TestClearPageWriteback(page); @@ -1003,19 +1007,22 @@ int test_set_page_writeback(struct page if (mapping) { unsigned long flags; + DEFINE_RADIX_TREE_CONTEXT(ctx, &mapping->page_tree); lock_page_ref_irqsave(page, flags); - spin_lock(&mapping->tree_lock); ret = TestSetPageWriteback(page); - if (!ret) - radix_tree_tag_set(&mapping->page_tree, - page_index(page), + if (!ret) { + radix_tree_lock(&ctx); + radix_tree_tag_set(ctx.tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - if (!PageDirty(page)) - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), + radix_tree_unlock(&ctx); + } + if (!PageDirty(page)) { + radix_tree_lock(&ctx); + radix_tree_tag_clear(ctx.tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock(&mapping->tree_lock); + radix_tree_unlock(&ctx); + } unlock_page_ref_irqrestore(page, flags); } else { ret = TestSetPageWriteback(page); Index: linux-rt-rebase.q/mm/swap_state.c =================================================================== --- linux-rt-rebase.q.orig/mm/swap_state.c +++ linux-rt-rebase.q/mm/swap_state.c @@ -38,7 +38,6 @@ static struct backing_dev_info swap_back struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, @@ -79,10 +78,12 @@ static int __add_to_swap_cache(struct pa BUG_ON(PagePrivate(page)); error = radix_tree_preload(gfp_mask); if (!error) { + DEFINE_RADIX_TREE_CONTEXT(ctx, &swapper_space.page_tree); + lock_page_ref_irq(page); - spin_lock(&swapper_space.tree_lock); - error = radix_tree_insert(&swapper_space.page_tree, - entry.val, page); + radix_tree_lock(&ctx); + error = radix_tree_insert(ctx.tree, entry.val, page); + radix_tree_unlock(&ctx); if (!error) { page_cache_get(page); SetPageSwapCache(page); @@ -90,7 +91,6 @@ static int __add_to_swap_cache(struct pa mapping_nrpages_inc(&swapper_space); __inc_zone_page_state(page, NR_FILE_PAGES); } - spin_unlock(&swapper_space.tree_lock); unlock_page_ref_irq(page); radix_tree_preload_end(); } @@ -125,12 +125,16 @@ static int add_to_swap_cache(struct page */ void __delete_from_swap_cache(struct page *page) { + DEFINE_RADIX_TREE_CONTEXT(ctx, &swapper_space.page_tree); + BUG_ON(!PageLocked(page)); BUG_ON(!PageSwapCache(page)); BUG_ON(PageWriteback(page)); BUG_ON(PagePrivate(page)); - radix_tree_delete(&swapper_space.page_tree, page_private(page)); + radix_tree_lock(&ctx); + radix_tree_delete(ctx.tree, page_private(page)); + radix_tree_unlock(&ctx); set_page_private(page, 0); ClearPageSwapCache(page); mapping_nrpages_dec(&swapper_space); @@ -203,9 +207,7 @@ void delete_from_swap_cache(struct page entry.val = page_private(page); lock_page_ref_irq(page); - spin_lock(&swapper_space.tree_lock); __delete_from_swap_cache(page); - spin_unlock(&swapper_space.tree_lock); unlock_page_ref_irq(page); swap_free(entry); Index: linux-rt-rebase.q/mm/swapfile.c =================================================================== --- linux-rt-rebase.q.orig/mm/swapfile.c +++ linux-rt-rebase.q/mm/swapfile.c @@ -368,13 +368,11 @@ int remove_exclusive_swap_page(struct pa if (p->swap_map[swp_offset(entry)] == 1) { /* Recheck the page count with the swapcache lock held.. */ lock_page_ref_irq(page); - spin_lock(&swapper_space.tree_lock); if ((page_count(page) == 2) && !PageWriteback(page)) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } - spin_unlock(&swapper_space.tree_lock); unlock_page_ref_irq(page); } spin_unlock(&swap_lock); Index: linux-rt-rebase.q/mm/truncate.c =================================================================== --- linux-rt-rebase.q.orig/mm/truncate.c +++ linux-rt-rebase.q/mm/truncate.c @@ -348,19 +348,16 @@ invalidate_complete_page2(struct address return 0; lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); if (PageDirty(page)) goto failed; BUG_ON(PagePrivate(page)); __remove_from_page_cache(page); - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); ClearPageUptodate(page); page_cache_release(page); /* pagecache ref */ return 1; failed: - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); return 0; } Index: linux-rt-rebase.q/mm/vmscan.c =================================================================== --- linux-rt-rebase.q.orig/mm/vmscan.c +++ linux-rt-rebase.q/mm/vmscan.c @@ -370,7 +370,6 @@ int remove_mapping(struct address_space BUG_ON(mapping != page_mapping(page)); lock_page_ref_irq(page); - spin_lock(&mapping->tree_lock); /* * The non racy check for a busy page. * @@ -405,13 +404,11 @@ int remove_mapping(struct address_space if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); - spin_unlock(&mapping->tree_lock); swap_free(swap); goto free_it; } __remove_from_page_cache(page); - spin_unlock(&mapping->tree_lock); free_it: unlock_page_ref_irq(page); @@ -419,7 +416,6 @@ free_it: return 1; cannot_free: - spin_unlock(&mapping->tree_lock); unlock_page_ref_irq(page); return 0; } patches/preempt-realtime-powerpc-a7.patch0000664000077200007720000001036310653433165020011 0ustar mingomingo To fix the following compile error by changing local_irq_restore() to raw_local_irq_restore(). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - include/asm-powerpc/hw_irq.h In file included from include/asm/system.h:9, from include/linux/list.h:9, from include/linux/signal.h:8, from arch/powerpc/kernel/asm-offsets.c:16: include/asm/hw_irq.h: In function 'local_get_flags': include/asm/hw_irq.h:23: error: expected expression before '<<' token include/asm/hw_irq.h:24: error: expected expression before '<<' token include/asm/hw_irq.h:25: error: expected expression before ':' token include/asm/hw_irq.h:25: error: expected statement before ')' token - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Signed-off-by: Tsutomu Owa -- owa --- arch/powerpc/kernel/head_64.S | 2 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/ppc_ksyms.c | 2 +- include/asm-powerpc/hw_irq.h | 18 ++++++++---------- 4 files changed, 11 insertions(+), 13 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/kernel/head_64.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/head_64.S +++ linux-rt-rebase.q/arch/powerpc/kernel/head_64.S @@ -1391,7 +1391,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISER * handles any interrupts pending at this point. */ ld r3,SOFTE(r1) - bl .local_irq_restore + bl .raw_local_irq_restore b 11f /* Here we have a page fault that hash_page can't handle. */ Index: linux-rt-rebase.q/arch/powerpc/kernel/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/irq.c +++ linux-rt-rebase.q/arch/powerpc/kernel/irq.c @@ -111,7 +111,7 @@ static inline void set_soft_enabled(unsi : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -void notrace local_irq_restore(unsigned long en) +void notrace raw_local_irq_restore(unsigned long en) { /* * get_paca()->soft_enabled = en; Index: linux-rt-rebase.q/arch/powerpc/kernel/ppc_ksyms.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/ppc_ksyms.c +++ linux-rt-rebase.q/arch/powerpc/kernel/ppc_ksyms.c @@ -49,7 +49,7 @@ #endif #ifdef CONFIG_PPC64 -EXPORT_SYMBOL(local_irq_restore); +EXPORT_SYMBOL(raw_local_irq_restore); #endif #ifdef CONFIG_PPC32 Index: linux-rt-rebase.q/include/asm-powerpc/hw_irq.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-powerpc/hw_irq.h +++ linux-rt-rebase.q/include/asm-powerpc/hw_irq.h @@ -16,18 +16,18 @@ extern void timer_interrupt(struct pt_re #ifdef CONFIG_PPC64 #include -static inline unsigned long local_get_flags(void) +static inline unsigned long raw_local_get_flags(void) { unsigned long flags; -<<<<<<< delete extern unsigned long local_get_flags(void); -<<<<<<< delete extern unsigned long local_irq_disable(void); + __asm__ __volatile__("lbz %0,%1(13)" + : "=r" (flags) : "i" (offsetof(struct paca_struct, soft_enabled))); return flags; } -static inline unsigned long local_irq_disable(void) +static inline unsigned long raw_local_irq_disable(void) { unsigned long flags, zero; @@ -53,8 +53,8 @@ extern void raw_local_irq_restore(unsign #define raw_irqs_disabled_flags(flags) ((flags) == 0) -#define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) -#define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) +#define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) +#define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) #define hard_irq_disable() \ do { \ @@ -63,17 +63,15 @@ extern void raw_local_irq_restore(unsign get_paca()->hard_enabled = 0; \ } while(0) -#else +#else /* CONFIG_PPC64 */ #if defined(CONFIG_BOOKE) #define SET_MSR_EE(x) mtmsr(x) #define raw_local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") -<<<<<<< delete #define local_irq_restore(flags) do { \ -#define raw_local_irq_restore(flags) do { \ #else #define SET_MSR_EE(x) mtmsr(x) #define raw_local_irq_restore(flags) mtmsr(flags) -#endif +#endif /* CONFIG_BOOKE */ static inline void raw_local_irq_disable(void) { patches/timer-freq-tweaks.patch0000664000077200007720000000751510653433167016131 0ustar mingomingo--- kernel/rcutorture.c | 2 +- mm/slab.c | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) Index: linux-rt-rebase.q/kernel/rcutorture.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rcutorture.c +++ linux-rt-rebase.q/kernel/rcutorture.c @@ -600,7 +600,7 @@ rcu_torture_reader(void *arg) if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); - schedule_timeout_interruptible(HZ); + schedule_timeout_interruptible(round_jiffies_relative(HZ)); continue; } if (p->rtort_mbtest == 0) Index: linux-rt-rebase.q/mm/slab.c =================================================================== --- linux-rt-rebase.q.orig/mm/slab.c +++ linux-rt-rebase.q/mm/slab.c @@ -1051,7 +1051,7 @@ static int transfer_objects(struct array #ifndef CONFIG_NUMA #define drain_alien_cache(cachep, alien) do { } while (0) -#define reap_alien(cachep, l3, this_cpu) do { } while (0) +#define reap_alien(cachep, l3, this_cpu) 0 static inline struct array_cache **alloc_alien_cache(int node, int limit) { @@ -1149,7 +1149,7 @@ static void __drain_alien_cache(struct k /* * Called from cache_reap() to regularly drain alien caches round robin. */ -static void +static int reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3, int *this_cpu) { int node = per_cpu(reap_node, *this_cpu); @@ -1160,8 +1160,10 @@ reap_alien(struct kmem_cache *cachep, st if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { __drain_alien_cache(cachep, ac, node, this_cpu); spin_unlock_irq(&ac->lock); + return 1; } } + return 0; } static void drain_alien_cache(struct kmem_cache *cachep, @@ -2488,7 +2490,7 @@ static void check_spinlock_acquired_node #define check_spinlock_acquired_node(x, y) do { } while(0) #endif -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +static int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, struct array_cache *ac, int force, int node); @@ -4118,14 +4120,15 @@ static int enable_cpucache(struct kmem_c * Drain an array if it contains any elements taking the l3 lock only if * necessary. Note that the l3 listlock also protects the array_cache * if drain_array() is used on the shared array. + * returns non-zero if some work is done */ -void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, struct array_cache *ac, int force, int node) { int tofree, this_cpu; if (!ac || !ac->avail) - return; + return 0; if (ac->touched && !force) { ac->touched = 0; } else { @@ -4141,6 +4144,7 @@ void drain_array(struct kmem_cache *cach } slab_spin_unlock_irq(&l3->list_lock, this_cpu); } + return 1; } /** @@ -4178,10 +4182,10 @@ static void cache_reap(struct work_struc */ l3 = searchp->nodelists[node]; - reap_alien(searchp, l3, &this_cpu); + work_done += reap_alien(searchp, l3, &this_cpu); - drain_array(searchp, l3, cpu_cache_get(searchp, this_cpu), - 0, node); + work_done += drain_array(searchp, l3, + cpu_cache_get(searchp, this_cpu), 0, node); /* * These are racy checks but it does not matter @@ -4192,7 +4196,7 @@ static void cache_reap(struct work_struc l3->next_reap = jiffies + REAPTIMEOUT_LIST3; - drain_array(searchp, l3, l3->shared, 0, node); + work_done += drain_array(searchp, l3, l3->shared, 0, node); if (l3->free_touched) l3->free_touched = 0; @@ -4211,9 +4215,9 @@ next: next_reap_node(); out: /* Set up the next iteration */ - schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); + schedule_delayed_work(work, + round_jiffies_relative((1+!work_done) * REAPTIMEOUT_CPUC)); } - #ifdef CONFIG_PROC_FS static void print_slabinfo_header(struct seq_file *m) patches/ep93xx-clockevents-fix.patch0000664000077200007720000000256010653433161017011 0ustar mingomingoSubject: timer patch for ep93xx From: Manfred Gruber hi ! this patch is necessary to get latencies < 1ms for ep93xx armv4t with 2.6.21.5-rt18. Signed-off-by: Manfred Gruber --- arch/arm/mach-ep93xx/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/arm/mach-ep93xx/core.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/mach-ep93xx/core.c +++ linux-rt-rebase.q/arch/arm/mach-ep93xx/core.c @@ -98,9 +98,9 @@ static struct clock_event_device clockev static int ep93xx_timer_interrupt(int irq, void *dev_id) { - __raw_writel(EP93XX_TC_CLEAR, EP93XX_TIMER1_CLEAR); + __raw_writel(EP93XX_TC_CLEAR, EP93XX_TIMER1_CLEAR); - clockevent_ep93xx.event_handler(&clockevent_ep93xx); + clockevent_ep93xx.event_handler(&clockevent_ep93xx); return IRQ_HANDLED; } @@ -108,7 +108,15 @@ static int ep93xx_timer_interrupt(int ir static int ep93xx_set_next_event(unsigned long evt, struct clock_event_device *unused) { + u32 tmode = __raw_readl(EP93XX_TIMER1_CONTROL); + + /* stop timer */ + __raw_writel(tmode & ~EP93XX_TC123_ENABLE, EP93XX_TIMER1_CONTROL); + /* program timer */ __raw_writel(evt, EP93XX_TIMER1_LOAD); + /* start timer */ + __raw_writel(tmode | EP93XX_TC123_ENABLE, EP93XX_TIMER1_CONTROL); + return 0; } patches/hrtimer-trace.patch0000664000077200007720000000454110653433163015320 0ustar mingomingo include/linux/hrtimer.h | 6 ++++++ kernel/hrtimer.c | 5 +++++ kernel/time/clockevents.c | 4 ++++ 3 files changed, 15 insertions(+) Index: linux-rt-rebase.q/include/linux/hrtimer.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/hrtimer.h +++ linux-rt-rebase.q/include/linux/hrtimer.h @@ -253,6 +253,12 @@ static inline ktime_t hrtimer_cb_get_tim extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); +# if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR) +# define hrtimer_trace(a,b) trace_special_u64((a).tv64,b) +# else +# define hrtimer_trace(a,b) trace_special((a).tv.sec,(a).tv.nsec,b) +# endif + /* Exported timer functions: */ /* Initialize timers: */ Index: linux-rt-rebase.q/kernel/hrtimer.c =================================================================== --- linux-rt-rebase.q.orig/kernel/hrtimer.c +++ linux-rt-rebase.q/kernel/hrtimer.c @@ -706,6 +706,8 @@ static void enqueue_hrtimer(struct hrtim struct hrtimer *entry; int leftmost = 1; + hrtimer_trace(timer->expires, (unsigned long) timer); + /* * Find the right place in the rbtree: */ @@ -1039,6 +1041,7 @@ void hrtimer_interrupt(struct clock_even retry: now = ktime_get(); + hrtimer_trace(now, 0); expires_next.tv64 = KTIME_MAX; @@ -1067,6 +1070,8 @@ void hrtimer_interrupt(struct clock_even break; } + hrtimer_trace(timer->expires, (unsigned long) timer); + /* Move softirq callbacks to the pending list */ if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { __remove_hrtimer(timer, base, Index: linux-rt-rebase.q/kernel/time/clockevents.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/clockevents.c +++ linux-rt-rebase.q/kernel/time/clockevents.c @@ -12,12 +12,14 @@ */ #include +#include #include #include #include #include #include #include +#include /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); @@ -80,6 +82,8 @@ int clockevents_program_event(struct clo delta = ktime_to_ns(ktime_sub(expires, now)); + hrtimer_trace(expires, (unsigned long) delta); + if (delta <= 0) return -ETIME; patches/rt-mutex-arm.patch0000664000077200007720000002616410653433164015122 0ustar mingomingo--- arch/arm/kernel/entry-armv.S | 4 +- arch/arm/kernel/entry-common.S | 10 +++--- arch/arm/kernel/process.c | 10 ++++-- arch/arm/kernel/semaphore.c | 31 +++++++++++++++----- include/asm-arm/semaphore.h | 61 ++++++++++++++++++++++++++++------------- include/asm-arm/thread_info.h | 2 + 6 files changed, 80 insertions(+), 38 deletions(-) Index: linux-rt-rebase.q/arch/arm/kernel/entry-armv.S =================================================================== --- linux-rt-rebase.q.orig/arch/arm/kernel/entry-armv.S +++ linux-rt-rebase.q/arch/arm/kernel/entry-armv.S @@ -204,7 +204,7 @@ __irq_svc: irq_handler #ifdef CONFIG_PREEMPT ldr r0, [tsk, #TI_FLAGS] @ get flags - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED blne svc_preempt preempt_return: ldr r0, [tsk, #TI_PREEMPT] @ read preempt value @@ -235,7 +235,7 @@ svc_preempt: str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED beq preempt_return @ go again b 1b #endif Index: linux-rt-rebase.q/arch/arm/kernel/entry-common.S =================================================================== --- linux-rt-rebase.q.orig/arch/arm/kernel/entry-common.S +++ linux-rt-rebase.q/arch/arm/kernel/entry-common.S @@ -46,7 +46,7 @@ ret_fast_syscall: fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED + tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED bne work_resched tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING beq no_work_pending @@ -56,7 +56,8 @@ work_pending: b ret_slow_syscall @ Check work again work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ @@ -396,6 +397,7 @@ ENTRY(sys_oabi_call_table) #include "calls.S" #undef ABI #undef OBSOLETE +#endif #ifdef CONFIG_FRAME_POINTER @@ -445,7 +447,7 @@ mcount: ldr ip, =mcount_enabled @ leave early, if disabled ldr ip, [ip] cmp ip, #0 - moveq pc,lr + moveq pc, lr mov ip, sp stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame @@ -504,5 +506,3 @@ arm_return_addr: #endif -#endif - Index: linux-rt-rebase.q/arch/arm/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/kernel/process.c +++ linux-rt-rebase.q/arch/arm/kernel/process.c @@ -134,7 +134,7 @@ static void default_idle(void) cpu_relax(); else { local_irq_disable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { timer_dyn_reprogram(); arch_idle(); } @@ -166,13 +166,15 @@ void cpu_idle(void) idle = default_idle; leds_event(led_idle_start); tick_nohz_stop_sched_tick(); - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) idle(); leds_event(led_idle_end); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } Index: linux-rt-rebase.q/arch/arm/kernel/semaphore.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/kernel/semaphore.c +++ linux-rt-rebase.q/arch/arm/kernel/semaphore.c @@ -49,14 +49,16 @@ * we cannot lose wakeup events. */ -void __up(struct semaphore *sem) +fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } +EXPORT_SYMBOL(__compat_up); + static DEFINE_SPINLOCK(semaphore_lock); -void __sched __down(struct semaphore * sem) +fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -89,7 +91,9 @@ void __sched __down(struct semaphore * s wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +EXPORT_SYMBOL(__compat_down); + +fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -140,6 +144,8 @@ int __sched __down_interruptible(struct return retval; } +EXPORT_SYMBOL(__compat_down_interruptible); + /* * Trylock failed - make sure we correct for * having decremented the count. @@ -148,7 +154,7 @@ int __sched __down_interruptible(struct * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -int __down_trylock(struct semaphore * sem) +fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -168,6 +174,15 @@ int __down_trylock(struct semaphore * se return 1; } +EXPORT_SYMBOL(__compat_down_trylock); + +fastcall int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + +EXPORT_SYMBOL(compat_sem_is_locked); + /* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines @@ -185,7 +200,7 @@ asm(" .section .sched.text,\"ax\",%progb __down_failed: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __down \n\ + bl __compat_down \n\ ldmfd sp!, {r0 - r4, pc} \n\ \n\ .align 5 \n\ @@ -193,7 +208,7 @@ __down_failed: \n\ __down_interruptible_failed: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __down_interruptible \n\ + bl __compat_down_interruptible \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r4, pc} \n\ \n\ @@ -202,7 +217,7 @@ __down_interruptible_failed: \n\ __down_trylock_failed: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __down_trylock \n\ + bl __compat_down_trylock \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r4, pc} \n\ \n\ @@ -211,7 +226,7 @@ __down_trylock_failed: \n\ __up_wakeup: \n\ stmfd sp!, {r0 - r4, lr} \n\ mov r0, ip \n\ - bl __up \n\ + bl __compat_up \n\ ldmfd sp!, {r0 - r4, pc} \n\ "); Index: linux-rt-rebase.q/include/asm-arm/semaphore.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/semaphore.h +++ linux-rt-rebase.q/include/asm-arm/semaphore.h @@ -5,46 +5,66 @@ #define __ASM_ARM_SEMAPHORE_H #include + +#ifdef CONFIG_PREEMPT_RT +# include +#endif + #include #include #include +/* + * On !PREEMPT_RT all semaphores are compat: + */ +#ifndef CONFIG_PREEMPT_RT +# define semaphore compat_semaphore +#endif + #include #include -struct semaphore { +struct compat_semaphore { atomic_t count; int sleepers; wait_queue_head_t wait; }; -#define __SEMAPHORE_INIT(name, cnt) \ +#define __COMPAT_SEMAPHORE_INITIALIZER(name, cnt) \ { \ .count = ATOMIC_INIT(cnt), \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait), \ } -#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ - struct semaphore name = __SEMAPHORE_INIT(name,count) +#define __COMPAT_MUTEX_INITIALIZER(name) \ + __COMPAT_SEMAPHORE_INITIALIZER(name,1) + +#define __COMPAT_DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct compat_semaphore name = __COMPAT_SEMAPHORE_INITIALIZER(name,count) -#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) -#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) +#define COMPAT_DECLARE_MUTEX(name) __COMPAT_DECLARE_SEMAPHORE_GENERIC(name,1) +#define COMPAT_DECLARE_MUTEX_LOCKED(name) __COMPAT_DECLARE_SEMAPHORE_GENERIC(name,0) -static inline void sema_init(struct semaphore *sem, int val) +static inline void compat_sema_init(struct compat_semaphore *sem, int val) { atomic_set(&sem->count, val); sem->sleepers = 0; init_waitqueue_head(&sem->wait); } -static inline void init_MUTEX(struct semaphore *sem) +static inline void compat_init_MUTEX(struct compat_semaphore *sem) +{ + compat_sema_init(sem, 1); +} + +static inline void compat_init_MUTEX_LOCKED(struct compat_semaphore *sem) { - sema_init(sem, 1); + compat_sema_init(sem, 0); } -static inline void init_MUTEX_LOCKED(struct semaphore *sem) +static inline int compat_sema_count(struct compat_semaphore *sem) { - sema_init(sem, 0); + return atomic_read(&sem->count); } /* @@ -55,16 +75,18 @@ asmlinkage int __down_interruptible_fai asmlinkage int __down_trylock_failed(void); asmlinkage void __up_wakeup(void); -extern void __down(struct semaphore * sem); -extern int __down_interruptible(struct semaphore * sem); -extern int __down_trylock(struct semaphore * sem); -extern void __up(struct semaphore * sem); +extern void __compat_up(struct compat_semaphore *sem); +extern int __compat_down_interruptible(struct compat_semaphore * sem); +extern int __compat_down_trylock(struct compat_semaphore * sem); +extern void __compat_down(struct compat_semaphore * sem); + +extern int compat_sem_is_locked(struct compat_semaphore *sem); /* * This is ugly, but we want the default case to fall through. * "__down" is the actual routine that waits... */ -static inline void down(struct semaphore * sem) +static inline void compat_down(struct compat_semaphore * sem) { might_sleep(); __down_op(sem, __down_failed); @@ -74,13 +96,13 @@ static inline void down(struct semaphore * This is ugly, but we want the default case to fall through. * "__down_interruptible" is the actual routine that waits... */ -static inline int down_interruptible (struct semaphore * sem) +static inline int compat_down_interruptible (struct compat_semaphore * sem) { might_sleep(); return __down_op_ret(sem, __down_interruptible_failed); } -static inline int down_trylock(struct semaphore *sem) +static inline int compat_down_trylock(struct compat_semaphore *sem) { return __down_op_ret(sem, __down_trylock_failed); } @@ -91,9 +113,10 @@ static inline int down_trylock(struct se * The default case (no contention) will result in NO * jumps for both down() and up(). */ -static inline void up(struct semaphore * sem) +static inline void compat_up(struct compat_semaphore * sem) { __up_op(sem, __up_wakeup); } +#include #endif Index: linux-rt-rebase.q/include/asm-arm/thread_info.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/thread_info.h +++ linux-rt-rebase.q/include/asm-arm/thread_info.h @@ -143,6 +143,7 @@ extern void iwmmxt_task_switch(struct th #define TIF_NOTIFY_RESUME 0 #define TIF_SIGPENDING 1 #define TIF_NEED_RESCHED 2 +#define TIF_NEED_RESCHED_DELAYED 3 #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_USING_IWMMXT 17 @@ -152,6 +153,7 @@ extern void iwmmxt_task_switch(struct th #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_DELAYED (1<pstr->xrun_debug) { + user_trace_stop(); snd_printd(KERN_DEBUG "XRUN: pcmC%dD%d%c\n", substream->pcm->card->number, substream->pcm->device, patches/schedule_on_each_cpu-enhance.patch0000664000077200007720000001013310653433170020260 0ustar mingomingoIt always bothered me a bit that on_each_cpu() and schedule_on_each_cpu() had wildly different interfaces. Rectify this and convert the sole in-kernel user to the new interface. Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar --- include/linux/workqueue.h | 2 - kernel/workqueue.c | 63 ++++++++++++++++++++++++++++++++++++++-------- mm/swap.c | 4 +- 3 files changed, 56 insertions(+), 13 deletions(-) Index: linux-rt-rebase.q/include/linux/workqueue.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/workqueue.h +++ linux-rt-rebase.q/include/linux/workqueue.h @@ -145,7 +145,7 @@ extern int FASTCALL(schedule_delayed_wor extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); extern int schedule_on_each_cpu_wq(struct workqueue_struct *wq, work_func_t func); -extern int schedule_on_each_cpu(work_func_t func); +extern int schedule_on_each_cpu(void (*func)(void *info), void *info, int retry, int wait); extern int current_is_keventd(void); extern int keventd_up(void); Index: linux-rt-rebase.q/kernel/workqueue.c =================================================================== --- linux-rt-rebase.q.orig/kernel/workqueue.c +++ linux-rt-rebase.q/kernel/workqueue.c @@ -571,9 +571,28 @@ int schedule_delayed_work_on(int cpu, } EXPORT_SYMBOL(schedule_delayed_work_on); +struct schedule_on_each_cpu_work { + struct work_struct work; + void (*func)(void *info); + void *info; +}; + +static void schedule_on_each_cpu_func(struct work_struct *work) +{ + struct schedule_on_each_cpu_work *w; + + w = container_of(work, typeof(*w), work); + w->func(w->info); + + kfree(w); +} + /** * schedule_on_each_cpu - call a function on each online CPU from keventd * @func: the function to call + * @info: data to pass to function + * @retry: ignored + * @wait: wait for completion * * Returns zero on success. * Returns -ve errno on failure. @@ -582,27 +601,51 @@ EXPORT_SYMBOL(schedule_delayed_work_on); * * schedule_on_each_cpu() is very slow. */ -int schedule_on_each_cpu(work_func_t func) +int schedule_on_each_cpu(void (*func)(void *info), void *info, int retry, int wait) { int cpu; - struct work_struct *works; + struct schedule_on_each_cpu_work **works; + int err = 0; - works = alloc_percpu(struct work_struct); + works = kzalloc(sizeof(void *)*nr_cpu_ids, GFP_KERNEL); if (!works) return -ENOMEM; + for_each_possible_cpu(cpu) { + works[cpu] = kmalloc_node(sizeof(struct schedule_on_each_cpu_work), + GFP_KERNEL, cpu_to_node(cpu)); + if (!works[cpu]) { + err = -ENOMEM; + goto out; + } + } + preempt_disable(); /* CPU hotplug */ for_each_online_cpu(cpu) { - struct work_struct *work = per_cpu_ptr(works, cpu); + struct schedule_on_each_cpu_work *work; - INIT_WORK(work, func); - set_bit(WORK_STRUCT_PENDING, work_data_bits(work)); - __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work); + work = works[cpu]; + works[cpu] = NULL; + + work->func = func; + work->info = info; + INIT_WORK(&work->work, schedule_on_each_cpu_func); + set_bit(WORK_STRUCT_PENDING, work_data_bits(&work->work)); + __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), &work->work); } preempt_enable(); - flush_workqueue(keventd_wq); - free_percpu(works); - return 0; + +out: + for_each_possible_cpu(cpu) { + if (works[cpu]) + kfree(works[cpu]); + } + kfree(works); + + if (!err && wait) + flush_workqueue(keventd_wq); + + return err; } /** Index: linux-rt-rebase.q/mm/swap.c =================================================================== --- linux-rt-rebase.q.orig/mm/swap.c +++ linux-rt-rebase.q/mm/swap.c @@ -216,7 +216,7 @@ void lru_add_drain(void) } #ifdef CONFIG_NUMA -static void lru_add_drain_per_cpu(struct work_struct *dummy) +static void lru_add_drain_per_cpu(void *info) { lru_add_drain(); } @@ -226,7 +226,7 @@ static void lru_add_drain_per_cpu(struct */ int lru_add_drain_all(void) { - return schedule_on_each_cpu(lru_add_drain_per_cpu); + return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL, 0, 1); } #else patches/arm-cmpxchg.patch0000664000077200007720000000243310653433164014757 0ustar mingomingo include/asm-arm/atomic.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) Index: linux-rt-rebase.q/include/asm-arm/atomic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/atomic.h +++ linux-rt-rebase.q/include/asm-arm/atomic.h @@ -173,6 +173,41 @@ static inline void atomic_clear_mask(uns raw_local_irq_restore(flags); } +#ifndef CONFIG_SMP +/* + * Atomic compare and exchange. + */ +#define __HAVE_ARCH_CMPXCHG 1 + +extern unsigned long wrong_size_cmpxchg(volatile void *ptr); + +static inline unsigned long __cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long flags, prev; + volatile unsigned long *p = ptr; + + if (size == 4) { + local_irq_save(flags); + if ((prev = *p) == old) + *p = new; + local_irq_restore(flags); + return(prev); + } else + return wrong_size_cmpxchg(ptr); +} + +#define cmpxchg(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ +}) + +#endif + #endif /* __LINUX_ARM_ARCH__ */ #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) patches/ich-force-hpet-restructure-hpet-generic-clock-code.patch0000664000077200007720000001452510653433161024306 0ustar mingomingoFrom: Venki Pallipadi Restructure and rename legacy replacement mode HPET timer support. Just the code structural changes and should be zero functionality change. Signed-off-by: Venkatesh Pallipadi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Cc: john stultz Cc: Greg KH Signed-off-by: Andrew Morton --- arch/i386/kernel/hpet.c | 148 ++++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 65 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/hpet.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/hpet.c +++ linux-rt-rebase.q/arch/i386/kernel/hpet.c @@ -149,9 +149,9 @@ static void hpet_reserve_platform_timers */ static unsigned long hpet_period; -static void hpet_set_mode(enum clock_event_mode mode, +static void hpet_legacy_set_mode(enum clock_event_mode mode, struct clock_event_device *evt); -static int hpet_next_event(unsigned long delta, +static int hpet_legacy_next_event(unsigned long delta, struct clock_event_device *evt); /* @@ -160,8 +160,8 @@ static int hpet_next_event(unsigned long static struct clock_event_device hpet_clockevent = { .name = "hpet", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = hpet_set_mode, - .set_next_event = hpet_next_event, + .set_mode = hpet_legacy_set_mode, + .set_next_event = hpet_legacy_next_event, .shift = 32, .irq = 0, }; @@ -178,7 +178,7 @@ static void hpet_start_counter(void) hpet_writel(cfg, HPET_CFG); } -static void hpet_enable_int(void) +static void hpet_enable_legacy_int(void) { unsigned long cfg = hpet_readl(HPET_CFG); @@ -187,7 +187,39 @@ static void hpet_enable_int(void) hpet_legacy_int_enabled = 1; } -static void hpet_set_mode(enum clock_event_mode mode, +static void hpet_legacy_clockevent_register(void) +{ + uint64_t hpet_freq; + + /* Start HPET legacy interrupts */ + hpet_enable_legacy_int(); + + /* + * The period is a femto seconds value. We need to calculate the + * scaled math multiplication factor for nanosecond to hpet tick + * conversion. + */ + hpet_freq = 1000000000000000ULL; + do_div(hpet_freq, hpet_period); + hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, + NSEC_PER_SEC, 32); + /* Calculate the min / max delta */ + hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, + &hpet_clockevent); + hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, + &hpet_clockevent); + + /* + * Start hpet with the boot cpu mask and make it + * global after the IO_APIC has been initialized. + */ + hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + clockevents_register_device(&hpet_clockevent); + global_clock_event = &hpet_clockevent; + printk(KERN_DEBUG "hpet clockevent registered\n"); +} + +static void hpet_legacy_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { unsigned long cfg, cmp, now; @@ -228,12 +260,12 @@ static void hpet_set_mode(enum clock_eve break; case CLOCK_EVT_MODE_RESUME: - hpet_enable_int(); + hpet_enable_legacy_int(); break; } } -static int hpet_next_event(unsigned long delta, +static int hpet_legacy_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned long cnt; @@ -273,58 +305,11 @@ static struct clocksource clocksource_hp #endif }; -/* - * Try to setup the HPET timer - */ -int __init hpet_enable(void) +static int hpet_clocksource_register(void) { - unsigned long id; - uint64_t hpet_freq; u64 tmp, start, now; cycle_t t1; - if (!is_hpet_capable()) - return 0; - - hpet_set_mapping(); - - /* - * Read the period and check for a sane value: - */ - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) - goto out_nohpet; - - /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. - */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); - /* Calculate the min / max delta */ - hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, - &hpet_clockevent); - hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, - &hpet_clockevent); - - /* - * Read the HPET ID register to retrieve the IRQ routing - * information and the number of channels - */ - id = hpet_readl(HPET_ID); - -#ifdef CONFIG_HPET_EMULATE_RTC - /* - * The legacy routing mode needs at least two channels, tick timer - * and the rtc emulation channel. - */ - if (!(id & HPET_ID_NUMBER)) - goto out_nohpet; -#endif - /* Start the counter */ hpet_start_counter(); @@ -346,7 +331,7 @@ int __init hpet_enable(void) if (t1 == read_hpet()) { printk(KERN_WARNING "HPET counter not counting. HPET disabled\n"); - goto out_nohpet; + return -ENODEV; } /* Initialize and register HPET clocksource @@ -367,15 +352,48 @@ int __init hpet_enable(void) clocksource_register(&clocksource_hpet); + return 0; +} + +/* + * Try to setup the HPET timer + */ +int __init hpet_enable(void) +{ + unsigned long id; + + if (!is_hpet_capable()) + return 0; + + hpet_set_mapping(); + + /* + * Read the period and check for a sane value: + */ + hpet_period = hpet_readl(HPET_PERIOD); + if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) + goto out_nohpet; + + /* + * Read the HPET ID register to retrieve the IRQ routing + * information and the number of channels + */ + id = hpet_readl(HPET_ID); + +#ifdef CONFIG_HPET_EMULATE_RTC + /* + * The legacy routing mode needs at least two channels, tick timer + * and the rtc emulation channel. + */ + if (!(id & HPET_ID_NUMBER)) + goto out_nohpet; +#endif + + if (hpet_clocksource_register()) + goto out_nohpet; + if (id & HPET_ID_LEGSUP) { - hpet_enable_int(); - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - clockevents_register_device(&hpet_clockevent); - global_clock_event = &hpet_clockevent; + hpet_legacy_clockevent_register(); return 1; } return 0; patches/apic-dumpstack.patch0000664000077200007720000000073410653433162015456 0ustar mingomingo arch/i386/kernel/apic.c | 1 + 1 file changed, 1 insertion(+) Index: linux-rt-rebase.q/arch/i386/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/apic.c +++ linux-rt-rebase.q/arch/i386/kernel/apic.c @@ -1309,6 +1309,7 @@ void smp_error_interrupt(struct pt_regs */ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); + dump_stack(); irq_exit(); } patches/preempt-realtime-drivers-pci-hotplug.patch0000664000077200007720000000143310653433166021733 0ustar mingomingoSubject: pci/hotplug/cpqphp_ctrl.c: remove stale BKL use From: Ingo Molnar remove stale BKL use from drivers/pci/hotplug/cpqphp_ctrl.c. Signed-off-by: Ingo Molnar --- drivers/pci/hotplug/cpqphp_ctrl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) Index: linux-rt-rebase.q/drivers/pci/hotplug/cpqphp_ctrl.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/hotplug/cpqphp_ctrl.c +++ linux-rt-rebase.q/drivers/pci/hotplug/cpqphp_ctrl.c @@ -1746,10 +1746,8 @@ static void pushbutton_helper_thread(uns static int event_thread(void* data) { struct controller *ctrl; - lock_kernel(); + daemonize("phpd_event"); - - unlock_kernel(); while (1) { dbg("!!!!event_thread sleeping\n"); patches/remove-check-pgt-cache-calls.patch0000664000077200007720000000256110653433165020051 0ustar mingomingo--- arch/i386/kernel/process.c | 1 - arch/x86_64/kernel/process.c | 1 - arch/x86_64/kernel/smp.c | 1 - 3 files changed, 3 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/process.c +++ linux-rt-rebase.q/arch/i386/kernel/process.c @@ -185,7 +185,6 @@ void cpu_idle(void) if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; - check_pgt_cache(); tick_nohz_stop_sched_tick(); rmb(); idle = pm_idle; Index: linux-rt-rebase.q/arch/x86_64/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/process.c +++ linux-rt-rebase.q/arch/x86_64/kernel/process.c @@ -210,7 +210,6 @@ void cpu_idle (void) tick_nohz_stop_sched_tick(); - check_pgt_cache(); rmb(); idle = pm_idle; if (!idle) Index: linux-rt-rebase.q/arch/x86_64/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/smp.c +++ linux-rt-rebase.q/arch/x86_64/kernel/smp.c @@ -241,7 +241,6 @@ void flush_tlb_mm (struct mm_struct * mm } if (!cpus_empty(cpu_mask)) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - check_pgt_cache(); preempt_enable(); } EXPORT_SYMBOL(flush_tlb_mm); patches/ich-force-hpet-make-generic-time-capable-of-switching-broadcast-timer.patch0000664000077200007720000000500410653433161027663 0ustar mingomingoFrom: Venki Pallipadi Auto-detect the presence of HPET on ICH5 or newer platforms and enable HPET for broadcast timer. This gives a bigger upperlimit for tickless time tick and improves the power consumption in comparison to PIT as broadcast timer. This patch: Change the broadcast timer, if a timer with higher rating becomes available. Signed-off-by: Venkatesh Pallipadi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Cc: john stultz Cc: Greg KH Signed-off-by: Andrew Morton --- kernel/time/tick-broadcast.c | 13 ++++++------- kernel/time/tick-common.c | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) Index: linux-rt-rebase.q/kernel/time/tick-broadcast.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/tick-broadcast.c +++ linux-rt-rebase.q/kernel/time/tick-broadcast.c @@ -64,8 +64,9 @@ static void tick_broadcast_start_periodi */ int tick_check_broadcast_device(struct clock_event_device *dev) { - if (tick_broadcast_device.evtdev || - (dev->features & CLOCK_EVT_FEAT_C3STOP)) + if ((tick_broadcast_device.evtdev && + tick_broadcast_device.evtdev->rating >= dev->rating) || + (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; clockevents_exchange_device(NULL, dev); @@ -519,11 +520,9 @@ static void tick_broadcast_clear_oneshot */ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { - if (bc->mode != CLOCK_EVT_MODE_ONESHOT) { - bc->event_handler = tick_handle_oneshot_broadcast; - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - bc->next_event.tv64 = KTIME_MAX; - } + bc->event_handler = tick_handle_oneshot_broadcast; + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + bc->next_event.tv64 = KTIME_MAX; } /* Index: linux-rt-rebase.q/kernel/time/tick-common.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/tick-common.c +++ linux-rt-rebase.q/kernel/time/tick-common.c @@ -200,7 +200,7 @@ static int tick_check_new_device(struct cpu = smp_processor_id(); if (!cpu_isset(cpu, newdev->cpumask)) - goto out; + goto out_bc; td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; @@ -265,7 +265,7 @@ out_bc: */ if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; -out: + spin_unlock_irqrestore(&tick_device_lock, flags); return ret; patches/preempt-irqs-mips.patch0000664000077200007720000000211510653433164016144 0ustar mingomingo--- arch/mips/kernel/time.c | 2 +- arch/mips/sibyte/sb1250/irq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/mips/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/time.c +++ linux-rt-rebase.q/arch/mips/kernel/time.c @@ -281,7 +281,7 @@ unsigned int mips_hpt_frequency; static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_PERCPU, + .flags = IRQF_DISABLED | IRQF_PERCPU | IRQF_NODELAY, .name = "timer", }; Index: linux-rt-rebase.q/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/sibyte/sb1250/irq.c +++ linux-rt-rebase.q/arch/mips/sibyte/sb1250/irq.c @@ -242,7 +242,7 @@ static irqreturn_t sb1250_dummy_handler static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = IRQF_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, patches/2.6.21-rc6-lockless4-__add_to_swap_cache-stuff.patch0000664000077200007720000000267410653433167022732 0ustar mingomingoFrom: Nick Piggin Subject: [patch 4/9] mm: __add_to_swap_cache stuff __add_to_swap_cache unconditionally sets the page locked. Instead, just ensure that the page is locked (which is a usual invariant for manipulating swapcache). Signed-off-by: Nick Piggin --- mm/swap_state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/mm/swap_state.c =================================================================== --- linux-rt-rebase.q.orig/mm/swap_state.c +++ linux-rt-rebase.q/mm/swap_state.c @@ -74,6 +74,7 @@ static int __add_to_swap_cache(struct pa { int error; + BUG_ON(!PageLocked(page)); BUG_ON(PageSwapCache(page)); BUG_ON(PagePrivate(page)); error = radix_tree_preload(gfp_mask); @@ -83,7 +84,6 @@ static int __add_to_swap_cache(struct pa entry.val, page); if (!error) { page_cache_get(page); - SetPageLocked(page); SetPageSwapCache(page); set_page_private(page, entry.val); total_swapcache_pages++; @@ -338,6 +338,7 @@ struct page *read_swap_cache_async(swp_e vma, addr); if (!new_page) break; /* Out of memory */ + SetPageLocked(new_page);/* could be non-atomic op */ } /* @@ -361,7 +362,9 @@ struct page *read_swap_cache_async(swp_e } } while (err != -ENOENT && err != -ENOMEM); - if (new_page) + if (new_page) { + ClearPageLocked(new_page); page_cache_release(new_page); + } return found_page; } patches/hrtimer-no-printk.patch0000664000077200007720000000211210653433166016136 0ustar mingomingo--- kernel/hrtimer.c | 3 +-- kernel/time/timekeeping.c | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/kernel/hrtimer.c =================================================================== --- linux-rt-rebase.q.orig/kernel/hrtimer.c +++ linux-rt-rebase.q/kernel/hrtimer.c @@ -578,8 +578,7 @@ static int hrtimer_switch_to_hres(void) /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); - printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", - smp_processor_id()); + return 1; } Index: linux-rt-rebase.q/kernel/time/timekeeping.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/timekeeping.c +++ linux-rt-rebase.q/kernel/time/timekeeping.c @@ -239,8 +239,10 @@ static void change_clocksource(void) tick_clock_notify(); +#ifndef CONFIG_PREEMPT_RT printk(KERN_INFO "Time: %s clocksource has been installed.\n", clock->name); +#endif } #else static inline void change_clocksource(void) { } patches/idle-stop-critical-timing.patch0000664000077200007720000000167410653433163017533 0ustar mingomingo--- drivers/acpi/processor_idle.c | 8 ++++++++ 1 file changed, 8 insertions(+) Index: linux-rt-rebase.q/drivers/acpi/processor_idle.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/processor_idle.c +++ linux-rt-rebase.q/drivers/acpi/processor_idle.c @@ -837,6 +837,12 @@ static inline void acpi_idle_update_bm_r */ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) { + /* + * We have irqs disabled here, so stop latency tracing + * at this point and restart it after we return: + */ + stop_critical_timing(); + if (cx->space_id == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -849,6 +855,8 @@ static inline void acpi_idle_do_entry(st gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } + + touch_critical_timing(); } /** patches/mm-fix-latency.patch0000664000077200007720000000560410653433162015404 0ustar mingomingoFrom: Hugh Dickins Subject: reduce pagetable-freeing latencies 2.6.15-rc1 moved the unlinking of a vma from its prio_tree and anon_vma into free_pgtables: so the vma is hidden from rmap and vmtruncate before freeing its page tables, allowing safe descent without page table lock. But free_pgtables is still called with preemption disabled, and Lee Revell has now detected high latency there. The right fix will be to rework the mmu_gathering, not to need preemption disabled; but for now an ugly CONFIG_PREEMPT block in free_pgtables, to make an initial unlinking pass with preemption enabled - made uglier by CONFIG_IA64 definitions (only ia64 actually uses the start and end given to tlb_finish_mmu, and our floor and ceiling don't quite work for those). These CONFIG choices being to minimize the additional TLB flushing. Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar -- mm/memory.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) Index: linux-rt-rebase.q/mm/memory.c =================================================================== --- linux-rt-rebase.q.orig/mm/memory.c +++ linux-rt-rebase.q/mm/memory.c @@ -264,18 +264,48 @@ void free_pgd_range(struct mmu_gather ** flush_tlb_pgtables((*tlb)->mm, start, end); } +#ifdef CONFIG_IA64 +#define tlb_start_addr(tlb) (tlb)->start_addr +#define tlb_end_addr(tlb) (tlb)->end_addr +#else +#define tlb_start_addr(tlb) 0UL /* only ia64 really uses it */ +#define tlb_end_addr(tlb) 0UL /* only ia64 really uses it */ +#endif + void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling) { +#ifdef CONFIG_PREEMPT + struct vm_area_struct *unlink = vma; + int fullmm = (*tlb)->fullmm; + + if (!vma) /* Sometimes when exiting after an oops */ + return; + if (vma->vm_next) + tlb_finish_mmu(*tlb, tlb_start_addr(*tlb), tlb_end_addr(*tlb)); + /* + * Hide vma from rmap and vmtruncate before freeeing pgtables, + * with preemption enabled, except when unmapping just one area. + */ + while (unlink) { + anon_vma_unlink(unlink); + unlink_file_vma(unlink); + unlink = unlink->vm_next; + } + if (vma->vm_next) + *tlb = tlb_gather_mmu(vma->vm_mm, fullmm); +#endif while (vma) { struct vm_area_struct *next = vma->vm_next; unsigned long addr = vma->vm_start; +#ifndef CONFIG_PREEMPT /* * Hide vma from rmap and vmtruncate before freeing pgtables */ anon_vma_unlink(vma); unlink_file_vma(vma); +#endif if (is_vm_hugetlb_page(vma)) { hugetlb_free_pgd_range(tlb, addr, vma->vm_end, @@ -288,8 +318,10 @@ void free_pgtables(struct mmu_gather **t && !is_vm_hugetlb_page(next)) { vma = next; next = vma->vm_next; +#ifndef CONFIG_PREEMPT anon_vma_unlink(vma); unlink_file_vma(vma); +#endif } free_pgd_range(tlb, addr, vma->vm_end, floor, next? next->vm_start: ceiling); patches/preempt-irqs-ppc-fix-more-fasteoi.patch0000664000077200007720000000704710653433164021143 0ustar mingomingoFrom sshtylyov@ru.mvista.com Thu May 17 15:18:39 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from imap.sh.mvista.com (unknown [63.81.120.155]) by mail.tglx.de (Postfix) with ESMTP id BFD3A65C065 for ; Thu, 17 May 2007 15:18:39 +0200 (CEST) Received: from wasted.dev.rtsoft.ru (unknown [10.150.0.9]) by imap.sh.mvista.com (Postfix) with ESMTP id 8E3CB3EC9; Thu, 17 May 2007 06:18:35 -0700 (PDT) From: Sergei Shtylyov Organization: MontaVista Software Inc. To: mingo@elte.hu, tglx@linutronix.de Subject: [PATCH 2.6.21-rt2] PowerPC: revert fix for threaded fasteoi IRQ handlers Date: Thu, 17 May 2007 17:20:08 +0400 User-Agent: KMail/1.5 Cc: linux-kernel@vger.kernel.org, linuxppc-dev@ozlabs.org, dwalker@mvista.com References: <200611192243.34850.sshtylyov@ru.mvista.com> In-Reply-To: <200611192243.34850.sshtylyov@ru.mvista.com> MIME-Version: 1.0 Content-Disposition: inline Message-Id: <200705171719.34968.sshtylyov@ru.mvista.com> Content-Type: text/plain; charset="us-ascii" X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Revert the change to the "fasteoi" type chips as after handle_fasteoi_irq() had been fixed, they've become meaningless (and even dangerous -- as was the case with Celleb that has been fixed earlier)... Signed-off-by: Sergei Shtylyov --- The patch in question wasn't even initially accepted but then was erroneously restored along with the TOD patch. I've asked to revert it but to no avail, so here's the formal patch to revert it at last... arch/powerpc/platforms/iseries/irq.c | 1 - arch/powerpc/platforms/pseries/xics.c | 2 -- arch/powerpc/sysdev/mpic.c | 1 - 3 files changed, 4 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/platforms/iseries/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/iseries/irq.c +++ linux-rt-rebase.q/arch/powerpc/platforms/iseries/irq.c @@ -278,7 +278,6 @@ static struct irq_chip iseries_pic = { .shutdown = iseries_shutdown_IRQ, .unmask = iseries_enable_IRQ, .mask = iseries_disable_IRQ, - .ack = iseries_end_IRQ, .eoi = iseries_end_IRQ }; Index: linux-rt-rebase.q/arch/powerpc/platforms/pseries/xics.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/pseries/xics.c +++ linux-rt-rebase.q/arch/powerpc/platforms/pseries/xics.c @@ -461,7 +461,6 @@ static struct irq_chip xics_pic_direct = .startup = xics_startup, .mask = xics_mask_irq, .unmask = xics_unmask_irq, - .ack = xics_eoi_direct, .eoi = xics_eoi_direct, .set_affinity = xics_set_affinity }; @@ -472,7 +471,6 @@ static struct irq_chip xics_pic_lpar = { .startup = xics_startup, .mask = xics_mask_irq, .unmask = xics_unmask_irq, - .ack = xics_eoi_lpar, .eoi = xics_eoi_lpar, .set_affinity = xics_set_affinity }; Index: linux-rt-rebase.q/arch/powerpc/sysdev/mpic.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/sysdev/mpic.c +++ linux-rt-rebase.q/arch/powerpc/sysdev/mpic.c @@ -835,7 +835,6 @@ int mpic_set_irq_type(unsigned int virq, static struct irq_chip mpic_irq_chip = { .mask = mpic_mask_irq, .unmask = mpic_unmask_irq, - .ack = mpic_end_irq, .eoi = mpic_end_irq, .set_type = mpic_set_irq_type, }; patches/preempt-realtime-console.patch0000664000077200007720000000352010653433166017465 0ustar mingomingo--- drivers/video/console/fbcon.c | 5 +++-- include/linux/console.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/drivers/video/console/fbcon.c =================================================================== --- linux-rt-rebase.q.orig/drivers/video/console/fbcon.c +++ linux-rt-rebase.q/drivers/video/console/fbcon.c @@ -1295,7 +1295,6 @@ static void fbcon_clear(struct vc_data * { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1324,10 +1323,11 @@ static void fbcon_putcs(struct vc_data * struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -3311,6 +3311,7 @@ static const struct consw fb_con = { .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { Index: linux-rt-rebase.q/include/linux/console.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/console.h +++ linux-rt-rebase.q/include/linux/console.h @@ -54,6 +54,7 @@ struct consw { void (*con_invert_region)(struct vc_data *, u16 *, int); u16 *(*con_screen_pos)(struct vc_data *, int); unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *); + int con_preemptible; // can it reschedule from within printk? }; extern const struct consw *conswitchp; patches/kprobes-preempt-fix.patch0000664000077200007720000000263710653433161016457 0ustar mingomingo arch/i386/kernel/kprobes.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/kprobes.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/kprobes.c +++ linux-rt-rebase.q/arch/i386/kernel/kprobes.c @@ -326,7 +326,7 @@ ss_probe: /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); regs->eip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); + preempt_enable(); return 1; } #endif @@ -335,7 +335,7 @@ ss_probe: return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } @@ -566,7 +566,7 @@ static int __kprobes post_kprobe_handler } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, eflags @@ -600,7 +600,7 @@ static int __kprobes kprobe_fault_handle restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: @@ -734,7 +734,7 @@ int __kprobes longjmp_break_handler(stru *regs = kcb->jprobe_saved_regs; memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); - preempt_enable_no_resched(); + preempt_enable(); return 1; } return 0; patches/rt-slab-new.patch0000664000077200007720000011314410653433164014706 0ustar mingomingo new slab port. Signed-off-by: Ingo Molnar --- mm/slab.c | 496 +++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 319 insertions(+), 177 deletions(-) Index: linux-rt-rebase.q/mm/slab.c =================================================================== --- linux-rt-rebase.q.orig/mm/slab.c +++ linux-rt-rebase.q/mm/slab.c @@ -116,6 +116,63 @@ #include /* + * On !PREEMPT_RT, raw irq flags are used as a per-CPU locking + * mechanism. + * + * On PREEMPT_RT, we use per-CPU locks for this. That's why the + * calling convention is changed slightly: a new 'flags' argument + * is passed to 'irq disable/enable' - the PREEMPT_RT code stores + * the CPU number of the lock there. + */ +#ifndef CONFIG_PREEMPT_RT +# define slab_irq_disable(cpu) \ + do { local_irq_disable(); (cpu) = smp_processor_id(); } while (0) +# define slab_irq_enable(cpu) local_irq_enable() +# define slab_irq_save(flags, cpu) \ + do { local_irq_save(flags); (cpu) = smp_processor_id(); } while (0) +# define slab_irq_restore(flags, cpu) local_irq_restore(flags) +/* + * In the __GFP_WAIT case we enable/disable interrupts on !PREEMPT_RT, + * which has no per-CPU locking effect since we are holding the cache + * lock in that case already. + * + * (On PREEMPT_RT, these are NOPs, but we have to drop/get the irq locks.) + */ +# define slab_irq_disable_nort() local_irq_disable() +# define slab_irq_enable_nort() local_irq_enable() +# define slab_irq_disable_rt(flags) do { (void)(flags); } while (0) +# define slab_irq_enable_rt(flags) do { (void)(flags); } while (0) +# define slab_spin_lock_irq(lock, cpu) \ + do { spin_lock_irq(lock); (cpu) = smp_processor_id(); } while (0) +# define slab_spin_unlock_irq(lock, cpu) \ + spin_unlock_irq(lock) +# define slab_spin_lock_irqsave(lock, flags, cpu) \ + do { spin_lock_irqsave(lock, flags); (cpu) = smp_processor_id(); } while (0) +# define slab_spin_unlock_irqrestore(lock, flags, cpu) \ + do { spin_unlock_irqrestore(lock, flags); } while (0) +#else +DEFINE_PER_CPU_LOCKED(int, slab_irq_locks) = { 0, }; +# define slab_irq_disable(cpu) (void)get_cpu_var_locked(slab_irq_locks, &(cpu)) +# define slab_irq_enable(cpu) put_cpu_var_locked(slab_irq_locks, cpu) +# define slab_irq_save(flags, cpu) \ + do { slab_irq_disable(cpu); (void) (flags); } while (0) +# define slab_irq_restore(flags, cpu) \ + do { slab_irq_enable(cpu); (void) (flags); } while (0) +# define slab_irq_disable_rt(cpu) slab_irq_disable(cpu) +# define slab_irq_enable_rt(cpu) slab_irq_enable(cpu) +# define slab_irq_disable_nort() do { } while (0) +# define slab_irq_enable_nort() do { } while (0) +# define slab_spin_lock_irq(lock, cpu) \ + do { slab_irq_disable(cpu); spin_lock(lock); } while (0) +# define slab_spin_unlock_irq(lock, cpu) \ + do { spin_unlock(lock); slab_irq_enable(cpu); } while (0) +# define slab_spin_lock_irqsave(lock, flags, cpu) \ + do { slab_irq_disable(cpu); spin_lock_irqsave(lock, flags); } while (0) +# define slab_spin_unlock_irqrestore(lock, flags, cpu) \ + do { spin_unlock_irqrestore(lock, flags); slab_irq_enable(cpu); } while (0) +#endif + +/* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). * @@ -314,7 +371,7 @@ struct kmem_list3 __initdata initkmem_li static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, - int node); + int node, int *this_cpu); static int enable_cpucache(struct kmem_cache *cachep); static void cache_reap(struct work_struct *unused); @@ -758,9 +815,10 @@ int slab_is_available(void) static DEFINE_PER_CPU(struct delayed_work, reap_work); -static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) +static inline struct array_cache * +cpu_cache_get(struct kmem_cache *cachep, int this_cpu) { - return cachep->array[smp_processor_id()]; + return cachep->array[this_cpu]; } static inline struct kmem_cache *__find_general_cachep(size_t size, @@ -993,7 +1051,7 @@ static int transfer_objects(struct array #ifndef CONFIG_NUMA #define drain_alien_cache(cachep, alien) do { } while (0) -#define reap_alien(cachep, l3) do { } while (0) +#define reap_alien(cachep, l3, this_cpu) do { } while (0) static inline struct array_cache **alloc_alien_cache(int node, int limit) { @@ -1004,7 +1062,8 @@ static inline void free_alien_cache(stru { } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int +cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) { return 0; } @@ -1016,14 +1075,15 @@ static inline void *alternate_node_alloc } static inline void *____cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int nodeid) + gfp_t flags, int nodeid, int *this_cpu) { return NULL; } #else /* CONFIG_NUMA */ -static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); +static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, + int nodeid, int *this_cpu); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); static struct array_cache **alloc_alien_cache(int node, int limit) @@ -1065,7 +1125,8 @@ static void free_alien_cache(struct arra } static void __drain_alien_cache(struct kmem_cache *cachep, - struct array_cache *ac, int node) + struct array_cache *ac, int node, + int *this_cpu) { struct kmem_list3 *rl3 = cachep->nodelists[node]; @@ -1079,7 +1140,7 @@ static void __drain_alien_cache(struct k if (rl3->shared) transfer_objects(rl3->shared, ac, ac->limit); - free_block(cachep, ac->entry, ac->avail, node); + free_block(cachep, ac->entry, ac->avail, node, this_cpu); ac->avail = 0; spin_unlock(&rl3->list_lock); } @@ -1088,15 +1149,16 @@ static void __drain_alien_cache(struct k /* * Called from cache_reap() to regularly drain alien caches round robin. */ -static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +static void +reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3, int *this_cpu) { - int node = __get_cpu_var(reap_node); + int node = per_cpu(reap_node, *this_cpu); if (l3->alien) { struct array_cache *ac = l3->alien[node]; if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { - __drain_alien_cache(cachep, ac, node); + __drain_alien_cache(cachep, ac, node, this_cpu); spin_unlock_irq(&ac->lock); } } @@ -1105,21 +1167,22 @@ static void reap_alien(struct kmem_cache static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) { - int i = 0; + int i = 0, this_cpu; struct array_cache *ac; unsigned long flags; for_each_online_node(i) { ac = alien[i]; if (ac) { - spin_lock_irqsave(&ac->lock, flags); - __drain_alien_cache(cachep, ac, i); - spin_unlock_irqrestore(&ac->lock, flags); + slab_spin_lock_irqsave(&ac->lock, flags, this_cpu); + __drain_alien_cache(cachep, ac, i, &this_cpu); + slab_spin_unlock_irqrestore(&ac->lock, flags, this_cpu); } } } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int +cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; @@ -1143,17 +1206,18 @@ static inline int cache_free_alien(struc spin_lock(&alien->lock); if (unlikely(alien->avail == alien->limit)) { STATS_INC_ACOVERFLOW(cachep); - __drain_alien_cache(cachep, alien, nodeid); + __drain_alien_cache(cachep, alien, nodeid, this_cpu); } alien->entry[alien->avail++] = objp; spin_unlock(&alien->lock); } else { spin_lock(&(cachep->nodelists[nodeid])->list_lock); - free_block(cachep, &objp, 1, nodeid); + free_block(cachep, &objp, 1, nodeid, this_cpu); spin_unlock(&(cachep->nodelists[nodeid])->list_lock); } return 1; } + #endif static int __cpuinit cpuup_callback(struct notifier_block *nfb, @@ -1164,6 +1228,7 @@ static int __cpuinit cpuup_callback(stru struct kmem_list3 *l3 = NULL; int node = cpu_to_node(cpu); const int memsize = sizeof(struct kmem_list3); + int this_cpu; switch (action) { case CPU_LOCK_ACQUIRE: @@ -1200,11 +1265,11 @@ static int __cpuinit cpuup_callback(stru cachep->nodelists[node] = l3; } - spin_lock_irq(&cachep->nodelists[node]->list_lock); + slab_spin_lock_irq(&cachep->nodelists[node]->list_lock, this_cpu); cachep->nodelists[node]->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + slab_spin_unlock_irq(&cachep->nodelists[node]->list_lock, this_cpu); } /* @@ -1236,7 +1301,7 @@ static int __cpuinit cpuup_callback(stru l3 = cachep->nodelists[node]; BUG_ON(!l3); - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); if (!l3->shared) { /* * We are serialised from CPU_DEAD or @@ -1251,7 +1316,7 @@ static int __cpuinit cpuup_callback(stru alien = NULL; } #endif - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); kfree(shared); free_alien_cache(alien); } @@ -1295,6 +1360,7 @@ static int __cpuinit cpuup_callback(stru struct array_cache *nc; struct array_cache *shared; struct array_cache **alien; + int this_cpu; cpumask_t mask; mask = node_to_cpumask(node); @@ -1306,29 +1372,31 @@ static int __cpuinit cpuup_callback(stru if (!l3) goto free_array_cache; - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; if (nc) - free_block(cachep, nc->entry, nc->avail, node); + free_block(cachep, nc->entry, nc->avail, node, + &this_cpu); if (!cpus_empty(mask)) { - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, + this_cpu); goto free_array_cache; } shared = l3->shared; if (shared) { free_block(cachep, shared->entry, - shared->avail, node); + shared->avail, node, &this_cpu); l3->shared = NULL; } alien = l3->alien; l3->alien = NULL; - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); kfree(shared); if (alien) { @@ -1370,11 +1438,13 @@ static void init_list(struct kmem_cache int nodeid) { struct kmem_list3 *ptr; + int this_cpu; ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); BUG_ON(!ptr); - local_irq_disable(); + WARN_ON(spin_is_locked(&list->list_lock)); + slab_irq_disable(this_cpu); memcpy(ptr, list, sizeof(struct kmem_list3)); /* * Do not assume that spinlocks can be initialized via memcpy: @@ -1383,7 +1453,7 @@ static void init_list(struct kmem_cache MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; - local_irq_enable(); + slab_irq_enable(this_cpu); } /* @@ -1527,36 +1597,34 @@ void __init kmem_cache_init(void) /* 4) Replace the bootstrap head arrays */ { struct array_cache *ptr; + int this_cpu; ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); - local_irq_disable(); - BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); - memcpy(ptr, cpu_cache_get(&cache_cache), - sizeof(struct arraycache_init)); + slab_irq_disable(this_cpu); + BUG_ON(cpu_cache_get(&cache_cache, this_cpu) != &initarray_cache.cache); + memcpy(ptr, cpu_cache_get(&cache_cache, this_cpu), + sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - - cache_cache.array[smp_processor_id()] = ptr; - local_irq_enable(); + cache_cache.array[this_cpu] = ptr; + slab_irq_enable(this_cpu); ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); - local_irq_disable(); - BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) - != &initarray_generic.cache); - memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), - sizeof(struct arraycache_init)); + slab_irq_disable(this_cpu); + BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, this_cpu) + != &initarray_generic.cache); + memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, this_cpu), + sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - - malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = - ptr; - local_irq_enable(); + malloc_sizes[INDEX_AC].cs_cachep->array[this_cpu] = ptr; + slab_irq_enable(this_cpu); } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1707,7 +1775,7 @@ static void store_stackinfo(struct kmem_ *addr++ = 0x12345678; *addr++ = caller; - *addr++ = smp_processor_id(); + *addr++ = raw_smp_processor_id(); size -= 3 * sizeof(unsigned long); { unsigned long *sptr = &caller; @@ -1862,7 +1930,11 @@ static void check_poison_obj(struct kmem } #endif +static void +__cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu); + #if DEBUG + /** * slab_destroy_objs - destroy a slab and its objects * @cachep: cache pointer being destroyed @@ -1871,7 +1943,8 @@ static void check_poison_obj(struct kmem * Call the registered destructor for each object in a slab that is being * destroyed. */ -static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) +static void +slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) { int i; for (i = 0; i < cachep->num; i++) { @@ -1914,7 +1987,8 @@ static void slab_destroy_objs(struct kme * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void +slab_destroy(struct kmem_cache *cachep, struct slab *slabp, int *this_cpu) { void *addr = slabp->s_mem - slabp->colouroff; @@ -1928,8 +2002,12 @@ static void slab_destroy(struct kmem_cac call_rcu(&slab_rcu->head, kmem_rcu_free); } else { kmem_freepages(cachep, addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slabp); + if (OFF_SLAB(cachep)) { + if (this_cpu) + __cache_free(cachep->slabp_cache, slabp, this_cpu); + else + kmem_cache_free(cachep->slabp_cache, slabp); + } } } @@ -2042,6 +2120,8 @@ static size_t calculate_slab_order(struc static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) { + int this_cpu; + if (g_cpucache_up == FULL) return enable_cpucache(cachep); @@ -2085,10 +2165,12 @@ static int __init_refok setup_cpu_cache( jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - cpu_cache_get(cachep)->avail = 0; - cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; - cpu_cache_get(cachep)->batchcount = 1; - cpu_cache_get(cachep)->touched = 0; + this_cpu = raw_smp_processor_id(); + + cpu_cache_get(cachep, this_cpu)->avail = 0; + cpu_cache_get(cachep, this_cpu)->limit = BOOT_CPUCACHE_ENTRIES; + cpu_cache_get(cachep, this_cpu)->batchcount = 1; + cpu_cache_get(cachep, this_cpu)->touched = 0; cachep->batchcount = 1; cachep->limit = BOOT_CPUCACHE_ENTRIES; return 0; @@ -2376,19 +2458,19 @@ EXPORT_SYMBOL(kmem_cache_create); #if DEBUG static void check_irq_off(void) { +/* + * On PREEMPT_RT we use locks to protect the per-CPU lists, + * and keep interrupts enabled. + */ +#ifndef CONFIG_PREEMPT_RT BUG_ON(!irqs_disabled()); +#endif } static void check_irq_on(void) { +#ifndef CONFIG_PREEMPT_RT BUG_ON(irqs_disabled()); -} - -static void check_spinlock_acquired(struct kmem_cache *cachep) -{ -#ifdef CONFIG_SMP - check_irq_off(); - assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); #endif } @@ -2403,7 +2485,6 @@ static void check_spinlock_acquired_node #else #define check_irq_off() do { } while(0) #define check_irq_on() do { } while(0) -#define check_spinlock_acquired(x) do { } while(0) #define check_spinlock_acquired_node(x, y) do { } while(0) #endif @@ -2411,26 +2492,60 @@ static void drain_array(struct kmem_cach struct array_cache *ac, int force, int node); -static void do_drain(void *arg) +static void __do_drain(void *arg, int this_cpu) { struct kmem_cache *cachep = arg; + int node = cpu_to_node(this_cpu); struct array_cache *ac; - int node = numa_node_id(); check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, this_cpu); spin_lock(&cachep->nodelists[node]->list_lock); - free_block(cachep, ac->entry, ac->avail, node); + free_block(cachep, ac->entry, ac->avail, node, &this_cpu); spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } +#ifdef CONFIG_PREEMPT_RT +static void do_drain(void *arg, int this_cpu) +{ + __do_drain(arg, this_cpu); +} +#else +static void do_drain(void *arg) +{ + __do_drain(arg, smp_processor_id()); +} +#endif + +#ifdef CONFIG_PREEMPT_RT +/* + * execute func() for all CPUs. On PREEMPT_RT we dont actually have + * to run on the remote CPUs - we only have to take their CPU-locks. + * (This is a rare operation, so cacheline bouncing is not an issue.) + */ +static void +slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) +{ + unsigned int i; + + check_irq_on(); + for_each_online_cpu(i) { + spin_lock(&__get_cpu_lock(slab_irq_locks, i)); + func(arg, i); + spin_unlock(&__get_cpu_lock(slab_irq_locks, i)); + } +} +#else +# define slab_on_each_cpu(func, cachep) on_each_cpu(func, cachep, 1, 1) +#endif + static void drain_cpu_caches(struct kmem_cache *cachep) { struct kmem_list3 *l3; int node; - on_each_cpu(do_drain, cachep, 1, 1); + slab_on_each_cpu(do_drain, cachep); check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; @@ -2455,16 +2570,16 @@ static int drain_freelist(struct kmem_ca struct kmem_list3 *l3, int tofree) { struct list_head *p; - int nr_freed; + int nr_freed, this_cpu; struct slab *slabp; nr_freed = 0; while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); p = l3->slabs_free.prev; if (p == &l3->slabs_free) { - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); goto out; } @@ -2473,13 +2588,9 @@ static int drain_freelist(struct kmem_ca BUG_ON(slabp->inuse); #endif list_del(&slabp->list); - /* - * Safe to drop the lock. The slab is no longer linked - * to the cache. - */ l3->free_objects -= cache->num; - spin_unlock_irq(&l3->list_lock); - slab_destroy(cache, slabp); + slab_destroy(cache, slabp, &this_cpu); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); nr_freed++; } out: @@ -2731,8 +2842,8 @@ static void slab_map_pages(struct kmem_c * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(struct kmem_cache *cachep, - gfp_t flags, int nodeid, void *objp) +static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, + void *objp, int *this_cpu) { struct slab *slabp; size_t offset; @@ -2761,7 +2872,8 @@ static int cache_grow(struct kmem_cache offset *= cachep->colour_off; if (local_flags & __GFP_WAIT) - local_irq_enable(); + slab_irq_enable_nort(); + slab_irq_enable_rt(*this_cpu); /* * The test for missing atomic flag is performed here, rather than @@ -2791,8 +2903,10 @@ static int cache_grow(struct kmem_cache cache_init_objs(cachep, slabp); + slab_irq_disable_rt(*this_cpu); if (local_flags & __GFP_WAIT) - local_irq_disable(); + slab_irq_disable_nort(); + check_irq_off(); spin_lock(&l3->list_lock); @@ -2805,8 +2919,9 @@ static int cache_grow(struct kmem_cache opps1: kmem_freepages(cachep, objp); failed: + slab_irq_disable_rt(*this_cpu); if (local_flags & __GFP_WAIT) - local_irq_disable(); + slab_irq_disable_nort(); return 0; } @@ -2926,7 +3041,8 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) +static void * +cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { int batchcount; struct kmem_list3 *l3; @@ -2936,7 +3052,7 @@ static void *cache_alloc_refill(struct k node = numa_node_id(); check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, *this_cpu); retry: batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { @@ -2947,7 +3063,7 @@ retry: */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->nodelists[node]; + l3 = cachep->nodelists[cpu_to_node(*this_cpu)]; BUG_ON(ac->avail > 0 || !l3); spin_lock(&l3->list_lock); @@ -2970,7 +3086,7 @@ retry: slabp = list_entry(entry, struct slab, list); check_slabp(cachep, slabp); - check_spinlock_acquired(cachep); + check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu)); /* * The slab was either on partial or free list so @@ -2984,8 +3100,9 @@ retry: STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - node); + ac->entry[ac->avail++] = + slab_get_obj(cachep, slabp, + cpu_to_node(*this_cpu)); } check_slabp(cachep, slabp); @@ -3004,10 +3121,10 @@ alloc_done: if (unlikely(!ac->avail)) { int x; - x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); + x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu); /* cache_grow can reenable interrupts, then ac could change. */ - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, *this_cpu); if (!x && ac->avail == 0) /* no objects in sight? abort */ return NULL; @@ -3159,21 +3276,22 @@ static inline int should_failslab(struct #endif /* CONFIG_FAILSLAB */ -static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) +static inline void * +____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { void *objp; struct array_cache *ac; check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, *this_cpu); if (likely(ac->avail)) { STATS_INC_ALLOCHIT(cachep); ac->touched = 1; objp = ac->entry[--ac->avail]; } else { STATS_INC_ALLOCMISS(cachep); - objp = cache_alloc_refill(cachep, flags); + objp = cache_alloc_refill(cachep, flags, this_cpu); } return objp; } @@ -3187,7 +3305,7 @@ static inline void *____cache_alloc(stru */ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) { - int nid_alloc, nid_here; + int nid_alloc, nid_here, this_cpu = raw_smp_processor_id(); if (in_interrupt() || (flags & __GFP_THISNODE)) return NULL; @@ -3197,7 +3315,7 @@ static void *alternate_node_alloc(struct else if (current->mempolicy) nid_alloc = slab_node(current->mempolicy); if (nid_alloc != nid_here) - return ____cache_alloc_node(cachep, flags, nid_alloc); + return ____cache_alloc_node(cachep, flags, nid_alloc, &this_cpu); return NULL; } @@ -3209,7 +3327,7 @@ static void *alternate_node_alloc(struct * allocator to do its reclaim / fallback magic. We then insert the * slab into the proper nodelist and then allocate from it. */ -static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) +static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) { struct zonelist *zonelist; gfp_t local_flags; @@ -3235,8 +3353,10 @@ retry: if (cpuset_zone_allowed_hardwall(*z, flags) && cache->nodelists[nid] && cache->nodelists[nid]->free_objects) - obj = ____cache_alloc_node(cache, - flags | GFP_THISNODE, nid); + + obj = ____cache_alloc_node(cache, + flags | GFP_THISNODE, nid, + this_cpu); } if (!obj) { @@ -3247,19 +3367,24 @@ retry: * set and go into memory reserves if necessary. */ if (local_flags & __GFP_WAIT) - local_irq_enable(); + slab_irq_enable_nort(); + slab_irq_enable_rt(*this_cpu); + kmem_flagcheck(cache, flags); obj = kmem_getpages(cache, flags, -1); + + slab_irq_disable_rt(*this_cpu); if (local_flags & __GFP_WAIT) - local_irq_disable(); + slab_irq_disable_nort(); + if (obj) { /* * Insert into the appropriate per node queues */ nid = page_to_nid(virt_to_page(obj)); - if (cache_grow(cache, flags, nid, obj)) { + if (cache_grow(cache, flags, nid, obj, this_cpu)) { obj = ____cache_alloc_node(cache, - flags | GFP_THISNODE, nid); + flags | GFP_THISNODE, nid, this_cpu); if (!obj) /* * Another processor may allocate the @@ -3280,7 +3405,7 @@ retry: * A interface to enable slab creation on nodeid */ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, - int nodeid) + int nodeid, int *this_cpu) { struct list_head *entry; struct slab *slabp; @@ -3328,11 +3453,11 @@ retry: must_grow: spin_unlock(&l3->list_lock); - x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); + x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL, this_cpu); if (x) goto retry; - return fallback_alloc(cachep, flags); + return fallback_alloc(cachep, flags, this_cpu); done: return obj; @@ -3354,39 +3479,41 @@ static __always_inline void * __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, void *caller) { - unsigned long save_flags; + unsigned long irqflags; + int this_cpu; void *ptr; if (should_failslab(cachep, flags)) return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + + slab_irq_save(irqflags, this_cpu); if (unlikely(nodeid == -1)) - nodeid = numa_node_id(); + nodeid = cpu_to_node(this_cpu); if (unlikely(!cachep->nodelists[nodeid])) { /* Node not bootstrapped yet */ - ptr = fallback_alloc(cachep, flags); + ptr = fallback_alloc(cachep, flags, &this_cpu); goto out; } - if (nodeid == numa_node_id()) { + if (nodeid == cpu_to_node(this_cpu)) { /* * Use the locally cached objects if possible. * However ____cache_alloc does not allow fallback * to other nodes. It may fail while we still have * objects on other nodes available. */ - ptr = ____cache_alloc(cachep, flags); + ptr = ____cache_alloc(cachep, flags, &this_cpu); if (ptr) goto out; } /* ___cache_alloc_node can fall back to other nodes */ - ptr = ____cache_alloc_node(cachep, flags, nodeid); + ptr = ____cache_alloc_node(cachep, flags, nodeid, &this_cpu); out: - local_irq_restore(save_flags); + slab_irq_restore(irqflags, this_cpu); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); if (unlikely((flags & __GFP_ZERO) && ptr)) @@ -3396,7 +3523,7 @@ __cache_alloc_node(struct kmem_cache *ca } static __always_inline void * -__do_cache_alloc(struct kmem_cache *cache, gfp_t flags) +__do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) { void *objp; @@ -3405,24 +3532,24 @@ __do_cache_alloc(struct kmem_cache *cach if (objp) goto out; } - objp = ____cache_alloc(cache, flags); + objp = ____cache_alloc(cache, flags, this_cpu); /* * We may just have run out of memory on the local node. * ____cache_alloc_node() knows how to locate memory on other nodes */ - if (!objp) - objp = ____cache_alloc_node(cache, flags, numa_node_id()); - + if (!objp) + objp = ____cache_alloc_node(cache, flags, + cpu_to_node(*this_cpu), this_cpu); out: return objp; } #else static __always_inline void * -__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { - return ____cache_alloc(cachep, flags); + return ____cache_alloc(cachep, flags, this_cpu); } #endif /* CONFIG_NUMA */ @@ -3431,15 +3558,16 @@ static __always_inline void * __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) { unsigned long save_flags; + int this_cpu; void *objp; if (should_failslab(cachep, flags)) return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); - objp = __do_cache_alloc(cachep, flags); - local_irq_restore(save_flags); + slab_irq_save(save_flags, this_cpu); + objp = __do_cache_alloc(cachep, flags, &this_cpu); + slab_irq_restore(save_flags, this_cpu); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); @@ -3453,7 +3581,7 @@ __cache_alloc(struct kmem_cache *cachep, * Caller needs to acquire correct kmem_list's list_lock */ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, - int node) + int node, int *this_cpu) { int i; struct kmem_list3 *l3; @@ -3482,7 +3610,7 @@ static void free_block(struct kmem_cache * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, slabp, this_cpu); } else { list_add(&slabp->list, &l3->slabs_free); } @@ -3496,11 +3624,12 @@ static void free_block(struct kmem_cache } } -static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) +static void +cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac, int *this_cpu) { int batchcount; struct kmem_list3 *l3; - int node = numa_node_id(); + int node = cpu_to_node(*this_cpu); batchcount = ac->batchcount; #if DEBUG @@ -3522,7 +3651,7 @@ static void cache_flusharray(struct kmem } } - free_block(cachep, ac->entry, batchcount, node); + free_block(cachep, ac->entry, batchcount, node, this_cpu); free_done: #if STATS { @@ -3551,14 +3680,15 @@ free_done: * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ -static inline void __cache_free(struct kmem_cache *cachep, void *objp) +static void +__cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu) { - struct array_cache *ac = cpu_cache_get(cachep); + struct array_cache *ac = cpu_cache_get(cachep, *this_cpu); check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); - if (cache_free_alien(cachep, objp)) + if (cache_free_alien(cachep, objp, this_cpu)) return; if (likely(ac->avail < ac->limit)) { @@ -3567,7 +3697,7 @@ static inline void __cache_free(struct k return; } else { STATS_INC_FREEMISS(cachep); - cache_flusharray(cachep, ac); + cache_flusharray(cachep, ac, this_cpu); ac->entry[ac->avail++] = objp; } } @@ -3725,13 +3855,14 @@ EXPORT_SYMBOL(__kmalloc); void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; + int this_cpu; BUG_ON(virt_to_cache(objp) != cachep); - local_irq_save(flags); + slab_irq_save(flags, this_cpu); debug_check_no_locks_freed(objp, obj_size(cachep)); - __cache_free(cachep, objp); - local_irq_restore(flags); + __cache_free(cachep, objp, &this_cpu); + slab_irq_restore(flags, this_cpu); } EXPORT_SYMBOL(kmem_cache_free); @@ -3748,15 +3879,16 @@ void kfree(const void *objp) { struct kmem_cache *c; unsigned long flags; + int this_cpu; if (unlikely(ZERO_OR_NULL_PTR(objp))) return; - local_irq_save(flags); + slab_irq_save(flags, this_cpu); kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); - __cache_free(c, (void *)objp); - local_irq_restore(flags); + __cache_free(c, (void *)objp, &this_cpu); + slab_irq_restore(flags, this_cpu); } EXPORT_SYMBOL(kfree); @@ -3777,7 +3909,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); */ static int alloc_kmemlist(struct kmem_cache *cachep) { - int node; + int node, this_cpu; struct kmem_list3 *l3; struct array_cache *new_shared; struct array_cache **new_alien = NULL; @@ -3805,11 +3937,11 @@ static int alloc_kmemlist(struct kmem_ca if (l3) { struct array_cache *shared = l3->shared; - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); if (shared) free_block(cachep, shared->entry, - shared->avail, node); + shared->avail, node, &this_cpu); l3->shared = new_shared; if (!l3->alien) { @@ -3818,7 +3950,7 @@ static int alloc_kmemlist(struct kmem_ca } l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); kfree(shared); free_alien_cache(new_alien); continue; @@ -3865,42 +3997,50 @@ struct ccupdate_struct { struct array_cache *new[NR_CPUS]; }; -static void do_ccupdate_local(void *info) +static void __do_ccupdate_local(void *info, int this_cpu) { struct ccupdate_struct *new = info; struct array_cache *old; check_irq_off(); - old = cpu_cache_get(new->cachep); + old = cpu_cache_get(new->cachep, this_cpu); + + new->cachep->array[this_cpu] = new->new[this_cpu]; + new->new[this_cpu] = old; +} - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; +#ifdef CONFIG_PREEMPT_RT +static void do_ccupdate_local(void *arg, int this_cpu) +{ + __do_ccupdate_local(arg, this_cpu); } +#else +static void do_ccupdate_local(void *arg) +{ + __do_ccupdate_local(arg, smp_processor_id()); +} +#endif /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared) { - struct ccupdate_struct *new; - int i; - - new = kzalloc(sizeof(*new), GFP_KERNEL); - if (!new) - return -ENOMEM; + struct ccupdate_struct new; + int i, this_cpu; + memset(&new.new, 0, sizeof(new.new)); for_each_online_cpu(i) { - new->new[i] = alloc_arraycache(cpu_to_node(i), limit, + new.new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount); - if (!new->new[i]) { + if (!new.new[i]) { for (i--; i >= 0; i--) - kfree(new->new[i]); - kfree(new); + kfree(new.new[i]); return -ENOMEM; } } - new->cachep = cachep; + new.cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); + slab_on_each_cpu(do_ccupdate_local, (void *)&new); check_irq_on(); cachep->batchcount = batchcount; @@ -3908,15 +4048,15 @@ static int do_tune_cpucache(struct kmem_ cachep->shared = shared; for_each_online_cpu(i) { - struct array_cache *ccold = new->new[i]; + struct array_cache *ccold = new.new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); - spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); + slab_spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, this_cpu); + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), &this_cpu); + slab_spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, this_cpu); kfree(ccold); } - kfree(new); + return alloc_kmemlist(cachep); } @@ -3980,26 +4120,26 @@ static int enable_cpucache(struct kmem_c * if drain_array() is used on the shared array. */ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, - struct array_cache *ac, int force, int node) + struct array_cache *ac, int force, int node) { - int tofree; + int tofree, this_cpu; if (!ac || !ac->avail) return; if (ac->touched && !force) { ac->touched = 0; } else { - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); if (ac->avail) { tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) tofree = (ac->avail + 1) / 2; - free_block(cachep, ac->entry, tofree, node); + free_block(cachep, ac->entry, tofree, node, &this_cpu); ac->avail -= tofree; memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); } } @@ -4017,11 +4157,12 @@ void drain_array(struct kmem_cache *cach */ static void cache_reap(struct work_struct *w) { + int this_cpu = raw_smp_processor_id(), node = cpu_to_node(this_cpu); struct kmem_cache *searchp; struct kmem_list3 *l3; - int node = numa_node_id(); struct delayed_work *work = container_of(w, struct delayed_work, work); + int work_done = 0; if (!mutex_trylock(&cache_chain_mutex)) /* Give up. Setup the next iteration. */ @@ -4037,9 +4178,10 @@ static void cache_reap(struct work_struc */ l3 = searchp->nodelists[node]; - reap_alien(searchp, l3); + reap_alien(searchp, l3, &this_cpu); - drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); + drain_array(searchp, l3, cpu_cache_get(searchp, this_cpu), + 0, node); /* * These are racy checks but it does not matter @@ -4128,7 +4270,7 @@ static int s_show(struct seq_file *m, vo unsigned long num_slabs, free_objects = 0, shared_avail = 0; const char *name; char *error = NULL; - int node; + int this_cpu, node; struct kmem_list3 *l3; active_objs = 0; @@ -4139,7 +4281,7 @@ static int s_show(struct seq_file *m, vo continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); list_for_each_entry(slabp, &l3->slabs_full, list) { if (slabp->inuse != cachep->num && !error) @@ -4164,7 +4306,7 @@ static int s_show(struct seq_file *m, vo if (l3->shared) shared_avail += l3->shared->avail; - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -4360,7 +4502,7 @@ static int leaks_show(struct seq_file *m struct kmem_list3 *l3; const char *name; unsigned long *n = m->private; - int node; + int node, this_cpu; int i; if (!(cachep->flags & SLAB_STORE_USER)) @@ -4378,13 +4520,13 @@ static int leaks_show(struct seq_file *m continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); list_for_each_entry(slabp, &l3->slabs_full, list) handle_slab(n, cachep, slabp); list_for_each_entry(slabp, &l3->slabs_partial, list) handle_slab(n, cachep, slabp); - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); } name = cachep->name; if (n[0] == n[1]) { patches/dont-unmask-io_apic.patch0000664000077200007720000000136310653433162016411 0ustar mingomingo--- arch/x86_64/kernel/io_apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/io_apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/io_apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/io_apic.c @@ -1431,7 +1431,8 @@ static void ack_apic_level(unsigned int irq_complete_move(irq); #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) /* If we are moving the irq we need to mask it */ - if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING) && + !(irq_desc[irq].status & IRQ_INPROGRESS)) { do_unmask_irq = 1; mask_IO_APIC_irq(irq); } patches/hpet-force-enable-on-vt8235-37-chipsets.patch0000664000077200007720000001060110653433161021454 0ustar mingomingoFrom us15@os.inf.tu-dresden.de Tue Jun 12 14:31:48 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.2 required=5.0 tests=AWL,MAILTO_TO_SPAM_ADDR autolearn=no version=3.1.7-deb Received: from os.inf.tu-dresden.de (os.inf.tu-dresden.de [141.76.48.99]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mail.tglx.de (Postfix) with ESMTP id 13C2565C292 for ; Tue, 12 Jun 2007 14:31:48 +0200 (CEST) Received: from nova.inf.tu-dresden.de ([141.76.48.73] helo=laptop.hypervisor.org) by os.inf.tu-dresden.de with esmtpsa (TLSv1:AES256-SHA:256) (Exim 4.67) id 1Hy5XI-0008Nr-CO for tglx@linutronix.de; Tue, 12 Jun 2007 14:31:48 +0200 Date: Tue, 12 Jun 2007 14:31:47 +0200 From: "Udo A. Steinberg" To: Thomas Gleixner Subject: [PATCH]: Force enable HPET on VT8235/8237 chipsets Message-ID: <20070612143147.2a6199c2@laptop.hypervisor.org> X-Mailer: X-Mailer 5.0 Gold Mime-Version: 1.0 Content-Type: multipart/signed; boundary=Sig_48zpQdK28xw1yvtSEbZ9tfp; protocol="application/pgp-signature"; micalg=PGP-SHA1 X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ --Sig_48zpQdK28xw1yvtSEbZ9tfp Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable This patch adds quirks to force enable HPET on Via VT8235 and VT8237 chipse= ts. The datasheet for 8237 documents HPET functionality (although wrongly) wher= eas HPET is undocumented for 8235. Tested on A7V880 (8237) and K7VT4A+ (8235) boards. Signed-off-by: Udo A. Steinberg --- quirks.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= +++- 1 file changed, 66 insertions(+), 1 deletion(-) --- arch/i386/kernel/quirks.c | 68 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/i386/kernel/quirks.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/quirks.c +++ linux-rt-rebase.q/arch/i386/kernel/quirks.c @@ -56,7 +56,8 @@ unsigned long force_hpet_address; static enum { NONE_FORCE_HPET_RESUME, OLD_ICH_FORCE_HPET_RESUME, - ICH_FORCE_HPET_RESUME + ICH_FORCE_HPET_RESUME, + VT8237_FORCE_HPET_RESUME } force_hpet_resume_type; static void __iomem *rcba_base; @@ -245,6 +246,68 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, old_ich_force_enable_hpet); + +static void vt8237_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address || !cached_dev) + return; + + val = 0xfed00000 | 0x80; + pci_write_config_dword(cached_dev, 0x68, val); + + pci_read_config_dword(cached_dev, 0x68, &val); + if (val & 0x80) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void vt8237_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0x68, &val); + /* + * Bit 7 is HPET enable bit. + * Bit 31:10 is HPET base address (contrary to what datasheet claims) + */ + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + val = 0xfed00000 | 0x80; + pci_write_config_dword(dev, 0x68, val); + + pci_read_config_dword(dev, 0x68, &val); + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, + vt8237_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + vt8237_force_enable_hpet); + void force_hpet_resume(void) { switch (force_hpet_resume_type) { @@ -254,6 +317,9 @@ void force_hpet_resume(void) case OLD_ICH_FORCE_HPET_RESUME: return old_ich_force_hpet_resume(); + case VT8237_FORCE_HPET_RESUME: + return vt8237_force_hpet_resume(); + default: break; } patches/ich-force-hpet-late-initialization-of-hpet-after-quirk.patch0000664000077200007720000000500410653433161025100 0ustar mingomingoFrom: Venki Pallipadi Enable HPET later during boot, after the force detect in PCI quirks. Also add a call to repeat the force enabling at resume time. Signed-off-by: Venkatesh Pallipadi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Cc: john stultz Cc: Greg KH Signed-off-by: Andrew Morton --- arch/i386/kernel/hpet.c | 27 +++++++++++++++++++++++++-- include/asm-i386/hpet.h | 1 + 2 files changed, 26 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/hpet.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/hpet.c +++ linux-rt-rebase.q/arch/i386/kernel/hpet.c @@ -164,6 +164,7 @@ static struct clock_event_device hpet_cl .set_next_event = hpet_legacy_next_event, .shift = 32, .irq = 0, + .rating = 50, }; static void hpet_start_counter(void) @@ -178,6 +179,17 @@ static void hpet_start_counter(void) hpet_writel(cfg, HPET_CFG); } +static void hpet_resume_device(void) +{ + ich_force_hpet_resume(); +} + +static void hpet_restart_counter(void) +{ + hpet_resume_device(); + hpet_start_counter(); +} + static void hpet_enable_legacy_int(void) { unsigned long cfg = hpet_readl(HPET_CFG); @@ -299,7 +311,7 @@ static struct clocksource clocksource_hp .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_start_counter, + .resume = hpet_restart_counter, #ifdef CONFIG_X86_64 .vread = vread_hpet, #endif @@ -412,10 +424,21 @@ out_nohpet: */ static __init int hpet_late_init(void) { - if (!is_hpet_capable()) + if (boot_hpet_disable) return -ENODEV; + if (!hpet_address) { + if (!force_hpet_address) + return -ENODEV; + + hpet_address = force_hpet_address; + hpet_enable(); + if (!hpet_virt_address) + return -ENODEV; + } + hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + return 0; } fs_initcall(hpet_late_init); Index: linux-rt-rebase.q/include/asm-i386/hpet.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/hpet.h +++ linux-rt-rebase.q/include/asm-i386/hpet.h @@ -64,6 +64,7 @@ /* hpet memory map physical address */ extern unsigned long hpet_address; +extern unsigned long force_hpet_address; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern unsigned long hpet_readl(unsigned long a); patches/write-try-lock-irqsave.patch0000664000077200007720000000120110653433162017103 0ustar mingomingo--- include/linux/spinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) Index: linux-rt-rebase.q/include/linux/spinlock.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/spinlock.h +++ linux-rt-rebase.q/include/linux/spinlock.h @@ -289,6 +289,13 @@ do { \ 1 : ({ local_irq_restore(flags); 0; }); \ }) +#define write_trylock_irqsave(lock, flags) \ +({ \ + local_irq_save(flags); \ + write_trylock(lock) ? \ + 1 : ({ local_irq_restore(flags); 0; }); \ +}) + /* * Locks two spinlocks l1 and l2. * l1_first indicates if spinlock l1 should be taken first. patches/x86_64-consolidate-tsc-calibration.patch0000664000077200007720000001551310653433161021065 0ustar mingomingoSubject: x86_64: Consolidate tsc calibration Move the TSC calibration code to tsc.c. Reimplement it so the pm timer can be used as a reference as well. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/hpet.c | 49 ------------------------- arch/x86_64/kernel/time.c | 33 +--------------- arch/x86_64/kernel/tsc.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/tsc.h | 4 ++ 4 files changed, 96 insertions(+), 80 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/hpet.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/hpet.c +++ linux-rt-rebase.q/arch/x86_64/kernel/hpet.c @@ -184,55 +184,6 @@ int hpet_reenable(void) return hpet_timer_stop_set_go(hpet_tick); } -/* - * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing - * it to the HPET timer of known frequency. - */ - -#define TICK_COUNT 100000000 -#define SMI_THRESHOLD 50000 -#define MAX_TRIES 5 - -/* - * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none - * occurs between the reads of the hpet & TSC. - */ -static void __init read_hpet_tsc(int *hpet, int *tsc) -{ - int tsc1, tsc2, hpet1, i; - - for (i = 0; i < MAX_TRIES; i++) { - tsc1 = get_cycles_sync(); - hpet1 = hpet_readl(HPET_COUNTER); - tsc2 = get_cycles_sync(); - if ((tsc2 - tsc1) < SMI_THRESHOLD) - break; - } - *hpet = hpet1; - *tsc = tsc2; -} - -unsigned int __init hpet_calibrate_tsc(void) -{ - int tsc_start, hpet_start; - int tsc_now, hpet_now; - unsigned long flags; - - local_irq_save(flags); - - read_hpet_tsc(&hpet_start, &tsc_start); - - do { - local_irq_disable(); - read_hpet_tsc(&hpet_now, &tsc_now); - local_irq_restore(flags); - } while ((tsc_now - tsc_start) < TICK_COUNT && - (hpet_now - hpet_start) < TICK_COUNT); - - return (tsc_now - tsc_start) * 1000000000L - / ((hpet_now - hpet_start) * hpet_period / 1000); -} - #ifdef CONFIG_HPET_EMULATE_RTC /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET * is enabled, we support RTC interrupt functionality in software. Index: linux-rt-rebase.q/arch/x86_64/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/time.c +++ linux-rt-rebase.q/arch/x86_64/kernel/time.c @@ -292,35 +292,6 @@ static unsigned int __init tsc_calibrate return pmc_now * tsc_khz / (tsc_now - tsc_start); } -/* - * pit_calibrate_tsc() uses the speaker output (channel 2) of - * the PIT. This is better than using the timer interrupt output, - * because we can read the value of the speaker with just one inb(), - * where we need three i/o operations for the interrupt channel. - * We count how many ticks the TSC does in 50 ms. - */ - -static unsigned int __init pit_calibrate_tsc(void) -{ - unsigned long start, end; - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - outb(0xb0, 0x43); - outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); - start = get_cycles_sync(); - while ((inb(0x61) & 0x20) == 0); - end = get_cycles_sync(); - - spin_unlock_irqrestore(&i8253_lock, flags); - - return (end - start) / 50; -} - #define PIT_MODE 0x43 #define PIT_CH0 0x40 @@ -376,14 +347,14 @@ void __init time_init(void) if (hpet_use_timer) { /* set tick_nsec to use the proper rate for HPET */ tick_nsec = TICK_NSEC_HPET; - tsc_khz = hpet_calibrate_tsc(); timename = "HPET"; } else { pit_init(); - tsc_khz = pit_calibrate_tsc(); timename = "PIT"; } + tsc_calibrate(); + cpu_khz = tsc_khz; if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && Index: linux-rt-rebase.q/arch/x86_64/kernel/tsc.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/tsc.c +++ linux-rt-rebase.q/arch/x86_64/kernel/tsc.c @@ -6,7 +6,9 @@ #include #include #include +#include +#include #include static int notsc __initdata = 0; @@ -118,6 +120,94 @@ core_initcall(cpufreq_tsc); #endif +#define MAX_RETRIES 5 +#define SMI_TRESHOLD 50000 + +/* + * Read TSC and the reference counters. Take care of SMI disturbance + */ +static unsigned long __init tsc_read_refs(unsigned long *pm, + unsigned long *hpet) +{ + unsigned long t1, t2; + int i; + + for (i = 0; i < MAX_RETRIES; i++) { + t1 = get_cycles_sync(); + if (hpet) + *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + else + *pm = acpi_pm_read_early(); + t2 = get_cycles_sync(); + if ((t2 - t1) < SMI_TRESHOLD) + return t2; + } + return ULONG_MAX; +} + +/** + * tsc_calibrate - calibrate the tsc on boot + */ +void __init tsc_calibrate(void) +{ + unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; + int hpet = is_hpet_enabled(); + + local_irq_save(flags); + + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + outb(0xb0, 0x43); + outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); + outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + tr1 = get_cycles_sync(); + while ((inb(0x61) & 0x20) == 0); + tr2 = get_cycles_sync(); + + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + + local_irq_restore(flags); + + /* + * Preset the result with the raw and inaccurate PIT + * calibration value + */ + tsc_khz = (tr2 - tr1) / 50; + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) { + printk(KERN_INFO "TSC calibrated against PIT\n"); + return; + } + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { + printk(KERN_WARNING "TSC calibration disturbed by SMI, " + "using PIT calibration result\n"); + return; + } + + tsc2 = (tsc2 - tsc1) * 1000000L; + + if (hpet) { + printk(KERN_INFO "TSC calibrated against HPET\n"); + if (hpet2 < hpet1) + hpet2 += 0x100000000; + hpet2 -= hpet1; + tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; + } else { + printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); + if (pm2 < pm1) + pm2 += ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC; + } + + tsc_khz = tsc2 / tsc1; +} + /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. Index: linux-rt-rebase.q/include/asm-i386/tsc.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/tsc.h +++ linux-rt-rebase.q/include/asm-i386/tsc.h @@ -72,4 +72,8 @@ int check_tsc_unstable(void); extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); +#ifdef CONFIG_X86_64 +extern void tsc_calibrate(void); +#endif + #endif patches/preempt-irqs-softirq-in-hardirq.patch0000664000077200007720000000415610653433164020726 0ustar mingomingo--- include/linux/interrupt.h | 1 + kernel/softirq.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) Index: linux-rt-rebase.q/include/linux/interrupt.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/interrupt.h +++ linux-rt-rebase.q/include/linux/interrupt.h @@ -290,6 +290,7 @@ struct softirq_action asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); +extern void do_softirq_from_hardirq(void); #ifdef CONFIG_PREEMPT_HARDIRQS # define __raise_softirq_irqoff(nr) raise_softirq_irqoff(nr) Index: linux-rt-rebase.q/kernel/softirq.c =================================================================== --- linux-rt-rebase.q.orig/kernel/softirq.c +++ linux-rt-rebase.q/kernel/softirq.c @@ -296,6 +296,8 @@ restart: asmlinkage void __do_softirq(void) { + unsigned long p_flags; + #ifdef CONFIG_PREEMPT_SOFTIRQS /* * 'preempt harder'. Push all softirq processing off to ksoftirqd. @@ -311,6 +313,38 @@ asmlinkage void __do_softirq(void) */ __local_bh_disable((unsigned long)__builtin_return_address(0)); trace_softirq_enter(); + p_flags = current->flags & PF_HARDIRQ; + current->flags &= ~PF_HARDIRQ; + + ___do_softirq(); + + trace_softirq_exit(); + + account_system_vtime(current); + _local_bh_enable(); + + current->flags |= p_flags; +} + +/* + * Process softirqs straight from hardirq context, + * without having to switch to a softirq thread. + * This can reduce the context-switch rate. + * + * NOTE: this is unused right now. + */ +void do_softirq_from_hardirq(void) +{ + unsigned long p_flags; + + if (!local_softirq_pending()) + return; + /* + * 'immediate' softirq execution: + */ + __local_bh_disable((unsigned long)__builtin_return_address(0)); + p_flags = current->flags & PF_HARDIRQ; + current->flags &= ~PF_HARDIRQ; ___do_softirq(); @@ -319,6 +353,7 @@ asmlinkage void __do_softirq(void) account_system_vtime(current); _local_bh_enable(); + current->flags |= p_flags; } #ifndef __ARCH_HAS_DO_SOFTIRQ patches/nmi-profiling-base.patch0000664000077200007720000003107410653433162016234 0ustar mingomingoSubject: [patch] nmi-driven profiling for /proc/profile From: Ingo Molnar nmi-driven profiling for /proc/profile Signed-off-by: Ingo Molnar --- arch/i386/kernel/crash.c | 8 --- arch/i386/kernel/nmi.c | 91 +++++++++++++++++++++++++++++++++++++++++---- arch/x86_64/kernel/crash.c | 5 -- arch/x86_64/kernel/irq.c | 2 arch/x86_64/kernel/nmi.c | 67 +++++++++++++++++++++++++++++++-- include/asm-i386/apic.h | 2 include/asm-x86_64/apic.h | 2 include/linux/profile.h | 1 kernel/profile.c | 9 ++-- kernel/time/tick-common.c | 1 kernel/time/tick-sched.c | 2 11 files changed, 159 insertions(+), 31 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/crash.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/crash.c +++ linux-rt-rebase.q/arch/i386/kernel/crash.c @@ -70,14 +70,6 @@ static int crash_nmi_callback(struct not return 1; } -static void smp_send_nmi_allbutself(void) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); -} - static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, }; Index: linux-rt-rebase.q/arch/i386/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/nmi.c +++ linux-rt-rebase.q/arch/i386/kernel/nmi.c @@ -28,6 +28,8 @@ #include #include +#include + #include "mach_traps.h" int unknown_nmi_panic; @@ -44,7 +46,7 @@ static cpumask_t backtrace_mask = CPU_MA atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static DEFINE_PER_CPU(short, wd_enabled); @@ -95,7 +97,7 @@ static int __init check_nmi_watchdog(voi for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks + mdelay((100*1000)/nmi_hz); /* wait 100 ticks */ for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP @@ -319,9 +321,48 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (system_state == SYSTEM_BOOTING) + return; + + printk(KERN_WARNING "nmi_show_all_regs(): start on CPU#%d.\n", + raw_smp_processor_id()); + dump_stack(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + + smp_send_nmi_allbutself(); + + for_each_online_cpu(i) { + while (nmi_show_regs[i] == 1) + barrier(); + } +} + +static DEFINE_SPINLOCK(nmi_print_lock); + +void irq_show_regs_callback(int cpu, struct pt_regs *regs) +{ + if (!nmi_show_regs[cpu]) + return; + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk(KERN_WARNING "NMI show regs on CPU#%d:\n", cpu); + printk(KERN_WARNING "apic_timer_irqs: %d\n", + per_cpu(irq_stat, cpu).apic_timer_irqs); + show_regs(regs); + spin_unlock(&nmi_print_lock); +} + +__kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -332,6 +373,8 @@ __kprobes int nmi_watchdog_tick(struct p int cpu = smp_processor_id(); int rc=0; + __profile_tick(CPU_PROFILING, regs); + /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { @@ -355,6 +398,9 @@ __kprobes int nmi_watchdog_tick(struct p */ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0]; + irq_show_regs_callback(cpu, regs); + + /* if the apic timer isn't firing, this cpu isn't doing much */ /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && last_irq_sums[cpu] == sum) { /* @@ -362,11 +408,30 @@ __kprobes int nmi_watchdog_tick(struct p * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + spin_lock(&nmi_print_lock); + printk(KERN_WARNING "NMI watchdog detected lockup on " + "CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], + 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + nmi_show_regs[i] = 1; + while (nmi_show_regs[i] == 1) + cpu_relax(); + } + printk(KERN_WARNING "NMI watchdog running again ...\n"); + for_each_online_cpu(i) + alert_counter[i] = 0; + + + } + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; @@ -464,5 +529,15 @@ void __trigger_all_cpu_backtrace(void) } } +void smp_send_nmi_allbutself(void) +{ +#ifdef CONFIG_SMP + cpumask_t mask = cpu_online_map; + cpu_clear(safe_smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, NMI_VECTOR); +#endif +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); Index: linux-rt-rebase.q/arch/x86_64/kernel/crash.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/crash.c +++ linux-rt-rebase.q/arch/x86_64/kernel/crash.c @@ -62,11 +62,6 @@ static int crash_nmi_callback(struct not return 1; } -static void smp_send_nmi_allbutself(void) -{ - send_IPI_allbutself(NMI_VECTOR); -} - /* * This code is a best effort heuristic to get the * other cpus to stop executing. So races with Index: linux-rt-rebase.q/arch/x86_64/kernel/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/irq.c +++ linux-rt-rebase.q/arch/x86_64/kernel/irq.c @@ -111,6 +111,8 @@ asmlinkage unsigned int do_IRQ(struct pt unsigned vector = ~regs->orig_rax; unsigned irq; + irq_show_regs_callback(smp_processor_id(), regs); + exit_idle(); irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; Index: linux-rt-rebase.q/arch/x86_64/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/nmi.c +++ linux-rt-rebase.q/arch/x86_64/kernel/nmi.c @@ -22,11 +22,13 @@ #include #include #include +#include #include #include #include #include +#include int unknown_nmi_panic; int nmi_watchdog_enabled; @@ -44,7 +46,7 @@ atomic_t nmi_active = ATOMIC_INIT(0); / int panic_on_timeout; unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static DEFINE_PER_CPU(short, wd_enabled); @@ -302,7 +304,7 @@ void touch_nmi_watchdog(void) unsigned cpu; /* - * Tell other CPUs to reset their alert counters. We cannot + * Tell other CPUs to reset their alert counters. We cannot * do it ourselves because the alert count increase is not * atomic. */ @@ -312,7 +314,42 @@ void touch_nmi_watchdog(void) } } - touch_softlockup_watchdog(); + touch_softlockup_watchdog(); +} + +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (system_state == SYSTEM_BOOTING) + return; + + smp_send_nmi_allbutself(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + + for_each_online_cpu(i) { + while (nmi_show_regs[i] == 1) + barrier(); + } +} + +static DEFINE_SPINLOCK(nmi_print_lock); + +void irq_show_regs_callback(int cpu, struct pt_regs *regs) +{ + if (!nmi_show_regs[cpu]) + return; + + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk(KERN_WARNING "NMI show regs on CPU#%d:\n", cpu); + printk(KERN_WARNING "apic_timer_irqs: %d\n", read_pda(apic_timer_irqs)); + show_regs(regs); + spin_unlock(&nmi_print_lock); } int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) @@ -322,6 +359,9 @@ int __kprobes nmi_watchdog_tick(struct p int cpu = smp_processor_id(); int rc = 0; + irq_show_regs_callback(cpu, regs); + __profile_tick(CPU_PROFILING, regs); + /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { @@ -330,6 +370,7 @@ int __kprobes nmi_watchdog_tick(struct p } sum = read_pda(apic_timer_irqs); + if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -358,9 +399,20 @@ int __kprobes nmi_watchdog_tick(struct p * wait a few IRQs (5 seconds) before doing the oops ... */ local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) + if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + nmi_show_regs[i] = 1; + while (nmi_show_regs[i] == 1) + cpu_relax(); + } + die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, panic_on_timeout); + } } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); @@ -478,6 +530,13 @@ void __trigger_all_cpu_backtrace(void) } } +void smp_send_nmi_allbutself(void) +{ +#ifdef CONFIG_SMP + send_IPI_allbutself(NMI_VECTOR); +#endif +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(touch_nmi_watchdog); Index: linux-rt-rebase.q/include/asm-i386/apic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/apic.h +++ linux-rt-rebase.q/include/asm-i386/apic.h @@ -116,6 +116,8 @@ extern void enable_NMI_through_LVT0 (voi extern int timer_over_8254; extern int local_apic_timer_c2_ok; +extern void smp_send_nmi_allbutself(void); + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } Index: linux-rt-rebase.q/include/asm-x86_64/apic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/apic.h +++ linux-rt-rebase.q/include/asm-x86_64/apic.h @@ -85,6 +85,8 @@ extern void setup_APIC_extended_lvt(unsi extern int apic_is_clustered_box(void); +extern void smp_send_nmi_allbutself(void); + #define K8_APIC_EXT_LVT_BASE 0x500 #define K8_APIC_EXT_INT_MSG_FIX 0x0 #define K8_APIC_EXT_INT_MSG_SMI 0x2 Index: linux-rt-rebase.q/include/linux/profile.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/profile.h +++ linux-rt-rebase.q/include/linux/profile.h @@ -23,6 +23,7 @@ struct notifier_block; /* init basic kernel profiler */ void __init profile_init(void); +void __profile_tick(int type, struct pt_regs *regs); void profile_tick(int); /* Index: linux-rt-rebase.q/kernel/profile.c =================================================================== --- linux-rt-rebase.q.orig/kernel/profile.c +++ linux-rt-rebase.q/kernel/profile.c @@ -407,16 +407,19 @@ void profile_hits(int type, void *__pc, EXPORT_SYMBOL_GPL(profile_hits); -void profile_tick(int type) +void __profile_tick(int type, struct pt_regs *regs) { - struct pt_regs *regs = get_irq_regs(); - if (type == CPU_PROFILING && timer_hook) timer_hook(regs); if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); } +void profile_tick(int type) +{ + return __profile_tick(type, get_irq_regs()); +} + #ifdef CONFIG_PROC_FS #include #include Index: linux-rt-rebase.q/kernel/time/tick-common.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/tick-common.c +++ linux-rt-rebase.q/kernel/time/tick-common.c @@ -68,7 +68,6 @@ static void tick_periodic(int cpu) } update_process_times(user_mode(get_irq_regs())); - profile_tick(CPU_PROFILING); } /* Index: linux-rt-rebase.q/kernel/time/tick-sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/tick-sched.c +++ linux-rt-rebase.q/kernel/time/tick-sched.c @@ -439,7 +439,6 @@ static void tick_nohz_handler(struct clo } update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING); /* Do not restart, when we are in the idle loop */ if (ts->tick_stopped) @@ -553,7 +552,6 @@ static enum hrtimer_restart tick_sched_t */ spin_unlock(&base->lock); update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING); spin_lock(&base->lock); } patches/arm-preempt-config.patch0000664000077200007720000000200710653433164016242 0ustar mingomingo arch/arm/Kconfig | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) Index: linux-rt-rebase.q/arch/arm/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/arm/Kconfig +++ linux-rt-rebase.q/arch/arm/Kconfig @@ -612,18 +612,7 @@ config LOCAL_TIMERS accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +source kernel/Kconfig.preempt config NO_IDLE_HZ bool "Dynamic tick timer" patches/radix-tree-optimistic-hist.patch0000664000077200007720000001037610653433167017754 0ustar mingomingoSubject: debug: optimistic lock histogram A simple histogram measuring the efficiency of the optimistic locking Signed-off-by: Peter Zijlstra --- fs/proc/proc_misc.c | 22 +++++++++++ lib/radix-tree.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 124 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/fs/proc/proc_misc.c =================================================================== --- linux-rt-rebase.q.orig/fs/proc/proc_misc.c +++ linux-rt-rebase.q/fs/proc/proc_misc.c @@ -289,6 +289,25 @@ static const struct file_operations proc .release = seq_release, }; +#ifdef CONFIG_RADIX_TREE_OPTIMISTIC +extern struct seq_operations optimistic_op; +static int optimistic_open(struct inode *inode, struct file *file) +{ + (void)inode; + return seq_open(file, &optimistic_op); +} + +extern ssize_t optimistic_write(struct file *, const char __user *, size_t, loff_t *); + +static struct file_operations optimistic_file_operations = { + .open = optimistic_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = optimistic_write, +}; +#endif + static int devinfo_show(struct seq_file *f, void *v) { int i = *(loff_t *) v; @@ -785,6 +804,9 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; } #endif +#ifdef CONFIG_RADIX_TREE_OPTIMISTIC + create_seq_entry("radix_optimistic", 0, &optimistic_file_operations); +#endif create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK Index: linux-rt-rebase.q/lib/radix-tree.c =================================================================== --- linux-rt-rebase.q.orig/lib/radix-tree.c +++ linux-rt-rebase.q/lib/radix-tree.c @@ -75,6 +75,105 @@ static unsigned long height_to_maxindex[ static struct lock_class_key radix_node_class[RADIX_TREE_MAX_PATH]; #endif +#ifdef CONFIG_RADIX_TREE_OPTIMISTIC +static DEFINE_PER_CPU(unsigned long[RADIX_TREE_MAX_PATH+1], optimistic_histogram); + +static void optimistic_hit(unsigned long height) +{ + if (height > RADIX_TREE_MAX_PATH) + height = RADIX_TREE_MAX_PATH; + + __get_cpu_var(optimistic_histogram)[height]++; +} + +#ifdef CONFIG_PROC_FS + +#include +#include + +static void *frag_start(struct seq_file *m, loff_t *pos) +{ + if (*pos < 0 || *pos > RADIX_TREE_MAX_PATH) + return NULL; + + m->private = (void *)(unsigned long)*pos; + return pos; +} + +static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) +{ + if (*pos < RADIX_TREE_MAX_PATH) { + (*pos)++; + (*((unsigned long *)&m->private))++; + return pos; + } + return NULL; +} + +static void frag_stop(struct seq_file *m, void *arg) +{ +} + +unsigned long get_optimistic_stat(unsigned long index) +{ + unsigned long total = 0; + int cpu; + + for_each_possible_cpu(cpu) { + total += per_cpu(optimistic_histogram, cpu)[index]; + } + return total; +} + +static int frag_show(struct seq_file *m, void *arg) +{ + unsigned long index = (unsigned long)m->private; + unsigned long hits = get_optimistic_stat(index); + + if (index == 0) + seq_printf(m, "levels skipped\thits\n"); + + if (index < RADIX_TREE_MAX_PATH) + seq_printf(m, "%9lu\t%9lu\n", index, hits); + else + seq_printf(m, "failed\t%9lu\n", hits); + + return 0; +} + +struct seq_operations optimistic_op = { + .start = frag_start, + .next = frag_next, + .stop = frag_stop, + .show = frag_show, +}; + +static void optimistic_reset(void) +{ + int cpu; + int height; + for_each_possible_cpu(cpu) { + for (height = 0; height <= RADIX_TREE_MAX_PATH; height++) + per_cpu(optimistic_histogram, cpu)[height] = 0; + } +} + +ssize_t optimistic_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (count) { + char c; + if (get_user(c, buf)) + return -EFAULT; + if (c == '0') + optimistic_reset(); + } + return count; +} + +#endif // CONFIG_PROC_FS +#endif // CONFIG_RADIX_TREE_OPTIMISTIC + /* * Radix tree node cache. */ @@ -461,7 +560,9 @@ radix_optimistic_lock(struct radix_tree_ BUG_ON(context->locked); spin_lock(&context->root->lock); context->locked = &context->root->lock; - } + optimistic_hit(RADIX_TREE_MAX_PATH); + } else + optimistic_hit(context->root->height - node->height); } return node; } patches/latency-tracer-variable-threshold.patch0000664000077200007720000000756610653433163021256 0ustar mingomingoFrom ce@ceag.ch Sun Jun 3 17:30:11 2007 Return-Path: Received: from toro.web-alm.net (toro.web-alm.net [62.245.132.31]) by mail.tglx.de (Postfix) with ESMTP id DC0AF65C065 for ; Sun, 3 Jun 2007 17:30:11 +0200 (CEST) Received: from toro.web-alm.net (localhost.localdomain [127.0.0.1]) by toro.web-alm.net (8.12.11.20060308/8.12.11/Web-Alm-2003112001) with ESMTP id l53FU9Dp010764 for ; Sun, 3 Jun 2007 17:30:09 +0200 Received: from thllin.ceag.ch (uucp@localhost) by toro.web-alm.net (8.12.11.20060308/8.12.10/Submit/Web-Alm-2003112001) with bsmtp id l53FU8ol010731 for tglx@linutronix.de; Sun, 3 Jun 2007 17:30:08 +0200 Received: from [192.168.255.76] (thlblade.ceag.ch [192.168.255.76]) by thllin.ceag.ch (8.12.11.20060308/8.12.11/CE-2005091901) with ESMTP id l53FMsUX003540 for ; Sun, 3 Jun 2007 17:22:55 +0200 Message-ID: <4662DCCE.8070002@ceag.ch> Date: Sun, 03 Jun 2007 17:22:54 +0200 From: Carsten Emde Organization: CE Computer Experts AG User-Agent: Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.8.1.2) Gecko/20070301 SeaMonkey/1.1.1 MIME-Version: 1.0 To: Thomas Gleixner Subject: [PATCH] Make threshold to print '!' in latency trace variable Content-Type: multipart/mixed; boundary="------------020807010006040805040904" X-Virus-Scanned: ClamAV 0.90.1/3340/Sun Jun 3 00:40:38 2007 on thllin.ceag.ch X-Virus-Status: Clean X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ This is a multi-part message in MIME format. --------------020807010006040805040904 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 8bit Thomas, this patch introduces a variable threshold to print the exclamation mark in the latency_trace output instead of the constant 100 microseconds. --cbe --------------020807010006040805040904 Content-Type: text/plain; name="linux-2.6.21.3-rt9-mark_thresh.patch" Content-Disposition: inline; filename="linux-2.6.21.3-rt9-mark_thresh.patch" Content-Transfer-Encoding: 8bit --- include/linux/clocksource.h | 1 + kernel/latency_trace.c | 4 +++- kernel/sysctl.c | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/include/linux/clocksource.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/clocksource.h +++ linux-rt-rebase.q/include/linux/clocksource.h @@ -23,6 +23,7 @@ struct clocksource; extern unsigned long preempt_max_latency; extern unsigned long preempt_thresh; +extern unsigned long preempt_mark_thresh; /** * struct clocksource - hardware abstraction for a free running counter Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -1294,11 +1294,13 @@ static void notrace l_stop(struct seq_fi up(&out_mutex); } +unsigned long preempt_mark_thresh = 100; + static void print_timestamp(struct seq_file *m, unsigned long abs_usecs, unsigned long rel_usecs) { seq_printf(m, " %4ldus", abs_usecs); - if (rel_usecs > 100) + if (rel_usecs > preempt_mark_thresh) seq_puts(m, "!: "); else if (rel_usecs > 1) seq_puts(m, "+: "); Index: linux-rt-rebase.q/kernel/sysctl.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sysctl.c +++ linux-rt-rebase.q/kernel/sysctl.c @@ -354,6 +354,14 @@ static ctl_table kern_table[] = { #ifdef CONFIG_EVENT_TRACE { .ctl_name = CTL_UNNUMBERED, + .procname = "preempt_mark_thresh", + .data = &preempt_mark_thresh, + .maxlen = sizeof(preempt_mark_thresh), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "trace_enabled", .data = &trace_enabled, .maxlen = sizeof(int), patches/rt-time-starvation-fix.patch0000664000077200007720000002232410653433167017112 0ustar mingomingoHey Ingo, Noticed -rt has been updated a few times and this is still missing so I figured I'd resend it just in case you missed it: We've worked around this before, but its cropped up again. Since update_wall_time is now called from a softirq, it can be preempted by a high priority process. If its preempted for long enough, the clocksource can wrap, causing time to stop incrementing, which if the preempting process is checking the time, can cause a hard lockup. This patch forces the clocksource to be read each tick, and accumulate only the cycle count. This allows the update_wall_time to be deferred w/o fear of hardware overflow. thanks -john arch/x86_64/kernel/vsyscall.c | 5 ++++- include/linux/clocksource.h | 40 ++++++++++++++++++++++++++++++++++++++-- include/linux/time.h | 1 + kernel/time/timekeeping.c | 34 ++++++++++++++++++---------------- kernel/timer.c | 1 + 5 files changed, 62 insertions(+), 19 deletions(-) linux-2.6.21-rc5_cycles-accumulated_C7.patch ============================================ Index: linux-rt-rebase.q/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/vsyscall.c +++ linux-rt-rebase.q/arch/x86_64/kernel/vsyscall.c @@ -77,6 +77,7 @@ void update_vsyscall(struct timespec *wa vsyscall_gtod_data.clock.mask = clock->mask; vsyscall_gtod_data.clock.mult = clock->mult; vsyscall_gtod_data.clock.shift = clock->shift; + vsyscall_gtod_data.clock.cycle_accumulated = clock->cycle_accumulated; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.sys_tz = sys_tz; @@ -114,7 +115,7 @@ static __always_inline long time_syscall static __always_inline void do_vgettimeofday(struct timeval * tv) { - cycle_t now, base, mask, cycle_delta; + cycle_t now, base, accumulated, mask, cycle_delta; unsigned seq; unsigned long mult, shift, nsec; cycle_t (*vread)(void); @@ -147,6 +148,7 @@ static __always_inline void do_vgettimeo } now = vread(); base = __vsyscall_gtod_data.clock.cycle_last; + accumulated = __vsyscall_gtod_data.clock.cycle_accumulated; mask = __vsyscall_gtod_data.clock.mask; mult = __vsyscall_gtod_data.clock.mult; shift = __vsyscall_gtod_data.clock.shift; @@ -157,6 +159,7 @@ static __always_inline void do_vgettimeo /* calculate interval: */ cycle_delta = (now - base) & mask; + cycle_delta += accumulated; /* convert to nsecs: */ nsec += (cycle_delta * mult) >> shift; Index: linux-rt-rebase.q/include/linux/clocksource.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/clocksource.h +++ linux-rt-rebase.q/include/linux/clocksource.h @@ -54,8 +54,12 @@ extern unsigned long preempt_mark_thresh * @flags: flags describing special properties * @vread: vsyscall based read * @resume: resume function for the clocksource, if necessary + * @cycle_last: Used internally by timekeeping core, please ignore. + * @cycle_accumulated: Used internally by timekeeping core, please ignore. * @cycle_interval: Used internally by timekeeping core, please ignore. * @xtime_interval: Used internally by timekeeping core, please ignore. + * @xtime_nsec: Used internally by timekeeping core, please ignore. + * @error: Used internally by timekeeping core, please ignore. */ struct clocksource { /* @@ -79,7 +83,7 @@ struct clocksource { #endif /* timekeeping specific data, ignore */ - cycle_t cycle_interval; + cycle_t cycle_accumulated, cycle_interval; u64 xtime_interval; /* * Second part is written at each timer interrupt @@ -172,11 +176,43 @@ static inline cycle_t clocksource_read(s } /** + * clocksource_get_cycles: - Access the clocksource's accumulated cycle value + * @cs: pointer to clocksource being read + * @now: current cycle value + * + * Uses the clocksource to return the current cycle_t value. + * NOTE!!!: This is different from clocksource_read, because it + * returns the accumulated cycle value! Must hold xtime lock! + */ +static inline cycle_t clocksource_get_cycles(struct clocksource *cs, cycle_t now) +{ + cycle_t offset = (now - cs->cycle_last) & cs->mask; + offset += cs->cycle_accumulated; + return offset; +} + +/** + * clocksource_accumulate: - Accumulates clocksource cycles + * @cs: pointer to clocksource being read + * @now: current cycle value + * + * Used to avoids clocksource hardware overflow by periodically + * accumulating the current cycle delta. Must hold xtime write lock! + */ +static inline void clocksource_accumulate(struct clocksource *cs, cycle_t now) +{ + cycle_t offset = (now - cs->cycle_last) & cs->mask; + cs->cycle_last = now; + cs->cycle_accumulated += offset; +} + +/** * cyc2ns - converts clocksource cycles to nanoseconds * @cs: Pointer to clocksource * @cycles: Cycles * * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds. + * Must hold xtime lock! * * XXX - This could use some mult_lxl_ll() asm optimization */ @@ -206,7 +242,7 @@ static inline cycle_t ns2cyc(struct cloc * @length_nsec: Desired interval length in nanoseconds. * * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment - * pair and interval request. + * pair and interval request. Must hold xtime_lock! * * Unless you're the timekeeping code, you should not be using this! */ Index: linux-rt-rebase.q/include/linux/time.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/time.h +++ linux-rt-rebase.q/include/linux/time.h @@ -98,6 +98,7 @@ extern unsigned long read_persistent_clo extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); +extern void timekeeping_accumulate(void); unsigned long get_seconds(void); struct timespec current_kernel_time(void); Index: linux-rt-rebase.q/kernel/time/timekeeping.c =================================================================== --- linux-rt-rebase.q.orig/kernel/time/timekeeping.c +++ linux-rt-rebase.q/kernel/time/timekeeping.c @@ -76,16 +76,10 @@ static struct clocksource *clock; /* poi */ static inline s64 __get_nsec_offset(void) { - cycle_t cycle_now, cycle_delta; + cycle_t cycle_delta; s64 ns_offset; - /* read clocksource: */ - cycle_now = clocksource_read(clock); - - /* calculate the delta since the last update_wall_time: */ - cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - - /* convert to nanoseconds: */ + cycle_delta = clocksource_get_cycles(clock, clocksource_read(clock)); ns_offset = cyc2ns(clock, cycle_delta); return ns_offset; @@ -232,7 +226,7 @@ static void change_clocksource(void) clock = new; clock->cycle_last = now; - + clock->cycle_accumulated = 0; clock->error = 0; clock->xtime_nsec = 0; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -244,8 +238,14 @@ static void change_clocksource(void) clock->name); #endif } + +void timekeeping_accumulate(void) +{ + clocksource_accumulate(clock, clocksource_read(clock)); +} #else static inline void change_clocksource(void) { } +void timekeeping_accumulate(void) { } #endif /** @@ -336,6 +336,7 @@ static int timekeeping_resume(struct sys } /* re-base the last cycle value */ clock->cycle_last = clocksource_read(clock); + clock->cycle_accumulated = 0; clock->error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -483,27 +484,28 @@ static void clocksource_adjust(s64 offse */ void update_wall_time(void) { - cycle_t offset; + cycle_t cycle_now; /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return; #ifdef CONFIG_GENERIC_TIME - offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; + cycle_now = clocksource_read(clock); #else - offset = clock->cycle_interval; + cycle_now = clock->cycle_last + clock->cycle_interval; #endif + clocksource_accumulate(clock, cycle_now); + clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; /* normally this loop will run just once, however in the * case of lost or late ticks, it will accumulate correctly. */ - while (offset >= clock->cycle_interval) { + while (clock->cycle_accumulated >= clock->cycle_interval) { /* accumulate one interval */ clock->xtime_nsec += clock->xtime_interval; - clock->cycle_last += clock->cycle_interval; - offset -= clock->cycle_interval; + clock->cycle_accumulated -= clock->cycle_interval; if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; @@ -517,7 +519,7 @@ void update_wall_time(void) } /* correct the clock when NTP error is too big */ - clocksource_adjust(offset); + clocksource_adjust(clock->cycle_accumulated); /* store full nanoseconds into xtime */ xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; Index: linux-rt-rebase.q/kernel/timer.c =================================================================== --- linux-rt-rebase.q.orig/kernel/timer.c +++ linux-rt-rebase.q/kernel/timer.c @@ -1031,6 +1031,7 @@ static void run_timer_softirq(struct sof void do_timer(unsigned long ticks) { jiffies_64 += ticks; + timekeeping_accumulate(); } #ifdef __ARCH_WANT_SYS_ALARM patches/pagefault-disable-cleanup.patch0000664000077200007720000001365210653433167017557 0ustar mingomingoSubject: [patch] clean up the page fault disabling logic From: Ingo Molnar decouple the pagefault-disabled logic from the preempt count. Signed-off-by: Ingo Molnar --- arch/arm/mm/fault.c | 2 +- arch/i386/mm/fault.c | 2 +- arch/mips/mm/fault.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/x86_64/mm/fault.c | 2 +- include/linux/sched.h | 1 + include/linux/uaccess.h | 33 +++------------------------------ kernel/fork.c | 1 + mm/memory.c | 22 ++++++++++++++++++++++ 9 files changed, 32 insertions(+), 35 deletions(-) Index: linux-rt-rebase.q/arch/arm/mm/fault.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/mm/fault.c +++ linux-rt-rebase.q/arch/arm/mm/fault.c @@ -229,7 +229,7 @@ do_page_fault(unsigned long addr, unsign * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto no_context; /* Index: linux-rt-rebase.q/arch/i386/mm/fault.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/mm/fault.c +++ linux-rt-rebase.q/arch/i386/mm/fault.c @@ -354,7 +354,7 @@ fastcall notrace void __kprobes do_page_ * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto bad_area_nosemaphore; /* When running in the kernel we expect faults to occur only to Index: linux-rt-rebase.q/arch/mips/mm/fault.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/mm/fault.c +++ linux-rt-rebase.q/arch/mips/mm/fault.c @@ -69,7 +69,7 @@ asmlinkage void do_page_fault(struct pt_ * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); Index: linux-rt-rebase.q/arch/powerpc/mm/fault.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/mm/fault.c +++ linux-rt-rebase.q/arch/powerpc/mm/fault.c @@ -184,7 +184,7 @@ int __kprobes do_page_fault(struct pt_re } #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ - if (in_atomic() || mm == NULL) { + if (in_atomic() || mm == NULL || current->pagefault_disabled) { if (!user_mode(regs)) return SIGSEGV; /* in_atomic() in user mode is really bad, Index: linux-rt-rebase.q/arch/x86_64/mm/fault.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/mm/fault.c +++ linux-rt-rebase.q/arch/x86_64/mm/fault.c @@ -371,7 +371,7 @@ asmlinkage void __kprobes do_page_fault( * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (unlikely(in_atomic() || !mm)) + if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) goto bad_area_nosemaphore; again: Index: linux-rt-rebase.q/include/linux/sched.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/sched.h +++ linux-rt-rebase.q/include/linux/sched.h @@ -1256,6 +1256,7 @@ struct task_struct { /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif + int pagefault_disabled; #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; int hardirqs_enabled; Index: linux-rt-rebase.q/include/linux/uaccess.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/uaccess.h +++ linux-rt-rebase.q/include/linux/uaccess.h @@ -6,37 +6,10 @@ /* * These routines enable/disable the pagefault handler in that - * it will not take any locks and go straight to the fixup table. - * - * They have great resemblance to the preempt_disable/enable calls - * and in fact they are identical; this is because currently there is - * no other way to make the pagefault handlers do this. So we do - * disable preemption but we don't necessarily care about that. + * it will not take any MM locks and go straight to the fixup table. */ -static inline void pagefault_disable(void) -{ - inc_preempt_count(); - /* - * make sure to have issued the store before a pagefault - * can hit. - */ - barrier(); -} - -static inline void pagefault_enable(void) -{ - /* - * make sure to issue those last loads/stores before enabling - * the pagefault handler again. - */ - barrier(); - dec_preempt_count(); - /* - * make sure we do.. - */ - barrier(); - preempt_check_resched(); -} +extern void pagefault_disable(void); +extern void pagefault_enable(void); #ifndef ARCH_HAS_NOCACHE_UACCESS Index: linux-rt-rebase.q/kernel/fork.c =================================================================== --- linux-rt-rebase.q.orig/kernel/fork.c +++ linux-rt-rebase.q/kernel/fork.c @@ -1140,6 +1140,7 @@ static struct task_struct *copy_process( p->hardirq_context = 0; p->softirq_context = 0; #endif + p->pagefault_disabled = 0; #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; Index: linux-rt-rebase.q/mm/memory.c =================================================================== --- linux-rt-rebase.q.orig/mm/memory.c +++ linux-rt-rebase.q/mm/memory.c @@ -2666,6 +2666,28 @@ unlock: return 0; } +void pagefault_disable(void) +{ + current->pagefault_disabled++; + /* + * make sure to have issued the store before a pagefault + * can hit. + */ + barrier(); +} +EXPORT_SYMBOL(pagefault_disable); + +void pagefault_enable(void) +{ + /* + * make sure to issue those last loads/stores before enabling + * the pagefault handler again. + */ + barrier(); + current->pagefault_disabled--; +} +EXPORT_SYMBOL(pagefault_enable); + /* * By the time we get here, we already hold the mm semaphore */ patches/x86_64-apic-remove-bogus-pit-synchronization.patch0000664000077200007720000000273510653433161023064 0ustar mingomingoSubject: x86_64: remove pit synchronization The APIC timer setup code synchronizes the local APIC timer to the PIT/HPET. This is pointless as the PIT and the local APIC timer frequency are not correlated and the APIC timer calibration can never be accurate enough to avoid that the local APIC timer and the PIT/HPET drift apart. Simply remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/apic.c | 20 -------------------- 1 file changed, 20 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -791,26 +791,6 @@ static void setup_APIC_timer(unsigned in local_irq_save(flags); - /* wait for irq slice */ - if (hpet_address && hpet_use_timer) { - int trigger = hpet_readl(HPET_T0_CMP); - while (hpet_readl(HPET_COUNTER) >= trigger) - /* do nothing */ ; - while (hpet_readl(HPET_COUNTER) < trigger) - /* do nothing */ ; - } else { - int c1, c2; - outb_p(0x00, 0x43); - c2 = inb_p(0x40); - c2 |= inb_p(0x40) << 8; - do { - c1 = c2; - outb_p(0x00, 0x43); - c2 = inb_p(0x40); - c2 |= inb_p(0x40) << 8; - } while (c2 - c1 < 300); - } - irqen = ! cpu_isset(smp_processor_id(), timer_interrupt_broadcast_ipi_mask); __setup_APIC_LVTT(clocks, 0, irqen); patches/smaller-trace.patch0000664000077200007720000000076110653433162015304 0ustar mingomingo--- kernel/latency_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -191,7 +191,7 @@ static int report_latency(cycle_t delta) /* * Number of per-CPU trace entries: */ -#define MAX_TRACE (65536UL*16UL) +#define MAX_TRACE (65536UL) #define CMDLINE_BYTES 16 patches/i386-prepare-sharing-hpet-code.patch0000664000077200007720000000500610653433161020171 0ustar mingomingoSubject: i386: prepare sharing the hpet code with x86_64 The hpet implementations of i386 and x8664 has been mostly the same before the clock events conversion of i386. The clock events conversion of i386 hpet is already done. So it makes sense to share the code for the x86_64 clock events conversion. Abstract out the mapping functions. Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/i386/kernel/hpet.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/hpet.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/hpet.c +++ linux-rt-rebase.q/arch/i386/kernel/hpet.c @@ -8,10 +8,9 @@ #include #include +#include #include -extern struct clock_event_device *global_clock_event; - #define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_SHIFT 22 @@ -22,7 +21,7 @@ extern struct clock_event_device *global * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; -static void __iomem * hpet_virt_address; +static void __iomem *hpet_virt_address; static inline unsigned long hpet_readl(unsigned long a) { @@ -34,6 +33,17 @@ static inline void hpet_writel(unsigned writel(d, hpet_virt_address + a); } +static inline void hpet_set_mapping(void) +{ + hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); +} + +static inline void hpet_clear_mapping(void) +{ + iounmap(hpet_virt_address); + hpet_virt_address = NULL; +} + /* * HPET command line enable / disable */ @@ -83,7 +93,7 @@ static void hpet_reserve_platform_timers memset(&hd, 0, sizeof (hd)); hd.hd_phys_address = hpet_address; - hd.hd_address = hpet_virt_address; + hd.hd_address = hpet; hd.hd_nirqs = nrtimers; hd.hd_flags = HPET_DATA_PLATFORM; hpet_reserve_timer(&hd, 0); @@ -238,7 +248,7 @@ int __init hpet_enable(void) if (!is_hpet_capable()) return 0; - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_set_mapping(); /* * Read the period and check for a sane value: @@ -334,13 +344,11 @@ int __init hpet_enable(void) return 0; out_nohpet: - iounmap(hpet_virt_address); - hpet_virt_address = NULL; + hpet_clear_mapping(); boot_hpet_disable = 1; return 0; } - #ifdef CONFIG_HPET_EMULATE_RTC /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET patches/undo-latency-tracing-raw-spinlock-hack.patch0000664000077200007720000000105610653433163022112 0ustar mingomingo--- kernel/latency_trace.c | 6 ------ 1 file changed, 6 deletions(-) Index: linux-rt-rebase.q/kernel/latency_trace.c =================================================================== --- linux-rt-rebase.q.orig/kernel/latency_trace.c +++ linux-rt-rebase.q/kernel/latency_trace.c @@ -38,12 +38,6 @@ int trace_use_raw_cycles = 0; -#define __raw_spinlock_t raw_spinlock_t -#define need_resched_delayed() 0 - -#define __raw_spinlock_t raw_spinlock_t -#define need_resched_delayed() 0 - #ifdef CONFIG_EVENT_TRACE /* * Convert raw cycles to usecs. patches/x86-64-tscless-vgettimeofday.patch0000664000077200007720000000354210653433167017755 0ustar mingomingoSubject: [patch] x86_64 GTOD: offer scalable vgettimeofday From: Ingo Molnar offer scalable vgettimeofday independently of whether the TSC is synchronous or not. Off by default. this patch also fixes an SMP bug in sys_vtime(): we should read __vsyscall_gtod_data.wall_time_tv.tv_sec only once. Signed-off-by: Ingo Molnar --- arch/x86_64/kernel/vsyscall.c | 19 +++++++++++++++++++ include/asm-x86_64/vgtod.h | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/vsyscall.c +++ linux-rt-rebase.q/arch/x86_64/kernel/vsyscall.c @@ -118,6 +118,25 @@ static __always_inline void do_vgettimeo unsigned seq; unsigned long mult, shift, nsec; cycle_t (*vread)(void); + + if (likely(__vsyscall_gtod_data.sysctl_enabled == 2)) { + struct timeval tmp; + + do { + barrier(); + tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; + tv->tv_usec = __vsyscall_gtod_data.wall_time_nsec; + barrier(); + tmp.tv_sec = __vsyscall_gtod_data.wall_time_sec; + tmp.tv_usec = __vsyscall_gtod_data.wall_time_nsec; + + } while (tmp.tv_usec != tv->tv_usec || + tmp.tv_sec != tv->tv_sec); + + tv->tv_usec /= NSEC_PER_USEC; + return; + } + do { seq = read_seqbegin(&__vsyscall_gtod_data.lock); Index: linux-rt-rebase.q/include/asm-x86_64/vgtod.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/vgtod.h +++ linux-rt-rebase.q/include/asm-x86_64/vgtod.h @@ -15,7 +15,7 @@ struct vsyscall_gtod_data { struct timezone sys_tz; struct { /* extract of a clocksource struct */ cycle_t (*vread)(void); - cycle_t cycle_last; + cycle_t cycle_last, cycle_accumulated; cycle_t mask; u32 mult; u32 shift; patches/preempt-realtime-arm-bagde4.patch0000664000077200007720000000227310653433165017731 0ustar mingomingo--- arch/arm/mach-sa1100/badge4.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/arm/mach-sa1100/badge4.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/mach-sa1100/badge4.c +++ linux-rt-rebase.q/arch/arm/mach-sa1100/badge4.c @@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); GPCR = BADGE4_GPIO_PCMEN5V; } local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); patches/2.6.21-rc6-lockless2-radix-tree-use-indirect-bit.patch0000664000077200007720000002325310653433167023103 0ustar mingomingoFrom: Nick Piggin Subject: [patch 2/9] radix-tree: use indirect bit Rather than sign direct radix-tree pointers with a special bit, sign the indirect one that hangs off the root. This means that, given a lookup_slot operation, the invalid result will be differentiated from the valid (previously, valid results could have the bit either set or clear). This does not affect slot lookups which occur under lock -- they can never return an invalid result. Is needed in future for lockless pagecache. Signed-off-by: Nick Piggin --- include/linux/radix-tree.h | 40 ++++++++++++++------------ lib/radix-tree.c | 69 ++++++++++++++++++++++++++++----------------- 2 files changed, 65 insertions(+), 44 deletions(-) Index: linux-rt-rebase.q/include/linux/radix-tree.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/radix-tree.h +++ linux-rt-rebase.q/include/linux/radix-tree.h @@ -26,28 +26,31 @@ #include /* - * A direct pointer (root->rnode pointing directly to a data item, - * rather than another radix_tree_node) is signalled by the low bit - * set in the root->rnode pointer. - * - * In this case root->height is also NULL, but the direct pointer tests are - * needed for RCU lookups when root->height is unreliable. + * An indirect pointer (root->rnode pointing to a radix_tree_node, rather + * than a data item) is signalled by the low bit set in the root->rnode + * pointer. + * + * In this case root->height is > 0, but the indirect pointer tests are + * needed for RCU lookups (because root->height is unreliable). The only + * time callers need worry about this is when doing a lookup_slot under + * RCU. */ -#define RADIX_TREE_DIRECT_PTR 1 +#define RADIX_TREE_INDIRECT_PTR 1 +#define RADIX_TREE_RETRY ((void *)-1UL) -static inline void *radix_tree_ptr_to_direct(void *ptr) +static inline void *radix_tree_ptr_to_indirect(void *ptr) { - return (void *)((unsigned long)ptr | RADIX_TREE_DIRECT_PTR); + return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); } -static inline void *radix_tree_direct_to_ptr(void *ptr) +static inline void *radix_tree_indirect_to_ptr(void *ptr) { - return (void *)((unsigned long)ptr & ~RADIX_TREE_DIRECT_PTR); + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } -static inline int radix_tree_is_direct_ptr(void *ptr) +static inline int radix_tree_is_indirect_ptr(void *ptr) { - return (int)((unsigned long)ptr & RADIX_TREE_DIRECT_PTR); + return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); } /*** radix-tree API starts here ***/ @@ -130,7 +133,10 @@ do { \ */ static inline void *radix_tree_deref_slot(void **pslot) { - return radix_tree_direct_to_ptr(*pslot); + void *ret = *pslot; + if (unlikely(radix_tree_is_indirect_ptr(ret))) + ret = RADIX_TREE_RETRY; + return ret; } /** * radix_tree_replace_slot - replace item in a slot @@ -142,10 +148,8 @@ static inline void *radix_tree_deref_slo */ static inline void radix_tree_replace_slot(void **pslot, void *item) { - BUG_ON(radix_tree_is_direct_ptr(item)); - rcu_assign_pointer(*pslot, - (void *)((unsigned long)item | - ((unsigned long)*pslot & RADIX_TREE_DIRECT_PTR))); + BUG_ON(radix_tree_is_indirect_ptr(item)); + rcu_assign_pointer(*pslot, item); } int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); Index: linux-rt-rebase.q/lib/radix-tree.c =================================================================== --- linux-rt-rebase.q.orig/lib/radix-tree.c +++ linux-rt-rebase.q/lib/radix-tree.c @@ -105,7 +105,7 @@ radix_tree_node_alloc(struct radix_tree_ } put_cpu_var(radix_tree_preloads); } - BUG_ON(radix_tree_is_direct_ptr(ret)); + BUG_ON(radix_tree_is_indirect_ptr(ret)); return ret; } @@ -245,7 +245,7 @@ static int radix_tree_extend(struct radi return -ENOMEM; /* Increase the height. */ - node->slots[0] = radix_tree_direct_to_ptr(root->rnode); + node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { @@ -256,6 +256,7 @@ static int radix_tree_extend(struct radi newheight = root->height+1; node->height = newheight; node->count = 1; + node = radix_tree_ptr_to_indirect(node); rcu_assign_pointer(root->rnode, node); root->height = newheight; } while (height > root->height); @@ -279,7 +280,7 @@ int radix_tree_insert(struct radix_tree_ int offset; int error; - BUG_ON(radix_tree_is_direct_ptr(item)); + BUG_ON(radix_tree_is_indirect_ptr(item)); /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { @@ -288,7 +289,8 @@ int radix_tree_insert(struct radix_tree_ return error; } - slot = root->rnode; + slot = radix_tree_indirect_to_ptr(root->rnode); + height = root->height; shift = (height-1) * RADIX_TREE_MAP_SHIFT; @@ -303,7 +305,8 @@ int radix_tree_insert(struct radix_tree_ rcu_assign_pointer(node->slots[offset], slot); node->count++; } else - rcu_assign_pointer(root->rnode, slot); + rcu_assign_pointer(root->rnode, + radix_tree_ptr_to_indirect(slot)); } /* Go a level down */ @@ -323,7 +326,7 @@ int radix_tree_insert(struct radix_tree_ BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); } else { - rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item)); + rcu_assign_pointer(root->rnode, item); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -355,11 +358,12 @@ void **radix_tree_lookup_slot(struct rad if (node == NULL) return NULL; - if (radix_tree_is_direct_ptr(node)) { + if (!radix_tree_is_indirect_ptr(node)) { if (index > 0) return NULL; return (void **)&root->rnode; } + node = radix_tree_indirect_to_ptr(node); height = node->height; if (index > radix_tree_maxindex(height)) @@ -403,11 +407,12 @@ void *radix_tree_lookup(struct radix_tre if (node == NULL) return NULL; - if (radix_tree_is_direct_ptr(node)) { + if (!radix_tree_is_indirect_ptr(node)) { if (index > 0) return NULL; - return radix_tree_direct_to_ptr(node); + return node; } + node = radix_tree_indirect_to_ptr(node); height = node->height; if (index > radix_tree_maxindex(height)) @@ -452,7 +457,7 @@ void *radix_tree_tag_set(struct radix_tr height = root->height; BUG_ON(index > radix_tree_maxindex(height)); - slot = root->rnode; + slot = radix_tree_indirect_to_ptr(root->rnode); shift = (height - 1) * RADIX_TREE_MAP_SHIFT; while (height > 0) { @@ -502,7 +507,7 @@ void *radix_tree_tag_clear(struct radix_ shift = (height - 1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; - slot = root->rnode; + slot = radix_tree_indirect_to_ptr(root->rnode); while (height > 0) { int offset; @@ -567,8 +572,9 @@ int radix_tree_tag_get(struct radix_tree if (node == NULL) return 0; - if (radix_tree_is_direct_ptr(node)) + if (!radix_tree_is_indirect_ptr(node)) return (index == 0); + node = radix_tree_indirect_to_ptr(node); height = node->height; if (index > radix_tree_maxindex(height)) @@ -685,13 +691,13 @@ radix_tree_gang_lookup(struct radix_tree if (!node) return 0; - if (radix_tree_is_direct_ptr(node)) { + if (!radix_tree_is_indirect_ptr(node)) { if (first_index > 0) return 0; - node = radix_tree_direct_to_ptr(node); - results[0] = rcu_dereference(node); + results[0] = node; return 1; } + node = radix_tree_indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); @@ -813,13 +819,13 @@ radix_tree_gang_lookup_tag(struct radix_ if (!node) return 0; - if (radix_tree_is_direct_ptr(node)) { + if (!radix_tree_is_indirect_ptr(node)) { if (first_index > 0) return 0; - node = radix_tree_direct_to_ptr(node); - results[0] = rcu_dereference(node); + results[0] = node; return 1; } + node = radix_tree_indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); @@ -849,12 +855,22 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag static inline void radix_tree_shrink(struct radix_tree_root *root) { /* try to shrink tree height */ - while (root->height > 0 && - root->rnode->count == 1 && - root->rnode->slots[0]) { + while (root->height > 0) { struct radix_tree_node *to_free = root->rnode; void *newptr; + BUG_ON(!radix_tree_is_indirect_ptr(to_free)); + to_free = radix_tree_indirect_to_ptr(to_free); + + /* + * The candidate node has more than one child, or its child + * is not at the leftmost slot, we cannot shrink. + */ + if (to_free->count != 1) + break; + if (!to_free->slots[0]) + break; + /* * We don't need rcu_assign_pointer(), since we are simply * moving the node from one part of the tree to another. If @@ -863,8 +879,8 @@ static inline void radix_tree_shrink(str * one (root->rnode). */ newptr = to_free->slots[0]; - if (root->height == 1) - newptr = radix_tree_ptr_to_direct(newptr); + if (root->height > 1) + newptr = radix_tree_ptr_to_indirect(newptr); root->rnode = newptr; root->height--; /* must only free zeroed nodes into the slab */ @@ -899,12 +915,12 @@ void *radix_tree_delete(struct radix_tre goto out; slot = root->rnode; - if (height == 0 && root->rnode) { - slot = radix_tree_direct_to_ptr(slot); + if (height == 0 /* XXX: bugfix? */) { root_tag_clear_all(root); root->rnode = NULL; goto out; } + slot = radix_tree_indirect_to_ptr(slot); shift = (height - 1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; @@ -946,7 +962,8 @@ void *radix_tree_delete(struct radix_tre radix_tree_node_free(to_free); if (pathp->node->count) { - if (pathp->node == root->rnode) + if (pathp->node == + radix_tree_indirect_to_ptr(root->rnode)) radix_tree_shrink(root); goto out; } patches/preempt-realtime-sh.patch0000664000077200007720000010216610653433165016442 0ustar mingomingoFrom lethal@linux-sh.org Fri Apr 27 10:21:47 2007 Date: Fri, 27 Apr 2007 10:21:47 +0900 From: Paul Mundt To: Thomas Gleixner , Ingo Molnar Subject: [PATCH] preempt-rt: Preliminary SH support Hi Thomas, Ingo, Here's preliminary preempt-rt support for SH. It was written against 2.6.21-rc5, but still applies cleanly. I've kept the clock events stuff out of this patch, since I'm planning on overhauling the timer stuff on SH first, but this should trickle in through 2.6.22-rc. Feel free to either merge this in to preempt-rt or hold off until the timer stuff gets done. Patch from Matsubara-san. Signed-off-by: Katsuya MATSUBARA Signed-off-by: Paul Mundt -- arch/sh/kernel/cpu/clock.c | 2 - arch/sh/kernel/cpu/sh4/sq.c | 2 - arch/sh/kernel/entry-common.S | 8 ++-- arch/sh/kernel/irq.c | 2 - arch/sh/kernel/process.c | 10 +++--- arch/sh/kernel/semaphore.c | 14 ++++++-- arch/sh/kernel/sh_ksyms.c | 9 ++--- arch/sh/kernel/signal.c | 7 ++++ arch/sh/kernel/time.c | 2 - arch/sh/kernel/traps.c | 2 - arch/sh/mm/cache-sh4.c | 12 +++---- arch/sh/mm/init.c | 2 - arch/sh/mm/pg-sh4.c | 8 ++-- arch/sh/mm/tlb-flush.c | 20 ++++++------ arch/sh/mm/tlb-sh4.c | 4 +- include/asm-sh/atomic-irq.h | 24 +++++++------- include/asm-sh/atomic.h | 8 ++-- include/asm-sh/bitops.h | 24 +++++++------- include/asm-sh/pgalloc.h | 2 - include/asm-sh/rwsem.h | 46 ++++++++++++++-------------- include/asm-sh/semaphore-helper.h | 8 ++-- include/asm-sh/semaphore.h | 61 +++++++++++++++++++++++--------------- include/asm-sh/system.h | 12 +++---- include/asm-sh/thread_info.h | 2 + 24 files changed, 160 insertions(+), 131 deletions(-) Index: linux-rt-rebase.q/arch/sh/kernel/cpu/clock.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/cpu/clock.c +++ linux-rt-rebase.q/arch/sh/kernel/cpu/clock.c @@ -28,7 +28,7 @@ #include static LIST_HEAD(clock_list); -static DEFINE_SPINLOCK(clock_lock); +static DEFINE_RAW_SPINLOCK(clock_lock); static DEFINE_MUTEX(clock_list_sem); /* Index: linux-rt-rebase.q/arch/sh/kernel/cpu/sh4/sq.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/cpu/sh4/sq.c +++ linux-rt-rebase.q/arch/sh/kernel/cpu/sh4/sq.c @@ -37,7 +37,7 @@ struct sq_mapping { }; static struct sq_mapping *sq_mapping_list; -static DEFINE_SPINLOCK(sq_mapping_lock); +static DEFINE_RAW_SPINLOCK(sq_mapping_lock); static struct kmem_cache *sq_cache; static unsigned long *sq_bitmap; Index: linux-rt-rebase.q/arch/sh/kernel/entry-common.S =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/entry-common.S +++ linux-rt-rebase.q/arch/sh/kernel/entry-common.S @@ -157,7 +157,7 @@ ENTRY(resume_userspace) mov.l @(TI_FLAGS,r8), r0 ! current_thread_info->flags tst #_TIF_WORK_MASK, r0 bt/s __restore_all - tst #_TIF_NEED_RESCHED, r0 + tst #_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED, r0 .align 2 work_pending: @@ -209,10 +209,10 @@ work_resched: tst #_TIF_WORK_MASK, r0 bt __restore_all bra work_pending - tst #_TIF_NEED_RESCHED, r0 + tst #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED, r0 .align 2 -1: .long schedule +1: .long __schedule 2: .long do_notify_resume 3: .long restore_all #ifdef CONFIG_TRACE_IRQFLAGS @@ -226,7 +226,7 @@ syscall_exit_work: ! r8: current_thread_info tst #_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP, r0 bt/s work_pending - tst #_TIF_NEED_RESCHED, r0 + tst #_TIF_NEED_RESCHED| _TIF_NEED_RESCHED_DELAYED, r0 #ifdef CONFIG_TRACE_IRQFLAGS mov.l 5f, r0 jsr @r0 Index: linux-rt-rebase.q/arch/sh/kernel/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/irq.c +++ linux-rt-rebase.q/arch/sh/kernel/irq.c @@ -82,7 +82,7 @@ static union irq_ctx *hardirq_ctx[NR_CPU static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; #endif -asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage notrace int do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); #ifdef CONFIG_4KSTACKS Index: linux-rt-rebase.q/arch/sh/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/process.c +++ linux-rt-rebase.q/arch/sh/kernel/process.c @@ -63,7 +63,7 @@ void default_idle(void) clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); set_bl_bit(); - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) cpu_sleep(); clear_bl_bit(); set_thread_flag(TIF_POLLING_NRFLAG); @@ -84,13 +84,15 @@ void cpu_idle(void) idle = default_idle; tick_nohz_stop_sched_tick(); - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) idle(); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); check_pgt_cache(); } } Index: linux-rt-rebase.q/arch/sh/kernel/semaphore.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/semaphore.c +++ linux-rt-rebase.q/arch/sh/kernel/semaphore.c @@ -46,7 +46,7 @@ DEFINE_SPINLOCK(semaphore_wake_lock); * critical part is the inline stuff in * where we want to avoid any extra jumps and calls. */ -void __up(struct semaphore *sem) +void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_one_more(sem); wake_up(&sem->wait); @@ -104,7 +104,7 @@ void __up(struct semaphore *sem) tsk->state = TASK_RUNNING; \ remove_wait_queue(&sem->wait, &wait); -void __sched __down(struct semaphore * sem) +void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { DOWN_VAR DOWN_HEAD(TASK_UNINTERRUPTIBLE) @@ -114,7 +114,7 @@ void __sched __down(struct semaphore * s DOWN_TAIL(TASK_UNINTERRUPTIBLE) } -int __sched __down_interruptible(struct semaphore * sem) +int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int ret = 0; DOWN_VAR @@ -133,7 +133,13 @@ int __sched __down_interruptible(struct return ret; } -int __down_trylock(struct semaphore * sem) +int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { return waking_non_zero_trylock(sem); } + +fastcall int __sched compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + Index: linux-rt-rebase.q/arch/sh/kernel/sh_ksyms.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/sh_ksyms.c +++ linux-rt-rebase.q/arch/sh/kernel/sh_ksyms.c @@ -26,7 +26,6 @@ EXPORT_SYMBOL(sh_mv); /* platform dependent support */ EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(no_irq_type); EXPORT_SYMBOL(strlen); @@ -50,9 +49,9 @@ EXPORT_SYMBOL(get_vm_area); #endif /* semaphore exports */ -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_up); +EXPORT_SYMBOL(__compat_down); +EXPORT_SYMBOL(__compat_down_interruptible); EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(__ndelay); @@ -141,7 +140,7 @@ EXPORT_SYMBOL(__flush_purge_region); EXPORT_SYMBOL(clear_user_page); #endif -EXPORT_SYMBOL(__down_trylock); +EXPORT_SYMBOL(__compat_down_trylock); #ifdef CONFIG_SMP EXPORT_SYMBOL(synchronize_irq); Index: linux-rt-rebase.q/arch/sh/kernel/signal.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/signal.c +++ linux-rt-rebase.q/arch/sh/kernel/signal.c @@ -566,6 +566,13 @@ static void do_signal(struct pt_regs *re struct k_sigaction ka; sigset_t *oldset; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux-rt-rebase.q/arch/sh/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/time.c +++ linux-rt-rebase.q/arch/sh/kernel/time.c @@ -24,7 +24,7 @@ struct sys_timer *sys_timer; /* Move this somewhere more sensible.. */ -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); /* Dummy RTC ops */ Index: linux-rt-rebase.q/arch/sh/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/kernel/traps.c +++ linux-rt-rebase.q/arch/sh/kernel/traps.c @@ -77,7 +77,7 @@ static void dump_mem(const char *str, un } } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) { Index: linux-rt-rebase.q/arch/sh/mm/cache-sh4.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/mm/cache-sh4.c +++ linux-rt-rebase.q/arch/sh/mm/cache-sh4.c @@ -203,7 +203,7 @@ void flush_cache_sigtramp(unsigned long index = CACHE_IC_ADDRESS_ARRAY | (v & current_cpu_data.icache.entry_mask); - local_irq_save(flags); + raw_local_irq_save(flags); jump_to_P2(); for (i = 0; i < current_cpu_data.icache.ways; @@ -212,7 +212,7 @@ void flush_cache_sigtramp(unsigned long back_to_P1(); wmb(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void flush_cache_4096(unsigned long start, @@ -228,10 +228,10 @@ static inline void flush_cache_4096(unsi (start < CACHE_OC_ADDRESS_ARRAY)) exec_offset = 0x20000000; - local_irq_save(flags); + raw_local_irq_save(flags); __flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), exec_offset); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -259,7 +259,7 @@ static inline void flush_icache_all(void { unsigned long flags, ccr; - local_irq_save(flags); + raw_local_irq_save(flags); jump_to_P2(); /* Flush I-cache */ @@ -273,7 +273,7 @@ static inline void flush_icache_all(void */ back_to_P1(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void flush_dcache_all(void) Index: linux-rt-rebase.q/arch/sh/mm/init.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/mm/init.c +++ linux-rt-rebase.q/arch/sh/mm/init.c @@ -21,7 +21,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); pgd_t swapper_pg_dir[PTRS_PER_PGD]; void (*copy_page)(void *from, void *to); Index: linux-rt-rebase.q/arch/sh/mm/pg-sh4.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/mm/pg-sh4.c +++ linux-rt-rebase.q/arch/sh/mm/pg-sh4.c @@ -39,9 +39,9 @@ void clear_user_page(void *to, unsigned entry = pfn_pte(phys_addr >> PAGE_SHIFT, PAGE_KERNEL); mutex_lock(&p3map_mutex[(address & CACHE_ALIAS)>>12]); set_pte(pte, entry); - local_irq_save(flags); + raw_local_irq_save(flags); flush_tlb_one(get_asid(), p3_addr); - local_irq_restore(flags); + raw_local_irq_restore(flags); update_mmu_cache(NULL, p3_addr, entry); __clear_user_page((void *)p3_addr, to); pte_clear(&init_mm, p3_addr, pte); @@ -75,9 +75,9 @@ void copy_user_page(void *to, void *from entry = pfn_pte(phys_addr >> PAGE_SHIFT, PAGE_KERNEL); mutex_lock(&p3map_mutex[(address & CACHE_ALIAS)>>12]); set_pte(pte, entry); - local_irq_save(flags); + raw_local_irq_save(flags); flush_tlb_one(get_asid(), p3_addr); - local_irq_restore(flags); + raw_local_irq_restore(flags); update_mmu_cache(NULL, p3_addr, entry); __copy_user_page((void *)p3_addr, from, to); pte_clear(&init_mm, p3_addr, pte); Index: linux-rt-rebase.q/arch/sh/mm/tlb-flush.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/mm/tlb-flush.c +++ linux-rt-rebase.q/arch/sh/mm/tlb-flush.c @@ -24,7 +24,7 @@ void local_flush_tlb_page(struct vm_area asid = cpu_asid(cpu, vma->vm_mm); page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); if (vma->vm_mm != current->mm) { saved_asid = get_asid(); set_asid(asid); @@ -32,7 +32,7 @@ void local_flush_tlb_page(struct vm_area local_flush_tlb_one(asid, page); if (saved_asid != MMU_NO_ASID) set_asid(saved_asid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -46,7 +46,7 @@ void local_flush_tlb_range(struct vm_are unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */ cpu_context(cpu, mm) = NO_CONTEXT; @@ -71,7 +71,7 @@ void local_flush_tlb_range(struct vm_are if (saved_asid != MMU_NO_ASID) set_asid(saved_asid); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -81,7 +81,7 @@ void local_flush_tlb_kernel_range(unsign unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */ local_flush_tlb_all(); @@ -100,7 +100,7 @@ void local_flush_tlb_kernel_range(unsign } set_asid(saved_asid); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -112,11 +112,11 @@ void local_flush_tlb_mm(struct mm_struct if (cpu_context(cpu, mm) != NO_CONTEXT) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); cpu_context(cpu, mm) = NO_CONTEXT; if (mm == current->mm) activate_context(mm, cpu); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -131,10 +131,10 @@ void local_flush_tlb_all(void) * TF-bit for SH-3, TI-bit for SH-4. * It's same position, bit #2. */ - local_irq_save(flags); + raw_local_irq_save(flags); status = ctrl_inl(MMUCR); status |= 0x04; ctrl_outl(status, MMUCR); ctrl_barrier(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux-rt-rebase.q/arch/sh/mm/tlb-sh4.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh/mm/tlb-sh4.c +++ linux-rt-rebase.q/arch/sh/mm/tlb-sh4.c @@ -51,7 +51,7 @@ void update_mmu_cache(struct vm_area_str } } - local_irq_save(flags); + raw_local_irq_save(flags); /* Set PTEH register */ vpn = (address & MMU_VPN_MASK) | get_asid(); @@ -74,7 +74,7 @@ void update_mmu_cache(struct vm_area_str /* Load the TLB */ asm volatile("ldtlb": /* no output */ : /* no input */ : "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_one(unsigned long asid, unsigned long page) Index: linux-rt-rebase.q/include/asm-sh/atomic-irq.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/atomic-irq.h +++ linux-rt-rebase.q/include/asm-sh/atomic-irq.h @@ -10,29 +10,29 @@ static inline void atomic_add(int i, ato { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *(long *)v += i; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void atomic_sub(int i, atomic_t *v) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *(long *)v -= i; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline int atomic_add_return(int i, atomic_t *v) { unsigned long temp, flags; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(long *)v; temp += i; *(long *)v = temp; - local_irq_restore(flags); + raw_local_irq_restore(flags); return temp; } @@ -41,11 +41,11 @@ static inline int atomic_sub_return(int { unsigned long temp, flags; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(long *)v; temp -= i; *(long *)v = temp; - local_irq_restore(flags); + raw_local_irq_restore(flags); return temp; } @@ -54,18 +54,18 @@ static inline void atomic_clear_mask(uns { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *(long *)v &= ~mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void atomic_set_mask(unsigned int mask, atomic_t *v) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *(long *)v |= mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* __ASM_SH_ATOMIC_IRQ_H */ Index: linux-rt-rebase.q/include/asm-sh/atomic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/atomic.h +++ linux-rt-rebase.q/include/asm-sh/atomic.h @@ -49,11 +49,11 @@ static inline int atomic_cmpxchg(atomic_ int ret; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); ret = v->counter; if (likely(ret == old)) v->counter = new; - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -65,11 +65,11 @@ static inline int atomic_add_unless(atom int ret; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); ret = v->counter; if (ret != u) v->counter += a; - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret != u; } Index: linux-rt-rebase.q/include/asm-sh/bitops.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/bitops.h +++ linux-rt-rebase.q/include/asm-sh/bitops.h @@ -14,9 +14,9 @@ static inline void set_bit(int nr, volat a += nr >> 5; mask = 1 << (nr & 0x1f); - local_irq_save(flags); + raw_local_irq_save(flags); *a |= mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -32,9 +32,9 @@ static inline void clear_bit(int nr, vol a += nr >> 5; mask = 1 << (nr & 0x1f); - local_irq_save(flags); + raw_local_irq_save(flags); *a &= ~mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void change_bit(int nr, volatile void * addr) @@ -45,9 +45,9 @@ static inline void change_bit(int nr, vo a += nr >> 5; mask = 1 << (nr & 0x1f); - local_irq_save(flags); + raw_local_irq_save(flags); *a ^= mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline int test_and_set_bit(int nr, volatile void * addr) @@ -58,10 +58,10 @@ static inline int test_and_set_bit(int n a += nr >> 5; mask = 1 << (nr & 0x1f); - local_irq_save(flags); + raw_local_irq_save(flags); retval = (mask & *a) != 0; *a |= mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); return retval; } @@ -74,10 +74,10 @@ static inline int test_and_clear_bit(int a += nr >> 5; mask = 1 << (nr & 0x1f); - local_irq_save(flags); + raw_local_irq_save(flags); retval = (mask & *a) != 0; *a &= ~mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); return retval; } @@ -90,10 +90,10 @@ static inline int test_and_change_bit(in a += nr >> 5; mask = 1 << (nr & 0x1f); - local_irq_save(flags); + raw_local_irq_save(flags); retval = (mask & *a) != 0; *a ^= mask; - local_irq_restore(flags); + raw_local_irq_restore(flags); return retval; } Index: linux-rt-rebase.q/include/asm-sh/pgalloc.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/pgalloc.h +++ linux-rt-rebase.q/include/asm-sh/pgalloc.h @@ -13,7 +13,7 @@ static inline void pmd_populate_kernel(s set_pmd(pmd, __pmd((unsigned long)pte)); } -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, +static inline void notrace pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { set_pmd(pmd, __pmd((unsigned long)page_address(pte))); Index: linux-rt-rebase.q/include/asm-sh/rwsem.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/rwsem.h +++ linux-rt-rebase.q/include/asm-sh/rwsem.h @@ -19,7 +19,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct compat_rw_semaphore { long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -27,7 +27,7 @@ struct rw_semaphore { #define RWSEM_WAITING_BIAS (-0x00010000) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; + raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -45,25 +45,25 @@ struct rw_semaphore { LIST_HEAD_INIT((name).wait_list) \ __RWSEM_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define COMPAT_DECLARE_RWSEM(name) \ + struct compat_rw_semaphore name = __RWSEM_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_down_read_failed(struct compat_rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_down_write_failed(struct compat_rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_wake(struct compat_rw_semaphore *sem); +extern struct compat_rw_semaphore *rwsem_downgrade_wake(struct compat_rw_semaphore *sem); -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, +extern void __compat_init_rwsem(struct rw_semaphore *sem, const char *name, struct lock_class_key *key); -#define init_rwsem(sem) \ +#define compat_init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __compat_init_rwsem((sem), #sem, &__key); \ } while (0) -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void compat_init_rwsem(struct rw_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); @@ -73,7 +73,7 @@ static inline void init_rwsem(struct rw_ /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct compat_rw_semaphore *sem) { if (atomic_inc_return((atomic_t *)(&sem->count)) > 0) smp_wmb(); @@ -81,7 +81,7 @@ static inline void __down_read(struct rw rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct compat_rw_semaphore *sem) { int tmp; @@ -98,7 +98,7 @@ static inline int __down_read_trylock(st /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct compat_rw_semaphore *sem) { int tmp; @@ -110,7 +110,7 @@ static inline void __down_write(struct r rwsem_down_write_failed(sem); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct compat_rw_semaphore *sem) { int tmp; @@ -123,7 +123,7 @@ static inline int __down_write_trylock(s /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct compat_rw_semaphore *sem) { int tmp; @@ -136,7 +136,7 @@ static inline void __up_read(struct rw_s /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct compat_rw_semaphore *sem) { smp_wmb(); if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, @@ -147,7 +147,7 @@ static inline void __up_write(struct rw_ /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct compat_rw_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -155,7 +155,7 @@ static inline void rwsem_atomic_add(int /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct compat_rw_semaphore *sem) { int tmp; @@ -165,7 +165,7 @@ static inline void __downgrade_write(str rwsem_downgrade_wake(sem); } -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write_nested(struct compat_rw_semaphore *sem, int subclass) { __down_write(sem); } @@ -173,13 +173,13 @@ static inline void __down_write_nested(s /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct compat_rw_semaphore *sem) { smp_mb(); return atomic_add_return(delta, (atomic_t *)(&sem->count)); } -static inline int rwsem_is_locked(struct rw_semaphore *sem) +static inline int rwsem_is_locked(struct compat_rw_semaphore *sem) { return (sem->count != 0); } Index: linux-rt-rebase.q/include/asm-sh/semaphore-helper.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/semaphore-helper.h +++ linux-rt-rebase.q/include/asm-sh/semaphore-helper.h @@ -14,12 +14,12 @@ * This is trivially done with load_locked/store_cond, * which we have. Let the rest of the losers suck eggs. */ -static __inline__ void wake_one_more(struct semaphore * sem) +static __inline__ void wake_one_more(struct compat_semaphore * sem) { atomic_inc((atomic_t *)&sem->sleepers); } -static __inline__ int waking_non_zero(struct semaphore *sem) +static __inline__ int waking_non_zero(struct compat_semaphore *sem) { unsigned long flags; int ret = 0; @@ -43,7 +43,7 @@ static __inline__ int waking_non_zero(st * protected by the spinlock in order to make atomic this atomic_inc() with the * atomic_read() in wake_one_more(), otherwise we can race. -arca */ -static __inline__ int waking_non_zero_interruptible(struct semaphore *sem, +static __inline__ int waking_non_zero_interruptible(struct compat_semaphore *sem, struct task_struct *tsk) { unsigned long flags; @@ -70,7 +70,7 @@ static __inline__ int waking_non_zero_in * protected by the spinlock in order to make atomic this atomic_inc() with the * atomic_read() in wake_one_more(), otherwise we can race. -arca */ -static __inline__ int waking_non_zero_trylock(struct semaphore *sem) +static __inline__ int waking_non_zero_trylock(struct compat_semaphore *sem) { unsigned long flags; int ret = 1; Index: linux-rt-rebase.q/include/asm-sh/semaphore.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/semaphore.h +++ linux-rt-rebase.q/include/asm-sh/semaphore.h @@ -20,29 +20,36 @@ #include #include -struct semaphore { +/* + * On !PREEMPT_RT all semaphores are compat: + */ +#ifndef CONFIG_PREEMPT_RT +# define compat_semaphore semaphore +#endif + +struct compat_semaphore { atomic_t count; int sleepers; wait_queue_head_t wait; }; -#define __SEMAPHORE_INITIALIZER(name, n) \ +#define __COMPAT_SEMAPHORE_INITIALIZER(name, n) \ { \ .count = ATOMIC_INIT(n), \ .sleepers = 0, \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ } -#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ - struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) +#define __COMPAT_DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct compat_semaphore name = __COMPAT_SEMAPHORE_INITIALIZER(name,count) -#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) -#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) +#define COMPAT_DECLARE_MUTEX(name) __COMPAT_DECLARE_SEMAPHORE_GENERIC(name,1) +#define COMPAT_DECLARE_MUTEX_LOCKED(name) __COMPAT_DECLARE_SEMAPHORE_GENERIC(name,0) -static inline void sema_init (struct semaphore *sem, int val) +static inline void compat_sema_init (struct compat_semaphore *sem, int val) { /* - * *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val); + * *sem = (struct compat_semaphore)__SEMAPHORE_INITIALIZER((*sem),val); * * i'd rather use the more flexible initialization above, but sadly * GCC 2.7.2.3 emits a bogus warning. EGCS doesn't. Oh well. @@ -52,14 +59,14 @@ static inline void sema_init (struct sem init_waitqueue_head(&sem->wait); } -static inline void init_MUTEX (struct semaphore *sem) +static inline void compat_init_MUTEX (struct compat_semaphore *sem) { - sema_init(sem, 1); + compat_sema_init(sem, 1); } -static inline void init_MUTEX_LOCKED (struct semaphore *sem) +static inline void compat_init_MUTEX_LOCKED (struct compat_semaphore *sem) { - sema_init(sem, 0); + compat_sema_init(sem, 0); } #if 0 @@ -69,36 +76,36 @@ asmlinkage int __down_failed_trylock(vo asmlinkage void __up_wakeup(void /* special register calling convention */); #endif -asmlinkage void __down(struct semaphore * sem); -asmlinkage int __down_interruptible(struct semaphore * sem); -asmlinkage int __down_trylock(struct semaphore * sem); -asmlinkage void __up(struct semaphore * sem); +asmlinkage void __compat_down(struct compat_semaphore * sem); +asmlinkage int __compat_down_interruptible(struct compat_semaphore * sem); +asmlinkage int __compat_down_trylock(struct compat_semaphore * sem); +asmlinkage void __compat_up(struct compat_semaphore * sem); extern spinlock_t semaphore_wake_lock; -static inline void down(struct semaphore * sem) +static inline void compat_down(struct compat_semaphore * sem) { might_sleep(); if (atomic_dec_return(&sem->count) < 0) - __down(sem); + __compat_down(sem); } -static inline int down_interruptible(struct semaphore * sem) +static inline int compat_down_interruptible(struct compat_semaphore * sem) { int ret = 0; might_sleep(); if (atomic_dec_return(&sem->count) < 0) - ret = __down_interruptible(sem); + ret = __compat_down_interruptible(sem); return ret; } -static inline int down_trylock(struct semaphore * sem) +static inline int compat_down_trylock(struct compat_semaphore * sem) { int ret = 0; if (atomic_dec_return(&sem->count) < 0) - ret = __down_trylock(sem); + ret = __compat_down_trylock(sem); return ret; } @@ -106,11 +113,17 @@ static inline int down_trylock(struct se * Note! This is subtle. We jump to wake people up only if * the semaphore was negative (== somebody was waiting on it). */ -static inline void up(struct semaphore * sem) +static inline void compat_up(struct compat_semaphore * sem) { if (atomic_inc_return(&sem->count) <= 0) - __up(sem); + __compat_up(sem); } +extern int compat_sem_is_locked(struct compat_semaphore *sem); + +#define compat_sema_count(sem) atomic_read(&(sem)->count) + +#include + #endif #endif /* __ASM_SH_SEMAPHORE_H */ Index: linux-rt-rebase.q/include/asm-sh/system.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/system.h +++ linux-rt-rebase.q/include/asm-sh/system.h @@ -158,10 +158,10 @@ static inline unsigned long xchg_u32(vol { unsigned long flags, retval; - local_irq_save(flags); + raw_local_irq_save(flags); retval = *m; *m = val; - local_irq_restore(flags); + raw_local_irq_restore(flags); return retval; } @@ -169,10 +169,10 @@ static inline unsigned long xchg_u8(vola { unsigned long flags, retval; - local_irq_save(flags); + raw_local_irq_save(flags); retval = *m; *m = val & 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); return retval; } @@ -207,11 +207,11 @@ static inline unsigned long __cmpxchg_u3 __u32 retval; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); retval = *m; if (retval == old) *m = new; - local_irq_restore(flags); /* implies memory barrier */ + raw_local_irq_restore(flags); /* implies memory barrier */ return retval; } Index: linux-rt-rebase.q/include/asm-sh/thread_info.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-sh/thread_info.h +++ linux-rt-rebase.q/include/asm-sh/thread_info.h @@ -112,6 +112,7 @@ static inline struct thread_info *curren #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ #define TIF_SINGLESTEP 5 /* singlestepping active */ +#define TIF_NEED_RESCHED_DELAYED 6 /* reschedule on return to userspace */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 @@ -123,6 +124,7 @@ static inline struct thread_info *curren #define _TIF_NEED_RESCHED (1<s_files iteration semantics Ensure no new files will be added when we're inspecting 'all' files. Without this, files could be added in front while we're iterating and we'd miss those. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- fs/file_table.c | 10 ++++++++++ fs/super.c | 4 ++++ include/linux/fs.h | 2 ++ security/selinux/selinuxfs.c | 4 ++++ 4 files changed, 20 insertions(+) Index: linux-rt-rebase.q/fs/file_table.c =================================================================== --- linux-rt-rebase.q.orig/fs/file_table.c +++ linux-rt-rebase.q/fs/file_table.c @@ -353,6 +353,12 @@ EXPORT_SYMBOL_GPL(filevec_add_drain_all) void file_kill(struct file *file) { + if (file && file->f_mapping && file->f_mapping->host) { + struct super_block *sb = file->f_mapping->host->i_sb; + if (sb) + synchronize_qrcu(&sb->s_qrcu); + } + if (file_flag(file, F_SUPERBLOCK)) { void **ptr; @@ -409,8 +415,10 @@ void file_move(struct file *file, struct int fs_may_remount_ro(struct super_block *sb) { struct file *file; + int idx; /* Check that no files are currently opened for writing. */ + idx = qrcu_read_lock(&sb->s_qrcu); filevec_add_drain_all(); lock_list_for_each_entry(file, &sb->s_files, f_u.fu_llist) { struct inode *inode = file->f_path.dentry->d_inode; @@ -423,9 +431,11 @@ int fs_may_remount_ro(struct super_block if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; } + qrcu_read_unlock(&sb->s_qrcu, idx); return 1; /* Tis' cool bro. */ too_bad: lock_list_for_each_entry_stop(file, f_u.fu_llist); + qrcu_read_unlock(&sb->s_qrcu, idx); return 0; } Index: linux-rt-rebase.q/fs/super.c =================================================================== --- linux-rt-rebase.q.orig/fs/super.c +++ linux-rt-rebase.q/fs/super.c @@ -68,6 +68,7 @@ static struct super_block *alloc_super(s INIT_LIST_HEAD(&s->s_dirty); INIT_LIST_HEAD(&s->s_io); INIT_LOCK_LIST_HEAD(&s->s_files); + init_qrcu_struct(&s->s_qrcu); INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -568,12 +569,15 @@ out: static void mark_files_ro(struct super_block *sb) { struct file *f; + int idx; + idx = qrcu_read_lock(&sb->s_qrcu); filevec_add_drain_all(); lock_list_for_each_entry(f, &sb->s_files, f_u.fu_llist) { if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) f->f_mode &= ~FMODE_WRITE; } + qrcu_read_unlock(&sb->s_qrcu, idx); } /** Index: linux-rt-rebase.q/include/linux/fs.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/fs.h +++ linux-rt-rebase.q/include/linux/fs.h @@ -286,6 +286,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -967,6 +968,7 @@ struct super_block { struct list_head s_io; /* parked for writeback */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ struct lock_list_head s_files; + struct qrcu_struct s_qrcu; struct block_device *s_bdev; struct mtd_info *s_mtd; Index: linux-rt-rebase.q/security/selinux/selinuxfs.c =================================================================== --- linux-rt-rebase.q.orig/security/selinux/selinuxfs.c +++ linux-rt-rebase.q/security/selinux/selinuxfs.c @@ -964,6 +964,7 @@ static void sel_remove_entries(struct de struct list_head *node; struct file *filp; struct super_block *sb = de->d_sb; + int idx; spin_lock(&dcache_lock); node = de->d_subdirs.next; @@ -984,6 +985,8 @@ static void sel_remove_entries(struct de spin_unlock(&dcache_lock); + idx = qrcu_read_lock(&sb->s_qrcu); + filevec_add_drain_all(); lock_list_for_each_entry(filp, &sb->s_files, f_u.fu_llist) { struct dentry * dentry = filp->f_path.dentry; @@ -992,6 +995,7 @@ static void sel_remove_entries(struct de } filp->f_op = NULL; } + qrcu_read_unlock(&sb->s_qrcu, idx); } #define BOOL_DIR_NAME "booleans" patches/preempt-realtime-powerpc-add-raw-relax-macros.patch0000664000077200007720000000215710653433165023416 0ustar mingomingoFrom tsutomu.owa@toshiba.co.jp Mon May 14 15:26:25 2007 Date: Mon, 14 May 2007 15:26:25 +0900 From: Tsutomu OWA To: linuxppc-dev@ozlabs.org, linux-kernel@vger.kernel.org Cc: mingo@elte.hu, tglx@linutronix.de Subject: Re: [patch 1/4] powerpc 2.6.21-rt1: fix a build breakage by adding __raw_*_relax() macros Add missing macros to fix a build breakage for PREEMPT_DESKTOP. Signed-off-by: Tsutomu OWA -- owa --- include/asm-powerpc/spinlock.h | 4 ++++ 1 file changed, 4 insertions(+) Index: linux-rt-rebase.q/include/asm-powerpc/spinlock.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-powerpc/spinlock.h +++ linux-rt-rebase.q/include/asm-powerpc/spinlock.h @@ -289,5 +289,9 @@ static __inline__ void __raw_write_unloc #define _raw_read_relax(lock) __rw_yield(lock) #define _raw_write_relax(lock) __rw_yield(lock) +#define __raw_spin_relax(lock) cpu_relax() +#define __raw_read_relax(lock) cpu_relax() +#define __raw_write_relax(lock) cpu_relax() + #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ patches/ppc-add-mcount.patch0000664000077200007720000001016210653433162015360 0ustar mingomingoFrom tsutomu.owa@toshiba.co.jp Mon May 14 10:15:30 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=UNPARSEABLE_RELAY autolearn=ham version=3.1.7-deb Received: from imx12.toshiba.co.jp (imx12.toshiba.co.jp [61.202.160.132]) by mail.tglx.de (Postfix) with ESMTP id 7006365C065 for ; Mon, 14 May 2007 10:15:30 +0200 (CEST) Received: from wall11.toshiba.co.jp (wall11 [133.199.90.149]) by imx12.toshiba.co.jp with ESMTP id l4E8FKmi007480; Mon, 14 May 2007 17:15:20 +0900 (JST) Received: (from root@localhost) by wall11.toshiba.co.jp id l4E8FKaH003434; Mon, 14 May 2007 17:15:20 +0900 (JST) Received: from ovp11.toshiba.co.jp [133.199.90.148] by wall11.toshiba.co.jp with ESMTP id TAA03430; Mon, 14 May 2007 17:15:20 +0900 Received: from mx2.toshiba.co.jp (localhost [127.0.0.1]) by ovp11.toshiba.co.jp with ESMTP id l4E8FJCq025717; Mon, 14 May 2007 17:15:19 +0900 (JST) Received: from rdcgw.rdc.toshiba.co.jp by toshiba.co.jp id l4E8FJ3Y013473; Mon, 14 May 2007 17:15:19 +0900 (JST) Received: from island.swc.toshiba.co.jp by rdcgw.rdc.toshiba.co.jp (8.8.8p2+Sun/3.7W) with ESMTP id RAA01521; Mon, 14 May 2007 17:15:18 +0900 (JST) Received: from forest.toshiba.co.jp (forest [133.196.122.2]) by island.swc.toshiba.co.jp (Postfix) with ESMTP id 87FCB40002; Mon, 14 May 2007 17:15:10 +0900 (JST) Date: Mon, 14 May 2007 17:15:10 +0900 Message-ID: From: Tsutomu OWA To: linuxppc-dev@ozlabs.org, linux-kernel@vger.kernel.org Cc: mingo@elte.hu, tglx@linutronix.de Subject: Re: [patch 1/5] powerpc 2.6.21-rt1: add mcount() and _mcount() In-Reply-To: References: User-Agent: Wanderlust/2.8.1 (Something) Emacs/20.7 Mule/4.0 (HANANOEN) Organization: Software Engineering Center, TOSHIBA. MIME-Version: 1.0 (generated by SEMI 1.14.4 - "Hosorogi") Content-Type: text/plain; charset=US-ASCII X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit add mcount() and _mcount() for latency trace support. Signed-off-by: Tsutomu OWA -- owa --- arch/powerpc/kernel/entry_64.S | 60 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) Index: linux-rt-rebase.q/arch/powerpc/kernel/entry_64.S =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/entry_64.S +++ linux-rt-rebase.q/arch/powerpc/kernel/entry_64.S @@ -826,3 +826,63 @@ _GLOBAL(enter_prom) ld r0,16(r1) mtlr r0 blr + +#ifdef CONFIG_MCOUNT +/* + * code almost taken from entry_32.S + */ +#define MCOUNT_FRAME_SIZE 32 +_GLOBAL(mcount) + stdu r1,-MCOUNT_FRAME_SIZE(r1) + mflr r3 + + LOAD_REG_ADDR(r5,mcount_enabled) + lwz r5,0(r5) + std r3,MCOUNT_FRAME_SIZE+16(r1) + cmpwi r5,0 + beq 1f + + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + ld r4,MCOUNT_FRAME_SIZE(r1) + ld r4,16(r4) + bl .__trace + nop +1: + ld r0,MCOUNT_FRAME_SIZE+16(r1) + mtlr r0 + addi r1,r1,MCOUNT_FRAME_SIZE + blr + +/* + * Based on glibc-2.4/sysdeps/powerpc/powerpc64/ppc-mcount.S + * + * We don't need to save the parameter-passing registers as gcc takes + * care of that for us. Thus this function looks fairly normal. + * In fact, the generic code would work for us. + */ +_GLOBAL(_mcount) + /* return if we're in real mode. */ + mfmsr r3 + andi. r0,r3,MSR_IR|MSR_DR /* see if relocation is on? */ + beqlr /* if not, do nothing. */ + /* we're in translation mode. keep going. */ + mflr r3 + ld r11,0(r1) /* load back chain ptr */ + stdu r1,-STACK_FRAME_OVERHEAD(r1) + std r3,STACK_FRAME_OVERHEAD+16(r1) + ld r4,16(r11) /* LR in back chain */ + LOAD_REG_ADDR(r5,mcount_enabled) + lwz r5,0(r5) + cmpwi r5,0 /* see if mcount_enabled? */ + beq 1f /* if disabled, then skip */ + + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + bl .__trace + nop +1: + ld r0,STACK_FRAME_OVERHEAD+16(r1) /* restore saved LR */ + mtlr r0 + addi r1,r1,STACK_FRAME_OVERHEAD + blr + +#endif /* CONFIG_MCOUNT */ patches/add-notrace.patch0000664000077200007720000000070510653433162014730 0ustar mingomingo--- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) Index: linux-rt-rebase.q/include/linux/linkage.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/linkage.h +++ linux-rt-rebase.q/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute ((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else patches/quicklist-release-before-free-page.patch0000664000077200007720000001527210653433170021272 0ustar mingomingoFrom peterz@infradead.org Mon Jul 23 21:40:44 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=ham version=3.1.7-deb Received: from mx2.mail.elte.hu (mx2.mail.elte.hu [157.181.151.9]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mail.tglx.de (Postfix) with ESMTP id CAC4B65C003 for ; Mon, 23 Jul 2007 21:40:44 +0200 (CEST) Received: from elvis.elte.hu ([157.181.1.14]) by mx2.mail.elte.hu with esmtp (Exim) id 1ID3lr-0000tI-MW from for ; Mon, 23 Jul 2007 21:40:43 +0200 Received: by elvis.elte.hu (Postfix, from userid 1004) id 1D9593E2153; Mon, 23 Jul 2007 21:40:43 +0200 (CEST) Resent-From: Ingo Molnar Resent-Date: Mon, 23 Jul 2007 21:40:40 +0200 Resent-Message-ID: <20070723194040.GA7831@elte.hu> Resent-To: Thomas Gleixner X-Original-To: mingo@elvis.elte.hu Delivered-To: mingo@elvis.elte.hu Received: from mx3.mail.elte.hu (mx3.mail.elte.hu [157.181.1.138]) by elvis.elte.hu (Postfix) with ESMTP id 03EA13E214E for ; Mon, 23 Jul 2007 18:33:06 +0200 (CEST) Received: from pentafluge.infradead.org ([213.146.154.40]) by mx3.mail.elte.hu with esmtp (Exim) id 1ID0qK-0003mK-9A from for ; Mon, 23 Jul 2007 18:33:08 +0200 Received: from i55087.upc-i.chello.nl ([62.195.55.87] helo=[192.168.0.111]) by pentafluge.infradead.org with esmtpsa (Exim 4.63 #1 (Red Hat Linux)) id 1ID0qB-0003Kf-Tf; Mon, 23 Jul 2007 17:33:00 +0100 Subject: Re: [PATCH] release quicklist before free_page From: Peter Zijlstra To: Daniel Walker Cc: mingo@elte.hu, paulmck@linux.vnet.ibm.com, linux-kernel@vger.kernel.org, linux-rt-users@vger.kernel.org In-Reply-To: <20070723152129.036573829@mvista.com> References: <20070723152129.036573829@mvista.com> Content-Type: text/plain Date: Mon, 23 Jul 2007 18:32:58 +0200 Message-Id: <1185208378.8197.20.camel@twins> Mime-Version: 1.0 X-Mailer: Evolution 2.10.1 X-ELTE-VirusStatus: clean X-ELTE-SpamScore: -1.0 X-ELTE-SpamLevel: X-ELTE-SpamCheck: no X-ELTE-SpamVersion: ELTE 2.0 X-ELTE-SpamCheck-Details: score=-1.0 required=5.9 tests=BAYES_00 autolearn=no SpamAssassin version=3.0.3 -1.0 BAYES_00 BODY: Bayesian spam probability is 0 to 1% [score: 0.0000] Received-SPF: softfail (mx2: transitioning domain of elte.hu does not designate 157.181.1.14 as permitted sender) client-ip=157.181.1.14; envelope-from=mingo@elte.hu; helo=elvis.elte.hu; X-ELTE-VirusStatus: clean X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit On Mon, 2007-07-23 at 08:21 -0700, Daniel Walker wrote: > Resolves, > > BUG: sleeping function called from invalid context cc1(29651) at kernel/rtmutex.c:636 > in_atomic():1 [00000001], irqs_disabled():0 > [] __might_sleep+0xf3/0xf9 > [] __rt_spin_lock+0x21/0x3c > [] get_zone_pcp+0x20/0x29 > [] free_hot_cold_page+0xdc/0x167 > [] add_preempt_count+0x12/0xcc > [] pgd_dtor+0x0/0x1 > [] quicklist_trim+0xb7/0xe3 > [] check_pgt_cache+0x19/0x1c > [] free_pgtables+0x54/0x12c > [] add_preempt_count+0x12/0xcc > [] unmap_region+0xeb/0x13b > > > It looks like the quicklist isn't used after a few variables are evaluated. > So no need to keep preemption disabled over the whole function. Not quite, it uses preempt_disable() to avoid migration and stick to a cpu. Without that it might end up freeing pages from another quicklist. How about this - compile tested only --- We cannot call the page allocator with preemption-disabled, use the per_cpu_locked construct to allow preemption while guarding the per cpu data. Signed-off-by: Peter Zijlstra --- include/linux/quicklist.h | 19 +++++++++++++++---- mm/quicklist.c | 9 +++++---- 2 files changed, 20 insertions(+), 8 deletions(-) Index: linux-rt-rebase.q/include/linux/quicklist.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/quicklist.h +++ linux-rt-rebase.q/include/linux/quicklist.h @@ -18,7 +18,7 @@ struct quicklist { int nr_pages; }; -DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; +DECLARE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK]; /* * The two key functions quicklist_alloc and quicklist_free are inline so @@ -30,19 +30,30 @@ DECLARE_PER_CPU(struct quicklist, quickl * The fast patch in quicklist_alloc touched only a per cpu cacheline and * the first cacheline of the page itself. There is minmal overhead involved. */ -static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *)) +static inline void *__quicklist_alloc(int cpu, int nr, gfp_t flags, void (*ctor)(void *)) { struct quicklist *q; void **p = NULL; - q =&get_cpu_var(quicklist)[nr]; + q = &__get_cpu_var_locked(quicklist, cpu)[nr]; p = q->page; if (likely(p)) { q->page = p[0]; p[0] = NULL; q->nr_pages--; } - put_cpu_var(quicklist); + return p; +} + +static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *)) +{ + struct quicklist *q; + void **p = NULL; + int cpu; + + (void)get_cpu_var_locked(quicklist, &cpu)[nr]; + p = __quicklist_alloc(cpu, nr, flags, ctor); + put_cpu_var_locked(quicklist, cpu); if (likely(p)) return p; Index: linux-rt-rebase.q/mm/quicklist.c =================================================================== --- linux-rt-rebase.q.orig/mm/quicklist.c +++ linux-rt-rebase.q/mm/quicklist.c @@ -19,7 +19,7 @@ #include #include -DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; +DEFINE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK]; #define FRACTION_OF_NODE_MEM 16 @@ -51,8 +51,9 @@ void quicklist_trim(int nr, void (*dtor) { long pages_to_free; struct quicklist *q; + int cpu; - q = &get_cpu_var(quicklist)[nr]; + q = &get_cpu_var_locked(quicklist, &cpu)[nr]; if (q->nr_pages > min_pages) { pages_to_free = min_pages_to_free(q, min_pages, max_free); @@ -61,7 +62,7 @@ void quicklist_trim(int nr, void (*dtor) * We pass a gfp_t of 0 to quicklist_alloc here * because we will never call into the page allocator. */ - void *p = quicklist_alloc(nr, 0, NULL); + void *p = __quicklist_alloc(cpu, nr, 0, NULL); if (dtor) dtor(p); @@ -69,7 +70,7 @@ void quicklist_trim(int nr, void (*dtor) pages_to_free--; } } - put_cpu_var(quicklist); + put_cpu_var_locked(quicklist, cpu); } unsigned long quicklist_total_size(void) patches/preempt-realtime-powerpc.patch0000664000077200007720000004047110653433165017507 0ustar mingomingo--- arch/powerpc/kernel/smp.c | 12 ++++++++- arch/powerpc/kernel/traps.c | 9 +++++- arch/powerpc/platforms/cell/smp.c | 2 - arch/powerpc/platforms/chrp/smp.c | 2 - arch/powerpc/platforms/chrp/time.c | 2 - arch/powerpc/platforms/powermac/feature.c | 2 - arch/powerpc/platforms/powermac/nvram.c | 2 - arch/powerpc/platforms/powermac/pic.c | 2 - arch/powerpc/platforms/pseries/smp.c | 2 - arch/ppc/8260_io/enet.c | 2 - arch/ppc/8260_io/fcc_enet.c | 2 - arch/ppc/8xx_io/commproc.c | 2 - arch/ppc/8xx_io/enet.c | 2 - arch/ppc/8xx_io/fec.c | 2 - arch/ppc/kernel/smp.c | 12 ++++++++- arch/ppc/kernel/traps.c | 6 +++- arch/ppc/platforms/hdpu.c | 2 - arch/ppc/platforms/sbc82xx.c | 2 - arch/ppc/syslib/cpm2_common.c | 2 - arch/ppc/syslib/open_pic.c | 2 - arch/ppc/syslib/open_pic2.c | 2 - include/asm-powerpc/hw_irq.h | 40 ++++++++++++++++++------------ 22 files changed, 76 insertions(+), 37 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/smp.c +++ linux-rt-rebase.q/arch/powerpc/kernel/smp.c @@ -126,6 +126,16 @@ void smp_send_reschedule(int cpu) smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE); +} + #ifdef CONFIG_DEBUGGER void smp_send_debugger_break(int cpu) { @@ -162,7 +172,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); Index: linux-rt-rebase.q/arch/powerpc/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/traps.c +++ linux-rt-rebase.q/arch/powerpc/kernel/traps.c @@ -97,11 +97,11 @@ static inline void pmac_backlight_unblan int die(const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = _RAW_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -178,6 +178,11 @@ void _exception(int signr, struct pt_reg return; } +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; Index: linux-rt-rebase.q/arch/powerpc/platforms/cell/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/cell/smp.c +++ linux-rt-rebase.q/arch/powerpc/platforms/cell/smp.c @@ -133,7 +133,7 @@ static void __devinit smp_iic_setup_cpu( iic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit cell_give_timebase(void) Index: linux-rt-rebase.q/arch/powerpc/platforms/chrp/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/chrp/smp.c +++ linux-rt-rebase.q/arch/powerpc/platforms/chrp/smp.c @@ -44,7 +44,7 @@ static void __devinit smp_chrp_setup_cpu mpic_setup_this_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit smp_chrp_give_timebase(void) Index: linux-rt-rebase.q/arch/powerpc/platforms/chrp/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/chrp/time.c +++ linux-rt-rebase.q/arch/powerpc/platforms/chrp/time.c @@ -27,7 +27,7 @@ #include #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; static int nvram_as1 = NVRAM_AS1; static int nvram_as0 = NVRAM_AS0; Index: linux-rt-rebase.q/arch/powerpc/platforms/powermac/feature.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/powermac/feature.c +++ linux-rt-rebase.q/arch/powerpc/platforms/powermac/feature.c @@ -59,7 +59,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -DEFINE_SPINLOCK(feature_lock); +DEFINE_RAW_SPINLOCK(feature_lock); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux-rt-rebase.q/arch/powerpc/platforms/powermac/nvram.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/powermac/nvram.c +++ linux-rt-rebase.q/arch/powerpc/platforms/powermac/nvram.c @@ -80,7 +80,7 @@ static int is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; // XXX Turn that into a sem -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); static int (*core99_write_bank)(int bank, u8* datas); static int (*core99_erase_bank)(int bank); Index: linux-rt-rebase.q/arch/powerpc/platforms/powermac/pic.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/powermac/pic.c +++ linux-rt-rebase.q/arch/powerpc/platforms/powermac/pic.c @@ -63,7 +63,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; Index: linux-rt-rebase.q/arch/powerpc/platforms/pseries/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/pseries/smp.c +++ linux-rt-rebase.q/arch/powerpc/platforms/pseries/smp.c @@ -154,7 +154,7 @@ static void __devinit smp_xics_setup_cpu } #endif /* CONFIG_XICS */ -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit pSeries_give_timebase(void) Index: linux-rt-rebase.q/arch/ppc/8260_io/enet.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/8260_io/enet.c +++ linux-rt-rebase.q/arch/ppc/8260_io/enet.c @@ -115,7 +115,7 @@ struct scc_enet_private { scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux-rt-rebase.q/arch/ppc/8260_io/fcc_enet.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/8260_io/fcc_enet.c +++ linux-rt-rebase.q/arch/ppc/8260_io/fcc_enet.c @@ -375,7 +375,7 @@ struct fcc_enet_private { volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux-rt-rebase.q/arch/ppc/8xx_io/commproc.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/8xx_io/commproc.c +++ linux-rt-rebase.q/arch/ppc/8xx_io/commproc.c @@ -370,7 +370,7 @@ cpm_setbrg(uint brg, uint rate) /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... Index: linux-rt-rebase.q/arch/ppc/8xx_io/enet.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/8xx_io/enet.c +++ linux-rt-rebase.q/arch/ppc/8xx_io/enet.c @@ -142,7 +142,7 @@ struct scc_enet_private { unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux-rt-rebase.q/arch/ppc/8xx_io/fec.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/8xx_io/fec.c +++ linux-rt-rebase.q/arch/ppc/8xx_io/fec.c @@ -164,7 +164,7 @@ struct fec_enet_private { struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux-rt-rebase.q/arch/ppc/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/kernel/smp.c +++ linux-rt-rebase.q/arch/ppc/kernel/smp.c @@ -136,6 +136,16 @@ void smp_send_reschedule(int cpu) smp_message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -160,7 +170,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); Index: linux-rt-rebase.q/arch/ppc/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/kernel/traps.c +++ linux-rt-rebase.q/arch/ppc/kernel/traps.c @@ -72,7 +72,7 @@ void (*debugger_fault_handler)(struct pt * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); int die(const char * str, struct pt_regs * fp, long err) { @@ -108,6 +108,10 @@ void _exception(int signr, struct pt_reg debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; Index: linux-rt-rebase.q/arch/ppc/platforms/hdpu.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/platforms/hdpu.c +++ linux-rt-rebase.q/arch/ppc/platforms/hdpu.c @@ -55,7 +55,7 @@ static void parse_bootinfo(unsigned long static void hdpu_set_l1pe(void); static void hdpu_cpustate_set(unsigned char new_state); #ifdef CONFIG_SMP -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; extern int smp_tb_synchronized; Index: linux-rt-rebase.q/arch/ppc/platforms/sbc82xx.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/platforms/sbc82xx.c +++ linux-rt-rebase.q/arch/ppc/platforms/sbc82xx.c @@ -65,7 +65,7 @@ static void sbc82xx_time_init(void) static volatile char *sbc82xx_i8259_map; static char sbc82xx_i8259_mask = 0xff; -static DEFINE_SPINLOCK(sbc82xx_i8259_lock); +static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock); static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr) { Index: linux-rt-rebase.q/arch/ppc/syslib/cpm2_common.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/syslib/cpm2_common.c +++ linux-rt-rebase.q/arch/ppc/syslib/cpm2_common.c @@ -114,7 +114,7 @@ cpm2_fastbrg(uint brg, uint rate, int di /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... */ static rh_block_t cpm_boot_dpmem_rh_block[16]; Index: linux-rt-rebase.q/arch/ppc/syslib/open_pic.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/syslib/open_pic.c +++ linux-rt-rebase.q/arch/ppc/syslib/open_pic.c @@ -526,7 +526,7 @@ void openpic_reset_processor_phys(u_int } #if defined(CONFIG_SMP) || defined(CONFIG_PM) -static DEFINE_SPINLOCK(openpic_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic_setup_lock); #endif #ifdef CONFIG_SMP Index: linux-rt-rebase.q/arch/ppc/syslib/open_pic2.c =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/syslib/open_pic2.c +++ linux-rt-rebase.q/arch/ppc/syslib/open_pic2.c @@ -380,7 +380,7 @@ static void openpic2_set_spurious(u_int vec); } -static DEFINE_SPINLOCK(openpic2_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic2_setup_lock); /* * Initialize a timer interrupt (and disable it) Index: linux-rt-rebase.q/include/asm-powerpc/hw_irq.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-powerpc/hw_irq.h +++ linux-rt-rebase.q/include/asm-powerpc/hw_irq.h @@ -20,8 +20,8 @@ static inline unsigned long local_get_fl { unsigned long flags; - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (flags) +<<<<<<< delete extern unsigned long local_get_flags(void); +<<<<<<< delete extern unsigned long local_irq_disable(void); : "i" (offsetof(struct paca_struct, soft_enabled))); return flags; @@ -39,14 +39,19 @@ static inline unsigned long local_irq_di return flags; } -extern void local_irq_restore(unsigned long); + extern void iseries_handle_interrupts(void); +extern unsigned long raw_local_get_flags(void); +extern unsigned long raw_local_irq_disable(void); +extern void raw_local_irq_restore(unsigned long); + +#define raw_local_irq_enable() raw_local_irq_restore(1) +#define raw_local_save_flags(flags) ((flags) = raw_local_get_flags()) +#define raw_local_irq_save(flags) ((flags) = raw_local_irq_disable()) -#define local_irq_enable() local_irq_restore(1) -#define local_save_flags(flags) ((flags) = local_get_flags()) -#define local_irq_save(flags) ((flags) = local_irq_disable()) +#define raw_irqs_disabled() (raw_local_get_flags() == 0) +#define raw_irqs_disabled_flags(flags) ((flags) == 0) -#define irqs_disabled() (local_get_flags() == 0) #define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) #define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) @@ -62,13 +67,15 @@ extern void iseries_handle_interrupts(vo #if defined(CONFIG_BOOKE) #define SET_MSR_EE(x) mtmsr(x) -#define local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") +#define raw_local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") +<<<<<<< delete #define local_irq_restore(flags) do { \ +#define raw_local_irq_restore(flags) do { \ #else #define SET_MSR_EE(x) mtmsr(x) -#define local_irq_restore(flags) mtmsr(flags) +#define raw_local_irq_restore(flags) mtmsr(flags) #endif -static inline void local_irq_disable(void) +static inline void raw_local_irq_disable(void) { #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 0": : :"memory"); @@ -80,7 +87,7 @@ static inline void local_irq_disable(voi #endif } -static inline void local_irq_enable(void) +static inline void raw_local_irq_enable(void) { #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 1": : :"memory"); @@ -92,7 +99,7 @@ static inline void local_irq_enable(void #endif } -static inline void local_irq_save_ptr(unsigned long *flags) +static inline void raw_local_irq_save_ptr(unsigned long *flags) { unsigned long msr; msr = mfmsr(); @@ -105,13 +112,16 @@ static inline void local_irq_save_ptr(un __asm__ __volatile__("": : :"memory"); } -#define local_save_flags(flags) ((flags) = mfmsr()) -#define local_irq_save(flags) local_irq_save_ptr(&flags) -#define irqs_disabled() ((mfmsr() & MSR_EE) == 0) +#define raw_local_save_flags(flags) ((flags) = mfmsr()) +#define raw_local_irq_save(flags) raw_local_irq_save_ptr(&flags) +#define raw_irqs_disabled() ((mfmsr() & MSR_EE) == 0) +#define raw_irqs_disabled_flags(flags) ((flags & MSR_EE) == 0) #define hard_irq_enable() local_irq_enable() #define hard_irq_disable() local_irq_disable() +#include + #endif /* CONFIG_PPC64 */ /* patches/ep93xx-clockevents.patch0000664000077200007720000001446110653433161016230 0ustar mingomingoclockevent support for the EP93xx platform clockevent support for the EP93xx platform (by tglx) Only added a fix for clockevent_ep93xx.mult, which was using the wrong clock tickrate) --- arch/arm/mach-ep93xx/core.c | 125 ++++++++++++++++++++---------- include/asm-arm/arch-ep93xx/ep93xx-regs.h | 6 + 2 files changed, 91 insertions(+), 40 deletions(-) Index: linux-rt-rebase.q/arch/arm/mach-ep93xx/core.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/mach-ep93xx/core.c +++ linux-rt-rebase.q/arch/arm/mach-ep93xx/core.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include @@ -50,7 +52,6 @@ #include - /************************************************************************* * Static I/O mappings that are needed for all EP93xx platforms *************************************************************************/ @@ -93,39 +94,58 @@ void __init ep93xx_map_io(void) * to use this timer for something else. We also use timer 4 for keeping * track of lost jiffies. */ -static unsigned int last_jiffy_time; -static unsigned int next_jiffy_time; -static unsigned int accumulator; +static struct clock_event_device clockevent_ep93xx; + +static int ep93xx_timer_interrupt(int irq, void *dev_id) +{ + __raw_writel(EP93XX_TC_CLEAR, EP93XX_TIMER1_CLEAR); -#define TIMER4_TICKS_PER_JIFFY (983040 / HZ) -#define TIMER4_TICKS_MOD_JIFFY (983040 % HZ) + clockevent_ep93xx.event_handler(&clockevent_ep93xx); -static int after_eq(unsigned long a, unsigned long b) + return IRQ_HANDLED; +} + +static int ep93xx_set_next_event(unsigned long evt, + struct clock_event_device *unused) { - return ((signed long)(a - b)) >= 0; + __raw_writel(evt, EP93XX_TIMER1_LOAD); + return 0; } -static int ep93xx_timer_interrupt(int irq, void *dev_id) +static void ep93xx_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) { - write_seqlock(&xtime_lock); + u32 tmode = EP93XX_TC123_SEL_508KHZ; - __raw_writel(1, EP93XX_TIMER1_CLEAR); - while (after_eq(__raw_readl(EP93XX_TIMER4_VALUE_LOW), next_jiffy_time)) { - timer_tick(); - - last_jiffy_time = next_jiffy_time; - next_jiffy_time += TIMER4_TICKS_PER_JIFFY; - accumulator += TIMER4_TICKS_MOD_JIFFY; - if (accumulator >= HZ) { - next_jiffy_time++; - accumulator -= HZ; - } + /* Disable timer */ + __raw_writel(tmode, EP93XX_TIMER1_CONTROL); + + switch(mode) { + case CLOCK_EVT_MODE_PERIODIC: + /* Set timer period */ + __raw_writel((508469 / HZ) - 1, EP93XX_TIMER1_LOAD); + tmode |= EP93XX_TC123_PERIODIC; + + case CLOCK_EVT_MODE_ONESHOT: + tmode |= EP93XX_TC123_ENABLE; + __raw_writel(tmode, EP93XX_TIMER1_CONTROL); + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_RESUME: + return; } +} - write_sequnlock(&xtime_lock); +static struct clock_event_device clockevent_ep93xx = { + .name = "ep93xx-timer1", + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, + .shift = 32, + .set_mode = ep93xx_set_mode, + .set_next_event = ep93xx_set_next_event, +}; - return IRQ_HANDLED; -} static struct irqaction ep93xx_timer_irq = { .name = "ep93xx timer", @@ -133,32 +153,58 @@ static struct irqaction ep93xx_timer_irq .handler = ep93xx_timer_interrupt, }; -static void __init ep93xx_timer_init(void) +static void __init ep93xx_clockevent_init(void) { - /* Enable periodic HZ timer. */ - __raw_writel(0x48, EP93XX_TIMER1_CONTROL); - __raw_writel((508469 / HZ) - 1, EP93XX_TIMER1_LOAD); - __raw_writel(0xc8, EP93XX_TIMER1_CONTROL); + setup_irq(IRQ_EP93XX_TIMER1, &ep93xx_timer_irq); - /* Enable lost jiffy timer. */ - __raw_writel(0x100, EP93XX_TIMER4_VALUE_HIGH); + clockevent_ep93xx.mult = div_sc(508469, NSEC_PER_SEC, + clockevent_ep93xx.shift); + clockevent_ep93xx.max_delta_ns = + clockevent_delta2ns(0xfffffffe, &clockevent_ep93xx); + clockevent_ep93xx.min_delta_ns = + clockevent_delta2ns(0xf, &clockevent_ep93xx); + clockevent_ep93xx.cpumask = cpumask_of_cpu(0); + clockevents_register_device(&clockevent_ep93xx); +} - setup_irq(IRQ_EP93XX_TIMER1, &ep93xx_timer_irq); +/* + * timer4 is a 40 Bit timer, separated in a 32bit and a 8 bit + * register, EP93XX_TIMER4_VALUE_LOW stores 32 bit word. The + * controlregister is in EP93XX_TIMER4_VALUE_HIGH + */ + +cycle_t ep93xx_get_cycles(void) +{ + return __raw_readl(EP93XX_TIMER4_VALUE_LOW); } -static unsigned long ep93xx_gettimeoffset(void) +static struct clocksource clocksource_ep93xx = { + .name = "ep93xx_timer4", + .rating = 200, + .read = ep93xx_get_cycles, + .mask = 0xFFFFFFFF, + .shift = 20, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static void __init ep93xx_clocksource_init(void) { - int offset; + /* Reset time-stamp counter */ + __raw_writel(0x100, EP93XX_TIMER4_VALUE_HIGH); - offset = __raw_readl(EP93XX_TIMER4_VALUE_LOW) - last_jiffy_time; + clocksource_ep93xx.mult = + clocksource_hz2mult(983040, clocksource_ep93xx.shift); + clocksource_register(&clocksource_ep93xx); +} - /* Calculate (1000000 / 983040) * offset. */ - return offset + (53 * offset / 3072); +static void __init ep93xx_timer_init(void) +{ + ep93xx_clocksource_init(); + ep93xx_clockevent_init(); } struct sys_timer ep93xx_timer = { - .init = ep93xx_timer_init, - .offset = ep93xx_gettimeoffset, + .init = ep93xx_timer_init, }; @@ -510,7 +556,6 @@ static struct platform_device ep93xx_ohc .resource = ep93xx_ohci_resources, }; - void __init ep93xx_init_devices(void) { unsigned int v; Index: linux-rt-rebase.q/include/asm-arm/arch-ep93xx/ep93xx-regs.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/arch-ep93xx/ep93xx-regs.h +++ linux-rt-rebase.q/include/asm-arm/arch-ep93xx/ep93xx-regs.h @@ -67,6 +67,12 @@ #define EP93XX_TIMER3_CONTROL EP93XX_TIMER_REG(0x88) #define EP93XX_TIMER3_CLEAR EP93XX_TIMER_REG(0x8c) +#define EP93XX_TC_CLEAR 0x00000001 +#define EP93XX_TC123_ENABLE 0x00000080 +#define EP93XX_TC123_PERIODIC 0x00000040 +#define EP93XX_TC123_SEL_508KHZ 0x00000008 +#define EP93XX_TC4_ENABLE 0x00000100 + #define EP93XX_I2S_BASE (EP93XX_APB_VIRT_BASE + 0x00020000) #define EP93XX_SECURITY_BASE (EP93XX_APB_VIRT_BASE + 0x00030000) patches/preempt-realtime-x86_64.patch0000664000077200007720000003112410653433165016761 0ustar mingomingo arch/x86_64/kernel/early_printk.c | 2 +- arch/x86_64/kernel/head64.c | 6 +++++- arch/x86_64/kernel/i8259.c | 2 +- arch/x86_64/kernel/io_apic.c | 13 +++++++------ arch/x86_64/kernel/nmi.c | 2 ++ arch/x86_64/kernel/process.c | 23 +++++++++++++---------- arch/x86_64/kernel/signal.c | 7 +++++++ arch/x86_64/kernel/smp.c | 14 ++++++++++++-- arch/x86_64/kernel/traps.c | 5 ++--- include/asm-x86_64/acpi.h | 4 ++-- include/asm-x86_64/hw_irq.h | 2 +- include/asm-x86_64/io_apic.h | 2 +- include/asm-x86_64/spinlock.h | 6 +++--- include/asm-x86_64/tlbflush.h | 8 +++++++- include/asm-x86_64/vgtod.h | 2 +- 15 files changed, 65 insertions(+), 33 deletions(-) Index: linux-rt-rebase.q/arch/x86_64/kernel/early_printk.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/early_printk.c +++ linux-rt-rebase.q/arch/x86_64/kernel/early_printk.c @@ -203,7 +203,7 @@ static int early_console_initialized = 0 void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; Index: linux-rt-rebase.q/arch/x86_64/kernel/head64.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/head64.c +++ linux-rt-rebase.q/arch/x86_64/kernel/head64.c @@ -26,7 +26,11 @@ static void __init zap_identity_mappings { pgd_t *pgd = pgd_offset_k(0UL); pgd_clear(pgd); - __flush_tlb(); + /* + * preempt_disable/enable does not work this early in the + * bootup yet: + */ + write_cr3(read_cr3()); } /* Don't add a printk in there. printk relies on the PDA which is not initialized Index: linux-rt-rebase.q/arch/x86_64/kernel/i8259.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/i8259.c +++ linux-rt-rebase.q/arch/x86_64/kernel/i8259.c @@ -96,8 +96,8 @@ static void (*interrupt[NR_VECTORS - FIR */ static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); +DEFINE_RAW_SPINLOCK(i8259A_lock); static struct irq_chip i8259A_chip = { .name = "XT-PIC", Index: linux-rt-rebase.q/arch/x86_64/kernel/io_apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/io_apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/io_apic.c @@ -90,8 +90,8 @@ int timer_over_8254 __initdata = 1; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); +DEFINE_RAW_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -204,6 +204,9 @@ static inline void io_apic_sync(unsigned reg ACTION; \ io_apic_modify(entry->apic, reg); \ FINAL; \ + /* Force POST flush by reading: */ \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -348,10 +351,8 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ DO_ACTION( __pcix_mask, 0, &= 0xffff7fff, ) /* edge */ DO_ACTION( __pcix_unmask, 0, = (reg & 0xfffeffff) | 0x00008000, ) /* level */ Index: linux-rt-rebase.q/arch/x86_64/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/nmi.c +++ linux-rt-rebase.q/arch/x86_64/kernel/nmi.c @@ -70,7 +70,9 @@ static int endflag __initdata = 0; */ static __init void nmi_cpu_busy(void *data) { +#ifndef CONFIG_PREEMPT_RT local_irq_enable_in_hardirq(); +#endif /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the Index: linux-rt-rebase.q/arch/x86_64/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/process.c +++ linux-rt-rebase.q/arch/x86_64/kernel/process.c @@ -116,7 +116,7 @@ static void default_idle(void) */ smp_mb(); local_irq_disable(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { /* Enables interrupts one instruction before HLT. x86 special cases this so there is no race. */ safe_halt(); @@ -202,7 +202,7 @@ void cpu_idle (void) current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -231,12 +231,14 @@ void cpu_idle (void) __exit_idle(); } - trace_preempt_exit_idle(); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + trace_preempt_exit_idle(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); trace_preempt_enter_idle(); + local_irq_enable(); } } @@ -252,10 +254,10 @@ void cpu_idle (void) */ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (!need_resched()) + if (!need_resched() && !need_resched_delayed()) __mwait(eax, ecx); } } @@ -263,10 +265,10 @@ void mwait_idle_with_hints(unsigned long /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { trace_hardirqs_on(); __sti_mwait(0, 0); } else @@ -385,7 +387,7 @@ void exit_thread(void) struct thread_struct *t = &me->thread; if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; @@ -393,6 +395,7 @@ void exit_thread(void) /* * Careful, clear this in the TSS too: */ + tss = &per_cpu(init_tss, get_cpu()); memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); Index: linux-rt-rebase.q/arch/x86_64/kernel/signal.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/signal.c +++ linux-rt-rebase.q/arch/x86_64/kernel/signal.c @@ -396,6 +396,13 @@ static void do_signal(struct pt_regs *re int signr; sigset_t *oldset; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux-rt-rebase.q/arch/x86_64/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/smp.c +++ linux-rt-rebase.q/arch/x86_64/kernel/smp.c @@ -56,7 +56,7 @@ union smp_flush_state { struct mm_struct *flush_mm; unsigned long flush_va; #define FLUSH_ALL -1ULL - spinlock_t tlbstate_lock; + raw_spinlock_t tlbstate_lock; }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned; @@ -295,10 +295,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); Index: linux-rt-rebase.q/arch/x86_64/kernel/traps.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/traps.c +++ linux-rt-rebase.q/arch/x86_64/kernel/traps.c @@ -219,7 +219,7 @@ void dump_trace(struct task_struct *tsk, unsigned long *stack, struct stacktrace_ops *ops, void *data) { - const unsigned cpu = get_cpu(); + const unsigned cpu = raw_smp_processor_id(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -310,7 +310,6 @@ void dump_trace(struct task_struct *tsk, tinfo = task_thread_info(tsk); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK - put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -360,7 +359,7 @@ _show_stack(struct task_struct *tsk, str { unsigned long *stack; int i; - const int cpu = smp_processor_id(); + const int cpu = raw_smp_processor_id(); unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); Index: linux-rt-rebase.q/include/asm-x86_64/acpi.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/acpi.h +++ linux-rt-rebase.q/include/asm-x86_64/acpi.h @@ -51,8 +51,8 @@ #define ACPI_ASM_MACROS #define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_DISABLE_IRQS() local_irq_disable_nort() +#define ACPI_ENABLE_IRQS() local_irq_enable_nort() #define ACPI_FLUSH_CPU_CACHE() wbinvd() int __acpi_acquire_global_lock(unsigned int *lock); Index: linux-rt-rebase.q/include/asm-x86_64/hw_irq.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/hw_irq.h +++ linux-rt-rebase.q/include/asm-x86_64/hw_irq.h @@ -118,7 +118,7 @@ void i8254_timer_resume(void); typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); extern void __setup_vector_irq(int cpu); -extern spinlock_t vector_lock; +extern raw_spinlock_t vector_lock; /* * Various low-level irq details needed by irq.c, process.c, Index: linux-rt-rebase.q/include/asm-x86_64/io_apic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/io_apic.h +++ linux-rt-rebase.q/include/asm-x86_64/io_apic.h @@ -125,6 +125,6 @@ extern int sis_apic_bug; /* dummy */ void enable_NMI_through_LVT0 (void * dummy); -extern spinlock_t i8259A_lock; +extern raw_spinlock_t i8259A_lock; #endif Index: linux-rt-rebase.q/include/asm-x86_64/spinlock.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/spinlock.h +++ linux-rt-rebase.q/include/asm-x86_64/spinlock.h @@ -160,8 +160,8 @@ static inline void __raw_write_unlock(__ : "=m" (rw->lock) : : "memory"); } -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define __raw_spin_relax(lock) cpu_relax() +#define __raw_read_relax(lock) cpu_relax() +#define __raw_write_relax(lock) cpu_relax() #endif /* __ASM_SPINLOCK_H */ Index: linux-rt-rebase.q/include/asm-x86_64/tlbflush.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/tlbflush.h +++ linux-rt-rebase.q/include/asm-x86_64/tlbflush.h @@ -8,14 +8,20 @@ static inline void __flush_tlb(void) { + preempt_disable(); write_cr3(read_cr3()); + preempt_enable(); } static inline void __flush_tlb_all(void) { - unsigned long cr4 = read_cr4(); + unsigned long cr4; + + preempt_disable(); + cr4 = read_cr4(); write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ write_cr4(cr4); /* write old PGE again and flush TLBs */ + preempt_enable(); } #define __flush_tlb_one(addr) \ Index: linux-rt-rebase.q/include/asm-x86_64/vgtod.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/vgtod.h +++ linux-rt-rebase.q/include/asm-x86_64/vgtod.h @@ -5,7 +5,7 @@ #include struct vsyscall_gtod_data { - seqlock_t lock; + raw_seqlock_t lock; /* open coded 'struct timespec' */ time_t wall_time_sec; patches/ppc-clockevents-fix.patch0000664000077200007720000001015210653433163016431 0ustar mingomingoFrom linux-kernel-owner@vger.kernel.org Thu May 24 20:24:54 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by mail.tglx.de (Postfix) with ESMTP id B0D2F65C3E9 for ; Thu, 24 May 2007 20:24:54 +0200 (CEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751886AbXEXSYQ (ORCPT ); Thu, 24 May 2007 14:24:16 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1750768AbXEXSYE (ORCPT ); Thu, 24 May 2007 14:24:04 -0400 Received: from gateway-1237.mvista.com ([63.81.120.155]:2175 "EHLO imap.sh.mvista.com" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1750741AbXEXSYD (ORCPT ); Thu, 24 May 2007 14:24:03 -0400 Received: from wasted.dev.rtsoft.ru (unknown [10.150.0.9]) by imap.sh.mvista.com (Postfix) with ESMTP id 767D13ECA; Thu, 24 May 2007 11:23:59 -0700 (PDT) From: Sergei Shtylyov Organization: MontaVista Software Inc. To: tglx@linutronix.de, mingo@elte.hu Subject: [PATCH 2.6.21-rt7] PowerPC: fix clockevents for classic CPUs Date: Thu, 24 May 2007 22:25:30 +0400 User-Agent: KMail/1.5 Cc: linuxppc-dev@ozlabs.org, linux-kernel@vger.kernel.org References: <200705172142.26739.sshtylyov@ru.mvista.com> In-Reply-To: <200705172142.26739.sshtylyov@ru.mvista.com> MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Disposition: inline Message-Id: <200705242225.30225.sshtylyov@ru.mvista.com> Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org X-Filter-To: .Kernel.LKML X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Uncoditionally set a maximum positive value to the decrementer before calling an event handler for all "classic" PPC CPUs (although this is only necessary to clear interrupt on POWER4+, I've been asked to do it this way) -- otherwise it wouldn't have been done for an offline CPU in periodic mode since the event reprogramming has been delegated to the timer subsystem. Also, as the classic decrementer doesn't have periodic mode, make set_mode() method for this case completely empty. While at it, add a switch case for CLOCK_EVT_MODE_RESUME to hush the warning. Signed-off-by: Sergei Shtylyov --- Testing on "classic" CPUs is still needed (used to work atop of 2.6.18-rt7). arch/powerpc/kernel/time.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/time.c +++ linux-rt-rebase.q/arch/powerpc/kernel/time.c @@ -167,11 +167,14 @@ static void decrementer_set_mode(enum cl case CLOCK_EVT_MODE_SHUTDOWN: tcr &= ~TCR_DIE; break; + case CLOCK_EVT_MODE_RESUME: + break; } mtspr(SPRN_TCR, tcr); -#endif + if (mode == CLOCK_EVT_MODE_PERIODIC) decrementer_set_next_event(tb_ticks_per_jiffy, dev); +#endif } static struct clock_event_device decrementer_clockevent = { @@ -571,16 +574,12 @@ void timer_interrupt(struct pt_regs * re irq_enter(); #ifdef CONFIG_GENERIC_CLOCKEVENTS -#ifdef CONFIG_PPC_MULTIPLATFORM +#if !defined(CONFIG_40x) && !defined(CONFIG_BOOKE) /* * We must write a positive value to the decrementer to clear - * the interrupt on the IBM 970 CPU series. In periodic mode, - * this happens when the decrementer gets reloaded later, but - * in one-shot mode, we have to do it here since an event handler - * may skip loading the new value... + * the interrupt on POWER4+ compatible CPUs. */ - if (per_cpu(decrementers, cpu).mode != CLOCK_EVT_MODE_PERIODIC) - set_dec(DECREMENTER_MAX); + set_dec(DECREMENTER_MAX); #endif /* * We can't disable the decrementer, so in the period between patches/arm-cmpxchg-support-armv6.patch0000664000077200007720000000317710653433164017530 0ustar mingomingo[PATCH -rt] cmpxchg support on ARMv6 Current rt patch don't support the cmpxchg on ARMv6. This patch supports cmpxchg in ARMv6. It's tested on OMAP2 (apollon board). Signed-off-by: Kyungmin Park p.s., Pleaes cc to me, I'm not subscriber on this mailing list. -- --- include/asm-arm/atomic.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) Index: linux-rt-rebase.q/include/asm-arm/atomic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/atomic.h +++ linux-rt-rebase.q/include/asm-arm/atomic.h @@ -114,6 +114,46 @@ static inline void atomic_clear_mask(uns : "cc"); } +/* + * Atomic compare and exchange. + */ +#define __HAVE_ARCH_CMPXCHG 1 + +extern unsigned long wrong_size_cmpxchg(volatile void *ptr); + +static inline unsigned long __cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + volatile unsigned long *p = ptr; + + if (size == 4) { + unsigned long oldval, res; + + do { + __asm__ __volatile__("@ atomic_cmpxchg\n" + "ldrex %1, [%2]\n" + "mov %0, #0\n" + "teq %1, %3\n" + "strexeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (p), "Ir" (old), "r" (new) + : "cc"); + } while (res); + + return oldval; + } else + return wrong_size_cmpxchg(ptr); +} + +#define cmpxchg(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ +}) + #else /* ARM_ARCH_6 */ #include patches/nf_conntrack-weird-crash-fix.patch0000664000077200007720000000223110653433170020201 0ustar mingomingo--- net/netfilter/nf_conntrack_core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) Index: linux-rt-rebase.q/net/netfilter/nf_conntrack_core.c =================================================================== --- linux-rt-rebase.q.orig/net/netfilter/nf_conntrack_core.c +++ linux-rt-rebase.q/net/netfilter/nf_conntrack_core.c @@ -1136,6 +1136,24 @@ int __init nf_conntrack_init(void) /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); + /* + * There's something really weird (read: crash) going on in + * this module when lockdep and rt is enabled - the locks are + * not initialized in the per-CPU area properly - or they might + * be initialized by getting a copy of the first CPU's per-cpu + * area? Only seems to happen when things are modular. Maybe + * per-cpu-alloc does not zero buffers properly? Needs + * investigating. Reported and fixed by Mike. + */ +#if defined(CONFIG_NF_CONNTRACK_EVENTS) && defined(CONFIG_SMP) + { + int cpu; + + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu_lock(nf_conntrack_ecache, cpu)); + } +#endif + return ret; out_fini_expect: patches/ich-force-hpet-ich5-quirk-to-force-detect-enable-fix.patch0000664000077200007720000000271310653433161024315 0ustar mingomingoFrom: Andrew Morton arch/i386/kernel/quirks.c: In function 'old_ich_force_enable_hpet': arch/i386/kernel/quirks.c:196: warning: 'gen_cntl' is used uninitialized in this function arch/i386/kernel/quirks.c: In function 'force_hpet_resume': arch/i386/kernel/quirks.c:171: warning: 'gen_cntl' is used uninitialized in this function Cc: Andi Kleen Cc: Greg KH Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Venkatesh Pallipadi Cc: Venki Pallipadi Cc: john stultz Signed-off-by: Andrew Morton --- arch/i386/kernel/quirks.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/quirks.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/quirks.c +++ linux-rt-rebase.q/arch/i386/kernel/quirks.c @@ -161,7 +161,8 @@ static struct pci_dev *cached_dev; static void old_ich_force_hpet_resume(void) { - u32 val, gen_cntl; + u32 val; + u32 uninitialized_var(gen_cntl); if (!force_hpet_address || !cached_dev) return; @@ -182,7 +183,8 @@ static void old_ich_force_hpet_resume(vo static void old_ich_force_enable_hpet(struct pci_dev *dev) { - u32 val, gen_cntl; + u32 val; + u32 uninitialized_var(gen_cntl); if (hpet_address || force_hpet_address) return; patches/s_files.patch0000664000077200007720000002702510653433167014204 0ustar mingomingoSubject: fs: break the file_list_lock for sb->s_files Break the protection of sb->s_files out from under the global file_list_lock. sb->s_files is converted to a lock_list. furthermore to prevent the lock_list_head of getting too contended with concurrent add operations the add is buffered in per cpu filevecs. This would ordinarily require a flush before a delete operation - to ensure the to be deleted entry is indeed added to the list. This is avoided by storing a pointer to the filevec location in the not yet used list_head. This pointer can then be used to clear the filevec entry before its actually added. The file_flag mess is a bit unfortunate - this can be removed by also converting tty->tty_files to a lock_list (TODO). Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- fs/file_table.c | 170 +++++++++++++++++++++++++++++++++++++++---- fs/open.c | 2 fs/proc/generic.c | 8 -- fs/super.c | 7 - include/linux/fs.h | 19 ++++ security/selinux/selinuxfs.c | 8 -- 6 files changed, 184 insertions(+), 30 deletions(-) Index: linux-rt-rebase.q/fs/file_table.c =================================================================== --- linux-rt-rebase.q.orig/fs/file_table.c +++ linux-rt-rebase.q/fs/file_table.c @@ -112,7 +112,7 @@ struct file *get_empty_filp(void) goto fail_sec; tsk = current; - INIT_LIST_HEAD(&f->f_u.fu_list); + INIT_LOCK_LIST_HEAD(&f->f_u.fu_llist); atomic_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); f->f_uid = tsk->fsuid; @@ -244,32 +244,175 @@ void put_filp(struct file *file) } } -void file_move(struct file *file, struct list_head *list) +enum { + FILEVEC_SIZE = 15 +}; + +struct filevec { + unsigned long nr; + struct file *files[FILEVEC_SIZE]; +}; + +static DEFINE_PER_CPU(struct filevec, sb_fvec); + +static inline unsigned int filevec_size(struct filevec *fvec) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); + return FILEVEC_SIZE - fvec->nr; +} + +static inline unsigned int filevec_count(struct filevec *fvec) +{ + return fvec->nr; +} + +static inline void filevec_reinit(struct filevec *fvec) +{ + fvec->nr = 0; +} + +static inline unsigned int filevec_add(struct filevec *fvec, struct file *filp) +{ + rcu_assign_pointer(fvec->files[fvec->nr], filp); + + /* + * Here we do icky stuff in order to avoid flushing the per cpu filevec + * on list removal. + * + * We store the location on the per cpu filevec in the as of yet unused + * fu_llist.next field and toggle bit 0 to indicate we done so. This + * allows the removal code to set the filevec entry to NULL, thereby + * avoiding the list add. + * + * Abuse the fu_llist.lock for protection. + */ + spin_lock(&filp->f_u.fu_llist.lock); + filp->f_u.fu_llist.next = (void *)&fvec->files[fvec->nr]; + __set_bit(0, (void *)&filp->f_u.fu_llist.next); + spin_unlock(&filp->f_u.fu_llist.lock); + + fvec->nr++; + return filevec_size(fvec); +} + +static void __filevec_add(struct filevec *fvec) +{ + int i; + + for (i = 0; i < filevec_count(fvec); i++) { + struct file *filp; + + /* + * see the comment in filevec_add(); + * need RCU because a concurrent remove might have deleted + * the entry from under us. + */ + rcu_read_lock(); + filp = rcu_dereference(fvec->files[i]); + /* + * the simple case, its gone - NEXT! + */ + if (!filp) { + rcu_read_unlock(); + continue; + } + + spin_lock(&filp->f_u.fu_llist.lock); + /* + * If the entry really is still there, add it! + */ + if (rcu_dereference(fvec->files[i])) { + struct super_block *sb = + filp->f_mapping->host->i_sb; + + __lock_list_add(&filp->f_u.fu_llist, &sb->s_files); + } + spin_unlock(&filp->f_u.fu_llist.lock); + rcu_read_unlock(); + } + filevec_reinit(fvec); +} + +static void filevec_add_drain(void) +{ + struct filevec *fvec = &get_cpu_var(sb_fvec, &cpu); + if (filevec_count(fvec)) + __filevec_add(fvec); + put_cpu_var(sb_fvec, cpu); } +static void filevec_add_drain_per_cpu(struct work_struct *dummy) +{ + filevec_add_drain(); +} + +int filevec_add_drain_all(void) +{ + return schedule_on_each_cpu(filevec_add_drain_per_cpu); +} +EXPORT_SYMBOL_GPL(filevec_add_drain_all); + void file_kill(struct file *file) { - if (!list_empty(&file->f_u.fu_list)) { + if (file_flag(file, F_SUPERBLOCK)) { + void **ptr; + + file_flag_clear(file, F_SUPERBLOCK); + + /* + * If bit 0 of the fu_llist.next pointer is set we're still + * enqueued on a per cpu filevec, in that case clear the entry + * and we're done. + */ + spin_lock(&file->f_u.fu_llist.lock); + ptr = (void **)file->f_u.fu_llist.next; + if (__test_and_clear_bit(0, (void *)&ptr)) { + rcu_assign_pointer(*ptr, NULL); + INIT_LIST_HEAD(&file->f_u.fu_llist.head); + spin_unlock(&file->f_u.fu_llist.lock); + return; + } + spin_unlock(&file->f_u.fu_llist.lock); + + if (!list_empty(&file->f_u.fu_list)) + lock_list_del_init(&file->f_u.fu_llist); + + } else if (!list_empty(&file->f_u.fu_list)) { file_list_lock(); list_del_init(&file->f_u.fu_list); file_list_unlock(); } } +void file_move(struct file *file, struct list_head *list) +{ + struct super_block *sb; + + if (!list) + return; + + file_kill(file); + + sb = file->f_mapping->host->i_sb; + if (list == &sb->s_files.head) { + struct filevec *fvec = &get_cpu_var(sb_fvec, &cpu); + file_flag_set(file, F_SUPERBLOCK); + if (!filevec_add(fvec, file)) + __filevec_add(fvec); + put_cpu_var(sb_fvec, cpu); + } else { + file_list_lock(); + list_add(&file->f_u.fu_list, list); + file_list_unlock(); + } +} + int fs_may_remount_ro(struct super_block *sb) { - struct list_head *p; + struct file *file; /* Check that no files are currently opened for writing. */ - file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file *file = list_entry(p, struct file, f_u.fu_list); + filevec_add_drain_all(); + lock_list_for_each_entry(file, &sb->s_files, f_u.fu_llist) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -280,10 +423,9 @@ int fs_may_remount_ro(struct super_block if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; } - file_list_unlock(); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + lock_list_for_each_entry_stop(file, f_u.fu_llist); return 0; } Index: linux-rt-rebase.q/fs/open.c =================================================================== --- linux-rt-rebase.q.orig/fs/open.c +++ linux-rt-rebase.q/fs/open.c @@ -753,7 +753,7 @@ static struct file *__dentry_open(struct f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_move(f, &inode->i_sb->s_files.head); if (!open && f->f_op) open = f->f_op->open; Index: linux-rt-rebase.q/fs/proc/generic.c =================================================================== --- linux-rt-rebase.q.orig/fs/proc/generic.c +++ linux-rt-rebase.q/fs/proc/generic.c @@ -560,15 +560,14 @@ static int proc_register(struct proc_dir */ static void proc_kill_inodes(struct proc_dir_entry *de) { - struct list_head *p; + struct file *filp; struct super_block *sb = proc_mnt->mnt_sb; /* * Actually it's a partial revoke(). */ - file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file * filp = list_entry(p, struct file, f_u.fu_list); + filevec_add_drain_all(); + lock_list_for_each_entry(filp, &sb->s_files, f_u.fu_llist) { struct dentry * dentry = filp->f_path.dentry; struct inode * inode; const struct file_operations *fops; @@ -582,7 +581,6 @@ static void proc_kill_inodes(struct proc filp->f_op = NULL; fops_put(fops); } - file_list_unlock(); } static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, Index: linux-rt-rebase.q/fs/super.c =================================================================== --- linux-rt-rebase.q.orig/fs/super.c +++ linux-rt-rebase.q/fs/super.c @@ -67,7 +67,7 @@ static struct super_block *alloc_super(s } INIT_LIST_HEAD(&s->s_dirty); INIT_LIST_HEAD(&s->s_io); - INIT_LIST_HEAD(&s->s_files); + INIT_LOCK_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -569,12 +569,11 @@ static void mark_files_ro(struct super_b { struct file *f; - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + filevec_add_drain_all(); + lock_list_for_each_entry(f, &sb->s_files, f_u.fu_llist) { if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) f->f_mode &= ~FMODE_WRITE; } - file_list_unlock(); } /** Index: linux-rt-rebase.q/include/linux/fs.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/fs.h +++ linux-rt-rebase.q/include/linux/fs.h @@ -278,6 +278,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include #include @@ -722,11 +723,16 @@ struct file { /* * fu_list becomes invalid after file_free is called and queued via * fu_rcuhead for RCU freeing + * fu_llist is used for the superblock s_files list; its crucial that + * the spinlock contained therein is not clobbered by other uses of + * the union. */ union { struct list_head fu_list; + struct lock_list_head fu_llist; struct rcu_head fu_rcuhead; } f_u; + unsigned long f__flags; struct path f_path; #define f_dentry f_path.dentry #define f_vfsmnt f_path.mnt @@ -757,9 +763,20 @@ extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); #define file_list_unlock() spin_unlock(&files_lock); +#define F_SUPERBLOCK 0 + +#define file_flag_set(file, flag) \ + __set_bit((flag), &(file)->f__flags) +#define file_flag_clear(file, flag) \ + __clear_bit((flag), &(file)->f__flags) +#define file_flag(file, flag) \ + test_bit((flag), &(file)->f__flags) + #define get_file(x) atomic_inc(&(x)->f_count) #define file_count(x) atomic_read(&(x)->f_count) +extern int filevec_add_drain_all(void); + #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes @@ -949,7 +966,7 @@ struct super_block { struct list_head s_dirty; /* dirty inodes */ struct list_head s_io; /* parked for writeback */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ - struct list_head s_files; + struct lock_list_head s_files; struct block_device *s_bdev; struct mtd_info *s_mtd; Index: linux-rt-rebase.q/security/selinux/selinuxfs.c =================================================================== --- linux-rt-rebase.q.orig/security/selinux/selinuxfs.c +++ linux-rt-rebase.q/security/selinux/selinuxfs.c @@ -961,7 +961,8 @@ static const struct file_operations sel_ /* partial revoke() from fs/proc/generic.c proc_kill_inodes */ static void sel_remove_entries(struct dentry *de) { - struct list_head *p, *node; + struct list_head *node; + struct file *filp; struct super_block *sb = de->d_sb; spin_lock(&dcache_lock); @@ -983,9 +984,7 @@ static void sel_remove_entries(struct de spin_unlock(&dcache_lock); - file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file * filp = list_entry(p, struct file, f_u.fu_list); + lock_list_for_each_entry(filp, &sb->s_files, f_u.fu_llist) { struct dentry * dentry = filp->f_path.dentry; if (dentry->d_parent != de) { @@ -993,7 +992,6 @@ static void sel_remove_entries(struct de } filp->f_op = NULL; } - file_list_unlock(); } #define BOOL_DIR_NAME "booleans" patches/lock_list.patch0000664000077200007720000001200510653433167014533 0ustar mingomingoSubject: lock_list - a fine grain locked double linked list Provide a simple fine grain locked double link list. It build upon the regular double linked list primitives, spinlocks and RCU. In order to avoid deadlocks a prev -> next locking order is observed. This prevents reverse iteration. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lock_list.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++ lib/Makefile | 2 - lib/lock_list.c | 55 ++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/include/linux/lock_list.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/linux/lock_list.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2006, Red Hat, Inc., Peter Zijlstra + * Licenced under the GPLv2. + * + * Simple fine grain locked double linked list. + */ +#ifndef _LINUX_LOCK_LIST_H +#define _LINUX_LOCK_LIST_H + +#ifdef __KERNEL__ + +#include +#include +#include + +struct lock_list_head { + union { + struct list_head head; + struct { + struct lock_list_head *next, *prev; + }; + }; + spinlock_t lock; +}; + +enum { + LOCK_LIST_NESTING_PREV = 1, + LOCK_LIST_NESTING_CUR, + LOCK_LIST_NESTING_NEXT, +}; + +static inline void INIT_LOCK_LIST_HEAD(struct lock_list_head *list) +{ + INIT_LIST_HEAD(&list->head); + spin_lock_init(&list->lock); +} + +/* + * Passed pointers are assumed stable by external means (refcount, rcu) + */ +extern void __lock_list_add(struct lock_list_head *new, + struct lock_list_head *list); + +static inline void lock_list_add(struct lock_list_head *new, + struct lock_list_head *list) +{ + spin_lock(&new->lock); + __lock_list_add(new, list); + spin_unlock(&new->lock); +} + +extern void lock_list_del_init(struct lock_list_head *entry); + +static inline +struct lock_list_head *lock_list_next_entry(struct lock_list_head *list, + struct lock_list_head *entry) +{ + struct lock_list_head *next = entry->next; + if (likely(next != list)) { + lock_set_subclass(&entry->lock.dep_map, + LOCK_LIST_NESTING_CUR, _THIS_IP_); + spin_lock_nested(&next->lock, LOCK_LIST_NESTING_NEXT); + BUG_ON(entry->next != next); + } else + next = NULL; + spin_unlock(&entry->lock); + return next; +} + +static inline +struct lock_list_head *lock_list_first_entry(struct lock_list_head *list) +{ + spin_lock(&list->lock); + return lock_list_next_entry(list, list); +} + +#define lock_list_for_each_entry(pos, list, member) \ + for (pos = list_entry(lock_list_first_entry(list), \ + typeof(*pos), member); \ + pos; \ + pos = list_entry(lock_list_next_entry(list, &pos->member), \ + typeof(*pos), member)) + +#define lock_list_for_each_entry_stop(pos, member) \ + spin_unlock(&(pos->member.lock)) + +#endif /* __KERNEL__ */ +#endif /* _LINUX_LOCK_LIST_H */ Index: linux-rt-rebase.q/lib/Makefile =================================================================== --- linux-rt-rebase.q.orig/lib/Makefile +++ linux-rt-rebase.q/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for some libs needed in the kernel. # -lib-y := ctype.o string.o vsprintf.o cmdline.o \ +lib-y := ctype.o string.o vsprintf.o cmdline.o lock_list.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o Index: linux-rt-rebase.q/lib/lock_list.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/lib/lock_list.c @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2006, Red Hat, Inc., Peter Zijlstra + * Licenced under the GPLv2. + * + * Simple fine grain locked double linked list. + * + * Locking order is from prev -> next. + * Edges are locked not nodes; that is, cur->lock protects: + * - cur->next, + * - cur->next->prev. + * + * Passed pointers are assumed to be stable by external means such as + * refcounts or RCU. The individual list entries are assumed to be RCU + * freed (requirement of __lock_list_del). + */ + +#include + +void __lock_list_add(struct lock_list_head *new, + struct lock_list_head *list) +{ + struct lock_list_head *next; + + spin_lock_nested(&list->lock, LOCK_LIST_NESTING_PREV); + next = list->next; + __list_add(&new->head, &list->head, &next->head); + spin_unlock(&list->lock); +} + +void lock_list_del_init(struct lock_list_head *entry) +{ + struct lock_list_head *prev, *next; + + rcu_read_lock(); +again: + prev = entry->prev; + if (prev == entry) + goto out; + spin_lock_nested(&prev->lock, LOCK_LIST_NESTING_PREV); + if (unlikely(entry->prev != prev)) { + /* + * we lost + */ + spin_unlock(&prev->lock); + goto again; + } + spin_lock_nested(&entry->lock, LOCK_LIST_NESTING_CUR); + next = entry->next; + __list_del(&prev->head, &next->head); + INIT_LIST_HEAD(&entry->head); + spin_unlock(&entry->lock); + spin_unlock(&prev->lock); +out: + rcu_read_unlock(); +} patches/rcu-1.patch0000664000077200007720000013517310653433163013507 0ustar mingomingo This patch re-organizes the RCU code to enable multiple implementations of RCU. Users of RCU continues to include rcupdate.h and the RCU interfaces remain the same. This is in preparation for subsequently merging the preepmtpible RCU implementation. Signed-off-by: Dipankar Sarma --- --- include/linux/rcuclassic.h | 148 +++++++++++ include/linux/rcupdate.h | 154 +++--------- kernel/Makefile | 2 kernel/rcuclassic.c | 561 +++++++++++++++++++++++++++++++++++++++++++++ kernel/rcupdate.c | 558 ++------------------------------------------ 5 files changed, 782 insertions(+), 641 deletions(-) Index: linux-rt-rebase.q/include/linux/rcuclassic.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/include/linux/rcuclassic.h @@ -0,0 +1,148 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (classic version) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2001 + * + * Author: Dipankar Sarma + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#ifndef __LINUX_RCUCLASSIC_H +#define __LINUX_RCUCLASSIC_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include + + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + long cur; /* Current batch number. */ + long completed; /* Number of the last completed batch */ + int next_pending; /* Is the next batch already waiting? */ + + int signaled; + + spinlock_t lock ____cacheline_internodealigned_in_smp; + cpumask_t cpumask; /* CPUs that need to switch in order */ + /* for current batch to proceed. */ +} ____cacheline_internodealigned_in_smp; + +/* Is batch a before batch b ? */ +static inline int rcu_batch_before(long a, long b) +{ + return (a - b) < 0; +} + +/* Is batch a after batch b ? */ +static inline int rcu_batch_after(long a, long b) +{ + return (a - b) > 0; +} + +/* + * Per-CPU data for Read-Copy UPdate. + * nxtlist - new callbacks are added here + * curlist - current batch for which quiescent cycle started if any + */ +struct rcu_data { + /* 1) quiescent state handling : */ + long quiescbatch; /* Batch # for grace period */ + int passed_quiesc; /* User-mode/idle loop etc. */ + int qs_pending; /* core waits for quiesc state */ + + /* 2) batch handling */ + long batch; /* Batch # for current RCU batch */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail; + long qlen; /* # of queued callbacks */ + struct rcu_head *curlist; + struct rcu_head **curtail; + struct rcu_head *donelist; + struct rcu_head **donetail; + long blimit; /* Upper limit on a processed batch */ + int cpu; +}; + +DECLARE_PER_CPU(struct rcu_data, rcu_data); +DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); + +/* + * Increment the quiescent state counter. + * The counter is a bit degenerated: We do not need to know + * how many quiescent states passed, just if there was at least + * one since the start of the grace period. Thus just a flag. + */ +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + rdp->passed_quiesc = 1; +} +static inline void rcu_bh_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + rdp->passed_quiesc = 1; +} + +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#define __rcu_read_lock() \ + do { \ + preempt_disable(); \ + __acquire(RCU); \ + } while(0) +#define __rcu_read_unlock() \ + do { \ + __release(RCU); \ + preempt_enable(); \ + } while(0) + +#define __rcu_read_lock_bh() \ + do { \ + local_bh_disable(); \ + __acquire(RCU_BH); \ + } while(0) +#define __rcu_read_unlock_bh() \ + do { \ + __release(RCU_BH); \ + local_bh_enable(); \ + } while(0) + +#define __synchronize_sched() synchronize_rcu() + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); +extern long rcu_batches_completed(void); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_RCUCLASSIC_H */ Index: linux-rt-rebase.q/include/linux/rcupdate.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/rcupdate.h +++ linux-rt-rebase.q/include/linux/rcupdate.h @@ -1,5 +1,5 @@ /* - * Read-Copy Update mechanism for mutual exclusion + * Read-Copy Update mechanism for mutual exclusion * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ * Copyright (C) IBM Corporation, 2001 * * Author: Dipankar Sarma - * - * Based on the original work by Paul McKenney + * + * Based on the original work by Paul McKenney * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf @@ -41,6 +41,7 @@ #include #include #include +#include /** * struct rcu_head - callback structure for use with RCU @@ -59,80 +60,6 @@ struct rcu_head { } while (0) - -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - long cur; /* Current batch number. */ - long completed; /* Number of the last completed batch */ - int next_pending; /* Is the next batch already waiting? */ - - int signaled; - - spinlock_t lock ____cacheline_internodealigned_in_smp; - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ -} ____cacheline_internodealigned_in_smp; - -/* Is batch a before batch b ? */ -static inline int rcu_batch_before(long a, long b) -{ - return (a - b) < 0; -} - -/* Is batch a after batch b ? */ -static inline int rcu_batch_after(long a, long b) -{ - return (a - b) > 0; -} - -/* - * Per-CPU data for Read-Copy UPdate. - * nxtlist - new callbacks are added here - * curlist - current batch for which quiescent cycle started if any - */ -struct rcu_data { - /* 1) quiescent state handling : */ - long quiescbatch; /* Batch # for grace period */ - int passed_quiesc; /* User-mode/idle loop etc. */ - int qs_pending; /* core waits for quiesc state */ - - /* 2) batch handling */ - long batch; /* Batch # for current RCU batch */ - struct rcu_head *nxtlist; - struct rcu_head **nxttail; - long qlen; /* # of queued callbacks */ - struct rcu_head *curlist; - struct rcu_head **curtail; - struct rcu_head *donelist; - struct rcu_head **donetail; - long blimit; /* Upper limit on a processed batch */ - int cpu; - struct rcu_head barrier; -}; - -DECLARE_PER_CPU(struct rcu_data, rcu_data); -DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); - -/* - * Increment the quiescent state counter. - * The counter is a bit degenerated: We do not need to know - * how many quiescent states passed, just if there was at least - * one since the start of the grace period. Thus just a flag. - */ -static inline void rcu_qsctr_inc(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - rdp->passed_quiesc = 1; -} -static inline void rcu_bh_qsctr_inc(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc = 1; -} - -extern int rcu_pending(int cpu); -extern int rcu_needs_cpu(int cpu); - /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -162,22 +89,14 @@ extern int rcu_needs_cpu(int cpu); * * It is illegal to block while in an RCU read-side critical section. */ -#define rcu_read_lock() \ - do { \ - preempt_disable(); \ - __acquire(RCU); \ - } while(0) +#define rcu_read_lock() __rcu_read_lock() /** * rcu_read_unlock - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ -#define rcu_read_unlock() \ - do { \ - __release(RCU); \ - preempt_enable(); \ - } while(0) +#define rcu_read_unlock() __rcu_read_unlock() /* * So where is rcu_write_lock()? It does not exist, as there is no @@ -200,22 +119,14 @@ extern int rcu_needs_cpu(int cpu); * can use just rcu_read_lock(). * */ -#define rcu_read_lock_bh() \ - do { \ - local_bh_disable(); \ - __acquire(RCU_BH); \ - } while(0) +#define rcu_read_lock_bh() __rcu_read_lock_bh() -/* +/** * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ -#define rcu_read_unlock_bh() \ - do { \ - __release(RCU_BH); \ - local_bh_enable(); \ - } while(0) +#define rcu_read_unlock_bh() __rcu_read_unlock_bh() /** * rcu_dereference - fetch an RCU-protected pointer in an @@ -267,22 +178,49 @@ extern int rcu_needs_cpu(int cpu); * In "classic RCU", these two guarantees happen to be one and * the same, but can differ in realtime RCU implementations. */ -#define synchronize_sched() synchronize_rcu() - -extern void rcu_init(void); -extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); +#define synchronize_sched() __synchronize_sched() -/* Exported interfaces */ -extern void FASTCALL(call_rcu(struct rcu_head *head, +/** + * call_rcu - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +extern void FASTCALL(call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head))); + + +/** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_bh() assumes + * that the read-side critical sections end on completion of a softirq + * handler. This means that read-side critical sections in process + * context must not be interrupted by softirqs. This interface is to be + * used when most of the read-side critical sections are in softirq context. + * RCU read-side critical sections are delimited by rcu_read_lock() and + * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh() + * and rcu_read_unlock_bh(), if in process context. These may be nested. + */ extern void FASTCALL(call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head))); + +/* Exported common interfaces */ extern void synchronize_rcu(void); -void synchronize_idle(void); extern void rcu_barrier(void); +/* Internal to kernel */ +extern void rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); + #endif /* __KERNEL__ */ #endif /* __LINUX_RCUPDATE_H */ Index: linux-rt-rebase.q/kernel/Makefile =================================================================== --- linux-rt-rebase.q.orig/kernel/Makefile +++ linux-rt-rebase.q/kernel/Makefile @@ -6,7 +6,7 @@ obj-y = sched.o fork.o exec_domain.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ signal.o sys.o kmod.o workqueue.o pid.o \ - rcupdate.o extable.o params.o posix-timers.o \ + rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ utsname.o Index: linux-rt-rebase.q/kernel/rcuclassic.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/kernel/rcuclassic.c @@ -0,0 +1,561 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, classic implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2001 + * + * Authors: Dipankar Sarma + * Manfred Spraul + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * Papers: http://www.rdrop.com/users/paulmck/RCU + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU/ *.txt + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, +}; +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, +}; + +DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; +DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; + +/* Fake initialization required by compiler */ +static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; +static int blimit = 10; +static int qhimark = 10000; +static int qlowmark = 100; + +#ifdef CONFIG_SMP +static void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + int cpu; + cpumask_t cpumask; + set_need_resched(); + if (unlikely(!rcp->signaled)) { + rcp->signaled = 1; + /* + * Don't send IPI to itself. With irqs disabled, + * rdp->cpu is the current cpu. + */ + cpumask = rcp->cpumask; + cpu_clear(rdp->cpu, cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_send_reschedule(cpu); + } +} +#else +static inline void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + set_need_resched(); +} +#endif + +/* + * call_rcu - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void fastcall call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + struct rcu_data *rdp; + + head->func = func; + head->next = NULL; + local_irq_save(flags); + rdp = &__get_cpu_var(rcu_data); + *rdp->nxttail = head; + rdp->nxttail = &head->next; + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } + local_irq_restore(flags); +} + +/* + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_bh() assumes + * that the read-side critical sections end on completion of a softirq + * handler. This means that read-side critical sections in process + * context must not be interrupted by softirqs. This interface is to be + * used when most of the read-side critical sections are in softirq context. + * RCU read-side critical sections are delimited by rcu_read_lock() and + * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh() + * and rcu_read_unlock_bh(), if in process context. These may be nested. + */ +void fastcall call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + struct rcu_data *rdp; + + head->func = func; + head->next = NULL; + local_irq_save(flags); + rdp = &__get_cpu_var(rcu_bh_data); + *rdp->nxttail = head; + rdp->nxttail = &head->next; + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_bh_ctrlblk); + } + + local_irq_restore(flags); +} + +/* + * Return the number of RCU batches processed thus far. Useful + * for debug and statistics. + */ +long rcu_batches_completed(void) +{ + return rcu_ctrlblk.completed; +} + +/* + * Return the number of RCU batches processed thus far. Useful + * for debug and statistics. + */ +long rcu_batches_completed_bh(void) +{ + return rcu_bh_ctrlblk.completed; +} + +/* + * Invoke the completed RCU callbacks. They are expected to be in + * a per-cpu list. + */ +static void rcu_do_batch(struct rcu_data *rdp) +{ + struct rcu_head *next, *list; + int count = 0; + + list = rdp->donelist; + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + if (++count >= rdp->blimit) + break; + } + rdp->donelist = list; + + local_irq_disable(); + rdp->qlen -= count; + local_irq_enable(); + if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; + + if (!rdp->donelist) + rdp->donetail = &rdp->donelist; + else + tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu)); +} + +/* + * Grace period handling: + * The grace period handling consists out of two steps: + * - A new grace period is started. + * This is done by rcu_start_batch. The start is not broadcasted to + * all cpus, they must pick this up by comparing rcp->cur with + * rdp->quiescbatch. All cpus are recorded in the + * rcu_ctrlblk.cpumask bitmap. + * - All cpus must go through a quiescent state. + * Since the start of the grace period is not broadcasted, at least two + * calls to rcu_check_quiescent_state are required: + * The first call just notices that a new grace period is running. The + * following calls check if there was a quiescent state since the beginning + * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If + * the bitmap is empty, then the grace period is completed. + * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace + * period (if necessary). + */ +/* + * Register a new batch of callbacks, and start it up if there is currently no + * active batch and the batch to be registered has not already occurred. + * Caller must hold rcu_ctrlblk.lock. + */ +static void rcu_start_batch(struct rcu_ctrlblk *rcp) +{ + if (rcp->next_pending && + rcp->completed == rcp->cur) { + rcp->next_pending = 0; + /* + * next_pending == 0 must be visible in + * __rcu_process_callbacks() before it can see new value of cur. + */ + smp_wmb(); + rcp->cur++; + + /* + * Accessing nohz_cpu_mask before incrementing rcp->cur needs a + * Barrier Otherwise it can cause tickless idle CPUs to be + * included in rcp->cpumask, which will extend graceperiods + * unnecessarily. + */ + smp_mb(); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); + + rcp->signaled = 0; + } +} + +/* + * cpu went through a quiescent state since the beginning of the grace period. + * Clear it from the cpu mask and complete the grace period if it was the last + * cpu. Start another grace period if someone has further entries pending + */ +static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) +{ + cpu_clear(cpu, rcp->cpumask); + if (cpus_empty(rcp->cpumask)) { + /* batch completed ! */ + rcp->completed = rcp->cur; + rcu_start_batch(rcp); + } +} + +/* + * Check if the cpu has gone through a quiescent state (say context + * switch). If so and if it already hasn't done so in this RCU + * quiescent cycle, then indicate that it has done so. + */ +static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, + struct rcu_data *rdp) +{ + if (rdp->quiescbatch != rcp->cur) { + /* start new grace period: */ + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->quiescbatch = rcp->cur; + return; + } + + /* Grace period already completed for this cpu? + * qs_pending is checked instead of the actual bitmap to avoid + * cacheline trashing. + */ + if (!rdp->qs_pending) + return; + + /* + * Was there a quiescent state since the beginning of the grace + * period? If no, then exit and wait for the next call. + */ + if (!rdp->passed_quiesc) + return; + rdp->qs_pending = 0; + + spin_lock(&rcp->lock); + /* + * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync + * during cpu startup. Ignore the quiescent state. + */ + if (likely(rdp->quiescbatch == rcp->cur)) + cpu_quiet(rdp->cpu, rcp); + + spin_unlock(&rcp->lock); +} + + +#ifdef CONFIG_HOTPLUG_CPU + +/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing + * locking requirements, the list it's pulling from has to belong to a cpu + * which is dead and hence not processing interrupts. + */ +static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, + struct rcu_head **tail) +{ + local_irq_disable(); + *this_rdp->nxttail = list; + if (list) + this_rdp->nxttail = tail; + local_irq_enable(); +} + +static void __rcu_offline_cpu(struct rcu_data *this_rdp, + struct rcu_ctrlblk *rcp, struct rcu_data *rdp) +{ + /* if the cpu going offline owns the grace period + * we can block indefinitely waiting for it, so flush + * it here + */ + spin_lock_bh(&rcp->lock); + if (rcp->cur != rcp->completed) + cpu_quiet(rdp->cpu, rcp); + spin_unlock_bh(&rcp->lock); + rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); + rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); + rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); +} + +static void rcu_offline_cpu(int cpu) +{ + struct rcu_data *this_rdp = &get_cpu_var(rcu_data); + struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); + + __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, + &per_cpu(rcu_data, cpu)); + __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, + &per_cpu(rcu_bh_data, cpu)); + put_cpu_var(rcu_data); + put_cpu_var(rcu_bh_data); + tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu); +} + +#else + +static void rcu_offline_cpu(int cpu) +{ +} + +#endif + +/* + * This does the RCU processing work from tasklet context. + */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, + struct rcu_data *rdp) +{ + if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { + *rdp->donetail = rdp->curlist; + rdp->donetail = rdp->curtail; + rdp->curlist = NULL; + rdp->curtail = &rdp->curlist; + } + + if (rdp->nxtlist && !rdp->curlist) { + local_irq_disable(); + rdp->curlist = rdp->nxtlist; + rdp->curtail = rdp->nxttail; + rdp->nxtlist = NULL; + rdp->nxttail = &rdp->nxtlist; + local_irq_enable(); + + /* + * start the next batch of callbacks + */ + + /* determine batch number */ + rdp->batch = rcp->cur + 1; + /* see the comment and corresponding wmb() in + * the rcu_start_batch() + */ + smp_rmb(); + + if (!rcp->next_pending) { + /* and start it/schedule start if it's a new batch */ + spin_lock(&rcp->lock); + rcp->next_pending = 1; + rcu_start_batch(rcp); + spin_unlock(&rcp->lock); + } + } + + rcu_check_quiescent_state(rcp, rdp); + if (rdp->donelist) + rcu_do_batch(rdp); +} + +static void rcu_process_callbacks(unsigned long unused) +{ + __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); +} + +static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) +{ + /* This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) + return 1; + + /* This cpu has no pending entries, but there are new entries */ + if (!rdp->curlist && rdp->nxtlist) + return 1; + + /* This cpu has finished callbacks to invoke */ + if (rdp->donelist) + return 1; + + /* The rcu core waits for a quiescent state from the cpu */ + if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) + return 1; + + /* nothing to do */ + return 0; +} + +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); +} + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); + + return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); +} + +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + rcu_qsctr_inc(cpu); + rcu_bh_qsctr_inc(cpu); + } else if (!in_softirq()) + rcu_bh_qsctr_inc(cpu); + tasklet_schedule(&per_cpu(rcu_tasklet, cpu)); +} + +static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, + struct rcu_data *rdp) +{ + memset(rdp, 0, sizeof(*rdp)); + rdp->curtail = &rdp->curlist; + rdp->nxttail = &rdp->nxtlist; + rdp->donetail = &rdp->donelist; + rdp->quiescbatch = rcp->completed; + rdp->qs_pending = 0; + rdp->cpu = cpu; + rdp->blimit = blimit; +} + +static void __devinit rcu_online_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu); + + rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp); + rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp); + tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); +} + +static int __devinit rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + rcu_online_cpu(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + rcu_offline_cpu(cpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, +}; + +/* + * Initializes rcu mechanism. Assumed to be called early. + * That is before local timer(SMP) or jiffie timer (uniproc) is setup. + * Note that rcu_qsctr and friends are implicitly + * initialized due to the choice of ``0'' for RCU_CTR_INVALID. + */ +void __init __rcu_init(void) +{ + rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); + /* Register notifier for non-boot CPUs */ + register_cpu_notifier(&rcu_nb); +} + +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); + +EXPORT_SYMBOL_GPL(rcu_batches_completed); +EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +EXPORT_SYMBOL_GPL(call_rcu); +EXPORT_SYMBOL_GPL(call_rcu_bh); Index: linux-rt-rebase.q/kernel/rcupdate.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rcupdate.c +++ linux-rt-rebase.q/kernel/rcupdate.c @@ -19,7 +19,7 @@ * * Authors: Dipankar Sarma * Manfred Spraul - * + * * Based on the original work by Paul McKenney * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: @@ -40,152 +40,53 @@ #include #include #include -#include #include -#include #include -#include -#include #include #include +#include -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_ctrlblk = { - .cur = -300, - .completed = -300, - .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), - .cpumask = CPU_MASK_NONE, -}; -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .cur = -300, - .completed = -300, - .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock), - .cpumask = CPU_MASK_NONE, +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; }; -DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; -DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; - -/* Fake initialization required by compiler */ -static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int blimit = 10; -static int qhimark = 10000; -static int qlowmark = 100; - +static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head); static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; -#ifdef CONFIG_SMP -static void force_quiescent_state(struct rcu_data *rdp, - struct rcu_ctrlblk *rcp) -{ - int cpu; - cpumask_t cpumask; - set_need_resched(); - if (unlikely(!rcp->signaled)) { - rcp->signaled = 1; - /* - * Don't send IPI to itself. With irqs disabled, - * rdp->cpu is the current cpu. - */ - cpumask = rcp->cpumask; - cpu_clear(rdp->cpu, cpumask); - for_each_cpu_mask(cpu, cpumask) - smp_send_reschedule(cpu); - } -} -#else -static inline void force_quiescent_state(struct rcu_data *rdp, - struct rcu_ctrlblk *rcp) +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(struct rcu_head *head) { - set_need_resched(); + struct rcu_synchronize *rcu; + + rcu = container_of(head, struct rcu_synchronize, head); + complete(&rcu->completion); } -#endif /** - * call_rcu - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * synchronize_rcu - wait until a grace period has elapsed. * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU * read-side critical sections have completed. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. - */ -void fastcall call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - unsigned long flags; - struct rcu_data *rdp; - - head->func = func; - head->next = NULL; - local_irq_save(flags); - rdp = &__get_cpu_var(rcu_data); - *rdp->nxttail = head; - rdp->nxttail = &head->next; - if (unlikely(++rdp->qlen > qhimark)) { - rdp->blimit = INT_MAX; - force_quiescent_state(rdp, &rcu_ctrlblk); - } - local_irq_restore(flags); -} - -/** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_bh() assumes - * that the read-side critical sections end on completion of a softirq - * handler. This means that read-side critical sections in process - * context must not be interrupted by softirqs. This interface is to be - * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by rcu_read_lock() and - * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh() - * and rcu_read_unlock_bh(), if in process context. These may be nested. + * If your read-side code is not protected by rcu_read_lock(), do -not- + * use synchronize_rcu(). */ -void fastcall call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) +void synchronize_rcu(void) { - unsigned long flags; - struct rcu_data *rdp; - - head->func = func; - head->next = NULL; - local_irq_save(flags); - rdp = &__get_cpu_var(rcu_bh_data); - *rdp->nxttail = head; - rdp->nxttail = &head->next; - - if (unlikely(++rdp->qlen > qhimark)) { - rdp->blimit = INT_MAX; - force_quiescent_state(rdp, &rcu_bh_ctrlblk); - } - - local_irq_restore(flags); -} + struct rcu_synchronize rcu; -/* - * Return the number of RCU batches processed thus far. Useful - * for debug and statistics. - */ -long rcu_batches_completed(void) -{ - return rcu_ctrlblk.completed; -} + init_completion(&rcu.completion); + /* Will wake me after RCU finished */ + call_rcu(&rcu.head, wakeme_after_rcu); -/* - * Return the number of RCU batches processed thus far. Useful - * for debug and statistics. - */ -long rcu_batches_completed_bh(void) -{ - return rcu_bh_ctrlblk.completed; + /* Wait for it */ + wait_for_completion(&rcu.completion); } static void rcu_barrier_callback(struct rcu_head *notused) @@ -200,10 +101,8 @@ static void rcu_barrier_callback(struct static void rcu_barrier_func(void *notused) { int cpu = smp_processor_id(); - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - struct rcu_head *head; + struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); - head = &rdp->barrier; atomic_inc(&rcu_barrier_cpu_count); call_rcu(head, rcu_barrier_callback); } @@ -224,414 +123,9 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Invoke the completed RCU callbacks. They are expected to be in - * a per-cpu list. - */ -static void rcu_do_batch(struct rcu_data *rdp) -{ - struct rcu_head *next, *list; - int count = 0; - - list = rdp->donelist; - while (list) { - next = list->next; - prefetch(next); - list->func(list); - list = next; - if (++count >= rdp->blimit) - break; - } - rdp->donelist = list; - - local_irq_disable(); - rdp->qlen -= count; - local_irq_enable(); - if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) - rdp->blimit = blimit; - - if (!rdp->donelist) - rdp->donetail = &rdp->donelist; - else - tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu)); -} - -/* - * Grace period handling: - * The grace period handling consists out of two steps: - * - A new grace period is started. - * This is done by rcu_start_batch. The start is not broadcasted to - * all cpus, they must pick this up by comparing rcp->cur with - * rdp->quiescbatch. All cpus are recorded in the - * rcu_ctrlblk.cpumask bitmap. - * - All cpus must go through a quiescent state. - * Since the start of the grace period is not broadcasted, at least two - * calls to rcu_check_quiescent_state are required: - * The first call just notices that a new grace period is running. The - * following calls check if there was a quiescent state since the beginning - * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If - * the bitmap is empty, then the grace period is completed. - * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace - * period (if necessary). - */ -/* - * Register a new batch of callbacks, and start it up if there is currently no - * active batch and the batch to be registered has not already occurred. - * Caller must hold rcu_ctrlblk.lock. - */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp) -{ - if (rcp->next_pending && - rcp->completed == rcp->cur) { - rcp->next_pending = 0; - /* - * next_pending == 0 must be visible in - * __rcu_process_callbacks() before it can see new value of cur. - */ - smp_wmb(); - rcp->cur++; - - /* - * Accessing nohz_cpu_mask before incrementing rcp->cur needs a - * Barrier Otherwise it can cause tickless idle CPUs to be - * included in rcp->cpumask, which will extend graceperiods - * unnecessarily. - */ - smp_mb(); - cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); - - rcp->signaled = 0; - } -} - -/* - * cpu went through a quiescent state since the beginning of the grace period. - * Clear it from the cpu mask and complete the grace period if it was the last - * cpu. Start another grace period if someone has further entries pending - */ -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) -{ - cpu_clear(cpu, rcp->cpumask); - if (cpus_empty(rcp->cpumask)) { - /* batch completed ! */ - rcp->completed = rcp->cur; - rcu_start_batch(rcp); - } -} - -/* - * Check if the cpu has gone through a quiescent state (say context - * switch). If so and if it already hasn't done so in this RCU - * quiescent cycle, then indicate that it has done so. - */ -static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - if (rdp->quiescbatch != rcp->cur) { - /* start new grace period: */ - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - rdp->quiescbatch = rcp->cur; - return; - } - - /* Grace period already completed for this cpu? - * qs_pending is checked instead of the actual bitmap to avoid - * cacheline trashing. - */ - if (!rdp->qs_pending) - return; - - /* - * Was there a quiescent state since the beginning of the grace - * period? If no, then exit and wait for the next call. - */ - if (!rdp->passed_quiesc) - return; - rdp->qs_pending = 0; - - spin_lock(&rcp->lock); - /* - * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync - * during cpu startup. Ignore the quiescent state. - */ - if (likely(rdp->quiescbatch == rcp->cur)) - cpu_quiet(rdp->cpu, rcp); - - spin_unlock(&rcp->lock); -} - - -#ifdef CONFIG_HOTPLUG_CPU - -/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing - * locking requirements, the list it's pulling from has to belong to a cpu - * which is dead and hence not processing interrupts. - */ -static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, - struct rcu_head **tail) -{ - local_irq_disable(); - *this_rdp->nxttail = list; - if (list) - this_rdp->nxttail = tail; - local_irq_enable(); -} - -static void __rcu_offline_cpu(struct rcu_data *this_rdp, - struct rcu_ctrlblk *rcp, struct rcu_data *rdp) -{ - /* if the cpu going offline owns the grace period - * we can block indefinitely waiting for it, so flush - * it here - */ - spin_lock_bh(&rcp->lock); - if (rcp->cur != rcp->completed) - cpu_quiet(rdp->cpu, rcp); - spin_unlock_bh(&rcp->lock); - rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); - rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); - rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); -} - -static void rcu_offline_cpu(int cpu) -{ - struct rcu_data *this_rdp = &get_cpu_var(rcu_data); - struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); - - __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, - &per_cpu(rcu_data, cpu)); - __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, - &per_cpu(rcu_bh_data, cpu)); - put_cpu_var(rcu_data); - put_cpu_var(rcu_bh_data); - tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu); -} - -#else - -static void rcu_offline_cpu(int cpu) -{ -} - -#endif - -/* - * This does the RCU processing work from tasklet context. - */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { - *rdp->donetail = rdp->curlist; - rdp->donetail = rdp->curtail; - rdp->curlist = NULL; - rdp->curtail = &rdp->curlist; - } - - if (rdp->nxtlist && !rdp->curlist) { - local_irq_disable(); - rdp->curlist = rdp->nxtlist; - rdp->curtail = rdp->nxttail; - rdp->nxtlist = NULL; - rdp->nxttail = &rdp->nxtlist; - local_irq_enable(); - - /* - * start the next batch of callbacks - */ - - /* determine batch number */ - rdp->batch = rcp->cur + 1; - /* see the comment and corresponding wmb() in - * the rcu_start_batch() - */ - smp_rmb(); - - if (!rcp->next_pending) { - /* and start it/schedule start if it's a new batch */ - spin_lock(&rcp->lock); - rcp->next_pending = 1; - rcu_start_batch(rcp); - spin_unlock(&rcp->lock); - } - } - - rcu_check_quiescent_state(rcp, rdp); - if (rdp->donelist) - rcu_do_batch(rdp); -} - -static void rcu_process_callbacks(unsigned long unused) -{ - __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); - __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); -} - -static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) -{ - /* This cpu has pending rcu entries and the grace period - * for them has completed. - */ - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) - return 1; - - /* This cpu has no pending entries, but there are new entries */ - if (!rdp->curlist && rdp->nxtlist) - return 1; - - /* This cpu has finished callbacks to invoke */ - if (rdp->donelist) - return 1; - - /* The rcu core waits for a quiescent state from the cpu */ - if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) - return 1; - - /* nothing to do */ - return 0; -} - -/* - * Check to see if there is any immediate RCU-related work to be done - * by the current CPU, returning 1 if so. This function is part of the - * RCU implementation; it is -not- an exported member of the RCU API. - */ -int rcu_pending(int cpu) -{ - return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || - __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); -} - -/* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. This function is part of the RCU implementation; it is -not- - * an exported member of the RCU API. - */ -int rcu_needs_cpu(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); - - return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); -} - -void rcu_check_callbacks(int cpu, int user) -{ - if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { - rcu_qsctr_inc(cpu); - rcu_bh_qsctr_inc(cpu); - } else if (!in_softirq()) - rcu_bh_qsctr_inc(cpu); - tasklet_schedule(&per_cpu(rcu_tasklet, cpu)); -} - -static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - memset(rdp, 0, sizeof(*rdp)); - rdp->curtail = &rdp->curlist; - rdp->nxttail = &rdp->nxtlist; - rdp->donetail = &rdp->donelist; - rdp->quiescbatch = rcp->completed; - rdp->qs_pending = 0; - rdp->cpu = cpu; - rdp->blimit = blimit; -} - -static void __devinit rcu_online_cpu(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu); - - rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp); - rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp); - tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); -} - -static int __cpuinit rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - rcu_offline_cpu(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata rcu_nb = { - .notifier_call = rcu_cpu_notify, -}; - -/* - * Initializes rcu mechanism. Assumed to be called early. - * That is before local timer(SMP) or jiffie timer (uniproc) is setup. - * Note that rcu_qsctr and friends are implicitly - * initialized due to the choice of ``0'' for RCU_CTR_INVALID. - */ void __init rcu_init(void) { - rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - /* Register notifier for non-boot CPUs */ - register_cpu_notifier(&rcu_nb); -} - -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -/* Because of FASTCALL declaration of complete, we use this wrapper */ -static void wakeme_after_rcu(struct rcu_head *head) -{ - struct rcu_synchronize *rcu; - - rcu = container_of(head, struct rcu_synchronize, head); - complete(&rcu->completion); -} - -/** - * synchronize_rcu - wait until a grace period has elapsed. - * - * Control will return to the caller some time after a full grace - * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - * - * If your read-side code is not protected by rcu_read_lock(), do -not- - * use synchronize_rcu(). - */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished */ - call_rcu(&rcu.head, wakeme_after_rcu); - - /* Wait for it */ - wait_for_completion(&rcu.completion); + __rcu_init(); } -module_param(blimit, int, 0); -module_param(qhimark, int, 0); -module_param(qlowmark, int, 0); -EXPORT_SYMBOL_GPL(rcu_batches_completed); -EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); -EXPORT_SYMBOL_GPL(call_rcu); -EXPORT_SYMBOL_GPL(call_rcu_bh); EXPORT_SYMBOL_GPL(synchronize_rcu); patches/preempt-realtime-mm.patch0000664000077200007720000001700010653433166016432 0ustar mingomingo--- include/linux/pagevec.h | 2 +- include/linux/vmstat.h | 10 ++++++++++ mm/bounce.c | 4 ++-- mm/memory.c | 11 +++++++++-- mm/mmap.c | 10 ++++++++-- mm/vmscan.c | 10 ++++++++-- mm/vmstat.c | 38 ++++++++++++++++++++++++++++++++------ 7 files changed, 70 insertions(+), 15 deletions(-) Index: linux-rt-rebase.q/include/linux/pagevec.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/pagevec.h +++ linux-rt-rebase.q/include/linux/pagevec.h @@ -9,7 +9,7 @@ #define _LINUX_PAGEVEC_H /* 14 pointers + two long's align the pagevec structure to a power of two */ -#define PAGEVEC_SIZE 14 +#define PAGEVEC_SIZE 8 struct page; struct address_space; Index: linux-rt-rebase.q/include/linux/vmstat.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/vmstat.h +++ linux-rt-rebase.q/include/linux/vmstat.h @@ -59,7 +59,12 @@ DECLARE_PER_CPU(struct vm_event_state, v static inline void __count_vm_event(enum vm_event_item item) { +#ifdef CONFIG_PREEMPT_RT + get_cpu_var(vm_event_states).event[item]++; + put_cpu(); +#else __get_cpu_var(vm_event_states).event[item]++; +#endif } static inline void count_vm_event(enum vm_event_item item) @@ -70,7 +75,12 @@ static inline void count_vm_event(enum v static inline void __count_vm_events(enum vm_event_item item, long delta) { +#ifdef CONFIG_PREEMPT_RT + get_cpu_var(vm_event_states).event[item] += delta; + put_cpu(); +#else __get_cpu_var(vm_event_states).event[item] += delta; +#endif } static inline void count_vm_events(enum vm_event_item item, long delta) Index: linux-rt-rebase.q/mm/bounce.c =================================================================== --- linux-rt-rebase.q.orig/mm/bounce.c +++ linux-rt-rebase.q/mm/bounce.c @@ -48,11 +48,11 @@ static void bounce_copy_vec(struct bio_v unsigned long flags; unsigned char *vto; - local_irq_save(flags); + local_irq_save_nort(flags); vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); memcpy(vto + to->bv_offset, vfrom, to->bv_len); kunmap_atomic(vto, KM_BOUNCE_READ); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #else /* CONFIG_HIGHMEM */ Index: linux-rt-rebase.q/mm/memory.c =================================================================== --- linux-rt-rebase.q.orig/mm/memory.c +++ linux-rt-rebase.q/mm/memory.c @@ -281,7 +281,9 @@ void free_pgtables(struct mmu_gather **t if (!vma) /* Sometimes when exiting after an oops */ return; +#ifndef CONFIG_PREEMPT_RT if (vma->vm_next) +#endif tlb_finish_mmu(*tlb, tlb_start_addr(*tlb), tlb_end_addr(*tlb)); /* * Hide vma from rmap and vmtruncate before freeeing pgtables, @@ -292,7 +294,9 @@ void free_pgtables(struct mmu_gather **t unlink_file_vma(unlink); unlink = unlink->vm_next; } +#ifndef CONFIG_PREEMPT_RT if (vma->vm_next) +#endif *tlb = tlb_gather_mmu(vma->vm_mm, fullmm); #endif while (vma) { @@ -805,10 +809,13 @@ static unsigned long unmap_page_range(st return addr; } -#ifdef CONFIG_PREEMPT +#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_RT) # define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) #else -/* No preempt: go for improved straight-line efficiency */ +/* + * No preempt: go for improved straight-line efficiency + * on PREEMPT_RT this is not a critical latency-path. + */ # define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) #endif Index: linux-rt-rebase.q/mm/mmap.c =================================================================== --- linux-rt-rebase.q.orig/mm/mmap.c +++ linux-rt-rebase.q/mm/mmap.c @@ -1873,10 +1873,16 @@ asmlinkage long sys_munmap(unsigned long static inline void verify_mm_writelocked(struct mm_struct *mm) { #ifdef CONFIG_DEBUG_VM - if (unlikely(down_read_trylock(&mm->mmap_sem))) { +# ifdef CONFIG_PREEMPT_RT + if (unlikely(!rt_rwsem_is_locked(&mm->mmap_sem))) { WARN_ON(1); - up_read(&mm->mmap_sem); } +# else + if (unlikely(down_read_trylock(&mm->mmap_sem))) { + WARN_ON(1); + up_read(&mm->mmap_sem); + } +# endif #endif } Index: linux-rt-rebase.q/mm/vmscan.c =================================================================== --- linux-rt-rebase.q.orig/mm/vmscan.c +++ linux-rt-rebase.q/mm/vmscan.c @@ -23,6 +23,7 @@ #include #include #include +#include #include /* for try_to_release_page(), buffer_heads_over_limit */ #include @@ -787,7 +788,7 @@ static unsigned long shrink_inactive_lis nr_scanned += nr_scan; nr_freed = shrink_page_list(&page_list, sc); nr_reclaimed += nr_freed; - local_irq_disable(); + local_irq_disable_nort(); if (current_is_kswapd()) { __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); __count_vm_events(KSWAPD_STEAL, nr_freed); @@ -818,9 +819,14 @@ static unsigned long shrink_inactive_lis } } } while (nr_scanned < max_scan); + /* + * Non-PREEMPT_RT relies on IRQs-off protecting the page_states + * per-CPU data. PREEMPT_RT has that data protected even in + * __mod_page_state(), so no need to keep IRQs disabled. + */ spin_unlock(&zone->lru_lock); done: - local_irq_enable(); + local_irq_enable_nort(); pagevec_release(&pvec); return nr_reclaimed; } Index: linux-rt-rebase.q/mm/vmstat.c =================================================================== --- linux-rt-rebase.q.orig/mm/vmstat.c +++ linux-rt-rebase.q/mm/vmstat.c @@ -156,10 +156,14 @@ static void refresh_zone_stat_thresholds void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); - s8 *p = pcp->vm_stat_diff + item; + struct per_cpu_pageset *pcp; + int cpu; long x; + s8 *p; + cpu = get_cpu(); + pcp = zone_pcp(zone, cpu); + p = pcp->vm_stat_diff + item; x = delta + *p; if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { @@ -167,6 +171,7 @@ void __mod_zone_page_state(struct zone * x = 0; } *p = x; + put_cpu(); } EXPORT_SYMBOL(__mod_zone_page_state); @@ -209,9 +214,13 @@ EXPORT_SYMBOL(mod_zone_page_state); */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); - s8 *p = pcp->vm_stat_diff + item; + struct per_cpu_pageset *pcp; + int cpu; + s8 *p; + cpu = get_cpu(); + pcp = zone_pcp(zone, cpu); + p = pcp->vm_stat_diff + item; (*p)++; if (unlikely(*p > pcp->stat_threshold)) { @@ -220,18 +229,34 @@ void __inc_zone_state(struct zone *zone, zone_page_state_add(*p + overstep, zone, item); *p = -overstep; } + put_cpu(); } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) { +#ifdef CONFIG_PREEMPT_RT + unsigned long flags; + struct zone *zone; + + zone = page_zone(page); + local_irq_save(flags); + __inc_zone_state(zone, item); + local_irq_restore(flags); +#else __inc_zone_state(page_zone(page), item); +#endif } EXPORT_SYMBOL(__inc_zone_page_state); void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); - s8 *p = pcp->vm_stat_diff + item; + struct per_cpu_pageset *pcp; + int cpu; + s8 *p; + + cpu = get_cpu(); + pcp = zone_pcp(zone, cpu); + p = pcp->vm_stat_diff + item; (*p)--; @@ -241,6 +266,7 @@ void __dec_zone_state(struct zone *zone, zone_page_state_add(*p - overstep, zone, item); *p = overstep; } + put_cpu(); } void __dec_zone_page_state(struct page *page, enum zone_stat_item item) patches/nmi-profiling.patch0000664000077200007720000000661210653433166015330 0ustar mingomingo--- arch/i386/kernel/irq.c | 2 ++ arch/i386/kernel/nmi.c | 7 +++---- arch/x86_64/kernel/nmi.c | 6 +++--- drivers/char/sysrq.c | 2 +- include/asm-x86_64/apic.h | 2 ++ 5 files changed, 11 insertions(+), 8 deletions(-) Index: linux-rt-rebase.q/arch/i386/kernel/irq.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/irq.c +++ linux-rt-rebase.q/arch/i386/kernel/irq.c @@ -79,7 +79,9 @@ fastcall notrace unsigned int do_IRQ(str u32 *isp; #endif +#ifdef CONFIG_X86_LOCAL_APIC irq_show_regs_callback(smp_processor_id(), regs); +#endif if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", Index: linux-rt-rebase.q/arch/i386/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/nmi.c +++ linux-rt-rebase.q/arch/i386/kernel/nmi.c @@ -350,9 +350,9 @@ void nmi_show_all_regs(void) } } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); -void irq_show_regs_callback(int cpu, struct pt_regs *regs) +notrace void irq_show_regs_callback(int cpu, struct pt_regs *regs) { if (!nmi_show_regs[cpu]) return; @@ -366,7 +366,7 @@ void irq_show_regs_callback(int cpu, str spin_unlock(&nmi_print_lock); } -__kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) +notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { /* * Since current_thread_info()-> is always on the stack, and we @@ -434,7 +434,6 @@ __kprobes int nmi_watchdog_tick(struct p for_each_online_cpu(i) alert_counter[i] = 0; - } } else { Index: linux-rt-rebase.q/arch/x86_64/kernel/nmi.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/nmi.c +++ linux-rt-rebase.q/arch/x86_64/kernel/nmi.c @@ -339,9 +339,9 @@ void nmi_show_all_regs(void) } } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); -void irq_show_regs_callback(int cpu, struct pt_regs *regs) +notrace void irq_show_regs_callback(int cpu, struct pt_regs *regs) { if (!nmi_show_regs[cpu]) return; @@ -354,7 +354,7 @@ void irq_show_regs_callback(int cpu, str spin_unlock(&nmi_print_lock); } -int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +int notrace __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; Index: linux-rt-rebase.q/drivers/char/sysrq.c =================================================================== --- linux-rt-rebase.q.orig/drivers/char/sysrq.c +++ linux-rt-rebase.q/drivers/char/sysrq.c @@ -208,7 +208,7 @@ static struct sysrq_key_op sysrq_showreg .enable_mask = SYSRQ_ENABLE_DUMP, }; -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) static void sysrq_handle_showallregs(int key, struct tty_struct *tty) { Index: linux-rt-rebase.q/include/asm-x86_64/apic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/apic.h +++ linux-rt-rebase.q/include/asm-x86_64/apic.h @@ -94,6 +94,8 @@ extern void smp_send_nmi_allbutself(void #define K8_APIC_EXT_INT_MSG_EXT 0x7 #define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0 +extern void smp_send_nmi_allbutself(void); + #define ARCH_APICTIMER_STOPS_ON_C3 1 extern unsigned boot_cpu_id; patches/cputimer-thread-rt_A0.patch0000664000077200007720000002153010653433165016611 0ustar mingomingoIngo, This patch re-adds the posix-cpu-timer functionality by running it from a per-cpu RT thread. This allows cpu rlimits to be enforced against RT processes that would otherwise starve the system. thanks -john Signed-off-by: John Stultz include/linux/init_task.h | 1 include/linux/posix-timers.h | 2 include/linux/sched.h | 2 init/main.c | 2 kernel/fork.c | 2 kernel/posix-cpu-timers.c | 176 ++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 180 insertions(+), 5 deletions(-) Index: linux-rt-rebase.q/include/linux/init_task.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/init_task.h +++ linux-rt-rebase.q/include/linux/init_task.h @@ -164,6 +164,7 @@ extern struct group_info init_groups; .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ + .posix_timer_list = NULL, \ .pi_lock = RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ Index: linux-rt-rebase.q/include/linux/posix-timers.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/posix-timers.h +++ linux-rt-rebase.q/include/linux/posix-timers.h @@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_s long clock_nanosleep_restart(struct restart_block *restart_block); +int posix_cpu_thread_init(void); + #endif Index: linux-rt-rebase.q/include/linux/sched.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/sched.h +++ linux-rt-rebase.q/include/linux/sched.h @@ -1136,6 +1136,8 @@ struct task_struct { unsigned long long it_sched_expires; struct list_head cpu_timers[3]; + struct task_struct* posix_timer_list; + /* process credentials */ uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; Index: linux-rt-rebase.q/init/main.c =================================================================== --- linux-rt-rebase.q.orig/init/main.c +++ linux-rt-rebase.q/init/main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -762,6 +763,7 @@ static void __init do_pre_smp_initcalls( migration_init(); #endif + posix_cpu_thread_init(); spawn_ksoftirqd(); if (!nosoftlockup) spawn_softlockup_task(); Index: linux-rt-rebase.q/kernel/fork.c =================================================================== --- linux-rt-rebase.q.orig/kernel/fork.c +++ linux-rt-rebase.q/kernel/fork.c @@ -1064,7 +1064,7 @@ static struct task_struct *copy_process( INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); - + p->posix_timer_list = NULL; p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; Index: linux-rt-rebase.q/kernel/posix-cpu-timers.c =================================================================== --- linux-rt-rebase.q.orig/kernel/posix-cpu-timers.c +++ linux-rt-rebase.q/kernel/posix-cpu-timers.c @@ -578,7 +578,7 @@ static void arm_timer(struct k_itimer *t p->cpu_timers : p->signal->cpu_timers); head += CPUCLOCK_WHICH(timer->it_clock); - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); spin_lock(&p->sighand->siglock); listpos = head; @@ -735,7 +735,7 @@ int posix_cpu_timer_set(struct k_itimer /* * Disarm any old timer after extracting its expiry time. */ - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); ret = 0; spin_lock(&p->sighand->siglock); @@ -1287,12 +1287,11 @@ out: * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ -void run_posix_cpu_timers(struct task_struct *tsk) +void __run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; - BUG_ON(!irqs_disabled()); #define UNEXPIRED(clock) \ (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ @@ -1355,6 +1354,169 @@ void run_posix_cpu_timers(struct task_st } } +#include +#include +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); + +static int posix_cpu_timers_thread(void *data) +{ + int cpu = (long)data; + + BUG_ON(per_cpu(posix_timer_task,cpu) != current); + + + while (!kthread_should_stop()) { + struct task_struct *tsk = NULL; + struct task_struct *next = NULL; + + if (cpu_is_offline(cpu)) { + goto wait_to_die; + } + + /* grab task list */ + raw_local_irq_disable(); + tsk = per_cpu(posix_timer_tasklist, cpu); + per_cpu(posix_timer_tasklist, cpu) = NULL; + raw_local_irq_enable(); + + + /* its possible the list is empty, just return */ + if (!tsk) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + continue; + } + + /* Process task list */ + while (1) { + /* save next */ + next = tsk->posix_timer_list; + + /* run the task timers, clear its ptr and + * unreference it + */ + __run_posix_cpu_timers(tsk); + tsk->posix_timer_list = NULL; + put_task_struct(tsk); + + /* check if this is the last on the list */ + if (next == tsk) + break; + tsk = next; + } + } + return 0; + +wait_to_die: + /* Wait for kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void run_posix_cpu_timers(struct task_struct *tsk) +{ + unsigned long cpu = smp_processor_id(); + struct task_struct *tasklist; + + BUG_ON(!irqs_disabled()); + if(!per_cpu(posix_timer_task, cpu)) + return; + /* get per-cpu references */ + tasklist = per_cpu(posix_timer_tasklist, cpu); + + /* check to see if we're already queued */ + if (!tsk->posix_timer_list) { + get_task_struct(tsk); + if (tasklist) { + tsk->posix_timer_list = tasklist; + } else { + /* + * The list is terminated by a self-pointing + * task_struct + */ + tsk->posix_timer_list = tsk; + } + per_cpu(posix_timer_tasklist, cpu) = tsk; + } + /* XXX signal the thread somehow */ + wake_up_process(per_cpu(posix_timer_task,cpu)); +} + + + + +/* + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. + * Here we can start up the necessary migration thread for the new CPU. + */ +static int posix_cpu_thread_call(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + struct task_struct *p; + struct sched_param param; + + switch (action) { + case CPU_UP_PREPARE: + p = kthread_create(posix_cpu_timers_thread, hcpu, + "posix_cpu_timers/%d",cpu); + if (IS_ERR(p)) + return NOTIFY_BAD; + p->flags |= PF_NOFREEZE; + kthread_bind(p, cpu); + /* Must be high prio to avoid getting starved */ + param.sched_priority = MAX_RT_PRIO-1; + sched_setscheduler(p, SCHED_FIFO, ¶m); + per_cpu(posix_timer_task,cpu) = p; + break; + case CPU_ONLINE: + /* Strictly unneccessary, as first user will wake it. */ + wake_up_process(per_cpu(posix_timer_task,cpu)); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + /* Unbind it from offline cpu so it can run. Fall thru. */ + kthread_bind(per_cpu(posix_timer_task,cpu), + any_online_cpu(cpu_online_map)); + kthread_stop(per_cpu(posix_timer_task,cpu)); + per_cpu(posix_timer_task,cpu) = NULL; + break; + case CPU_DEAD: + kthread_stop(per_cpu(posix_timer_task,cpu)); + per_cpu(posix_timer_task,cpu) = NULL; + break; +#endif + } + return NOTIFY_OK; +} + +/* Register at highest priority so that task migration (migrate_all_tasks) + * happens before everything else. + */ +static struct notifier_block __devinitdata posix_cpu_thread_notifier = { + .notifier_call = posix_cpu_thread_call, + .priority = 10 +}; + +int __init posix_cpu_thread_init(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + /* Start one for boot CPU. */ + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, cpu); + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, cpu); + register_cpu_notifier(&posix_cpu_thread_notifier); + return 0; +} + + + /* * Set one of the process-wide special case CPU timers. * The tasklist_lock and tsk->sighand->siglock must be held by the caller. @@ -1620,6 +1782,12 @@ static __init int init_posix_cpu_timers( .nsleep = thread_cpu_nsleep, .nsleep_restart = thread_cpu_nsleep_restart, }; + unsigned long cpu; + + /* init the per-cpu posix_timer_tasklets */ + for_each_cpu_mask(cpu, cpu_possible_map) { + per_cpu(posix_timer_tasklist, cpu) = NULL; + } register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); patches/disable-irqpoll.patch0000664000077200007720000000177310653433165015643 0ustar mingomingo kernel/irq/spurious.c | 10 ++++++++++ 1 file changed, 10 insertions(+) Index: linux-rt-rebase.q/kernel/irq/spurious.c =================================================================== --- linux-rt-rebase.q.orig/kernel/irq/spurious.c +++ linux-rt-rebase.q/kernel/irq/spurious.c @@ -239,6 +239,11 @@ __setup("noirqdebug", noirqdebug_setup); static int __init irqfixup_setup(char *str) { +#ifdef CONFIG_PREEMPT_RT + printk(KERN_WARNING "irqfixup boot option not supported " + "w/ CONFIG_PREEMPT_RT\n"); + return 1; +#endif irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); @@ -250,6 +255,11 @@ __setup("irqfixup", irqfixup_setup); static int __init irqpoll_setup(char *str) { +#ifdef CONFIG_PREEMPT_RT + printk(KERN_WARNING "irqpoll boot option not supported " + "w/ CONFIG_PREEMPT_RT\n"); + return 1; +#endif irqfixup = 2; printk(KERN_WARNING "Misrouted IRQ fixup and polling support " "enabled\n"); patches/vsyscall-add-notrace.patch0000664000077200007720000000552610653433170016573 0ustar mingomingoFrom rostedt@goodmis.org Tue Jun 19 04:41:17 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from ms-smtp-01.nyroc.rr.com (ms-smtp-01.nyroc.rr.com [24.24.2.55]) by mail.tglx.de (Postfix) with ESMTP id 4C19265C3EC for ; Tue, 19 Jun 2007 04:41:17 +0200 (CEST) Received: from [192.168.23.10] (cpe-24-94-51-176.stny.res.rr.com [24.94.51.176]) by ms-smtp-01.nyroc.rr.com (8.13.6/8.13.6) with ESMTP id l5J2f9l0013971; Mon, 18 Jun 2007 22:41:10 -0400 (EDT) Subject: [PATCH RT] Don't call mcount from vsyscall_fn's From: Steven Rostedt To: Ingo Molnar Cc: Thomas Gleixner , LKML , RT Content-Type: text/plain Date: Mon, 18 Jun 2007 22:41:09 -0400 Message-Id: <1182220869.15228.10.camel@localhost.localdomain> Mime-Version: 1.0 X-Mailer: Evolution 2.6.3 X-Virus-Scanned: Symantec AntiVirus Scan Engine X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit This bit me in the butt. I couldn't understand why my init app was segfaulting, with a kernel address, but a user RIP and RSP. Well, the RIP I think was bogus, but the kernel address was always the start of "mcount". Looking deeper, I printed out what was in the RSP (even though it was a user stack). It ended up showing me that the calling address was from the VDSO area. Looking even further, I found the offending culprit, which was vread_hpet. Looking at the assembly dump, I saw the vread_hpet was calling mcount, but I could not see it in the code. Nor could I see it in hpet.i (-E option of compiling). Well, I guess Ingo is a magician when it comes to compiler tricks, and has the mcount being called by "every!!" function, unless you add the "notrace" option. This patch adds the notrace to vsyscall_fn, so that we don't have user land apps calling mcount and crashing! Signed-off-by: Steven Rostedt --- include/asm-x86_64/vsyscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-rt-rebase.q/include/asm-x86_64/vsyscall.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-x86_64/vsyscall.h +++ linux-rt-rebase.q/include/asm-x86_64/vsyscall.h @@ -24,7 +24,7 @@ enum vsyscall_num { ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) #define __section_vsyscall_clock __attribute__ \ ((unused, __section__ (".vsyscall_clock"),aligned(16))) -#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) +#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) notrace #define VGETCPU_RDTSCP 1 #define VGETCPU_LSL 2 patches/preempt-realtime-powerpc-b3.patch0000664000077200007720000000376710653433165020020 0ustar mingomingo To fix the following runtime warning. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - BUG: using smp_processor_id() in preemptible [00000000] code: init/371 caller is .pgtable_free_tlb+0x2c/0x14c Call Trace: [C00000000FF6B770] [C00000000000FAAC] .show_stack+0x68/0x1b0 (unreliable) [C00000000FF6B810] [C0000000001F7190] .debug_smp_processor_id+0xc8/0xf8 [C00000000FF6B8A0] [C00000000002C52C] .pgtable_free_tlb+0x2c/0x14c [C00000000FF6B940] [C0000000000B6528] .free_pgd_range+0x234/0x3bc [C00000000FF6BA40] [C0000000000B6AB8] .free_pgtables+0x224/0x260 [C00000000FF6BB00] [C0000000000B7FE8] .exit_mmap+0x100/0x208 [C00000000FF6BBC0] [C000000000055FB0] .mmput+0x70/0x12c [C00000000FF6BC50] [C00000000005B728] .exit_mm+0x150/0x170 [C00000000FF6BCE0] [C00000000005D80C] .do_exit+0x28c/0x9bc [C00000000FF6BDA0] [C00000000005DFF0] .sys_exit_group+0x0/0x8 [C00000000FF6BE30] [C000000000008634] syscall_exit+0x0/0x40 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Would it be better to just use raw_smp_processor_id() rather than tlb->cpu? Signed-off-by: Tsutomu Owa -- owa --- arch/powerpc/mm/tlb_64.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/arch/powerpc/mm/tlb_64.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/mm/tlb_64.c +++ linux-rt-rebase.q/arch/powerpc/mm/tlb_64.c @@ -93,8 +93,11 @@ static void pte_free_submit(struct pte_f void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { - /* This is safe since tlb_gather_mmu has disabled preemption */ - cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); + /* + * This is safe since tlb_gather_mmu has disabled preemption. + * tlb->cpu is set by tlb_gather_mmu as well. + */ + cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu); struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || patches/rcu-various-fixups.patch0000664000077200007720000000402610653433167016347 0ustar mingomingo--- security/selinux/avc.c | 9 +++++++++ security/selinux/netif.c | 2 ++ 2 files changed, 11 insertions(+) Index: linux-rt-rebase.q/security/selinux/avc.c =================================================================== --- linux-rt-rebase.q.orig/security/selinux/avc.c +++ linux-rt-rebase.q/security/selinux/avc.c @@ -312,6 +312,7 @@ static inline int avc_reclaim_node(void) if (!spin_trylock_irqsave(&avc_cache.slots_lock[hvalue], flags)) continue; + rcu_read_lock(); list_for_each_entry(node, &avc_cache.slots[hvalue], list) { if (atomic_dec_and_test(&node->ae.used)) { /* Recently Unused */ @@ -319,11 +320,13 @@ static inline int avc_reclaim_node(void) avc_cache_stats_incr(reclaims); ecx++; if (ecx >= AVC_CACHE_RECLAIM) { + rcu_read_unlock(); spin_unlock_irqrestore(&avc_cache.slots_lock[hvalue], flags); goto out; } } } + rcu_read_unlock(); spin_unlock_irqrestore(&avc_cache.slots_lock[hvalue], flags); } out: @@ -807,8 +810,14 @@ int avc_ss_reset(u32 seqno) for (i = 0; i < AVC_CACHE_SLOTS; i++) { spin_lock_irqsave(&avc_cache.slots_lock[i], flag); + /* + * On -rt the outer spinlock does not prevent RCU + * from being performed: + */ + rcu_read_lock(); list_for_each_entry(node, &avc_cache.slots[i], list) avc_node_delete(node); + rcu_read_unlock(); spin_unlock_irqrestore(&avc_cache.slots_lock[i], flag); } Index: linux-rt-rebase.q/security/selinux/netif.c =================================================================== --- linux-rt-rebase.q.orig/security/selinux/netif.c +++ linux-rt-rebase.q/security/selinux/netif.c @@ -209,6 +209,7 @@ static void sel_netif_flush(void) { int idx; + rcu_read_lock(); spin_lock_bh(&sel_netif_lock); for (idx = 0; idx < SEL_NETIF_HASH_SIZE; idx++) { struct sel_netif *netif; @@ -217,6 +218,7 @@ static void sel_netif_flush(void) sel_netif_destroy(netif); } spin_unlock_bh(&sel_netif_lock); + rcu_read_unlock(); } static int sel_netif_avc_callback(u32 event, u32 ssid, u32 tsid, patches/i386-mark-atomic-irq-ops-raw.patch0000664000077200007720000000116410653433162017621 0ustar mingomingo--- include/asm-i386/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-rt-rebase.q/include/asm-i386/atomic.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/atomic.h +++ linux-rt-rebase.q/include/asm-i386/atomic.h @@ -195,10 +195,10 @@ static __inline__ int atomic_add_return( #ifdef CONFIG_M386 no_xadd: /* Legacy 386 processor */ - local_irq_save(flags); + raw_local_irq_save(flags); __i = atomic_read(v); atomic_set(v, i + __i); - local_irq_restore(flags); + raw_local_irq_restore(flags); return i + __i; #endif } patches/preempt-softirqs-core.patch0000664000077200007720000004400110653433164017020 0ustar mingomingo--- include/linux/bottom_half.h | 1 include/linux/interrupt.h | 13 +- include/linux/sched.h | 17 ++ kernel/Kconfig.preempt | 16 ++ kernel/sched.c | 28 ++++ kernel/softirq.c | 273 ++++++++++++++++++++++++++++++++++---------- 6 files changed, 279 insertions(+), 69 deletions(-) Index: linux-rt-rebase.q/include/linux/bottom_half.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/bottom_half.h +++ linux-rt-rebase.q/include/linux/bottom_half.h @@ -2,7 +2,6 @@ #define _LINUX_BH_H extern void local_bh_disable(void); -extern void __local_bh_enable(void); extern void _local_bh_enable(void); extern void local_bh_enable(void); extern void local_bh_enable_ip(unsigned long ip); Index: linux-rt-rebase.q/include/linux/interrupt.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/interrupt.h +++ linux-rt-rebase.q/include/linux/interrupt.h @@ -270,6 +270,8 @@ enum HRTIMER_SOFTIRQ, #endif RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ + /* Entries after this are ignored in split softirq mode */ + MAX_SOFTIRQ, }; /* softirq mask and active fields moved to irq_cpustat_t in @@ -282,13 +284,21 @@ struct softirq_action void *data; }; +#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +#define __do_raise_softirq_irqoff(nr) __raise_softirq_irqoff(nr) + asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); extern void FASTCALL(raise_softirq(unsigned int nr)); +extern void wakeup_irqd(void); +#ifdef CONFIG_PREEMPT_SOFTIRQS +extern void wait_for_softirq(int softirq); +#else +# define wait_for_softirq(x) do {} while(0) +#endif /* Tasklets --- multithreaded analogue of BHs. @@ -400,6 +410,7 @@ extern void tasklet_kill(struct tasklet_ extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); +void takeover_tasklets(unsigned int cpu); /* * Autoprobing for irqs: Index: linux-rt-rebase.q/include/linux/sched.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/sched.h +++ linux-rt-rebase.q/include/linux/sched.h @@ -89,6 +89,12 @@ struct sched_param { #include +#ifdef CONFIG_PREEMPT_SOFTIRQS +extern int softirq_preemption; +#else +# define softirq_preemption 0 +#endif + struct exec_domain; struct futex_pi_state; struct bio; @@ -1420,6 +1426,7 @@ static inline void put_task_struct(struc #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ +#define PF_SOFTIRQ 0x04000000 /* softirq context */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ @@ -1851,6 +1858,7 @@ static inline int need_resched(void) extern int cond_resched(void); extern int cond_resched_lock(spinlock_t * lock); extern int cond_resched_softirq(void); +extern int cond_resched_softirq_context(void); /* * Does a critical section need to be broken due to another @@ -1866,10 +1874,13 @@ extern int cond_resched_softirq(void); * Does a critical section need to be broken due to another * task waiting or preemption being signalled: */ -static inline int lock_need_resched(spinlock_t *lock) +#define lock_need_resched(lock) \ + unlikely(need_lockbreak(lock) || need_resched()) + +static inline int softirq_need_resched(void) { - if (need_lockbreak(lock) || need_resched()) - return 1; + if (softirq_preemption && (current->flags & PF_SOFTIRQ)) + return need_resched(); return 0; } Index: linux-rt-rebase.q/kernel/Kconfig.preempt =================================================================== --- linux-rt-rebase.q.orig/kernel/Kconfig.preempt +++ linux-rt-rebase.q/kernel/Kconfig.preempt @@ -99,3 +99,19 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. + +config PREEMPT_SOFTIRQS + bool "Thread Softirqs" + default n +# depends on PREEMPT + help + This option reduces the latency of the kernel by 'threading' + soft interrupts. This means that all softirqs will execute + in softirqd's context. While this helps latency, it can also + reduce performance. + + The threading of softirqs can also be controlled via + /proc/sys/kernel/softirq_preemption runtime flag and the + sofirq-preempt=0/1 boot-time option. + + Say N if you are unsure. Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -3205,7 +3205,7 @@ void account_system_time(struct task_str tmp = cputime_to_cputime64(cputime); if (hardirq_count() - hardirq_offset) cpustat->irq = cputime64_add(cpustat->irq, tmp); - else if (softirq_count()) + else if (softirq_count() || (p->flags & PF_SOFTIRQ)) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); else if (p != rq->idle) cpustat->system = cputime64_add(cpustat->system, tmp); @@ -3452,7 +3452,7 @@ asmlinkage void __sched preempt_schedule int saved_lock_depth; #endif /* Catch callers which need to be fixed */ - BUG_ON(ti->preempt_count || !irqs_disabled()); + WARN_ON_ONCE(ti->preempt_count || !irqs_disabled()); need_resched: add_preempt_count(PREEMPT_ACTIVE); @@ -4501,9 +4501,12 @@ int cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(cond_resched_lock); +/* + * Voluntarily preempt a process context that has softirqs disabled: + */ int __sched cond_resched_softirq(void) { - BUG_ON(!in_softirq()); + WARN_ON_ONCE(!in_softirq()); if (need_resched() && system_state == SYSTEM_RUNNING) { local_bh_enable(); @@ -4515,6 +4518,25 @@ int __sched cond_resched_softirq(void) } EXPORT_SYMBOL(cond_resched_softirq); +/* + * Voluntarily preempt a softirq context (possible with softirq threading): + */ +int __sched cond_resched_softirq_context(void) +{ + WARN_ON_ONCE(!in_softirq()); + + if (softirq_need_resched() && system_state == SYSTEM_RUNNING) { + raw_local_irq_disable(); + _local_bh_enable(); + raw_local_irq_enable(); + __cond_resched(); + local_bh_disable(); + return 1; + } + return 0; +} +EXPORT_SYMBOL(cond_resched_softirq_context); + /** * yield - yield the current processor to other threads. * Index: linux-rt-rebase.q/kernel/softirq.c =================================================================== --- linux-rt-rebase.q.orig/kernel/softirq.c +++ linux-rt-rebase.q/kernel/softirq.c @@ -4,9 +4,15 @@ * Copyright (C) 1992 Linus Torvalds * * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + * + * Softirq-split implemetation by + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar */ #include +#include +#include +#include #include #include #include @@ -46,7 +52,41 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +struct softirqdata { + int nr; + unsigned long cpu; + struct task_struct *tsk; +#ifdef CONFIG_PREEMPT_SOFTIRQS + wait_queue_head_t wait; + int running; +#endif +}; + +static DEFINE_PER_CPU(struct softirqdata [MAX_SOFTIRQ], ksoftirqd); + +#ifdef CONFIG_PREEMPT_SOFTIRQS +/* + * Preempting the softirq causes cases that would not be a + * problem when the softirq is not preempted. That is a + * process may have code to spin while waiting for a softirq + * to finish on another CPU. But if it happens that the + * process has preempted the softirq, this could cause a + * deadlock. + */ +void wait_for_softirq(int softirq) +{ + struct softirqdata *data = &__get_cpu_var(ksoftirqd)[softirq]; + if (data->running) { + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&data->wait, &wait); + if (data->running) + schedule(); + remove_wait_queue(&data->wait, &wait); + __set_current_state(TASK_RUNNING); + } +} +#endif /* * we cannot loop indefinitely here to avoid userspace starvation, @@ -54,16 +94,32 @@ static DEFINE_PER_CPU(struct task_struct * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static inline void wakeup_softirqd(void) +static void wakeup_softirqd(int softirq) { /* Interrupts are disabled: no need to stop preemption */ - struct task_struct *tsk = __get_cpu_var(ksoftirqd); + struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); } /* + * Wake up the softirq threads which have work + */ +static void trigger_softirqs(void) +{ + u32 pending = local_softirq_pending(); + int curr = 0; + + while (pending) { + if (pending & 1) + wakeup_softirqd(curr); + pending >>= 1; + curr++; + } +} + +/* * This one is for softirq.c-internal use, * where hardirqs are disabled legitimately: */ @@ -98,20 +154,6 @@ void local_bh_disable(void) EXPORT_SYMBOL(local_bh_disable); -void __local_bh_enable(void) -{ - WARN_ON_ONCE(in_irq()); - - /* - * softirqs should never be enabled by __local_bh_enable(), - * it always nests inside local_bh_enable() sections: - */ - WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); - - sub_preempt_count(SOFTIRQ_OFFSET); -} -EXPORT_SYMBOL_GPL(__local_bh_enable); - /* * Special-case - softirqs can safely be enabled in * cond_resched_softirq(), or by __do_softirq(), @@ -205,7 +247,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 -asmlinkage void __do_softirq(void) +asmlinkage void ___do_softirq(void) { struct softirq_action *h; __u32 pending; @@ -215,9 +257,6 @@ asmlinkage void __do_softirq(void) pending = local_softirq_pending(); account_system_vtime(current); - __local_bh_disable((unsigned long)__builtin_return_address(0)); - trace_softirq_enter(); - cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ @@ -229,8 +268,17 @@ restart: do { if (pending & 1) { - h->action(h); + { + u32 preempt_count = preempt_count(); + h->action(h); + if (preempt_count != preempt_count()) { + print_symbol("BUG: softirq exited %s with wrong preemption count!\n", (unsigned long) h->action); + printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count()); + preempt_count() = preempt_count; + } + } rcu_bh_qsctr_inc(cpu); + cond_resched_softirq_context(); } h++; pending >>= 1; @@ -243,12 +291,34 @@ restart: goto restart; if (pending) - wakeup_softirqd(); + trigger_softirqs(); +} + +asmlinkage void __do_softirq(void) +{ +#ifdef CONFIG_PREEMPT_SOFTIRQS + /* + * 'preempt harder'. Push all softirq processing off to ksoftirqd. + */ + if (softirq_preemption) { + if (local_softirq_pending()) + trigger_softirqs(); + return; + } +#endif + /* + * 'immediate' softirq execution: + */ + __local_bh_disable((unsigned long)__builtin_return_address(0)); + trace_softirq_enter(); + + ___do_softirq(); trace_softirq_exit(); account_system_vtime(current); _local_bh_enable(); + } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -317,19 +387,11 @@ void irq_exit(void) */ inline fastcall void raise_softirq_irqoff(unsigned int nr) { - __raise_softirq_irqoff(nr); + __do_raise_softirq_irqoff(nr); - /* - * If we're in an interrupt or softirq, we're done - * (this also catches softirq-disabled code). We will - * actually run the softirq once we return from - * the irq or softirq. - * - * Otherwise we wake up ksoftirqd to make sure we - * schedule the softirq soon. - */ - if (!in_interrupt()) - wakeup_softirqd(); +#ifdef CONFIG_PREEMPT_SOFTIRQS + wakeup_softirqd(nr); +#endif } EXPORT_SYMBOL(raise_softirq_irqoff); @@ -414,7 +476,7 @@ static void tasklet_action(struct softir local_irq_disable(); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - __raise_softirq_irqoff(TASKLET_SOFTIRQ); + __do_raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } } @@ -447,7 +509,7 @@ static void tasklet_hi_action(struct sof local_irq_disable(); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - __raise_softirq_irqoff(HI_SOFTIRQ); + __do_raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } } @@ -487,13 +549,24 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); } -static int ksoftirqd(void * __bind_cpu) +static int ksoftirqd(void * __data) { + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 }; + struct softirqdata *data = __data; + u32 mask = (1 << data->nr); + struct softirq_action *h; + +#ifdef CONFIG_PREEMPT_SOFTIRQS + init_waitqueue_head(&data->wait); +#endif + + sys_sched_setscheduler(current->pid, SCHED_FIFO, ¶m); + current->flags |= PF_SOFTIRQ; set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { preempt_disable(); - if (!local_softirq_pending()) { + if (!(local_softirq_pending() & mask)) { preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -501,19 +574,41 @@ static int ksoftirqd(void * __bind_cpu) __set_current_state(TASK_RUNNING); - while (local_softirq_pending()) { +#ifdef CONFIG_PREEMPT_SOFTIRQS + data->running = 1; +#endif + + while (local_softirq_pending() & mask) { /* Preempt disable stops cpu going offline. If already offline, we'll be on wrong CPU: don't process */ - if (cpu_is_offline((long)__bind_cpu)) + if (cpu_is_offline(data->cpu)) goto wait_to_die; - do_softirq(); + + local_irq_disable(); preempt_enable_no_resched(); + set_softirq_pending(local_softirq_pending() & ~mask); + local_bh_disable(); + local_irq_enable(); + + h = &softirq_vec[data->nr]; + if (h) + h->action(h); + rcu_bh_qsctr_inc(data->cpu); + + local_irq_disable(); + _local_bh_enable(); + local_irq_enable(); + cond_resched(); preempt_disable(); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); +#ifdef CONFIG_PREEMPT_SOFTIRQS + data->running = 0; + wake_up(&data->wait); +#endif } __set_current_state(TASK_RUNNING); return 0; @@ -560,7 +655,7 @@ void tasklet_kill_immediate(struct taskl BUG(); } -static void takeover_tasklets(unsigned int cpu) +void takeover_tasklets(unsigned int cpu) { struct tasklet_struct **i; @@ -582,49 +677,82 @@ static void takeover_tasklets(unsigned i } #endif /* CONFIG_HOTPLUG_CPU */ +static const char *softirq_names [] = +{ + [HI_SOFTIRQ] = "high", + [SCHED_SOFTIRQ] = "sched", + [TIMER_SOFTIRQ] = "timer", + [NET_TX_SOFTIRQ] = "net-tx", + [NET_RX_SOFTIRQ] = "net-rx", + [BLOCK_SOFTIRQ] = "block", + [TASKLET_SOFTIRQ] = "tasklet", +#ifdef CONFIG_HIGH_RES_TIMERS + [HRTIMER_SOFTIRQ] = "hrtimer", +#endif + [RCU_SOFTIRQ] = "rcu", +}; + static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int hotcpu = (unsigned long)hcpu; + int hotcpu = (unsigned long)hcpu, i; struct task_struct *p; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); - if (IS_ERR(p)) { - printk("ksoftirqd for %i failed\n", hotcpu); - return NOTIFY_BAD; + for (i = 0; i < MAX_SOFTIRQ; i++) { + per_cpu(ksoftirqd, hotcpu)[i].nr = i; + per_cpu(ksoftirqd, hotcpu)[i].cpu = hotcpu; + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; + } + for (i = 0; i < MAX_SOFTIRQ; i++) { + p = kthread_create(ksoftirqd, + &per_cpu(ksoftirqd, hotcpu)[i], + "softirq-%s/%d", softirq_names[i], + hotcpu); + if (IS_ERR(p)) { + printk("ksoftirqd %d for %i failed\n", i, + hotcpu); + return NOTIFY_BAD; + } + kthread_bind(p, hotcpu); + per_cpu(ksoftirqd, hotcpu)[i].tsk = p; } - kthread_bind(p, hotcpu); - per_cpu(ksoftirqd, hotcpu) = p; - break; + break; + break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - wake_up_process(per_cpu(ksoftirqd, hotcpu)); + for (i = 0; i < MAX_SOFTIRQ; i++) + wake_up_process(per_cpu(ksoftirqd, hotcpu)[i].tsk); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - if (!per_cpu(ksoftirqd, hotcpu)) - break; - /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), - any_online_cpu(cpu_online_map)); +#if 0 + for (i = 0; i < MAX_SOFTIRQ; i++) { + if (!per_cpu(ksoftirqd, hotcpu)[i].tsk) + continue; + kthread_bind(per_cpu(ksoftirqd, hotcpu)[i].tsk, + any_online_cpu(cpu_online_map)); + } +#endif case CPU_DEAD: case CPU_DEAD_FROZEN: { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - p = per_cpu(ksoftirqd, hotcpu); - per_cpu(ksoftirqd, hotcpu) = NULL; sched_setscheduler(p, SCHED_FIFO, ¶m); - kthread_stop(p); + for (i = 0; i < MAX_SOFTIRQ; i++) { + p = per_cpu(ksoftirqd, hotcpu)[i].tsk; + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; + kthread_stop(p); + } takeover_tasklets(hotcpu); break; - } #endif /* CONFIG_HOTPLUG_CPU */ } + } return NOTIFY_OK; } @@ -643,6 +771,29 @@ __init int spawn_ksoftirqd(void) return 0; } + +#ifdef CONFIG_PREEMPT_SOFTIRQS + +int softirq_preemption = 1; + +EXPORT_SYMBOL(softirq_preemption); + +static int __init softirq_preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) + softirq_preemption = 0; + else + get_option(&str, &softirq_preemption); + if (!softirq_preemption) + printk("turning off softirq preemption!\n"); + + return 1; +} + +__setup("softirq-preempt=", softirq_preempt_setup); + +#endif + #ifdef CONFIG_SMP /* * Call a function on all processors patches/preempt-realtime-rcu.patch0000664000077200007720000000224510653433166016617 0ustar mingomingo--- kernel/rcupreempt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) Index: linux-rt-rebase.q/kernel/rcupreempt.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rcupreempt.c +++ linux-rt-rebase.q/kernel/rcupreempt.c @@ -55,7 +55,7 @@ */ struct rcu_data { - spinlock_t lock; + raw_spinlock_t lock; long completed; /* Number of last completed batch. */ struct rcu_head *nextlist; struct rcu_head **nexttail; @@ -68,12 +68,12 @@ struct rcu_data { #endif /* #ifdef CONFIG_RCU_TRACE */ }; struct rcu_ctrlblk { - spinlock_t fliplock; + raw_spinlock_t fliplock; long completed; /* Number of last completed batch. */ }; static struct rcu_data rcu_data; static struct rcu_ctrlblk rcu_ctrlblk = { - .fliplock = SPIN_LOCK_UNLOCKED, + .fliplock = RAW_SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), .completed = 0, }; static DEFINE_PER_CPU(atomic_t [2], rcu_flipctr) = @@ -352,7 +352,7 @@ int rcu_needs_cpu(int cpu) return !!rcu_data.waitlist || rcu_pending(cpu); } -int rcu_pending(int cpu) +int notrace rcu_pending(int cpu) { return (rcu_data.donelist != NULL || rcu_data.waitlist != NULL || patches/x86_64-apic-add-clockevents-functions.patch0000664000077200007720000000735610653433161021501 0ustar mingomingoSubject: x86_64: Add (not yet used) clock event functions Signed-off-by: Thomas Gleixner Signed-off-by: Chris Wright Signed-off-by: Ingo Molnar --- arch/x86_64/Kconfig | 6 +++ arch/x86_64/kernel/apic.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) Index: linux-rt-rebase.q/arch/x86_64/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/Kconfig +++ linux-rt-rebase.q/arch/x86_64/Kconfig @@ -28,6 +28,10 @@ config GENERIC_TIME bool default y +config GENERIC_CLOCKEVENTS_MIGR + bool + default y + config GENERIC_TIME_VSYSCALL bool default y @@ -138,6 +142,8 @@ source "init/Kconfig" menu "Processor type and features" +source "kernel/time/Kconfig" + choice prompt "Subarchitecture Type" default X86_PC Index: linux-rt-rebase.q/arch/x86_64/kernel/apic.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/apic.c +++ linux-rt-rebase.q/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,77 @@ static struct resource lapic_resource = static unsigned int calibration_result; +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt); +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt); + +static void lapic_timer_broadcast(cpumask_t mask); + +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen); + +static struct clock_event_device lapic_clockevent = { + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT + | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + apic_write(APIC_TMICT, delta); + return 0; +} + +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + unsigned long flags; + unsigned int v; + + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return; + + local_irq_save(flags); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC, 1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; + } + + local_irq_restore(flags); +} + +/* + * Local APIC timer broadcast function + */ +static void lapic_timer_broadcast(cpumask_t mask) +{ +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#endif +} + /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers @@ -867,6 +939,13 @@ static void __init calibrate_APIC_clock( printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + calibration_result = result / HZ; } patches/mapping_nrpages.patch0000664000077200007720000004347210653433167015736 0ustar mingomingoSubject: mm/fs: abstract address_space::nrpages Currently the tree_lock protects mapping->nrpages, this will not be possible much longer. Hence abstract the access to this variable so that it can be easily replaced by an atomic_ulong_t. Signed-off-by: Peter Zijlstra --- arch/sh64/lib/dbg.c | 2 +- fs/block_dev.c | 4 ++-- fs/buffer.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/glops.c | 4 ++-- fs/gfs2/meta_io.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 10 +++++----- fs/jffs2/dir.c | 4 ++-- fs/jffs2/fs.c | 2 +- fs/libfs.c | 2 +- fs/nfs/inode.c | 6 +++--- fs/xfs/linux-2.6/xfs_vnode.h | 2 +- include/linux/fs.h | 22 +++++++++++++++++++++- include/linux/swap.h | 2 +- ipc/shm.c | 4 ++-- mm/filemap.c | 14 +++++++------- mm/shmem.c | 8 ++++---- mm/swap_state.c | 4 ++-- mm/truncate.c | 2 +- 20 files changed, 60 insertions(+), 40 deletions(-) Index: linux-rt-rebase.q/arch/sh64/lib/dbg.c =================================================================== --- linux-rt-rebase.q.orig/arch/sh64/lib/dbg.c +++ linux-rt-rebase.q/arch/sh64/lib/dbg.c @@ -424,6 +424,6 @@ void print_page(struct page *page) printk(" page[%p] -> index 0x%lx, count 0x%x, flags 0x%lx\n", page, page->index, page_count(page), page->flags); printk(" address_space = %p, pages =%ld\n", page->mapping, - page->mapping->nrpages); + mapping_nrpages(page->mapping)); } Index: linux-rt-rebase.q/fs/block_dev.c =================================================================== --- linux-rt-rebase.q.orig/fs/block_dev.c +++ linux-rt-rebase.q/fs/block_dev.c @@ -59,7 +59,7 @@ static sector_t max_block(struct block_d /* Kill _all_ buffers and pagecache , dirty or not.. */ static void kill_bdev(struct block_device *bdev) { - if (bdev->bd_inode->i_mapping->nrpages == 0) + if (mapping_nrpages(bdev->bd_inode->i_mapping) == 0) return; invalidate_bh_lrus(); truncate_inode_pages(bdev->bd_inode->i_mapping, 0); @@ -592,7 +592,7 @@ long nr_blockdev_pages(void) long ret = 0; spin_lock(&bdev_lock); list_for_each_entry(bdev, &all_bdevs, bd_list) { - ret += bdev->bd_inode->i_mapping->nrpages; + ret += mapping_nrpages(bdev->bd_inode->i_mapping); } spin_unlock(&bdev_lock); return ret; Index: linux-rt-rebase.q/fs/buffer.c =================================================================== --- linux-rt-rebase.q.orig/fs/buffer.c +++ linux-rt-rebase.q/fs/buffer.c @@ -334,7 +334,7 @@ void invalidate_bdev(struct block_device { struct address_space *mapping = bdev->bd_inode->i_mapping; - if (mapping->nrpages == 0) + if (mapping_nrpages(mapping) == 0) return; invalidate_bh_lrus(); Index: linux-rt-rebase.q/fs/gfs2/glock.c =================================================================== --- linux-rt-rebase.q.orig/fs/gfs2/glock.c +++ linux-rt-rebase.q/fs/gfs2/glock.c @@ -1876,7 +1876,7 @@ static int dump_glock(struct glock_iter (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); if (gl->gl_aspace) print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); + mapping_nrpages(gl->gl_aspace->i_mapping)); else print_dbg(gi, " aspace = no\n"); print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); Index: linux-rt-rebase.q/fs/gfs2/glops.c =================================================================== --- linux-rt-rebase.q.orig/fs/gfs2/glops.c +++ linux-rt-rebase.q/fs/gfs2/glops.c @@ -258,7 +258,7 @@ static int inode_go_demote_ok(struct gfs struct gfs2_sbd *sdp = gl->gl_sbd; int demote = 0; - if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages) + if (!gl->gl_object && !mapping_nrpages(gl->gl_aspace->i_mapping)) demote = 1; else if (!sdp->sd_args.ar_localcaching && time_after_eq(jiffies, gl->gl_stamp + @@ -325,7 +325,7 @@ static void inode_go_unlock(struct gfs2_ static int rgrp_go_demote_ok(struct gfs2_glock *gl) { - return !gl->gl_aspace->i_mapping->nrpages; + return !mapping_nrpages(gl->gl_aspace->i_mapping); } /** Index: linux-rt-rebase.q/fs/gfs2/meta_io.c =================================================================== --- linux-rt-rebase.q.orig/fs/gfs2/meta_io.c +++ linux-rt-rebase.q/fs/gfs2/meta_io.c @@ -104,7 +104,7 @@ void gfs2_meta_inval(struct gfs2_glock * truncate_inode_pages(mapping, 0); atomic_dec(&aspace->i_writecount); - gfs2_assert_withdraw(sdp, !mapping->nrpages); + gfs2_assert_withdraw(sdp, !mapping_nrpages(mapping)); } /** Index: linux-rt-rebase.q/fs/hugetlbfs/inode.c =================================================================== --- linux-rt-rebase.q.orig/fs/hugetlbfs/inode.c +++ linux-rt-rebase.q/fs/hugetlbfs/inode.c @@ -236,7 +236,7 @@ static void truncate_hugepages(struct in } huge_pagevec_release(&pvec); } - BUG_ON(!lstart && mapping->nrpages); + BUG_ON(!lstart && mapping_nrpages(mapping)); hugetlb_unreserve_pages(inode, start, freed); } Index: linux-rt-rebase.q/fs/inode.c =================================================================== --- linux-rt-rebase.q.orig/fs/inode.c +++ linux-rt-rebase.q/fs/inode.c @@ -244,7 +244,7 @@ void clear_inode(struct inode *inode) might_sleep(); invalidate_inode_buffers(inode); - BUG_ON(inode->i_data.nrpages); + BUG_ON(mapping_nrpages(&inode->i_data)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); wait_on_inode(inode); @@ -277,7 +277,7 @@ static void dispose_list(struct list_hea inode = list_first_entry(head, struct inode, i_list); list_del(&inode->i_list); - if (inode->i_data.nrpages) + if (mapping_nrpages(&inode->i_data)) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -369,7 +369,7 @@ static int can_unuse(struct inode *inode return 0; if (atomic_read(&inode->i_count)) return 0; - if (inode->i_data.nrpages) + if (mapping_nrpages(&inode->i_data)) return 0; return 1; } @@ -408,7 +408,7 @@ static void prune_icache(int nr_to_scan) list_move(&inode->i_list, &inode_unused); continue; } - if (inode_has_buffers(inode) || inode->i_data.nrpages) { + if (inode_has_buffers(inode) || mapping_nrpages(&inode->i_data)) { __iget(inode); spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) @@ -1073,7 +1073,7 @@ static void generic_forget_inode(struct inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); - if (inode->i_data.nrpages) + if (mapping_nrpages(&inode->i_data)) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); wake_up_inode(inode); Index: linux-rt-rebase.q/fs/jffs2/dir.c =================================================================== --- linux-rt-rebase.q.orig/fs/jffs2/dir.c +++ linux-rt-rebase.q/fs/jffs2/dir.c @@ -203,7 +203,7 @@ static int jffs2_create(struct inode *di inode->i_op = &jffs2_file_inode_operations; inode->i_fop = &jffs2_file_operations; inode->i_mapping->a_ops = &jffs2_file_address_operations; - inode->i_mapping->nrpages = 0; + mapping_nrpages_init(inode->i_mapping); f = JFFS2_INODE_INFO(inode); dir_f = JFFS2_INODE_INFO(dir_i); @@ -227,7 +227,7 @@ static int jffs2_create(struct inode *di d_instantiate(dentry, inode); D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", - inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); + inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, mapping_nrpages(inode->i_mapping))); return 0; fail: Index: linux-rt-rebase.q/fs/jffs2/fs.c =================================================================== --- linux-rt-rebase.q.orig/fs/jffs2/fs.c +++ linux-rt-rebase.q/fs/jffs2/fs.c @@ -291,7 +291,7 @@ void jffs2_read_inode (struct inode *ino inode->i_op = &jffs2_file_inode_operations; inode->i_fop = &jffs2_file_operations; inode->i_mapping->a_ops = &jffs2_file_address_operations; - inode->i_mapping->nrpages = 0; + mapping_nrpages_init(inode->i_mapping); break; case S_IFBLK: Index: linux-rt-rebase.q/fs/libfs.c =================================================================== --- linux-rt-rebase.q.orig/fs/libfs.c +++ linux-rt-rebase.q/fs/libfs.c @@ -16,7 +16,7 @@ int simple_getattr(struct vfsmount *mnt, { struct inode *inode = dentry->d_inode; generic_fillattr(inode, stat); - stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9); + stat->blocks = mapping_nrpages(inode->i_mapping) << (PAGE_CACHE_SHIFT - 9); return 0; } Index: linux-rt-rebase.q/fs/nfs/inode.c =================================================================== --- linux-rt-rebase.q.orig/fs/nfs/inode.c +++ linux-rt-rebase.q/fs/nfs/inode.c @@ -97,7 +97,7 @@ int nfs_sync_mapping(struct address_spac { int ret; - if (mapping->nrpages == 0) + if (mapping_nrpages(mapping) == 0) return 0; unmap_mapping_range(mapping, 0, 0, 0); ret = filemap_write_and_wait(mapping); @@ -137,7 +137,7 @@ void nfs_zap_caches(struct inode *inode) void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { - if (mapping->nrpages != 0) { + if (mapping_nrpages(mapping) != 0) { spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); @@ -684,7 +684,7 @@ static int nfs_invalidate_mapping_nolock { struct nfs_inode *nfsi = NFS_I(inode); - if (mapping->nrpages != 0) { + if (mapping_nrpages(mapping) != 0) { int ret = invalidate_inode_pages2(mapping); if (ret < 0) return ret; Index: linux-rt-rebase.q/fs/xfs/linux-2.6/xfs_vnode.h =================================================================== --- linux-rt-rebase.q.orig/fs/xfs/linux-2.6/xfs_vnode.h +++ linux-rt-rebase.q/fs/xfs/linux-2.6/xfs_vnode.h @@ -537,7 +537,7 @@ static inline void vn_atime_to_time_t(bh * Some useful predicates. */ #define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping) -#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) +#define VN_CACHED(vp) mapping_nrpages(vn_to_inode(vp)->i_mapping) #define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ PAGECACHE_TAG_DIRTY) #define VN_TRUNC(vp) ((vp)->v_flag & VTRUNCATED) Index: linux-rt-rebase.q/include/linux/fs.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/fs.h +++ linux-rt-rebase.q/include/linux/fs.h @@ -447,7 +447,7 @@ struct address_space { struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ spinlock_t i_mmap_lock; /* protect tree, count, list */ unsigned int truncate_count; /* Cover race condition with truncate */ - unsigned long nrpages; /* number of total pages */ + unsigned long __nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ unsigned long flags; /* error bits/gfp mask */ @@ -462,6 +462,26 @@ struct address_space { * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ +static inline void mapping_nrpages_init(struct address_space *mapping) +{ + mapping->__nrpages = 0; +} + +static inline unsigned long mapping_nrpages(struct address_space *mapping) +{ + return mapping->__nrpages; +} + +static inline void mapping_nrpages_inc(struct address_space *mapping) +{ + mapping->__nrpages++; +} + +static inline void mapping_nrpages_dec(struct address_space *mapping) +{ + mapping->__nrpages--; +} + struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ struct inode * bd_inode; /* will die */ Index: linux-rt-rebase.q/include/linux/swap.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/swap.h +++ linux-rt-rebase.q/include/linux/swap.h @@ -225,7 +225,7 @@ extern int end_swap_bio_read(struct bio /* linux/mm/swap_state.c */ extern struct address_space swapper_space; -#define total_swapcache_pages swapper_space.nrpages +#define total_swapcache_pages mapping_nrpages(&swapper_space) extern void show_swap_cache_info(void); extern int add_to_swap(struct page *, gfp_t); extern void __delete_from_swap_cache(struct page *); Index: linux-rt-rebase.q/ipc/shm.c =================================================================== --- linux-rt-rebase.q.orig/ipc/shm.c +++ linux-rt-rebase.q/ipc/shm.c @@ -564,11 +564,11 @@ static void shm_get_stat(struct ipc_name if (is_file_hugepages(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; - *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; + *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping_nrpages(mapping); } else { struct shmem_inode_info *info = SHMEM_I(inode); spin_lock(&info->lock); - *rss += inode->i_mapping->nrpages; + *rss += mapping_nrpages(inode->i_mapping); *swp += info->swapped; spin_unlock(&info->lock); } Index: linux-rt-rebase.q/mm/filemap.c =================================================================== --- linux-rt-rebase.q.orig/mm/filemap.c +++ linux-rt-rebase.q/mm/filemap.c @@ -118,7 +118,7 @@ void __remove_from_page_cache(struct pag radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; - mapping->nrpages--; + mapping_nrpages_dec(mapping); __dec_zone_page_state(page, NR_FILE_PAGES); BUG_ON(page_mapped(page)); } @@ -191,7 +191,7 @@ int __filemap_fdatawrite_range(struct ad int ret; struct writeback_control wbc = { .sync_mode = sync_mode, - .nr_to_write = mapping->nrpages * 2, + .nr_to_write = mapping_nrpages(mapping) * 2, .range_start = start, .range_end = end, }; @@ -373,7 +373,7 @@ int filemap_write_and_wait(struct addres { int err = 0; - if (mapping->nrpages) { + if (mapping_nrpages(mapping)) { err = filemap_fdatawrite(mapping); /* * Even if the above returned error, the pages may be @@ -407,7 +407,7 @@ int filemap_write_and_wait_range(struct { int err = 0; - if (mapping->nrpages) { + if (mapping_nrpages(mapping)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ @@ -449,7 +449,7 @@ int add_to_page_cache(struct page *page, SetPageLocked(page); page->mapping = mapping; page->index = offset; - mapping->nrpages++; + mapping_nrpages_inc(mapping); __inc_zone_page_state(page, NR_FILE_PAGES); } spin_unlock_irq(&mapping->tree_lock); @@ -2297,7 +2297,7 @@ generic_file_direct_IO(int rw, struct ki * about to write. We do this *before* the write so that we can return * -EIO without clobbering -EIOCBQUEUED from ->direct_IO(). */ - if (rw == WRITE && mapping->nrpages) { + if (rw == WRITE && mapping_nrpages(mapping)) { retval = invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end); if (retval) @@ -2315,7 +2315,7 @@ generic_file_direct_IO(int rw, struct ki * thing to do, so we don't support it 100%. If this invalidation * fails and we have -EIOCBQUEUED we ignore the failure. */ - if (rw == WRITE && mapping->nrpages) { + if (rw == WRITE && mapping_nrpages(mapping)) { int err = invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end); if (err && retval >= 0) Index: linux-rt-rebase.q/mm/shmem.c =================================================================== --- linux-rt-rebase.q.orig/mm/shmem.c +++ linux-rt-rebase.q/mm/shmem.c @@ -216,8 +216,8 @@ static void shmem_free_blocks(struct ino * We have to calculate the free blocks since the mm can drop * undirtied hole pages behind our back. * - * But normally info->alloced == inode->i_mapping->nrpages + info->swapped - * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) + * But normally info->alloced == mapping_nrpages(inode->i_mapping) + info->swapped + * So mm freed is info->alloced - (mapping_nrpages(inode->i_mapping) + info->swapped) * * It has to be called with the spinlock held. */ @@ -226,7 +226,7 @@ static void shmem_recalc_inode(struct in struct shmem_inode_info *info = SHMEM_I(inode); long freed; - freed = info->alloced - info->swapped - inode->i_mapping->nrpages; + freed = info->alloced - info->swapped - mapping_nrpages(inode->i_mapping); if (freed > 0) { info->alloced -= freed; shmem_unacct_blocks(info->flags, freed); @@ -672,7 +672,7 @@ static void shmem_truncate_range(struct done1: shmem_dir_unmap(dir); done2: - if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { + if (mapping_nrpages(inode->i_mapping) && (info->flags & SHMEM_PAGEIN)) { /* * Call truncate_inode_pages again: racing shmem_unuse_inode * may have swizzled a page in from swap since vmtruncate or Index: linux-rt-rebase.q/mm/swap_state.c =================================================================== --- linux-rt-rebase.q.orig/mm/swap_state.c +++ linux-rt-rebase.q/mm/swap_state.c @@ -87,7 +87,7 @@ static int __add_to_swap_cache(struct pa page_cache_get(page); SetPageSwapCache(page); set_page_private(page, entry.val); - total_swapcache_pages++; + mapping_nrpages_inc(&swapper_space); __inc_zone_page_state(page, NR_FILE_PAGES); } spin_unlock_irq(&swapper_space.tree_lock); @@ -133,7 +133,7 @@ void __delete_from_swap_cache(struct pag radix_tree_delete(&swapper_space.page_tree, page_private(page)); set_page_private(page, 0); ClearPageSwapCache(page); - total_swapcache_pages--; + mapping_nrpages_dec(&swapper_space); __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); } Index: linux-rt-rebase.q/mm/truncate.c =================================================================== --- linux-rt-rebase.q.orig/mm/truncate.c +++ linux-rt-rebase.q/mm/truncate.c @@ -164,7 +164,7 @@ void truncate_inode_pages_range(struct a pgoff_t next; int i; - if (mapping->nrpages == 0) + if (mapping_nrpages(mapping) == 0) return; BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); patches/highmem-redo-mainline.patch0000664000077200007720000000103310653433167016706 0ustar mingomingo--- mm/highmem.c | 8 ++++++++ 1 file changed, 8 insertions(+) Index: linux-rt-rebase.q/mm/highmem.c =================================================================== --- linux-rt-rebase.q.orig/mm/highmem.c +++ linux-rt-rebase.q/mm/highmem.c @@ -214,6 +214,14 @@ static unsigned long pkmap_insert(struct return vaddr; } +/* + * Flush all unused kmap mappings in order to remove stray mappings. + */ +void kmap_flush_unused(void) +{ + WARN_ON_ONCE(1); +} + fastcall void *kmap_high(struct page *page) { unsigned long vaddr; patches/preempt-realtime-debug-sysctl.patch0000664000077200007720000001000010653433166020417 0ustar mingomingo--- drivers/char/sysrq.c | 18 ++++++++++++++- drivers/char/tty_io.c | 1 kernel/panic.c | 1 kernel/sysctl.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/drivers/char/sysrq.c =================================================================== --- linux-rt-rebase.q.orig/drivers/char/sysrq.c +++ linux-rt-rebase.q/drivers/char/sysrq.c @@ -208,6 +208,22 @@ static struct sysrq_key_op sysrq_showreg .enable_mask = SYSRQ_ENABLE_DUMP, }; +#if defined(__i386__) + +static void sysrq_handle_showallregs(int key, struct tty_struct *tty) +{ + nmi_show_all_regs(); +} + +static struct sysrq_key_op sysrq_showallregs_op = { + .handler = sysrq_handle_showallregs, + .help_msg = "showalLcpupc", + .action_msg = "Show Regs On All CPUs", +}; +#else +#define sysrq_showallregs_op (*(struct sysrq_key_op *)0) +#endif + static void sysrq_handle_showstate(int key, struct tty_struct *tty) { show_state(); @@ -340,7 +356,7 @@ static struct sysrq_key_op *sysrq_key_ta &sysrq_kill_op, /* i */ NULL, /* j */ &sysrq_SAK_op, /* k */ - NULL, /* l */ + &sysrq_showallregs_op, /* l */ &sysrq_showmem_op, /* m */ &sysrq_unrt_op, /* n */ /* o: This will often be registered as 'Off' at init time */ Index: linux-rt-rebase.q/drivers/char/tty_io.c =================================================================== --- linux-rt-rebase.q.orig/drivers/char/tty_io.c +++ linux-rt-rebase.q/drivers/char/tty_io.c @@ -257,6 +257,7 @@ static int check_tty_count(struct tty_st printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " "!= #fd's(%d) in %s\n", tty->name, tty->count, count, routine); + dump_stack(); return count; } #endif Index: linux-rt-rebase.q/kernel/panic.c =================================================================== --- linux-rt-rebase.q.orig/kernel/panic.c +++ linux-rt-rebase.q/kernel/panic.c @@ -80,6 +80,7 @@ NORET_TYPE void panic(const char * fmt, vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); + dump_stack(); bust_spinlocks(0); /* Index: linux-rt-rebase.q/kernel/sysctl.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sysctl.c +++ linux-rt-rebase.q/kernel/sysctl.c @@ -323,6 +323,54 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_PANIC, + .procname = "prof_pid", + .data = &prof_pid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#ifdef CONFIG_PREEMPT + { + .ctl_name = KERN_PANIC, + .procname = "kernel_preemption", + .data = &kernel_preemption, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_PREEMPT_VOLUNTARY + { + .ctl_name = KERN_PANIC, + .procname = "voluntary_preemption", + .data = &voluntary_preemption, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#if defined(CONFIG_PREEMPT_SOFTIRQS) && !defined(CONFIG_PREEMPT_RT) + { + .ctl_name = KERN_PANIC, + .procname = "softirq_preemption", + .data = &softirq_preemption, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#if defined(CONFIG_PREEMPT_HARDIRQS) && !defined(CONFIG_PREEMPT_RT) + { + .ctl_name = KERN_PANIC, + .procname = "hardirq_preemption", + .data = &hardirq_preemption, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_WAKEUP_TIMING { .ctl_name = CTL_UNNUMBERED, @@ -457,6 +505,16 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_GENERIC_HARDIRQS + { + .ctl_name = KERN_PANIC, + .procname = "debug_direct_keyboard", + .data = &debug_direct_keyboard, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = KERN_CORE_USES_PID, .procname = "core_uses_pid", patches/preempt-irqs-hrtimer.patch0000664000077200007720000001036210653433164016651 0ustar mingomingo include/linux/hrtimer.h | 10 ++++++++++ kernel/hrtimer.c | 35 ++++++++++++++++++++++++++++++++++- kernel/itimer.c | 1 + kernel/posix-timers.c | 3 +++ 4 files changed, 48 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/include/linux/hrtimer.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/hrtimer.h +++ linux-rt-rebase.q/include/linux/hrtimer.h @@ -200,6 +200,9 @@ struct hrtimer_cpu_base { struct list_head cb_pending; unsigned long nr_events; #endif +#ifdef CONFIG_PREEMPT_SOFTIRQS + wait_queue_head_t wait; +#endif }; #ifdef CONFIG_HIGH_RES_TIMERS @@ -276,6 +279,13 @@ static inline int hrtimer_restart(struct return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); } +/* Softirq preemption could deadlock timer removal */ +#ifdef CONFIG_PREEMPT_SOFTIRQS + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); +#else +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) +#endif + /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); Index: linux-rt-rebase.q/kernel/hrtimer.c =================================================================== --- linux-rt-rebase.q.orig/kernel/hrtimer.c +++ linux-rt-rebase.q/kernel/hrtimer.c @@ -911,7 +911,7 @@ int hrtimer_cancel(struct hrtimer *timer if (ret >= 0) return ret; - cpu_relax(); + hrtimer_wait_for_timer(timer); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1022,6 +1022,32 @@ int hrtimer_get_res(const clockid_t whic } EXPORT_SYMBOL_GPL(hrtimer_get_res); +#ifdef CONFIG_PREEMPT_SOFTIRQS +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) + +/** + * hrtimer_wait_for_timer - Wait for a running timer + * + * @timer: timer to wait for + * + * The function waits in case the timers callback function is + * currently executed on the waitqueue of the timer base. The + * waitqueue is woken up after the timer callback function has + * finished execution. + */ +void hrtimer_wait_for_timer(const struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + + if (base && base->cpu_base) + wait_event(base->cpu_base->wait, + !(timer->state & HRTIMER_STATE_CALLBACK)); +} + +#else +# define wake_up_timer_waiters(b) do { } while (0) +#endif + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1157,6 +1183,8 @@ static void run_hrtimer_softirq(struct s } } spin_unlock_irq(&cpu_base->lock); + + wake_up_timer_waiters(cpu_base); } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -1207,6 +1235,8 @@ static inline void run_hrtimer_queue(str } } spin_unlock_irq(&cpu_base->lock); + + wake_up_timer_waiters(cpu_base); } /* @@ -1382,6 +1412,9 @@ static void __devinit init_hrtimers_cpu( cpu_base->clock_base[i].cpu_base = cpu_base; hrtimer_init_hres(cpu_base); +#ifdef CONFIG_PREEMPT_SOFTIRQS + init_waitqueue_head(&cpu_base->wait); +#endif } #ifdef CONFIG_HOTPLUG_CPU Index: linux-rt-rebase.q/kernel/itimer.c =================================================================== --- linux-rt-rebase.q.orig/kernel/itimer.c +++ linux-rt-rebase.q/kernel/itimer.c @@ -170,6 +170,7 @@ again: /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); + hrtimer_wait_for_timer(&tsk->signal->real_timer); goto again; } expires = timeval_to_ktime(value->it_value); Index: linux-rt-rebase.q/kernel/posix-timers.c =================================================================== --- linux-rt-rebase.q.orig/kernel/posix-timers.c +++ linux-rt-rebase.q/kernel/posix-timers.c @@ -805,6 +805,7 @@ retry: unlock_timer(timr, flag); if (error == TIMER_RETRY) { + hrtimer_wait_for_timer(&timr->it.real.timer); rtn = NULL; // We already got the old time... goto retry; } @@ -844,6 +845,7 @@ retry_delete: if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); + hrtimer_wait_for_timer(&timer->it.real.timer); goto retry_delete; } @@ -876,6 +878,7 @@ retry_delete: if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); + hrtimer_wait_for_timer(&timer->it.real.timer); goto retry_delete; } list_del(&timer->list); patches/random-driver-latency-fix.patch0000664000077200007720000000174210653433163017544 0ustar mingomingo drivers/char/random.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) Index: linux-rt-rebase.q/drivers/char/random.c =================================================================== --- linux-rt-rebase.q.orig/drivers/char/random.c +++ linux-rt-rebase.q/drivers/char/random.c @@ -580,8 +580,11 @@ static void add_timer_randomness(struct preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -626,9 +629,6 @@ static void add_timer_randomness(struct if(input_pool.entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); - -out: - preempt_enable(); } void add_input_randomness(unsigned int type, unsigned int code, patches/ppc-mcount-dummy-functions.patch0000664000077200007720000000251710653433162017776 0ustar mingomingoFrom tsutomu.owa@toshiba.co.jp Mon May 14 17:16:37 2007 Date: Mon, 14 May 2007 17:16:37 +0900 From: Tsutomu OWA To: linuxppc-dev@ozlabs.org, linux-kernel@vger.kernel.org Cc: mingo@elte.hu, tglx@linutronix.de Subject: Re: [patch 2/5] powerpc 2.6.21-rt1: dummy functions and export _mcount to compile add dummy functions save_stack_trace(), early_printk() for now and export _mcount to compile. Signed-off-by: Tsutomu OWA -- owa --- arch/powerpc/kernel/setup_64.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) Index: linux-rt-rebase.q/arch/powerpc/kernel/setup_64.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/setup_64.c +++ linux-rt-rebase.q/arch/powerpc/kernel/setup_64.c @@ -605,3 +605,22 @@ struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif /* CONFIG_PPC_INDIRECT_IO */ +#ifdef CONFIG_STACKTRACE +#include +void notrace save_stack_trace(struct stack_trace *trace, + struct task_struct *task) +{ +} +#endif /* CONFIG_STACKTRACE */ + +#ifdef CONFIG_EARLY_PRINTK +void notrace early_printk(const char *fmt, ...) +{ + BUG(); +} +#endif /* CONFIG_EARLY_PRINTK */ + +#ifdef CONFIG_MCOUNT +extern void _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif /* CONFIG_MCOUNT */ patches/preempt-realtime-ipc.patch0000664000077200007720000000575610653433166016613 0ustar mingomingo--- ipc/mqueue.c | 5 +++++ ipc/msg.c | 25 +++++++++++++++++++------ ipc/sem.c | 6 ++++++ 3 files changed, 30 insertions(+), 6 deletions(-) Index: linux-rt-rebase.q/ipc/mqueue.c =================================================================== --- linux-rt-rebase.q.orig/ipc/mqueue.c +++ linux-rt-rebase.q/ipc/mqueue.c @@ -783,12 +783,17 @@ static inline void pipelined_send(struct struct msg_msg *message, struct ext_wait_queue *receiver) { + /* + * Keep them in one critical section for PREEMPT_RT: + */ + preempt_disable(); receiver->msg = message; list_del(&receiver->list); receiver->state = STATE_PENDING; wake_up_process(receiver->task); smp_wmb(); receiver->state = STATE_READY; + preempt_enable(); } /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() Index: linux-rt-rebase.q/ipc/msg.c =================================================================== --- linux-rt-rebase.q.orig/ipc/msg.c +++ linux-rt-rebase.q/ipc/msg.c @@ -213,12 +213,19 @@ static void expunge_all(struct msg_queue while (tmp != &msq->q_receivers) { struct msg_receiver *msr; + /* + * Make sure that the wakeup doesnt preempt + * this CPU prematurely. (on PREEMPT_RT) + */ + preempt_disable(); + msr = list_entry(tmp, struct msg_receiver, r_list); tmp = tmp->next; msr->r_msg = NULL; - wake_up_process(msr->r_tsk); - smp_mb(); + wake_up_process(msr->r_tsk); /* serializes */ msr->r_msg = ERR_PTR(res); + + preempt_enable(); } } @@ -603,22 +610,28 @@ static inline int pipelined_send(struct !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, msr->r_msgtype, msr->r_mode)) { + /* + * Make sure that the wakeup doesnt preempt + * this CPU prematurely. (on PREEMPT_RT) + */ + preempt_disable(); + list_del(&msr->r_list); if (msr->r_maxsize < msg->m_ts) { msr->r_msg = NULL; - wake_up_process(msr->r_tsk); - smp_mb(); + wake_up_process(msr->r_tsk); /* serializes */ msr->r_msg = ERR_PTR(-E2BIG); } else { msr->r_msg = NULL; msq->q_lrpid = msr->r_tsk->pid; msq->q_rtime = get_seconds(); - wake_up_process(msr->r_tsk); - smp_mb(); + wake_up_process(msr->r_tsk); /* serializes */ msr->r_msg = msg; + preempt_enable(); return 1; } + preempt_enable(); } } return 0; Index: linux-rt-rebase.q/ipc/sem.c =================================================================== --- linux-rt-rebase.q.orig/ipc/sem.c +++ linux-rt-rebase.q/ipc/sem.c @@ -412,6 +412,11 @@ static void update_queue (struct sem_arr if (error <= 0) { struct sem_queue *n; remove_from_queue(sma,q); + /* + * make sure that the wakeup doesnt preempt + * _this_ cpu prematurely. (on preempt_rt) + */ + preempt_disable(); q->status = IN_WAKEUP; /* * Continue scanning. The next operation @@ -434,6 +439,7 @@ static void update_queue (struct sem_arr */ smp_wmb(); q->status = error; + preempt_enable(); q = n; } else { q = q->next; patches/softlockup-cleanups.patch0000664000077200007720000000420710653433167016556 0ustar mingomingoSubject: softlockup watchdog: style cleanups From: Ingo Molnar kernel/softirq.c grew a few style uncleanlinesses in the past few months, clean that up. No functional changes: text data bss dec hex filename 1126 76 4 1206 4b6 softlockup.o.before 1129 76 4 1209 4b9 softlockup.o.after ( the 3 bytes .text increase is due to the "<1>" appended to one of the printk messages. ) Signed-off-by: Ingo Molnar --- kernel/softlockup.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) Index: linux-rt-rebase.q/kernel/softlockup.c =================================================================== --- linux-rt-rebase.q.orig/kernel/softlockup.c +++ linux-rt-rebase.q/kernel/softlockup.c @@ -23,7 +23,7 @@ static DEFINE_PER_CPU(unsigned long, tou static DEFINE_PER_CPU(unsigned long, print_timestamp); static DEFINE_PER_CPU(struct task_struct *, watchdog_task); -static int did_panic = 0; +static int did_panic; static int softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) @@ -122,7 +122,7 @@ void softlockup_tick(void) /* * The watchdog thread - runs every second and touches the timestamp. */ -static int watchdog(void * __bind_cpu) +static int watchdog(void *__bind_cpu) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; @@ -160,13 +160,13 @@ cpu_callback(struct notifier_block *nfb, BUG_ON(per_cpu(watchdog_task, hotcpu)); p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); if (IS_ERR(p)) { - printk("watchdog for %i failed\n", hotcpu); + printk(KERN_ERR "watchdog for %i failed\n", hotcpu); return NOTIFY_BAD; } - per_cpu(touch_timestamp, hotcpu) = 0; - per_cpu(watchdog_task, hotcpu) = p; + per_cpu(touch_timestamp, hotcpu) = 0; + per_cpu(watchdog_task, hotcpu) = p; kthread_bind(p, hotcpu); - break; + break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: wake_up_process(per_cpu(watchdog_task, hotcpu)); @@ -186,7 +186,7 @@ cpu_callback(struct notifier_block *nfb, kthread_stop(p); break; #endif /* CONFIG_HOTPLUG_CPU */ - } + } return NOTIFY_OK; } patches/preempt-irqs-ppc-fix-b5.patch0000664000077200007720000000316610653433164017055 0ustar mingomingo To fix the following boot time error by removing ack member added by the rt patch. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Processor 1 found. Brought up 2 CPUs ------------[ cut here ]------------ kernel BUG at arch/powerpc/platforms/cell/interrupt.c:86! pu 0x1: Vector: 700 (Program Check) at [c00000000fff3c80] pc: c000000000033f9c: .iic_eoi+0x58/0x64 lr: c00000000009add8: .handle_percpu_irq+0xd4/0xf4 sp: c00000000fff3f00 msr: 9000000000021032 current = 0xc000000000fee040 paca = 0xc000000000509e80 pid = 0, comm = swapper kernel BUG at arch/powerpc/platforms/cell/interrupt.c:86! enter ? for help [link register ] c00000000009add8 .handle_percpu_irq+0xd4/0xf4 [c00000000fff3f00] c00000000009ada8 .handle_percpu_irq+0xa4/0xf4 (unreliable) [c00000000fff3f90] c000000000023bb8 .call_handle_irq+0x1c/0x2c [c000000000ff7950] c00000000000c910 .do_IRQ+0xf8/0x1b8 [c000000000ff79f0] c000000000034f34 .cbe_system_reset_exception+0x74/0xb4 [c000000000ff7a70] c000000000022610 .system_reset_exception+0x40/0xe0 [c000000000ff7af0] c000000000003378 system_reset_common+0xf8/0x100 --- arch/powerpc/platforms/cell/interrupt.c | 1 - 1 file changed, 1 deletion(-) Index: linux-rt-rebase.q/arch/powerpc/platforms/cell/interrupt.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/cell/interrupt.c +++ linux-rt-rebase.q/arch/powerpc/platforms/cell/interrupt.c @@ -90,7 +90,6 @@ static struct irq_chip iic_chip = { .typename = " CELL-IIC ", .mask = iic_mask, .unmask = iic_unmask, - .ack = iic_eoi, .eoi = iic_eoi, }; patches/latency-tracing-ppc.patch0000664000077200007720000000236210653433162016414 0ustar mingomingo arch/powerpc/kernel/time.c | 1 + arch/ppc/boot/Makefile | 9 +++++++++ 2 files changed, 10 insertions(+) Index: linux-rt-rebase.q/arch/powerpc/kernel/time.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/time.c +++ linux-rt-rebase.q/arch/powerpc/kernel/time.c @@ -922,6 +922,7 @@ void __init time_init(void) tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; + cpu_khz = ppc_tb_freq / 1000; tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); calc_cputime_factors(); Index: linux-rt-rebase.q/arch/ppc/boot/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/ppc/boot/Makefile +++ linux-rt-rebase.q/arch/ppc/boot/Makefile @@ -14,6 +14,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd patches/acpi-move-timer-broadcast-and-pmtimer-access-before-c3-arbiter-shutdown.patch0000664000077200007720000000300710653433161030216 0ustar mingomingoFrom: Udo A. Steinberg The chipset doc for IHC4 tells us: 1.In general, software should not attempt any non-posted accesses during arbiter disable except to the ICH4's power management registers. This implies that interrupt handlers for any unmasked hardware interrupts and SMI/NMI should check ARB_DIS status before reading from ICH devices. So it's not a good idea to access ICH devices after arbiter shut down. Signed-off-by: Udo A. Steinberg Signed-off-by: Thomas Gleixner Cc: Len Brown Signed-off-by: Andrew Morton --- drivers/acpi/processor_idle.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) Index: linux-rt-rebase.q/drivers/acpi/processor_idle.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/processor_idle.c +++ linux-rt-rebase.q/drivers/acpi/processor_idle.c @@ -989,6 +989,12 @@ static int acpi_idle_enter_c3(struct cpu return 0; } + /* + * Must be done before busmaster disable as we might need to + * access HPET ! + */ + acpi_state_timer_broadcast(pr, cx, 1); + /* disable bus master */ if (pr->flags.bm_check) { spin_lock(&c3_lock); @@ -1008,7 +1014,6 @@ static int acpi_idle_enter_c3(struct cpu /* Get start time (ticks) */ t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); - acpi_state_timer_broadcast(pr, cx, 1); acpi_idle_do_entry(cx); t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); patches/preempt-realtime-prevent-idle-boosting.patch0000664000077200007720000000342710653433166022251 0ustar mingomingoSubject: Premmpt-RT: Preevent boosting of idle task Idle task boosting is a nono in general. There is one exception, when NOHZ is active: The idle task calls get_next_timer_interrupt() and holds the timer wheel base->lock on the CPU and another CPU wants to access the timer (probably to cancel it). We can safely ignore the boosting request, as the idle CPU runs this code with interrupts disabled and will complete the lock protected section without being interrupted. So there is no real need to boost. Signed-off-by: Thomas Gleixner --- kernel/sched.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) Index: linux-rt-rebase.q/kernel/sched.c =================================================================== --- linux-rt-rebase.q.orig/kernel/sched.c +++ linux-rt-rebase.q/kernel/sched.c @@ -4330,6 +4330,25 @@ void rt_mutex_setprio(struct task_struct BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); + + /* + * Idle task boosting is a nono in general. There is one + * exception, when NOHZ is active: + * + * The idle task calls get_next_timer_interrupt() and holds + * the timer wheel base->lock on the CPU and another CPU wants + * to access the timer (probably to cancel it). We can safely + * ignore the boosting request, as the idle CPU runs this code + * with interrupts disabled and will complete the lock + * protected section without being interrupted. So there is no + * real need to boost. + */ + if (unlikely(p == rq->idle)) { + WARN_ON(p != rq->curr); + WARN_ON(p->pi_blocked_on); + goto out_unlock; + } + now = rq_clock(rq); oldprio = p->prio; @@ -4363,6 +4382,7 @@ void rt_mutex_setprio(struct task_struct } trace_special(prev_resched, _need_resched(), 0); +out_unlock: task_rq_unlock(rq, &flags); } patches/lockstat-rt-hooks.patch0000664000077200007720000001237410653433170016143 0ustar mingomingo--- include/linux/lockdep.h | 28 ++++++++++++++++++++++++++++ kernel/rt.c | 25 ++++++++++++++++--------- kernel/rtmutex.c | 4 ++-- 3 files changed, 46 insertions(+), 11 deletions(-) Index: linux-rt-rebase.q/include/linux/lockdep.h =================================================================== --- linux-rt-rebase.q.orig/include/linux/lockdep.h +++ linux-rt-rebase.q/include/linux/lockdep.h @@ -344,6 +344,28 @@ do { \ lock_acquired(&(_lock)->dep_map); \ } while (0) +#define LOCK_CONTENDED_RT(_lock, f_try, f_lock) \ +do { \ + if (!f_try(&(_lock)->lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + f_lock(&(_lock)->lock); \ + lock_acquired(&(_lock)->dep_map); \ + } \ +} while (0) + + +#define LOCK_CONTENDED_RT_RET(_lock, f_try, f_lock) \ +({ \ + int ret = 0; \ + if (!f_try(&(_lock)->lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + ret = f_lock(&(_lock)->lock); \ + if (!ret) \ + lock_acquired(&(_lock)->dep_map); \ + } \ + ret; \ +}) + #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) @@ -352,6 +374,12 @@ do { \ #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) +#define LOCK_CONTENDED_RT(_lock, f_try, f_lock) \ + f_lock(&(_lock)->lock) + +#define LOCK_CONTENDED_RT_RET(_lock, f_try, f_lock) \ + f_lock(&(_lock)->lock) + #endif /* CONFIG_LOCK_STAT */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) Index: linux-rt-rebase.q/kernel/rt.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rt.c +++ linux-rt-rebase.q/kernel/rt.c @@ -98,16 +98,22 @@ EXPORT_SYMBOL(_mutex_init); void __lockfunc _mutex_lock(struct mutex *lock) { mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - rt_mutex_lock(&lock->lock); + LOCK_CONTENDED_RT(lock, rt_mutex_trylock, rt_mutex_lock); } EXPORT_SYMBOL(_mutex_lock); +static int __lockfunc __rt_mutex_lock_interruptible(struct rt_mutex *lock) +{ + return rt_mutex_lock_interruptible(lock, 0); +} + int __lockfunc _mutex_lock_interruptible(struct mutex *lock) { int ret; mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - ret = rt_mutex_lock_interruptible(&lock->lock, 0); + ret = LOCK_CONTENDED_RT_RET(lock, rt_mutex_trylock, + __rt_mutex_lock_interruptible); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); return ret; @@ -118,7 +124,7 @@ EXPORT_SYMBOL(_mutex_lock_interruptible) void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) { mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - rt_mutex_lock(&lock->lock); + LOCK_CONTENDED_RT(lock, rt_mutex_trylock, rt_mutex_lock); } EXPORT_SYMBOL(_mutex_lock_nested); @@ -127,7 +133,8 @@ int __lockfunc _mutex_lock_interruptible int ret; mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - ret = rt_mutex_lock_interruptible(&lock->lock, 0); + ret = LOCK_CONTENDED_RT_RET(lock, rt_mutex_trylock, + __rt_mutex_lock_interruptible); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); return ret; @@ -203,7 +210,7 @@ EXPORT_SYMBOL(rt_read_trylock); void __lockfunc rt_write_lock(rwlock_t *rwlock) { rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); - __rt_spin_lock(&rwlock->lock); + LOCK_CONTENDED_RT(rwlock, rt_mutex_trylock, __rt_spin_lock); } EXPORT_SYMBOL(rt_write_lock); @@ -223,7 +230,7 @@ void __lockfunc rt_read_lock(rwlock_t *r return; } spin_unlock_irqrestore(&lock->wait_lock, flags); - __rt_spin_lock(lock); + LOCK_CONTENDED_RT(rwlock, rt_mutex_trylock, __rt_spin_lock); } EXPORT_SYMBOL(rt_read_lock); @@ -359,14 +366,14 @@ EXPORT_SYMBOL(rt_down_write_trylock); void fastcall rt_down_write(struct rw_semaphore *rwsem) { rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); - rt_mutex_lock(&rwsem->lock); + LOCK_CONTENDED_RT(rwsem, rt_mutex_trylock, rt_mutex_lock); } EXPORT_SYMBOL(rt_down_write); void fastcall rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) { rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); - rt_mutex_lock(&rwsem->lock); + LOCK_CONTENDED_RT(rwsem, rt_mutex_trylock, rt_mutex_lock); } EXPORT_SYMBOL(rt_down_write_nested); @@ -411,7 +418,7 @@ static void __rt_down_read(struct rw_sem return; } spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags); - rt_mutex_lock(&rwsem->lock); + LOCK_CONTENDED_RT(rwsem, rt_mutex_trylock, rt_mutex_lock); } void fastcall rt_down_read(struct rw_semaphore *rwsem) Index: linux-rt-rebase.q/kernel/rtmutex.c =================================================================== --- linux-rt-rebase.q.orig/kernel/rtmutex.c +++ linux-rt-rebase.q/kernel/rtmutex.c @@ -785,8 +785,8 @@ rt_spin_lock_slowunlock(struct rt_mutex void __lockfunc rt_spin_lock(spinlock_t *lock) { - rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_RT(lock, rt_mutex_trylock, __rt_spin_lock); } EXPORT_SYMBOL(rt_spin_lock); @@ -800,8 +800,8 @@ EXPORT_SYMBOL(__rt_spin_lock); void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) { - rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED_RT(lock, rt_mutex_trylock, __rt_spin_lock); } EXPORT_SYMBOL(rt_spin_lock_nested); patches/origin.patch0000664000077200007720000216415710653433161014053 0ustar mingomingo head: e4903fb59590f86190280a549420f6cb85bd7f7e --- --- Documentation/block/barrier.txt | 6 Documentation/block/biodoc.txt | 10 Documentation/block/request.txt | 2 Documentation/gpio.txt | 4 Documentation/iostats.txt | 2 Documentation/lguest/Makefile | 3 Documentation/lguest/lguest.c | 84 +- Makefile | 2 arch/arm/plat-omap/mailbox.c | 8 arch/i386/boot/apm.c | 7 arch/i386/boot/main.c | 16 arch/i386/kernel/acpi/Makefile | 2 arch/i386/kernel/setup.c | 2 arch/i386/mm/init.c | 2 arch/ia64/ia32/ia32_support.c | 8 arch/ia64/ia32/ia32priv.h | 12 arch/ia64/ia32/sys_ia32.c | 81 +- arch/ia64/kernel/acpi.c | 19 arch/ia64/kernel/head.S | 4 arch/ia64/kernel/irq_ia64.c | 26 arch/ia64/kernel/machvec.c | 27 arch/ia64/kernel/process.c | 7 arch/ia64/kernel/setup.c | 11 arch/ia64/kernel/smp.c | 2 arch/ia64/kernel/smpboot.c | 6 arch/ia64/kernel/vmlinux.lds.S | 2 arch/ia64/pci/pci.c | 2 arch/m68knommu/Kconfig | 24 arch/m68knommu/Makefile | 2 arch/m68knommu/kernel/dma.c | 1 arch/m68knommu/kernel/setup.c | 5 arch/m68knommu/platform/5206/config.c | 8 arch/m68knommu/platform/5206e/config.c | 7 arch/m68knommu/platform/520x/config.c | 7 arch/m68knommu/platform/523x/config.c | 8 arch/m68knommu/platform/5249/config.c | 8 arch/m68knommu/platform/5272/config.c | 7 arch/m68knommu/platform/527x/config.c | 8 arch/m68knommu/platform/528x/config.c | 8 arch/m68knommu/platform/5307/config.c | 7 arch/m68knommu/platform/5307/entry.S | 11 arch/m68knommu/platform/532x/config.c | 5 arch/m68knommu/platform/5407/config.c | 7 arch/m68knommu/platform/68VZ328/config.c | 7 arch/mips/Makefile | 2 arch/mips/arc/console.c | 31 - arch/mips/jazz/io.c | 135 ---- arch/mips/jazz/reset.c | 13 arch/mips/jazz/setup.c | 4 arch/mips/jmr3927/rbhma3100/setup.c | 2 arch/mips/kernel/gdb-stub.c | 4 arch/mips/kernel/head.S | 2 arch/mips/kernel/linux32.c | 2 arch/mips/kernel/rtlx.c | 2 arch/mips/kernel/syscall.c | 4 arch/mips/kernel/vpe.c | 3 arch/mips/mm/c-sb1.c | 2 arch/mips/mm/init.c | 2 arch/mips/sni/sniprom.c | 5 arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c | 2 arch/powerpc/Kconfig | 9 arch/powerpc/boot/dts/mpc8313erdb.dts | 2 arch/powerpc/boot/dts/mpc832x_mds.dts | 2 arch/powerpc/boot/dts/mpc832x_rdb.dts | 2 arch/powerpc/boot/dts/mpc8349emitx.dts | 4 arch/powerpc/boot/dts/mpc8349emitxgp.dts | 2 arch/powerpc/boot/dts/mpc834x_mds.dts | 4 arch/powerpc/boot/dts/mpc836x_mds.dts | 2 arch/powerpc/boot/dts/mpc8540ads.dts | 2 arch/powerpc/boot/dts/mpc8541cds.dts | 4 arch/powerpc/boot/dts/mpc8544ds.dts | 211 +++++++ arch/powerpc/boot/dts/mpc8548cds.dts | 250 +++++--- arch/powerpc/boot/dts/mpc8555cds.dts | 4 arch/powerpc/boot/dts/mpc8560ads.dts | 2 arch/powerpc/boot/dts/mpc8568mds.dts | 54 + arch/powerpc/boot/dts/mpc8641_hpcn.dts | 8 arch/powerpc/configs/mpc8544_ds_defconfig | 333 ++++++----- arch/powerpc/configs/mpc8568mds_defconfig | 292 +++++---- arch/powerpc/kernel/pci_32.c | 25 arch/powerpc/platforms/82xx/mpc82xx_ads.c | 3 arch/powerpc/platforms/83xx/pci.c | 4 arch/powerpc/platforms/85xx/Kconfig | 6 arch/powerpc/platforms/85xx/Makefile | 2 arch/powerpc/platforms/85xx/mpc8544_ds.c | 241 ++++++++ arch/powerpc/platforms/85xx/mpc85xx.h | 1 arch/powerpc/platforms/85xx/mpc85xx_ads.c | 3 arch/powerpc/platforms/85xx/mpc85xx_cds.c | 125 +++- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 7 arch/powerpc/platforms/85xx/pci.c | 91 --- arch/powerpc/platforms/86xx/Kconfig | 3 arch/powerpc/platforms/86xx/Makefile | 1 arch/powerpc/platforms/86xx/mpc86xx.h | 5 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 13 arch/powerpc/platforms/86xx/pci.c | 238 ------- arch/powerpc/platforms/cell/spufs/spufs.h | 5 arch/powerpc/platforms/chrp/pci.c | 7 arch/powerpc/platforms/embedded6xx/linkstation.c | 2 arch/powerpc/sysdev/Makefile | 1 arch/powerpc/sysdev/fsl_pci.c | 257 ++++++++ arch/powerpc/sysdev/fsl_pci.h | 88 ++ arch/powerpc/sysdev/fsl_pcie.h | 94 --- arch/powerpc/sysdev/grackle.c | 2 arch/powerpc/sysdev/indirect_pci.c | 59 + arch/powerpc/sysdev/mv64x60_pci.c | 2 arch/sparc/defconfig | 2 arch/sparc/kernel/sparc_ksyms.c | 3 arch/sparc/kernel/vmlinux.lds.S | 2 arch/sparc/lib/memset.S | 2 arch/sparc/prom/printf.c | 2 arch/sparc64/defconfig | 32 - arch/sparc64/kernel/head.S | 11 arch/sparc64/kernel/vmlinux.lds.S | 2 arch/um/drivers/mconsole_kern.c | 4 arch/um/drivers/net_kern.c | 2 arch/um/drivers/ubd_kern.c | 4 arch/um/kernel/mem.c | 2 arch/um/kernel/physmem.c | 15 arch/um/kernel/skas/process.c | 4 arch/um/os-Linux/aio.c | 47 - arch/um/os-Linux/process.c | 2 arch/um/os-Linux/user_syms.c | 5 arch/um/sys-i386/Makefile | 2 arch/x86_64/kernel/acpi/Makefile | 2 arch/x86_64/kernel/acpi/sleep.c | 4 arch/x86_64/kernel/head.S | 2 arch/x86_64/kernel/setup.c | 2 block/as-iosched.c | 26 block/blktrace.c | 10 block/bsg.c | 12 block/cfq-iosched.c | 39 - block/deadline-iosched.c | 18 block/elevator.c | 75 +- block/ll_rw_blk.c | 215 +++---- block/noop-iosched.c | 14 block/scsi_ioctl.c | 24 drivers/acorn/block/fd1772.c | 4 drivers/acorn/block/mfmhd.c | 2 drivers/acpi/Kconfig | 62 -- drivers/acpi/ac.c | 9 drivers/acpi/acpi_memhotplug.c | 8 drivers/acpi/asus_acpi.c | 11 drivers/acpi/battery.c | 9 drivers/acpi/button.c | 12 drivers/acpi/container.c | 10 drivers/acpi/ec.c | 8 drivers/acpi/events/evrgnini.c | 2 drivers/acpi/fan.c | 8 drivers/acpi/namespace/nsxfeval.c | 2 drivers/acpi/pci_link.c | 9 drivers/acpi/pci_root.c | 9 drivers/acpi/power.c | 8 drivers/acpi/processor_core.c | 8 drivers/acpi/processor_throttling.c | 59 + drivers/acpi/sbs.c | 10 drivers/acpi/scan.c | 156 +++-- drivers/acpi/sleep/Makefile | 4 drivers/acpi/sleep/main.c | 162 ++++- drivers/acpi/sleep/poweroff.c | 2 drivers/acpi/sleep/proc.c | 20 drivers/acpi/sleep/wakeup.c | 2 drivers/acpi/thermal.c | 8 drivers/acpi/utilities/uteval.c | 4 drivers/acpi/video.c | 8 drivers/ata/ata_piix.c | 113 +++ drivers/ata/libata-scsi.c | 2 drivers/ata/pata_ali.c | 2 drivers/ata/pata_hpt37x.c | 14 drivers/block/amiflop.c | 2 drivers/block/aoe/aoe.h | 2 drivers/block/aoe/aoeblk.c | 2 drivers/block/ataflop.c | 2 drivers/block/cciss.c | 10 drivers/block/cpqarray.c | 6 drivers/block/floppy.c | 4 drivers/block/lguest_blk.c | 2 drivers/block/loop.c | 4 drivers/block/nbd.c | 4 drivers/block/paride/pcd.c | 4 drivers/block/paride/pd.c | 2 drivers/block/paride/pf.c | 4 drivers/block/pktcdvd.c | 12 drivers/block/ps2esdi.c | 4 drivers/block/ps3disk.c | 8 drivers/block/rd.c | 2 drivers/block/sunvdc.c | 2 drivers/block/swim3.c | 4 drivers/block/sx8.c | 20 drivers/block/ub.c | 6 drivers/block/umem.c | 6 drivers/block/viodasd.c | 2 drivers/block/xd.c | 2 drivers/block/xd.h | 2 drivers/block/xen-blkfront.c | 4 drivers/block/xsysace.c | 4 drivers/block/z2ram.c | 2 drivers/cdrom/cdrom.c | 2 drivers/cdrom/viocd.c | 2 drivers/char/Kconfig | 24 drivers/char/Makefile | 3 drivers/char/hpet.c | 8 drivers/ide/ide-cd.c | 4 drivers/ide/ide-disk.c | 4 drivers/ide/ide-io.c | 2 drivers/ide/ide-probe.c | 2 drivers/ide/legacy/hd.c | 2 drivers/input/misc/atlas_btns.c | 9 drivers/kvm/kvm_main.c | 44 + drivers/kvm/x86_emulate.c | 2 drivers/md/dm-table.c | 8 drivers/md/dm.c | 10 drivers/md/faulty.c | 2 drivers/md/linear.c | 14 drivers/md/md.c | 2 drivers/md/multipath.c | 12 drivers/md/raid0.c | 14 drivers/md/raid1.c | 12 drivers/md/raid10.c | 14 drivers/md/raid5.c | 18 drivers/message/i2o/i2o_block.c | 4 drivers/misc/asus-laptop.c | 41 - drivers/misc/sony-laptop.c | 21 drivers/misc/thinkpad_acpi.c | 20 drivers/misc/thinkpad_acpi.h | 2 drivers/mmc/card/queue.c | 8 drivers/net/82596.c | 1 drivers/net/Makefile | 2 drivers/net/acenic.c | 6 drivers/net/atl1/atl1_hw.h | 9 drivers/net/atl1/atl1_main.c | 28 drivers/net/defxx.c | 17 drivers/net/ehea/ehea.h | 2 drivers/net/ehea/ehea_main.c | 22 drivers/net/forcedeth.c | 16 drivers/net/netxen/netxen_nic.h | 3 drivers/net/netxen/netxen_nic_main.c | 50 - drivers/net/phy/vitesse.c | 2 drivers/net/ps3_gelic_net.c | 215 ++++--- drivers/net/ps3_gelic_net.h | 24 drivers/net/ucc_geth.c | 334 ++++++----- drivers/net/ucc_geth.h | 6 drivers/net/ucc_geth_ethtool.c | 388 +++++++++++++ drivers/net/ucc_geth_mii.c | 6 drivers/pci/pci-acpi.c | 26 drivers/pci/pci.c | 9 drivers/pci/pci.h | 3 drivers/pnp/driver.c | 5 drivers/pnp/pnpacpi/core.c | 33 - drivers/rtc/class.c | 5 drivers/s390/block/dasd.c | 4 drivers/s390/block/dasd_int.h | 2 drivers/s390/block/dcssblk.c | 2 drivers/s390/block/xpram.c | 2 drivers/s390/char/tape.h | 2 drivers/s390/char/tape_block.c | 4 drivers/s390/net/ctcmain.c | 6 drivers/s390/net/netiucv.c | 4 drivers/sbus/char/Kconfig | 1 drivers/sbus/char/jsflash.c | 2 drivers/scsi/aacraid/linit.c | 4 drivers/scsi/scsi_lib.c | 12 drivers/scsi/sd.c | 4 drivers/scsi/sr.c | 2 drivers/serial/68328serial.c | 71 -- drivers/serial/8250.c | 5 drivers/serial/8250_early.c | 10 drivers/serial/serial_core.c | 9 drivers/video/Kconfig | 9 fs/bio.c | 30 - fs/ocfs2/file.c | 2 fs/open.c | 2 include/acpi/acpi_bus.h | 7 include/acpi/acpi_drivers.h | 25 include/acpi/actypes.h | 6 include/acpi/acutils.h | 4 include/asm-arm/arch-omap/mailbox.h | 2 include/asm-i386/acpi.h | 23 include/asm-i386/bootparam.h | 9 include/asm-i386/ist.h | 10 include/asm-i386/suspend.h | 2 include/asm-ia64/acpi.h | 5 include/asm-ia64/ia32.h | 9 include/asm-ia64/machvec.h | 1 include/asm-ia64/processor.h | 4 include/asm-ia64/smp.h | 1 include/asm-m68knommu/hw_irq.h | 4 include/asm-m68knommu/mcfdma.h | 2 include/asm-m68knommu/system.h | 4 include/asm-m68knommu/timex.h | 24 include/asm-mips/war.h | 1 include/asm-powerpc/pci-bridge.h | 16 include/asm-x86_64/acpi.h | 22 include/asm-x86_64/ist.h | 1 include/asm-x86_64/suspend.h | 2 include/asm-xtensa/io.h | 1 include/linux/acpi.h | 1 include/linux/apm_bios.h | 20 include/linux/blkdev.h | 141 ++-- include/linux/blktrace_api.h | 2 include/linux/elevator.h | 76 +- include/linux/ide.h | 4 include/linux/libata.h | 2 include/linux/loop.h | 2 include/linux/mod_devicetable.h | 6 include/linux/pci.h | 1 include/linux/pci_ids.h | 21 include/linux/pnp.h | 4 include/linux/raid/md_k.h | 4 include/linux/serial_8250.h | 2 include/linux/serial_core.h | 2 include/linux/time.h | 8 include/scsi/sd.h | 2 kernel/acct.c | 2 kernel/hrtimer.c | 6 kernel/sysctl.c | 2 kernel/time.c | 16 kernel/time/timekeeping.c | 38 + kernel/tsacct.c | 2 lib/fault-inject.c | 4 mm/bounce.c | 4 mm/hugetlb.c | 1 mm/slab.c | 2 net/8021q/vlan.c | 2 net/bridge/netfilter/ebt_log.c | 7 net/bridge/netfilter/ebt_ulog.c | 9 net/ipv4/netfilter/ipt_LOG.c | 6 net/ipv6/netfilter/ip6t_LOG.c | 6 net/ipv6/tcp_ipv6.c | 1 net/netfilter/nf_conntrack_proto_tcp.c | 1 net/netfilter/nf_conntrack_proto_udp.c | 1 net/netfilter/nf_conntrack_proto_udplite.c | 1 net/netfilter/xt_physdev.c | 1 net/netlink/genetlink.c | 28 net/rxrpc/af_rxrpc.c | 2 net/rxrpc/ar-connection.c | 4 net/rxrpc/ar-transport.c | 4 net/rxrpc/rxkad.c | 2 net/sunrpc/auth_gss/svcauth_gss.c | 9 scripts/Makefile.build | 2 scripts/Makefile.modpost | 21 scripts/kconfig/conf.c | 31 - scripts/kconfig/expr.h | 4 scripts/kconfig/gconf.c | 10 scripts/kconfig/kxgettext.c | 4 scripts/kconfig/lkc_proto.h | 2 scripts/kconfig/mconf.c | 4 scripts/kconfig/menu.c | 12 scripts/kconfig/qconf.cc | 2 scripts/kconfig/zconf.tab.c_shipped | 12 scripts/kconfig/zconf.y | 12 scripts/mod/file2alias.c | 12 scripts/mod/modpost.c | 159 +++-- scripts/ver_linux | 4 security/selinux/ss/services.c | 4 security/selinux/xfrm.c | 3 354 files changed, 4486 insertions(+), 2802 deletions(-) Index: linux-rt-rebase.q/Documentation/block/barrier.txt =================================================================== --- linux-rt-rebase.q.orig/Documentation/block/barrier.txt +++ linux-rt-rebase.q/Documentation/block/barrier.txt @@ -79,9 +79,9 @@ and how to prepare flush requests. Note used to indicate the whole sequence of performing barrier requests including draining and flushing. -typedef void (prepare_flush_fn)(request_queue_t *q, struct request *rq); +typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq); -int blk_queue_ordered(request_queue_t *q, unsigned ordered, +int blk_queue_ordered(struct request_queue *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn); @q : the queue in question @@ -92,7 +92,7 @@ int blk_queue_ordered(request_queue_t *q For example, SCSI disk driver's prepare_flush_fn looks like the following. -static void sd_prepare_flush(request_queue_t *q, struct request *rq) +static void sd_prepare_flush(struct request_queue *q, struct request *rq) { memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd_type = REQ_TYPE_BLOCK_PC; Index: linux-rt-rebase.q/Documentation/block/biodoc.txt =================================================================== --- linux-rt-rebase.q.orig/Documentation/block/biodoc.txt +++ linux-rt-rebase.q/Documentation/block/biodoc.txt @@ -740,12 +740,12 @@ Block now offers some simple generic fun queueing (typically known as tagged command queueing), ie manage more than one outstanding command on a queue at any given time. - blk_queue_init_tags(request_queue_t *q, int depth) + blk_queue_init_tags(struct request_queue *q, int depth) Initialize internal command tagging structures for a maximum depth of 'depth'. - blk_queue_free_tags((request_queue_t *q) + blk_queue_free_tags((struct request_queue *q) Teardown tag info associated with the queue. This will be done automatically by block if blk_queue_cleanup() is called on a queue @@ -754,7 +754,7 @@ one outstanding command on a queue at an The above are initialization and exit management, the main helpers during normal operations are: - blk_queue_start_tag(request_queue_t *q, struct request *rq) + blk_queue_start_tag(struct request_queue *q, struct request *rq) Start tagged operation for this request. A free tag number between 0 and 'depth' is assigned to the request (rq->tag holds this number), @@ -762,7 +762,7 @@ normal operations are: for this queue is already achieved (or if the tag wasn't started for some other reason), 1 is returned. Otherwise 0 is returned. - blk_queue_end_tag(request_queue_t *q, struct request *rq) + blk_queue_end_tag(struct request_queue *q, struct request *rq) End tagged operation on this request. 'rq' is removed from the internal book keeping structures. @@ -781,7 +781,7 @@ queue. For instance, on IDE any tagged r the hardware and software block queue and enable the driver to sanely restart all the outstanding requests. There's a third helper to do that: - blk_queue_invalidate_tags(request_queue_t *q) + blk_queue_invalidate_tags(struct request_queue *q) Clear the internal block tag queue and re-add all the pending requests to the request queue. The driver will receive them again on the Index: linux-rt-rebase.q/Documentation/block/request.txt =================================================================== --- linux-rt-rebase.q.orig/Documentation/block/request.txt +++ linux-rt-rebase.q/Documentation/block/request.txt @@ -83,6 +83,6 @@ struct bio *bio DBI First bio in reque struct bio *biotail DBI Last bio in request -request_queue_t *q DB Request queue this request belongs to +struct request_queue *q DB Request queue this request belongs to struct request_list *rl B Request list this request came from Index: linux-rt-rebase.q/Documentation/gpio.txt =================================================================== --- linux-rt-rebase.q.orig/Documentation/gpio.txt +++ linux-rt-rebase.q/Documentation/gpio.txt @@ -148,7 +148,7 @@ pin ... that won't always match the spec issues including wire-OR and output latencies. The get/set calls have no error returns because "invalid GPIO" should have -been reported earlier in gpio_set_direction(). However, note that not all +been reported earlier from gpio_direction_*(). However, note that not all platforms can read the value of output pins; those that can't should always return zero. Also, using these calls for GPIOs that can't safely be accessed without sleeping (see below) is an error. @@ -239,7 +239,7 @@ map between them using calls like: Those return either the corresponding number in the other namespace, or else a negative errno code if the mapping can't be done. (For example, some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO -number that hasn't been marked as an input using gpio_set_direction(), or +number that wasn't set up as an input using gpio_direction_input(), or to use an IRQ number that didn't originally come from gpio_to_irq(). These two mapping calls are expected to cost on the order of a single Index: linux-rt-rebase.q/Documentation/iostats.txt =================================================================== --- linux-rt-rebase.q.orig/Documentation/iostats.txt +++ linux-rt-rebase.q/Documentation/iostats.txt @@ -79,7 +79,7 @@ Field 8 -- # of milliseconds spent writ measured from __make_request() to end_that_request_last()). Field 9 -- # of I/Os currently in progress The only field that should go to zero. Incremented as requests are - given to appropriate request_queue_t and decremented as they finish. + given to appropriate struct request_queue and decremented as they finish. Field 10 -- # of milliseconds spent doing I/Os This field is increases so long as field 9 is nonzero. Field 11 -- weighted # of milliseconds spent doing I/Os Index: linux-rt-rebase.q/Documentation/lguest/Makefile =================================================================== --- linux-rt-rebase.q.orig/Documentation/lguest/Makefile +++ linux-rt-rebase.q/Documentation/lguest/Makefile @@ -11,8 +11,7 @@ endif include $(KBUILD_OUTPUT)/.config LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000) -CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 \ - -static -DLGUEST_GUEST_TOP="$(LGUEST_GUEST_TOP)" -Wl,-T,lguest.lds +CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds LDLIBS:=-lz all: lguest.lds lguest Index: linux-rt-rebase.q/Documentation/lguest/lguest.c =================================================================== --- linux-rt-rebase.q.orig/Documentation/lguest/lguest.c +++ linux-rt-rebase.q/Documentation/lguest/lguest.c @@ -47,12 +47,14 @@ static bool verbose; #define verbose(args...) \ do { if (verbose) printf(args); } while(0) static int waker_fd; +static u32 top; struct device_list { fd_set infds; int max_infd; + struct lguest_device_desc *descs; struct device *dev; struct device **lastdev; }; @@ -324,8 +326,7 @@ static void concat(char *dst, char *args static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) { u32 args[] = { LHREQ_INITIALIZE, - LGUEST_GUEST_TOP/getpagesize(), /* Just below us */ - pgdir, start, page_offset }; + top/getpagesize(), pgdir, start, page_offset }; int fd; fd = open_or_die("/dev/lguest", O_RDWR); @@ -382,7 +383,7 @@ static int setup_waker(int lguest_fd, st static void *_check_pointer(unsigned long addr, unsigned int size, unsigned int line) { - if (addr >= LGUEST_GUEST_TOP || addr + size >= LGUEST_GUEST_TOP) + if (addr >= top || addr + size >= top) errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); return (void *)addr; } @@ -629,24 +630,26 @@ static void handle_input(int fd, struct } } -static struct lguest_device_desc *new_dev_desc(u16 type, u16 features, - u16 num_pages) +static struct lguest_device_desc * +new_dev_desc(struct lguest_device_desc *descs, + u16 type, u16 features, u16 num_pages) { - static unsigned long top = LGUEST_GUEST_TOP; - struct lguest_device_desc *desc; + unsigned int i; - desc = malloc(sizeof(*desc)); - desc->type = type; - desc->num_pages = num_pages; - desc->features = features; - desc->status = 0; - if (num_pages) { - top -= num_pages*getpagesize(); - map_zeroed_pages(top, num_pages); - desc->pfn = top / getpagesize(); - } else - desc->pfn = 0; - return desc; + for (i = 0; i < LGUEST_MAX_DEVICES; i++) { + if (!descs[i].type) { + descs[i].type = type; + descs[i].features = features; + descs[i].num_pages = num_pages; + if (num_pages) { + map_zeroed_pages(top, num_pages); + descs[i].pfn = top/getpagesize(); + top += num_pages*getpagesize(); + } + return &descs[i]; + } + } + errx(1, "too many devices"); } static struct device *new_device(struct device_list *devices, @@ -669,7 +672,7 @@ static struct device *new_device(struct dev->fd = fd; if (handle_input) set_fd(dev->fd, devices); - dev->desc = new_dev_desc(type, features, num_pages); + dev->desc = new_dev_desc(devices->descs, type, features, num_pages); dev->mem = (void *)(dev->desc->pfn * getpagesize()); dev->handle_input = handle_input; dev->watch_key = (unsigned long)dev->mem + watch_off; @@ -866,30 +869,6 @@ static void setup_tun_net(const char *ar verbose("attached to bridge: %s\n", br_name); } -/* Now we know how much memory we have, we copy in device descriptors */ -static void map_device_descriptors(struct device_list *devs, unsigned long mem) -{ - struct device *i; - unsigned int num; - struct lguest_device_desc *descs; - - /* Device descriptor array sits just above top of normal memory */ - descs = map_zeroed_pages(mem, 1); - - for (i = devs->dev, num = 0; i; i = i->next, num++) { - if (num == LGUEST_MAX_DEVICES) - errx(1, "too many devices"); - verbose("Device %i: %s\n", num, - i->desc->type == LGUEST_DEVICE_T_NET ? "net" - : i->desc->type == LGUEST_DEVICE_T_CONSOLE ? "console" - : i->desc->type == LGUEST_DEVICE_T_BLOCK ? "block" - : "unknown"); - descs[num] = *i->desc; - free(i->desc); - i->desc = &descs[num]; - } -} - static void __attribute__((noreturn)) run_guest(int lguest_fd, struct device_list *device_list) { @@ -934,8 +913,8 @@ static void usage(void) int main(int argc, char *argv[]) { - unsigned long mem, pgdir, start, page_offset, initrd_size = 0; - int c, lguest_fd; + unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0; + int i, c, lguest_fd; struct device_list device_list; void *boot = (void *)0; const char *initrd_name = NULL; @@ -945,6 +924,15 @@ int main(int argc, char *argv[]) device_list.lastdev = &device_list.dev; FD_ZERO(&device_list.infds); + /* We need to know how much memory so we can allocate devices. */ + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + mem = top = atoi(argv[i]) * 1024 * 1024; + device_list.descs = map_zeroed_pages(top, 1); + top += getpagesize(); + break; + } + } while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { switch (c) { case 'v': @@ -974,16 +962,12 @@ int main(int argc, char *argv[]) setup_console(&device_list); /* First we map /dev/zero over all of guest-physical memory. */ - mem = atoi(argv[optind]) * 1024 * 1024; map_zeroed_pages(0, mem / getpagesize()); /* Now we load the kernel */ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), &page_offset); - /* Write the device descriptors into memory. */ - map_device_descriptors(&device_list, mem); - /* Map the initrd image if requested */ if (initrd_name) { initrd_size = load_initrd(initrd_name, mem); Index: linux-rt-rebase.q/Makefile =================================================================== --- linux-rt-rebase.q.orig/Makefile +++ linux-rt-rebase.q/Makefile @@ -299,7 +299,7 @@ CHECKFLAGS := -D__linux__ -Dlinux -D MODFLAGS = -DMODULE CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) -LDFLAGS_MODULE = -r +LDFLAGS_MODULE = CFLAGS_KERNEL = AFLAGS_KERNEL = Index: linux-rt-rebase.q/arch/arm/plat-omap/mailbox.c =================================================================== --- linux-rt-rebase.q.orig/arch/arm/plat-omap/mailbox.c +++ linux-rt-rebase.q/arch/arm/plat-omap/mailbox.c @@ -161,11 +161,11 @@ static void mbox_rx_work(struct work_str /* * Mailbox interrupt handler */ -static void mbox_txq_fn(request_queue_t * q) +static void mbox_txq_fn(struct request_queue * q) { } -static void mbox_rxq_fn(request_queue_t * q) +static void mbox_rxq_fn(struct request_queue * q) { } @@ -180,7 +180,7 @@ static void __mbox_rx_interrupt(struct o { struct request *rq; mbox_msg_t msg; - request_queue_t *q = mbox->rxq->queue; + struct request_queue *q = mbox->rxq->queue; disable_mbox_irq(mbox, IRQ_RX); @@ -297,7 +297,7 @@ static struct omap_mbox_queue *mbox_queu request_fn_proc * proc, void (*work) (struct work_struct *)) { - request_queue_t *q; + struct request_queue *q; struct omap_mbox_queue *mq; mq = kzalloc(sizeof(struct omap_mbox_queue), GFP_KERNEL); Index: linux-rt-rebase.q/arch/i386/boot/apm.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/boot/apm.c +++ linux-rt-rebase.q/arch/i386/boot/apm.c @@ -40,14 +40,15 @@ int query_apm_bios(void) if (bx != 0x504d) /* "PM" signature */ return -1; - if (cx & 0x02) /* 32 bits supported? */ + if (!(cx & 0x02)) /* 32 bits supported? */ return -1; /* Disconnect first, just in case */ ax = 0x5304; + bx = 0; asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + : "+a" (ax), "+b" (bx) + : : "ecx", "edx", "esi", "edi"); /* Paranoia */ ebx = esi = 0; Index: linux-rt-rebase.q/arch/i386/boot/main.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/boot/main.c +++ linux-rt-rebase.q/arch/i386/boot/main.c @@ -73,15 +73,15 @@ static void keyboard_set_repeat(void) } /* - * Get Intel SpeedStep IST information. + * Get Intel SpeedStep (IST) information. */ -static void query_speedstep_ist(void) +static void query_ist(void) { asm("int $0x15" - : "=a" (boot_params.speedstep_info[0]), - "=b" (boot_params.speedstep_info[1]), - "=c" (boot_params.speedstep_info[2]), - "=d" (boot_params.speedstep_info[3]) + : "=a" (boot_params.ist_info.signature), + "=b" (boot_params.ist_info.command), + "=c" (boot_params.ist_info.event), + "=d" (boot_params.ist_info.perf_level) : "a" (0x0000e980), /* IST Support */ "d" (0x47534943)); /* Request value */ } @@ -144,8 +144,8 @@ void main(void) query_voyager(); #endif - /* Query SpeedStep IST information */ - query_speedstep_ist(); + /* Query Intel SpeedStep (IST) information */ + query_ist(); /* Query APM information */ #if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) Index: linux-rt-rebase.q/arch/i386/kernel/acpi/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/acpi/Makefile +++ linux-rt-rebase.q/arch/i386/kernel/acpi/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_ACPI) += boot.o ifneq ($(CONFIG_PCI),) obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o endif -obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o +obj-$(CONFIG_ACPI) += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o processor.o Index: linux-rt-rebase.q/arch/i386/kernel/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/kernel/setup.c +++ linux-rt-rebase.q/arch/i386/kernel/setup.c @@ -422,7 +422,7 @@ void __init setup_bootmem_allocator(void */ reserve_bootmem(PAGE_SIZE, PAGE_SIZE); #endif -#ifdef CONFIG_ACPI_SLEEP +#ifdef CONFIG_ACPI /* * Reserve low memory region for sleep support. */ Index: linux-rt-rebase.q/arch/i386/mm/init.c =================================================================== --- linux-rt-rebase.q.orig/arch/i386/mm/init.c +++ linux-rt-rebase.q/arch/i386/mm/init.c @@ -432,7 +432,7 @@ static void __init pagetable_init (void) paravirt_pagetable_setup_done(pgd_base); } -#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) +#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI) /* * Swap suspend & friends need this for resume because things like the intel-agp * driver might have split up a kernel 4MB mapping. Index: linux-rt-rebase.q/arch/ia64/ia32/ia32_support.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/ia32/ia32_support.c +++ linux-rt-rebase.q/arch/ia64/ia32/ia32_support.c @@ -249,11 +249,11 @@ ia32_init (void) #if PAGE_SHIFT > IA32_PAGE_SHIFT { - extern struct kmem_cache *partial_page_cachep; + extern struct kmem_cache *ia64_partial_page_cachep; - partial_page_cachep = kmem_cache_create("partial_page_cache", - sizeof(struct partial_page), - 0, SLAB_PANIC, NULL); + ia64_partial_page_cachep = kmem_cache_create("ia64_partial_page_cache", + sizeof(struct ia64_partial_page), + 0, SLAB_PANIC, NULL); } #endif return 0; Index: linux-rt-rebase.q/arch/ia64/ia32/ia32priv.h =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/ia32/ia32priv.h +++ linux-rt-rebase.q/arch/ia64/ia32/ia32priv.h @@ -25,8 +25,8 @@ * partially mapped pages provide precise accounting of which 4k sub pages * are mapped and which ones are not, thereby improving IA-32 compatibility. */ -struct partial_page { - struct partial_page *next; /* linked list, sorted by address */ +struct ia64_partial_page { + struct ia64_partial_page *next; /* linked list, sorted by address */ struct rb_node pp_rb; /* 64K is the largest "normal" page supported by ia64 ABI. So 4K*64 * should suffice.*/ @@ -34,17 +34,17 @@ struct partial_page { unsigned int base; }; -struct partial_page_list { - struct partial_page *pp_head; /* list head, points to the lowest +struct ia64_partial_page_list { + struct ia64_partial_page *pp_head; /* list head, points to the lowest * addressed partial page */ struct rb_root ppl_rb; - struct partial_page *pp_hint; /* pp_hint->next is the last + struct ia64_partial_page *pp_hint; /* pp_hint->next is the last * accessed partial page */ atomic_t pp_count; /* reference count */ }; #if PAGE_SHIFT > IA32_PAGE_SHIFT -struct partial_page_list* ia32_init_pp_list (void); +struct ia64_partial_page_list* ia32_init_pp_list (void); #else # define ia32_init_pp_list() 0 #endif Index: linux-rt-rebase.q/arch/ia64/ia32/sys_ia32.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/ia32/sys_ia32.c +++ linux-rt-rebase.q/arch/ia64/ia32/sys_ia32.c @@ -253,17 +253,17 @@ mmap_subpage (struct file *file, unsigne return ret; } -/* SLAB cache for partial_page structures */ -struct kmem_cache *partial_page_cachep; +/* SLAB cache for ia64_partial_page structures */ +struct kmem_cache *ia64_partial_page_cachep; /* - * init partial_page_list. + * init ia64_partial_page_list. * return 0 means kmalloc fail. */ -struct partial_page_list* +struct ia64_partial_page_list* ia32_init_pp_list(void) { - struct partial_page_list *p; + struct ia64_partial_page_list *p; if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL) return p; @@ -280,12 +280,12 @@ ia32_init_pp_list(void) * Else, return 0 and provide @pprev, @rb_link, @rb_parent to * be used by later __ia32_insert_pp(). */ -static struct partial_page * -__ia32_find_pp(struct partial_page_list *ppl, unsigned int start, - struct partial_page **pprev, struct rb_node ***rb_link, +static struct ia64_partial_page * +__ia32_find_pp(struct ia64_partial_page_list *ppl, unsigned int start, + struct ia64_partial_page **pprev, struct rb_node ***rb_link, struct rb_node **rb_parent) { - struct partial_page *pp; + struct ia64_partial_page *pp; struct rb_node **__rb_link, *__rb_parent, *rb_prev; pp = ppl->pp_hint; @@ -297,7 +297,7 @@ __ia32_find_pp(struct partial_page_list while (*__rb_link) { __rb_parent = *__rb_link; - pp = rb_entry(__rb_parent, struct partial_page, pp_rb); + pp = rb_entry(__rb_parent, struct ia64_partial_page, pp_rb); if (pp->base == start) { ppl->pp_hint = pp; @@ -314,7 +314,7 @@ __ia32_find_pp(struct partial_page_list *rb_parent = __rb_parent; *pprev = NULL; if (rb_prev) - *pprev = rb_entry(rb_prev, struct partial_page, pp_rb); + *pprev = rb_entry(rb_prev, struct ia64_partial_page, pp_rb); return NULL; } @@ -322,9 +322,9 @@ __ia32_find_pp(struct partial_page_list * insert @pp into @ppl. */ static void -__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp, - struct partial_page *prev, struct rb_node **rb_link, - struct rb_node *rb_parent) +__ia32_insert_pp(struct ia64_partial_page_list *ppl, + struct ia64_partial_page *pp, struct ia64_partial_page *prev, + struct rb_node **rb_link, struct rb_node *rb_parent) { /* link list */ if (prev) { @@ -334,7 +334,7 @@ __ia32_insert_pp(struct partial_page_lis ppl->pp_head = pp; if (rb_parent) pp->next = rb_entry(rb_parent, - struct partial_page, pp_rb); + struct ia64_partial_page, pp_rb); else pp->next = NULL; } @@ -350,8 +350,8 @@ __ia32_insert_pp(struct partial_page_lis * delete @pp from partial page list @ppl. */ static void -__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp, - struct partial_page *prev) +__ia32_delete_pp(struct ia64_partial_page_list *ppl, + struct ia64_partial_page *pp, struct ia64_partial_page *prev) { if (prev) { prev->next = pp->next; @@ -363,15 +363,15 @@ __ia32_delete_pp(struct partial_page_lis ppl->pp_hint = pp->next; } rb_erase(&pp->pp_rb, &ppl->ppl_rb); - kmem_cache_free(partial_page_cachep, pp); + kmem_cache_free(ia64_partial_page_cachep, pp); } -static struct partial_page * -__pp_prev(struct partial_page *pp) +static struct ia64_partial_page * +__pp_prev(struct ia64_partial_page *pp) { struct rb_node *prev = rb_prev(&pp->pp_rb); if (prev) - return rb_entry(prev, struct partial_page, pp_rb); + return rb_entry(prev, struct ia64_partial_page, pp_rb); else return NULL; } @@ -383,7 +383,7 @@ __pp_prev(struct partial_page *pp) static void __ia32_delete_pp_range(unsigned int start, unsigned int end) { - struct partial_page *pp, *prev; + struct ia64_partial_page *pp, *prev; struct rb_node **rb_link, *rb_parent; if (start >= end) @@ -401,7 +401,7 @@ __ia32_delete_pp_range(unsigned int star } while (pp && pp->base < end) { - struct partial_page *tmp = pp->next; + struct ia64_partial_page *tmp = pp->next; __ia32_delete_pp(current->thread.ppl, pp, prev); pp = tmp; } @@ -414,7 +414,7 @@ __ia32_delete_pp_range(unsigned int star static int __ia32_set_pp(unsigned int start, unsigned int end, int flags) { - struct partial_page *pp, *prev; + struct ia64_partial_page *pp, *prev; struct rb_node ** rb_link, *rb_parent; unsigned int pstart, start_bit, end_bit, i; @@ -450,8 +450,8 @@ __ia32_set_pp(unsigned int start, unsign return 0; } - /* new a partial_page */ - pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + /* new a ia64_partial_page */ + pp = kmem_cache_alloc(ia64_partial_page_cachep, GFP_KERNEL); if (!pp) return -ENOMEM; pp->base = pstart; @@ -504,7 +504,7 @@ ia32_set_pp(unsigned int start, unsigned static int __ia32_unset_pp(unsigned int start, unsigned int end) { - struct partial_page *pp, *prev; + struct ia64_partial_page *pp, *prev; struct rb_node ** rb_link, *rb_parent; unsigned int pstart, start_bit, end_bit, i; struct vm_area_struct *vma; @@ -532,8 +532,8 @@ __ia32_unset_pp(unsigned int start, unsi return -ENOMEM; } - /* new a partial_page */ - pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + /* new a ia64_partial_page */ + pp = kmem_cache_alloc(ia64_partial_page_cachep, GFP_KERNEL); if (!pp) return -ENOMEM; pp->base = pstart; @@ -605,7 +605,7 @@ ia32_unset_pp(unsigned int *startp, unsi static int __ia32_compare_pp(unsigned int start, unsigned int end) { - struct partial_page *pp, *prev; + struct ia64_partial_page *pp, *prev; struct rb_node ** rb_link, *rb_parent; unsigned int pstart, start_bit, end_bit, size; unsigned int first_bit, next_zero_bit; /* the first range in bitmap */ @@ -682,13 +682,13 @@ ia32_compare_pp(unsigned int *startp, un } static void -__ia32_drop_pp_list(struct partial_page_list *ppl) +__ia32_drop_pp_list(struct ia64_partial_page_list *ppl) { - struct partial_page *pp = ppl->pp_head; + struct ia64_partial_page *pp = ppl->pp_head; while (pp) { - struct partial_page *next = pp->next; - kmem_cache_free(partial_page_cachep, pp); + struct ia64_partial_page *next = pp->next; + kmem_cache_free(ia64_partial_page_cachep, pp); pp = next; } @@ -696,9 +696,9 @@ __ia32_drop_pp_list(struct partial_page_ } void -ia32_drop_partial_page_list(struct task_struct *task) +ia32_drop_ia64_partial_page_list(struct task_struct *task) { - struct partial_page_list* ppl = task->thread.ppl; + struct ia64_partial_page_list* ppl = task->thread.ppl; if (ppl && atomic_dec_and_test(&ppl->pp_count)) __ia32_drop_pp_list(ppl); @@ -708,9 +708,9 @@ ia32_drop_partial_page_list(struct task_ * Copy current->thread.ppl to ppl (already initialized). */ static int -__ia32_copy_pp_list(struct partial_page_list *ppl) +__ia32_copy_pp_list(struct ia64_partial_page_list *ppl) { - struct partial_page *pp, *tmp, *prev; + struct ia64_partial_page *pp, *tmp, *prev; struct rb_node **rb_link, *rb_parent; ppl->pp_head = NULL; @@ -721,7 +721,7 @@ __ia32_copy_pp_list(struct partial_page_ prev = NULL; for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) { - tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + tmp = kmem_cache_alloc(ia64_partial_page_cachep, GFP_KERNEL); if (!tmp) return -ENOMEM; *tmp = *pp; @@ -734,7 +734,8 @@ __ia32_copy_pp_list(struct partial_page_ } int -ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags) +ia32_copy_ia64_partial_page_list(struct task_struct *p, + unsigned long clone_flags) { int retval = 0; Index: linux-rt-rebase.q/arch/ia64/kernel/acpi.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/acpi.c +++ linux-rt-rebase.q/arch/ia64/kernel/acpi.c @@ -67,6 +67,8 @@ EXPORT_SYMBOL(pm_power_off); unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; +unsigned long acpi_wakeup_address = 0; + const char __init * acpi_get_sysname(void) { @@ -986,4 +988,21 @@ int acpi_unregister_ioapic(acpi_handle h EXPORT_SYMBOL(acpi_unregister_ioapic); +/* + * acpi_save_state_mem() - save kernel state + * + * TBD when when IA64 starts to support suspend... + */ +int acpi_save_state_mem(void) { return 0; } + +/* + * acpi_restore_state() + */ +void acpi_restore_state_mem(void) {} + +/* + * do_suspend_lowlevel() + */ +void do_suspend_lowlevel(void) {} + #endif /* CONFIG_ACPI */ Index: linux-rt-rebase.q/arch/ia64/kernel/head.S =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/head.S +++ linux-rt-rebase.q/arch/ia64/kernel/head.S @@ -178,7 +178,7 @@ swapper_pg_dir: halt_msg: stringz "Halting kernel\n" - .text + .section .text.head,"ax" .global start_ap @@ -392,6 +392,8 @@ self: hint @pause br.sptk.many self // endless loop END(_start) + .text + GLOBAL_ENTRY(ia64_save_debug_regs) alloc r16=ar.pfs,1,0,0,0 mov r20=ar.lc // preserve ar.lc Index: linux-rt-rebase.q/arch/ia64/kernel/irq_ia64.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/irq_ia64.c +++ linux-rt-rebase.q/arch/ia64/kernel/irq_ia64.c @@ -85,8 +85,8 @@ DEFINE_PER_CPU(int[IA64_NUM_VECTORS], ve [0 ... IA64_NUM_VECTORS - 1] = IA64_SPURIOUS_INT_VECTOR }; -static cpumask_t vector_table[IA64_MAX_DEVICE_VECTORS] = { - [0 ... IA64_MAX_DEVICE_VECTORS - 1] = CPU_MASK_NONE +static cpumask_t vector_table[IA64_NUM_VECTORS] = { + [0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE }; static int irq_status[NR_IRQS] = { @@ -123,17 +123,18 @@ static inline int find_unassigned_irq(vo static inline int find_unassigned_vector(cpumask_t domain) { cpumask_t mask; - int pos; + int pos, vector; cpus_and(mask, domain, cpu_online_map); if (cpus_empty(mask)) return -EINVAL; for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) { - cpus_and(mask, domain, vector_table[pos]); + vector = IA64_FIRST_DEVICE_VECTOR + pos; + cpus_and(mask, domain, vector_table[vector]); if (!cpus_empty(mask)) continue; - return IA64_FIRST_DEVICE_VECTOR + pos; + return vector; } return -ENOSPC; } @@ -141,7 +142,7 @@ static inline int find_unassigned_vector static int __bind_irq_vector(int irq, int vector, cpumask_t domain) { cpumask_t mask; - int cpu, pos; + int cpu; struct irq_cfg *cfg = &irq_cfg[irq]; cpus_and(mask, domain, cpu_online_map); @@ -156,8 +157,7 @@ static int __bind_irq_vector(int irq, in cfg->vector = vector; cfg->domain = domain; irq_status[irq] = IRQ_USED; - pos = vector - IA64_FIRST_DEVICE_VECTOR; - cpus_or(vector_table[pos], vector_table[pos], domain); + cpus_or(vector_table[vector], vector_table[vector], domain); return 0; } @@ -174,7 +174,7 @@ int bind_irq_vector(int irq, int vector, static void __clear_irq_vector(int irq) { - int vector, cpu, pos; + int vector, cpu; cpumask_t mask; cpumask_t domain; struct irq_cfg *cfg = &irq_cfg[irq]; @@ -189,8 +189,7 @@ static void __clear_irq_vector(int irq) cfg->vector = IRQ_VECTOR_UNASSIGNED; cfg->domain = CPU_MASK_NONE; irq_status[irq] = IRQ_UNUSED; - pos = vector - IA64_FIRST_DEVICE_VECTOR; - cpus_andnot(vector_table[pos], vector_table[pos], domain); + cpus_andnot(vector_table[vector], vector_table[vector], domain); } static void clear_irq_vector(int irq) @@ -212,9 +211,6 @@ assign_irq_vector (int irq) vector = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); - if (irq < 0) { - goto out; - } for_each_online_cpu(cpu) { domain = vector_allocation_domain(cpu); vector = find_unassigned_vector(domain); @@ -223,6 +219,8 @@ assign_irq_vector (int irq) } if (vector < 0) goto out; + if (irq == AUTO_ASSIGN) + irq = vector; BUG_ON(__bind_irq_vector(irq, vector, domain)); out: spin_unlock_irqrestore(&vector_lock, flags); Index: linux-rt-rebase.q/arch/ia64/kernel/machvec.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/machvec.c +++ linux-rt-rebase.q/arch/ia64/kernel/machvec.c @@ -13,14 +13,6 @@ struct ia64_machine_vector ia64_mv; EXPORT_SYMBOL(ia64_mv); -static __initdata const char *mvec_name; -static __init int setup_mvec(char *s) -{ - mvec_name = s; - return 0; -} -early_param("machvec", setup_mvec); - static struct ia64_machine_vector * __init lookup_machvec (const char *name) { @@ -41,7 +33,7 @@ machvec_init (const char *name) struct ia64_machine_vector *mv; if (!name) - name = mvec_name ? mvec_name : acpi_get_sysname(); + name = acpi_get_sysname(); mv = lookup_machvec(name); if (!mv) panic("generic kernel failed to find machine vector for" @@ -51,6 +43,23 @@ machvec_init (const char *name) printk(KERN_INFO "booting generic kernel on platform %s\n", name); } +void __init +machvec_init_from_cmdline(const char *cmdline) +{ + char str[64]; + const char *start; + char *end; + + if (! (start = strstr(cmdline, "machvec=")) ) + return machvec_init(NULL); + + strlcpy(str, start + strlen("machvec="), sizeof(str)); + if ( (end = strchr(str, ' ')) ) + *end = '\0'; + + return machvec_init(str); +} + #endif /* CONFIG_IA64_GENERIC */ void Index: linux-rt-rebase.q/arch/ia64/kernel/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/process.c +++ linux-rt-rebase.q/arch/ia64/kernel/process.c @@ -499,7 +499,8 @@ copy_thread (int nr, unsigned long clone /* Copy partially mapped page list */ if (!retval) - retval = ia32_copy_partial_page_list(p, clone_flags); + retval = ia32_copy_ia64_partial_page_list(p, + clone_flags); } #endif @@ -728,7 +729,7 @@ flush_thread (void) ia64_drop_fpu(current); #ifdef CONFIG_IA32_SUPPORT if (IS_IA32_PROCESS(task_pt_regs(current))) { - ia32_drop_partial_page_list(current); + ia32_drop_ia64_partial_page_list(current); current->thread.task_size = IA32_PAGE_OFFSET; set_fs(USER_DS); } @@ -754,7 +755,7 @@ exit_thread (void) pfm_release_debug_registers(current); #endif if (IS_IA32_PROCESS(task_pt_regs(current))) - ia32_drop_partial_page_list(current); + ia32_drop_ia64_partial_page_list(current); } unsigned long Index: linux-rt-rebase.q/arch/ia64/kernel/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/setup.c +++ linux-rt-rebase.q/arch/ia64/kernel/setup.c @@ -491,12 +491,17 @@ setup_arch (char **cmdline_p) efi_init(); io_port_init(); - parse_early_param(); - #ifdef CONFIG_IA64_GENERIC - machvec_init(NULL); + /* machvec needs to be parsed from the command line + * before parse_early_param() is called to ensure + * that ia64_mv is initialised before any command line + * settings may cause console setup to occur + */ + machvec_init_from_cmdline(*cmdline_p); #endif + parse_early_param(); + if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); Index: linux-rt-rebase.q/arch/ia64/kernel/smp.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/smp.c +++ linux-rt-rebase.q/arch/ia64/kernel/smp.c @@ -468,7 +468,7 @@ smp_send_stop (void) send_IPI_allbutself(IPI_CPU_STOP); } -int __init +int setup_profiling_timer (unsigned int multiplier) { return -EINVAL; Index: linux-rt-rebase.q/arch/ia64/kernel/smpboot.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/smpboot.c +++ linux-rt-rebase.q/arch/ia64/kernel/smpboot.c @@ -487,7 +487,7 @@ struct create_idle { int cpu; }; -void +void __cpuinit do_fork_idle(struct work_struct *work) { struct create_idle *c_idle = @@ -497,7 +497,7 @@ do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -static int __devinit +static int __cpuinit do_boot_cpu (int sapicid, int cpu) { int timeout; @@ -808,7 +808,7 @@ set_cpu_sibling_map(int cpu) } } -int __devinit +int __cpuinit __cpu_up (unsigned int cpu) { int ret; Index: linux-rt-rebase.q/arch/ia64/kernel/vmlinux.lds.S =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/kernel/vmlinux.lds.S +++ linux-rt-rebase.q/arch/ia64/kernel/vmlinux.lds.S @@ -50,6 +50,8 @@ SECTIONS KPROBES_TEXT *(.gnu.linkonce.t*) } + .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) + { *(.text.head) } .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { *(.text2) } #ifdef CONFIG_SMP Index: linux-rt-rebase.q/arch/ia64/pci/pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/ia64/pci/pci.c +++ linux-rt-rebase.q/arch/ia64/pci/pci.c @@ -581,7 +581,7 @@ pcibios_align_resource (void *data, stru /* * PCI BIOS setup, always defaults to SAL interface */ -char * __init +char * __devinit pcibios_setup (char *str) { return str; Index: linux-rt-rebase.q/arch/m68knommu/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/Kconfig +++ linux-rt-rebase.q/arch/m68knommu/Kconfig @@ -216,6 +216,18 @@ config XCOPILOT_BUGS help Support the bugs of Xcopilot. +config UC5272 + bool 'Arcturus Networks uC5272 dimm board support' + depends on M5272 + help + Support for the Arcturus Networks uC5272 dimm board. + +config UC5282 + bool "Arcturus Networks uC5282 board support" + depends on M528x + help + Support for the Arcturus Networks uC5282 dimm board. + config UCSIMM bool "uCsimm module support" depends on M68EZ328 @@ -342,6 +354,18 @@ config SOM5282EM depends on M528x help Support for the EMAC.Inc SOM5282EM module. + +config WILDFIRE + bool "Intec Automation Inc. WildFire board support" + depends on M528x + help + Support for the Intec Automation Inc. WildFire. + +config WILDFIREMOD + bool "Intec Automation Inc. WildFire module support" + depends on M528x + help + Support for the Intec Automation Inc. WildFire module. config ARN5307 bool "Arnewsh 5307 board support" Index: linux-rt-rebase.q/arch/m68knommu/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/Makefile +++ linux-rt-rebase.q/arch/m68knommu/Makefile @@ -26,6 +26,8 @@ platform-$(CONFIG_M5407) := 5407 PLATFORM := $(platform-y) board-$(CONFIG_PILOT) := pilot +board-$(CONFIG_UC5272) := UC5272 +board-$(CONFIG_UC5282) := UC5282 board-$(CONFIG_UCSIMM) := ucsimm board-$(CONFIG_UCDIMM) := ucdimm board-$(CONFIG_UCQUICC) := uCquicc Index: linux-rt-rebase.q/arch/m68knommu/kernel/dma.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/kernel/dma.c +++ linux-rt-rebase.q/arch/m68knommu/kernel/dma.c @@ -8,6 +8,7 @@ #include #include #include +#include #include void *dma_alloc_coherent(struct device *dev, size_t size, Index: linux-rt-rebase.q/arch/m68knommu/kernel/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/kernel/setup.c +++ linux-rt-rebase.q/arch/m68knommu/kernel/setup.c @@ -132,6 +132,11 @@ void setup_arch(char **cmdline_p) config_BSP(&command_line[0], sizeof(command_line)); +#if defined(CONFIG_BOOTPARAM) + strncpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line)); + command_line[sizeof(command_line) - 1] = 0; +#endif + printk(KERN_INFO "\x0F\r\n\nuClinux/" CPU "\n"); #ifdef CONFIG_UCDIMM Index: linux-rt-rebase.q/arch/m68knommu/platform/5206/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5206/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/5206/config.c @@ -98,14 +98,6 @@ int mcf_timerirqpending(int timer) void config_BSP(char *commandp, int size) { mcf_setimr(MCFSIM_IMR_MASKALL); - -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - mach_sched_init = coldfire_timer_init; mach_tick = coldfire_tick; mach_gettimeoffset = coldfire_timer_offset; Index: linux-rt-rebase.q/arch/m68knommu/platform/5206e/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5206e/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/5206e/config.c @@ -98,15 +98,10 @@ void config_BSP(char *commandp, int size { mcf_setimr(MCFSIM_IMR_MASKALL); -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#elif defined(CONFIG_NETtel) +#if defined(CONFIG_NETtel) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0xf0004000, size); commandp[size-1] = 0; -#else - memset(commandp, 0, size); #endif /* CONFIG_NETtel */ mach_sched_init = coldfire_timer_init; Index: linux-rt-rebase.q/arch/m68knommu/platform/520x/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/520x/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/520x/config.c @@ -48,13 +48,6 @@ void mcf_autovector(unsigned int vec) void config_BSP(char *commandp, int size) { -#ifdef CONFIG_BOOTPARAM - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - mach_sched_init = coldfire_pit_init; mach_tick = coldfire_pit_tick; mach_gettimeoffset = coldfire_pit_offset; Index: linux-rt-rebase.q/arch/m68knommu/platform/523x/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/523x/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/523x/config.c @@ -63,14 +63,6 @@ void mcf_autovector(unsigned int vec) void config_BSP(char *commandp, int size) { mcf_disableall(); - -#ifdef CONFIG_BOOTPARAM - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - mach_sched_init = coldfire_pit_init; mach_tick = coldfire_pit_tick; mach_gettimeoffset = coldfire_pit_offset; Index: linux-rt-rebase.q/arch/m68knommu/platform/5249/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5249/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/5249/config.c @@ -96,14 +96,6 @@ int mcf_timerirqpending(int timer) void config_BSP(char *commandp, int size) { mcf_setimr(MCFSIM_IMR_MASKALL); - -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - mach_sched_init = coldfire_timer_init; mach_tick = coldfire_tick; mach_gettimeoffset = coldfire_timer_offset; Index: linux-rt-rebase.q/arch/m68knommu/platform/5272/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5272/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/5272/config.c @@ -113,10 +113,7 @@ void config_BSP(char *commandp, int size mcf_disableall(); -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#elif defined(CONFIG_NETtel) || defined(CONFIG_SCALES) +#if defined(CONFIG_NETtel) || defined(CONFIG_SCALES) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0xf0004000, size); commandp[size-1] = 0; @@ -128,8 +125,6 @@ void config_BSP(char *commandp, int size /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0xf0010000, size); commandp[size-1] = 0; -#else - memset(commandp, 0, size); #endif mcf_timervector = 69; Index: linux-rt-rebase.q/arch/m68knommu/platform/527x/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/527x/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/527x/config.c @@ -63,14 +63,6 @@ void mcf_autovector(unsigned int vec) void config_BSP(char *commandp, int size) { mcf_disableall(); - -#ifdef CONFIG_BOOTPARAM - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - mach_sched_init = coldfire_pit_init; mach_tick = coldfire_pit_tick; mach_gettimeoffset = coldfire_pit_offset; Index: linux-rt-rebase.q/arch/m68knommu/platform/528x/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/528x/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/528x/config.c @@ -63,14 +63,6 @@ void mcf_autovector(unsigned int vec) void config_BSP(char *commandp, int size) { mcf_disableall(); - -#ifdef CONFIG_BOOTPARAM - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - mach_sched_init = coldfire_pit_init; mach_tick = coldfire_pit_tick; mach_gettimeoffset = coldfire_pit_offset; Index: linux-rt-rebase.q/arch/m68knommu/platform/5307/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5307/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/5307/config.c @@ -111,10 +111,7 @@ void config_BSP(char *commandp, int size { mcf_setimr(MCFSIM_IMR_MASKALL); -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#elif defined(CONFIG_NETtel) || defined(CONFIG_eLIA) || \ +#if defined(CONFIG_NETtel) || defined(CONFIG_eLIA) || \ defined(CONFIG_DISKtel) || defined(CONFIG_SECUREEDGEMP3) || \ defined(CONFIG_CLEOPATRA) /* Copy command line from FLASH to local buffer... */ @@ -124,8 +121,6 @@ void config_BSP(char *commandp, int size mcf_timervector = 30; mcf_profilevector = 31; mcf_timerlevel = 6; -#else - memset(commandp, 0, size); #endif mach_sched_init = coldfire_timer_init; Index: linux-rt-rebase.q/arch/m68knommu/platform/5307/entry.S =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5307/entry.S +++ linux-rt-rebase.q/arch/m68knommu/platform/5307/entry.S @@ -213,16 +213,12 @@ ENTRY(ret_from_interrupt) * Beware - when entering resume, prev (the current task) is * in a0, next (the new task) is in a1,so don't change these * registers until their contents are no longer needed. + * This is always called in supervisor mode, so don't bother to save + * and restore sr; user's process sr is actually in the stack. */ ENTRY(resume) movel %a0, %d1 /* get prev thread in d1 */ - movew %sr,%d0 /* save thread status reg */ - movew %d0,%a0@(TASK_THREAD+THREAD_SR) - - oril #0x700,%d0 /* disable interrupts */ - move %d0,%sr - movel sw_usp,%d0 /* save usp */ movel %d0,%a0@(TASK_THREAD+THREAD_USP) @@ -233,7 +229,4 @@ ENTRY(resume) movel %a1@(TASK_THREAD+THREAD_USP),%a0 /* restore thread user stack */ movel %a0, sw_usp - - movew %a1@(TASK_THREAD+THREAD_SR),%d0 /* restore thread status reg */ - movew %d0, %sr rts Index: linux-rt-rebase.q/arch/m68knommu/platform/532x/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/532x/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/532x/config.c @@ -92,10 +92,7 @@ void config_BSP(char *commandp, int size { mcf_setimr(MCFSIM_IMR_MASKALL); -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else +#if !defined(CONFIG_BOOTPARAM) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0x4000, 4); if(strncmp(commandp, "kcl ", 4) == 0){ Index: linux-rt-rebase.q/arch/m68knommu/platform/5407/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/5407/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/5407/config.c @@ -102,13 +102,6 @@ void config_BSP(char *commandp, int size { mcf_setimr(MCFSIM_IMR_MASKALL); -#if defined(CONFIG_BOOTPARAM) - strncpy(commandp, CONFIG_BOOTPARAM_STRING, size); - commandp[size-1] = 0; -#else - memset(commandp, 0, size); -#endif - #if defined(CONFIG_CLEOPATRA) /* Different timer setup - to prevent device clash */ mcf_timervector = 30; Index: linux-rt-rebase.q/arch/m68knommu/platform/68VZ328/config.c =================================================================== --- linux-rt-rebase.q.orig/arch/m68knommu/platform/68VZ328/config.c +++ linux-rt-rebase.q/arch/m68knommu/platform/68VZ328/config.c @@ -191,13 +191,6 @@ void config_BSP(char *command, int size) { printk(KERN_INFO "68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n"); -#if defined(CONFIG_BOOTPARAM) - strncpy(command, CONFIG_BOOTPARAM_STRING, size); - command[size-1] = 0; -#else - memset(command, 0, size); -#endif - init_hardware(command, size); mach_sched_init = (void *) m68328_timer_init; Index: linux-rt-rebase.q/arch/mips/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/mips/Makefile +++ linux-rt-rebase.q/arch/mips/Makefile @@ -328,7 +328,7 @@ load-$(CONFIG_MIPS_SEAD) += 0xffffffff80 # MIPS SIM # core-$(CONFIG_MIPS_SIM) += arch/mips/mipssim/ -cflags-$(CONFIG_MIPS_SIM) += -Iinclude/asm-mips/mach-sim +cflags-$(CONFIG_MIPS_SIM) += -Iinclude/asm-mips/mach-mipssim load-$(CONFIG_MIPS_SIM) += 0x80100000 # Index: linux-rt-rebase.q/arch/mips/arc/console.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/arc/console.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1996 David S. Miller (dm@sgi.com) - * Compability with board caches, Ulf Carlsson - */ -#include -#include -#include - -/* - * IP22 boardcache is not compatible with board caches. Thus we disable it - * during romvec action. Since r4xx0.c is always compiled and linked with your - * kernel, this shouldn't cause any harm regardless what MIPS processor you - * have. - * - * The ARC write and read functions seem to interfere with the serial lines - * in some way. You should be careful with them. - */ - -void prom_putchar(char c) -{ - ULONG cnt; - CHAR it = c; - - bc_disable(); - ArcWrite(1, &it, 1, &cnt); - bc_enable(); -} Index: linux-rt-rebase.q/arch/mips/jazz/io.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/jazz/io.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Low level I/O functions for Jazz family machines. - * - * Copyright (C) 1997 by Ralf Baechle. - */ -#include -#include -#include -#include -#include - -/* - * Map an 16mb segment of the EISA address space to 0xe3000000; - */ -static inline void map_eisa_address(unsigned long address) -{ - /* XXX */ - /* We've got an wired entry in the TLB. We just need to modify it. - fast and clean. But since we want to get rid of wired entries - things are a little bit more complicated ... */ -} - -static unsigned char jazz_readb(unsigned long addr) -{ - unsigned char res; - - map_eisa_address(addr); - addr &= 0xffffff; - res = *(volatile unsigned char *) (JAZZ_EISA_BASE + addr); - - return res; -} - -static unsigned short jazz_readw(unsigned long addr) -{ - unsigned short res; - - map_eisa_address(addr); - addr &= 0xffffff; - res = *(volatile unsigned char *) (JAZZ_EISA_BASE + addr); - - return res; -} - -static unsigned int jazz_readl(unsigned long addr) -{ - unsigned int res; - - map_eisa_address(addr); - addr &= 0xffffff; - res = *(volatile unsigned char *) (JAZZ_EISA_BASE + addr); - - return res; -} - -static void jazz_writeb(unsigned char val, unsigned long addr) -{ - map_eisa_address(addr); - addr &= 0xffffff; - *(volatile unsigned char *) (JAZZ_EISA_BASE + addr) = val; -} - -static void jazz_writew(unsigned short val, unsigned long addr) -{ - map_eisa_address(addr); - addr &= 0xffffff; - *(volatile unsigned char *) (JAZZ_EISA_BASE + addr) = val; -} - -static void jazz_writel(unsigned int val, unsigned long addr) -{ - map_eisa_address(addr); - addr &= 0xffffff; - *(volatile unsigned char *) (JAZZ_EISA_BASE + addr) = val; -} - -static void jazz_memset_io(unsigned long addr, int val, unsigned long len) -{ - unsigned long waddr; - - waddr = JAZZ_EISA_BASE | (addr & 0xffffff); - while(len) { - unsigned long fraglen; - - fraglen = (~addr + 1) & 0xffffff; - fraglen = (fraglen < len) ? fraglen : len; - map_eisa_address(addr); - memset((char *)waddr, val, fraglen); - addr += fraglen; - waddr = waddr + fraglen - 0x1000000; - len -= fraglen; - } -} - -static void jazz_memcpy_fromio(unsigned long to, unsigned long from, unsigned long len) -{ - unsigned long waddr; - - waddr = JAZZ_EISA_BASE | (from & 0xffffff); - while(len) { - unsigned long fraglen; - - fraglen = (~from + 1) & 0xffffff; - fraglen = (fraglen < len) ? fraglen : len; - map_eisa_address(from); - memcpy((void *)to, (void *)waddr, fraglen); - to += fraglen; - from += fraglen; - waddr = waddr + fraglen - 0x1000000; - len -= fraglen; - } -} - -static void jazz_memcpy_toio(unsigned long to, unsigned long from, unsigned long len) -{ - unsigned long waddr; - - waddr = JAZZ_EISA_BASE | (to & 0xffffff); - while(len) { - unsigned long fraglen; - - fraglen = (~to + 1) & 0xffffff; - fraglen = (fraglen < len) ? fraglen : len; - map_eisa_address(to); - memcpy((char *)to + JAZZ_EISA_BASE, (void *)from, fraglen); - to += fraglen; - from += fraglen; - waddr = waddr + fraglen - 0x1000000; - len -= fraglen; - } -} Index: linux-rt-rebase.q/arch/mips/jazz/reset.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/jazz/reset.c +++ linux-rt-rebase.q/arch/mips/jazz/reset.c @@ -6,10 +6,6 @@ */ #include #include -#include -#include -#include -#include #define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */ @@ -58,12 +54,3 @@ void jazz_machine_restart(char *command) jazz_write_output (0x00); } } - -void jazz_machine_halt(void) -{ -} - -void jazz_machine_power_off(void) -{ - /* Jazz machines don't have a software power switch */ -} Index: linux-rt-rebase.q/arch/mips/jazz/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/jazz/setup.c +++ linux-rt-rebase.q/arch/mips/jazz/setup.c @@ -34,8 +34,6 @@ extern asmlinkage void jazz_handle_int(void); extern void jazz_machine_restart(char *command); -extern void jazz_machine_halt(void); -extern void jazz_machine_power_off(void); void __init plat_timer_setup(struct irqaction *irq) { @@ -95,8 +93,6 @@ void __init plat_mem_setup(void) /* The RTC is outside the port address space */ _machine_restart = jazz_machine_restart; - _machine_halt = jazz_machine_halt; - pm_power_off = jazz_machine_power_off; screen_info = (struct screen_info) { 0, 0, /* orig-x, orig-y */ Index: linux-rt-rebase.q/arch/mips/jmr3927/rbhma3100/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/jmr3927/rbhma3100/setup.c +++ linux-rt-rebase.q/arch/mips/jmr3927/rbhma3100/setup.c @@ -434,7 +434,7 @@ EXPORT_SYMBOL(__swizzle_addr_b); static int __init jmr3927_rtc_init(void) { - struct resource res = { + static struct resource __initdata res = { .start = JMR3927_IOC_NVRAMB_ADDR - IO_BASE, .end = JMR3927_IOC_NVRAMB_ADDR - IO_BASE + 0x800 - 1, .flags = IORESOURCE_MEM, Index: linux-rt-rebase.q/arch/mips/kernel/gdb-stub.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/gdb-stub.c +++ linux-rt-rebase.q/arch/mips/kernel/gdb-stub.c @@ -1099,12 +1099,12 @@ void adel(void) * malloc is needed by gdb client in "call func()", even a private one * will make gdb happy */ -static void * __attribute_used__ malloc(size_t size) +static void __used *malloc(size_t size) { return kmalloc(size, GFP_ATOMIC); } -static void __attribute_used__ free (void *where) +static void __used free(void *where) { kfree(where); } Index: linux-rt-rebase.q/arch/mips/kernel/head.S =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/head.S +++ linux-rt-rebase.q/arch/mips/kernel/head.S @@ -141,7 +141,7 @@ EXPORT(stext) # used for profiling EXPORT(_stext) -#ifdef CONFIG_BOOT_RAW +#ifndef CONFIG_BOOT_RAW /* * Give us a fighting chance of running if execution beings at the * kernel load address. This is needed because this platform does Index: linux-rt-rebase.q/arch/mips/kernel/linux32.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/linux32.c +++ linux-rt-rebase.q/arch/mips/kernel/linux32.c @@ -567,7 +567,7 @@ asmlinkage long sys32_fadvise64_64(int f } save_static_function(sys32_clone); -__attribute_used__ noinline static int +static int noinline __used _sys32_clone(nabi_no_regargs struct pt_regs regs) { unsigned long clone_flags; Index: linux-rt-rebase.q/arch/mips/kernel/rtlx.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/rtlx.c +++ linux-rt-rebase.q/arch/mips/kernel/rtlx.c @@ -85,7 +85,7 @@ static irqreturn_t rtlx_interrupt(int ir return IRQ_HANDLED; } -static __attribute_used__ void dump_rtlx(void) +static void __used dump_rtlx(void) { int i; Index: linux-rt-rebase.q/arch/mips/kernel/syscall.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/syscall.c +++ linux-rt-rebase.q/arch/mips/kernel/syscall.c @@ -167,14 +167,14 @@ sys_mmap2(unsigned long addr, unsigned l } save_static_function(sys_fork); -__attribute_used__ noinline static int +static int __used noinline _sys_fork(nabi_no_regargs struct pt_regs regs) { return do_fork(SIGCHLD, regs.regs[29], ®s, 0, NULL, NULL); } save_static_function(sys_clone); -__attribute_used__ noinline static int +static int __used noinline _sys_clone(nabi_no_regargs struct pt_regs regs) { unsigned long clone_flags; Index: linux-rt-rebase.q/arch/mips/kernel/vpe.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/kernel/vpe.c +++ linux-rt-rebase.q/arch/mips/kernel/vpe.c @@ -154,7 +154,6 @@ struct { }; static void release_progmem(void *ptr); -/* static __attribute_used__ void dump_vpe(struct vpe * v); */ extern void save_gp_address(unsigned int secbase, unsigned int rel); /* get the vpe associated with this minor */ @@ -1024,7 +1023,7 @@ static int vpe_elfload(struct vpe * v) return 0; } -__attribute_used__ void dump_vpe(struct vpe * v) +void __used dump_vpe(struct vpe * v) { struct tc *t; Index: linux-rt-rebase.q/arch/mips/mm/c-sb1.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/mm/c-sb1.c +++ linux-rt-rebase.q/arch/mips/mm/c-sb1.c @@ -272,7 +272,7 @@ void sb1_flush_cache_data_page(unsigned /* * Invalidate all caches on this CPU */ -static void __attribute_used__ local_sb1___flush_cache_all(void) +static void __used local_sb1___flush_cache_all(void) { __sb1_writeback_inv_dcache_all(); __sb1_flush_icache_all(); Index: linux-rt-rebase.q/arch/mips/mm/init.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/mm/init.c +++ linux-rt-rebase.q/arch/mips/mm/init.c @@ -484,7 +484,7 @@ void free_initrd_mem(unsigned long start } #endif -void free_initmem(void) +void __init_refok free_initmem(void) { prom_free_prom_memory(); free_init_pages("unused kernel memory", Index: linux-rt-rebase.q/arch/mips/sni/sniprom.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/sni/sniprom.c +++ linux-rt-rebase.q/arch/mips/sni/sniprom.c @@ -19,6 +19,7 @@ #include #include #include +#include #include /* special SNI prom calls */ @@ -71,7 +72,7 @@ const char *get_system_type(void) #define SNI_IDPROM_SIZE 0x1000 #ifdef DEBUG -static void sni_idprom_dump(void) +static void __init sni_idprom_dump(void) { int i; @@ -88,7 +89,7 @@ static void sni_idprom_dump(void) } #endif -static void sni_mem_init(void ) +static void __init sni_mem_init(void ) { int i, memsize; struct membank { Index: linux-rt-rebase.q/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +++ linux-rt-rebase.q/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c @@ -1020,7 +1020,7 @@ void __init toshiba_rbtx4927_timer_setup static int __init toshiba_rbtx4927_rtc_init(void) { - struct resource res = { + static struct resource __initdata res = { .start = 0x1c010000, .end = 0x1c010000 + 0x800 - 1, .flags = IORESOURCE_MEM, Index: linux-rt-rebase.q/arch/powerpc/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/Kconfig +++ linux-rt-rebase.q/arch/powerpc/Kconfig @@ -411,11 +411,6 @@ config PPC_INDIRECT_PCI default y if 40x || 44x default n -config PPC_INDIRECT_PCI_BE - bool - depends PPC_INDIRECT_PCI - default n - config EISA bool @@ -425,6 +420,10 @@ config SBUS config FSL_SOC bool +config FSL_PCI + bool + select PPC_INDIRECT_PCI + # Yes MCA RS/6000s exist but Linux-PPC does not currently support any config MCA bool Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8313erdb.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8313erdb.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8313erdb.dts @@ -178,7 +178,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8500 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc832x_mds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc832x_mds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc832x_mds.dts @@ -154,7 +154,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8500 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc832x_rdb.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -123,7 +123,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8500 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8349emitx.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8349emitx.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8349emitx.dts @@ -197,7 +197,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8500 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; @@ -222,7 +222,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8600 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8349emitxgp.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8349emitxgp.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8349emitxgp.dts @@ -154,7 +154,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8600 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc834x_mds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc834x_mds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc834x_mds.dts @@ -241,7 +241,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8500 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; @@ -301,7 +301,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8600 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc836x_mds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc836x_mds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc836x_mds.dts @@ -169,7 +169,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8500 100>; - compatible = "83xx"; + compatible = "fsl,mpc8349-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8540ads.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8540ads.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8540ads.dts @@ -258,7 +258,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8541cds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8541cds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8541cds.dts @@ -193,7 +193,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pci"; device_type = "pci"; i8259@19000 { @@ -230,7 +230,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <9000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8544ds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8544ds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8544ds.dts @@ -137,6 +137,217 @@ interrupt-parent = <&mpic>; }; + pci@8000 { + compatible = "fsl,mpc8540-pci"; + device_type = "pci"; + interrupt-map-mask = ; + interrupt-map = < + + /* IDSEL 0x11 J17 Slot 1 */ + 8800 0 0 1 &mpic 2 1 + 8800 0 0 2 &mpic 3 1 + 8800 0 0 3 &mpic 4 1 + 8800 0 0 4 &mpic 1 1 + + /* IDSEL 0x12 J16 Slot 2 */ + + 9000 0 0 1 &mpic 3 1 + 9000 0 0 2 &mpic 4 1 + 9000 0 0 3 &mpic 2 1 + 9000 0 0 4 &mpic 1 1>; + + interrupt-parent = <&mpic>; + interrupts = <18 2>; + bus-range = <0 ff>; + ranges = <02000000 0 80000000 80000000 0 10000000 + 01000000 0 00000000 e2000000 0 00800000>; + clock-frequency = <3f940aa>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <8000 1000>; + }; + + pcie@9000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <9000 1000>; + bus-range = <0 ff>; + ranges = <02000000 0 90000000 90000000 0 10000000 + 01000000 0 00000000 e3000000 0 00800000>; + clock-frequency = <1fca055>; + interrupt-parent = <&mpic>; + interrupts = <1a 2>; + interrupt-map-mask = ; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 4 1 + 0000 0 0 2 &mpic 5 1 + 0000 0 0 3 &mpic 6 1 + 0000 0 0 4 &mpic 7 1 + >; + }; + + pcie@a000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = ; + bus-range = <0 ff>; + ranges = <02000000 0 a0000000 a0000000 0 10000000 + 01000000 0 00000000 e2800000 0 00800000>; + clock-frequency = <1fca055>; + interrupt-parent = <&mpic>; + interrupts = <19 2>; + interrupt-map-mask = ; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 0 1 + 0000 0 0 2 &mpic 1 1 + 0000 0 0 3 &mpic 2 1 + 0000 0 0 4 &mpic 3 1 + >; + }; + + pcie@b000 { + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = ; + bus-range = <0 ff>; + ranges = <02000000 0 b0000000 b0000000 0 10000000 + 01000000 0 00000000 e3800000 0 00800000>; + clock-frequency = <1fca055>; + interrupt-parent = <&mpic>; + interrupts = <1b 2>; + interrupt-map-mask = ; + interrupt-map = < + + // IDSEL 0x1a + d000 0 0 1 &i8259 6 2 + d000 0 0 2 &i8259 3 2 + d000 0 0 3 &i8259 4 2 + d000 0 0 4 &i8259 5 2 + + // IDSEL 0x1b + d800 0 0 1 &i8259 5 2 + d800 0 0 2 &i8259 0 0 + d800 0 0 3 &i8259 0 0 + d800 0 0 4 &i8259 0 0 + + // IDSEL 0x1c USB + e000 0 0 1 &i8259 9 2 + e000 0 0 2 &i8259 a 2 + e000 0 0 3 &i8259 c 2 + e000 0 0 4 &i8259 7 2 + + // IDSEL 0x1d Audio + e800 0 0 1 &i8259 9 2 + e800 0 0 2 &i8259 a 2 + e800 0 0 3 &i8259 b 2 + e800 0 0 4 &i8259 0 0 + + // IDSEL 0x1e Legacy + f000 0 0 1 &i8259 c 2 + f000 0 0 2 &i8259 0 0 + f000 0 0 3 &i8259 0 0 + f000 0 0 4 &i8259 0 0 + + // IDSEL 0x1f IDE/SATA + f800 0 0 1 &i8259 6 2 + f800 0 0 2 &i8259 0 0 + f800 0 0 3 &i8259 0 0 + f800 0 0 4 &i8259 0 0 + >; + uli1575@0 { + reg = <0 0 0 0 0>; + #size-cells = <2>; + #address-cells = <3>; + ranges = <02000000 0 b0000000 + 02000000 0 b0000000 + 0 10000000 + 01000000 0 00000000 + 01000000 0 00000000 + 0 00080000>; + + pci_bridge@0 { + reg = <0 0 0 0 0>; + #size-cells = <2>; + #address-cells = <3>; + ranges = <02000000 0 b0000000 + 02000000 0 b0000000 + 0 20000000 + 01000000 0 00000000 + 01000000 0 00000000 + 0 00100000>; + + isa@1e { + device_type = "isa"; + #interrupt-cells = <2>; + #size-cells = <1>; + #address-cells = <2>; + reg = ; + ranges = <1 0 01000000 0 0 + 00001000>; + interrupt-parent = <&i8259>; + + i8259: interrupt-controller@20 { + reg = <1 20 2 + 1 a0 2 + 1 4d0 2>; + clock-frequency = <0>; + interrupt-controller; + device_type = "interrupt-controller"; + #address-cells = <0>; + #interrupt-cells = <2>; + built-in; + compatible = "chrp,iic"; + interrupts = <9 2>; + interrupt-parent = + <&mpic>; + }; + + i8042@60 { + #size-cells = <0>; + #address-cells = <1>; + reg = <1 60 1 1 64 1>; + interrupts = <1 3 c 3>; + interrupt-parent = + <&i8259>; + + keyboard@0 { + reg = <0>; + compatible = "pnpPNP,303"; + }; + + mouse@1 { + reg = <1>; + compatible = "pnpPNP,f03"; + }; + }; + + rtc@70 { + compatible = + "pnpPNP,b00"; + reg = <1 70 2>; + }; + + gpio@400 { + reg = <1 400 80>; + }; + }; + }; + }; + + }; + mpic: pic@40000 { clock-frequency = <0>; interrupt-controller; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8548cds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8548cds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8548cds.dts @@ -1,5 +1,5 @@ /* - * MPC8555 CDS Device Tree Source + * MPC8548 CDS Device Tree Source * * Copyright 2006 Freescale Semiconductor Inc. * @@ -44,8 +44,14 @@ #size-cells = <1>; #interrupt-cells = <2>; device_type = "soc"; - ranges = <0 e0000000 00100000>; - reg = ; // CCSRBAR 1M + ranges = <00001000 e0001000 000ff000 + 80000000 80000000 10000000 + e2000000 e2000000 00800000 + 90000000 90000000 10000000 + e2800000 e2800000 00800000 + a0000000 a0000000 20000000 + e3000000 e3000000 01000000>; + reg = ; // CCSRBAR bus-frequency = <0>; memory-controller@2000 { @@ -162,8 +168,8 @@ serial@4500 { device_type = "serial"; compatible = "ns16550"; - reg = <4500 100>; // reg base, size - clock-frequency = <0>; // should we fill in in uboot? + reg = <4500 100>; // reg base, size + clock-frequency = <0>; // should we fill in in uboot? interrupts = <2a 2>; interrupt-parent = <&mpic>; }; @@ -172,7 +178,7 @@ device_type = "serial"; compatible = "ns16550"; reg = <4600 100>; // reg base, size - clock-frequency = <0>; // should we fill in in uboot? + clock-frequency = <0>; // should we fill in in uboot? interrupts = <2a 2>; interrupt-parent = <&mpic>; }; @@ -183,77 +189,154 @@ fsl,has-rstcr; }; - pci1: pci@8000 { - interrupt-map-mask = <1f800 0 0 7>; + pci@8000 { + interrupt-map-mask = ; interrupt-map = < + /* IDSEL 0x4 (PCIX Slot 2) */ + 02000 0 0 1 &mpic 0 1 + 02000 0 0 2 &mpic 1 1 + 02000 0 0 3 &mpic 2 1 + 02000 0 0 4 &mpic 3 1 + + /* IDSEL 0x5 (PCIX Slot 3) */ + 02800 0 0 1 &mpic 1 1 + 02800 0 0 2 &mpic 2 1 + 02800 0 0 3 &mpic 3 1 + 02800 0 0 4 &mpic 0 1 + + /* IDSEL 0x6 (PCIX Slot 4) */ + 03000 0 0 1 &mpic 2 1 + 03000 0 0 2 &mpic 3 1 + 03000 0 0 3 &mpic 0 1 + 03000 0 0 4 &mpic 1 1 + + /* IDSEL 0x8 (PCIX Slot 5) */ + 04000 0 0 1 &mpic 0 1 + 04000 0 0 2 &mpic 1 1 + 04000 0 0 3 &mpic 2 1 + 04000 0 0 4 &mpic 3 1 + + /* IDSEL 0xC (Tsi310 bridge) */ + 06000 0 0 1 &mpic 0 1 + 06000 0 0 2 &mpic 1 1 + 06000 0 0 3 &mpic 2 1 + 06000 0 0 4 &mpic 3 1 + + /* IDSEL 0x14 (Slot 2) */ + 0a000 0 0 1 &mpic 0 1 + 0a000 0 0 2 &mpic 1 1 + 0a000 0 0 3 &mpic 2 1 + 0a000 0 0 4 &mpic 3 1 + + /* IDSEL 0x15 (Slot 3) */ + 0a800 0 0 1 &mpic 1 1 + 0a800 0 0 2 &mpic 2 1 + 0a800 0 0 3 &mpic 3 1 + 0a800 0 0 4 &mpic 0 1 + + /* IDSEL 0x16 (Slot 4) */ + 0b000 0 0 1 &mpic 2 1 + 0b000 0 0 2 &mpic 3 1 + 0b000 0 0 3 &mpic 0 1 + 0b000 0 0 4 &mpic 1 1 + + /* IDSEL 0x18 (Slot 5) */ + 0c000 0 0 1 &mpic 0 1 + 0c000 0 0 2 &mpic 1 1 + 0c000 0 0 3 &mpic 2 1 + 0c000 0 0 4 &mpic 3 1 + + /* IDSEL 0x1C (Tsi310 bridge PCI primary) */ + 0E000 0 0 1 &mpic 0 1 + 0E000 0 0 2 &mpic 1 1 + 0E000 0 0 3 &mpic 2 1 + 0E000 0 0 4 &mpic 3 1>; - /* IDSEL 0x10 */ - 08000 0 0 1 &mpic 0 1 - 08000 0 0 2 &mpic 1 1 - 08000 0 0 3 &mpic 2 1 - 08000 0 0 4 &mpic 3 1 - - /* IDSEL 0x11 */ - 08800 0 0 1 &mpic 0 1 - 08800 0 0 2 &mpic 1 1 - 08800 0 0 3 &mpic 2 1 - 08800 0 0 4 &mpic 3 1 - - /* IDSEL 0x12 (Slot 1) */ - 09000 0 0 1 &mpic 0 1 - 09000 0 0 2 &mpic 1 1 - 09000 0 0 3 &mpic 2 1 - 09000 0 0 4 &mpic 3 1 - - /* IDSEL 0x13 (Slot 2) */ - 09800 0 0 1 &mpic 1 1 - 09800 0 0 2 &mpic 2 1 - 09800 0 0 3 &mpic 3 1 - 09800 0 0 4 &mpic 0 1 - - /* IDSEL 0x14 (Slot 3) */ - 0a000 0 0 1 &mpic 2 1 - 0a000 0 0 2 &mpic 3 1 - 0a000 0 0 3 &mpic 0 1 - 0a000 0 0 4 &mpic 1 1 - - /* IDSEL 0x15 (Slot 4) */ - 0a800 0 0 1 &mpic 3 1 - 0a800 0 0 2 &mpic 0 1 - 0a800 0 0 3 &mpic 1 1 - 0a800 0 0 4 &mpic 2 1 - - /* Bus 1 (Tundra Bridge) */ - /* IDSEL 0x12 (ISA bridge) */ - 19000 0 0 1 &mpic 0 1 - 19000 0 0 2 &mpic 1 1 - 19000 0 0 3 &mpic 2 1 - 19000 0 0 4 &mpic 3 1>; interrupt-parent = <&mpic>; interrupts = <18 2>; bus-range = <0 0>; - ranges = <02000000 0 80000000 80000000 0 20000000 - 01000000 0 00000000 e2000000 0 00100000>; + ranges = <02000000 0 80000000 80000000 0 10000000 + 01000000 0 00000000 e2000000 0 00800000>; clock-frequency = <3f940aa>; #interrupt-cells = <1>; #size-cells = <2>; #address-cells = <3>; reg = <8000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci"; device_type = "pci"; - i8259@19000 { - clock-frequency = <0>; - interrupt-controller; - device_type = "interrupt-controller"; - reg = <19000 0 0 0 1>; - #address-cells = <0>; - #interrupt-cells = <2>; - built-in; - compatible = "chrp,iic"; - big-endian; - interrupts = <1>; - interrupt-parent = <&pci1>; + pci_bridge@1c { + interrupt-map-mask = ; + interrupt-map = < + + /* IDSEL 0x00 (PrPMC Site) */ + 0000 0 0 1 &mpic 0 1 + 0000 0 0 2 &mpic 1 1 + 0000 0 0 3 &mpic 2 1 + 0000 0 0 4 &mpic 3 1 + + /* IDSEL 0x04 (VIA chip) */ + 2000 0 0 1 &mpic 0 1 + 2000 0 0 2 &mpic 1 1 + 2000 0 0 3 &mpic 2 1 + 2000 0 0 4 &mpic 3 1 + + /* IDSEL 0x05 (8139) */ + 2800 0 0 1 &mpic 1 1 + + /* IDSEL 0x06 (Slot 6) */ + 3000 0 0 1 &mpic 2 1 + 3000 0 0 2 &mpic 3 1 + 3000 0 0 3 &mpic 0 1 + 3000 0 0 4 &mpic 1 1 + + /* IDESL 0x07 (Slot 7) */ + 3800 0 0 1 &mpic 3 1 + 3800 0 0 2 &mpic 0 1 + 3800 0 0 3 &mpic 1 1 + 3800 0 0 4 &mpic 2 1>; + + reg = ; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + ranges = <02000000 0 80000000 + 02000000 0 80000000 + 0 20000000 + 01000000 0 00000000 + 01000000 0 00000000 + 0 00080000>; + clock-frequency = <1fca055>; + + isa@4 { + device_type = "isa"; + #interrupt-cells = <2>; + #size-cells = <1>; + #address-cells = <2>; + reg = <2000 0 0 0 0>; + ranges = <1 0 01000000 0 0 00001000>; + interrupt-parent = <&i8259>; + + i8259: interrupt-controller@20 { + clock-frequency = <0>; + interrupt-controller; + device_type = "interrupt-controller"; + reg = <1 20 2 + 1 a0 2 + 1 4d0 2>; + #address-cells = <0>; + #interrupt-cells = <2>; + built-in; + compatible = "chrp,iic"; + interrupts = <0 1>; + interrupt-parent = <&mpic>; + }; + + rtc@70 { + compatible = "pnpPNP,b00"; + reg = <1 70 2>; + }; + }; }; }; @@ -263,20 +346,45 @@ /* IDSEL 0x15 */ a800 0 0 1 &mpic b 1 - a800 0 0 2 &mpic b 1 - a800 0 0 3 &mpic b 1 - a800 0 0 4 &mpic b 1>; + a800 0 0 2 &mpic 1 1 + a800 0 0 3 &mpic 2 1 + a800 0 0 4 &mpic 3 1>; + interrupt-parent = <&mpic>; interrupts = <19 2>; bus-range = <0 0>; - ranges = <02000000 0 a0000000 a0000000 0 20000000 - 01000000 0 00000000 e3000000 0 00100000>; + ranges = <02000000 0 90000000 90000000 0 10000000 + 01000000 0 00000000 e2800000 0 00800000>; clock-frequency = <3f940aa>; #interrupt-cells = <1>; #size-cells = <2>; #address-cells = <3>; reg = <9000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pci"; + device_type = "pci"; + }; + /* PCI Express */ + pcie@a000 { + interrupt-map-mask = ; + interrupt-map = < + + /* IDSEL 0x0 (PEX) */ + 00000 0 0 1 &mpic 0 1 + 00000 0 0 2 &mpic 1 1 + 00000 0 0 3 &mpic 2 1 + 00000 0 0 4 &mpic 3 1>; + + interrupt-parent = <&mpic>; + interrupts = <1a 2>; + bus-range = <0 ff>; + ranges = <02000000 0 a0000000 a0000000 0 20000000 + 01000000 0 00000000 e3000000 0 08000000>; + clock-frequency = <1fca055>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = ; + compatible = "fsl,mpc8548-pcie"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8555cds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8555cds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8555cds.dts @@ -193,7 +193,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <8000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pci"; device_type = "pci"; i8259@19000 { @@ -230,7 +230,7 @@ #size-cells = <2>; #address-cells = <3>; reg = <9000 1000>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pci"; device_type = "pci"; }; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8560ads.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8560ads.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8560ads.dts @@ -136,7 +136,7 @@ #interrupt-cells = <1>; #size-cells = <2>; #address-cells = <3>; - compatible = "85xx"; + compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci"; device_type = "pci"; reg = <8000 1000>; clock-frequency = <3f940aa>; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8568mds.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8568mds.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8568mds.dts @@ -170,6 +170,60 @@ interrupt-parent = <&mpic>; }; + pci@8000 { + interrupt-map-mask = ; + interrupt-map = < + /* IDSEL 0x12 AD18 */ + 9000 0 0 1 &mpic 5 1 + 9000 0 0 2 &mpic 6 1 + 9000 0 0 3 &mpic 7 1 + 9000 0 0 4 &mpic 4 1 + + /* IDSEL 0x13 AD19 */ + 9800 0 0 1 &mpic 6 1 + 9800 0 0 2 &mpic 7 1 + 9800 0 0 3 &mpic 4 1 + 9800 0 0 4 &mpic 5 1>; + + interrupt-parent = <&mpic>; + interrupts = <18 2>; + bus-range = <0 ff>; + ranges = <02000000 0 80000000 80000000 0 20000000 + 01000000 0 00000000 e2000000 0 00800000>; + clock-frequency = <3f940aa>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <8000 1000>; + compatible = "fsl,mpc8540-pci"; + device_type = "pci"; + }; + + /* PCI Express */ + pcie@a000 { + interrupt-map-mask = ; + interrupt-map = < + + /* IDSEL 0x0 (PEX) */ + 00000 0 0 1 &mpic 0 1 + 00000 0 0 2 &mpic 1 1 + 00000 0 0 3 &mpic 2 1 + 00000 0 0 4 &mpic 3 1>; + + interrupt-parent = <&mpic>; + interrupts = <1a 2>; + bus-range = <0 ff>; + ranges = <02000000 0 a0000000 a0000000 0 20000000 + 01000000 0 00000000 e3000000 0 08000000>; + clock-frequency = <1fca055>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = ; + compatible = "fsl,mpc8548-pcie"; + device_type = "pci"; + }; + serial@4600 { device_type = "serial"; compatible = "ns16550"; Index: linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8641_hpcn.dts =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/boot/dts/mpc8641_hpcn.dts +++ linux-rt-rebase.q/arch/powerpc/boot/dts/mpc8641_hpcn.dts @@ -211,8 +211,8 @@ interrupt-parent = <&mpic>; }; - pci@8000 { - compatible = "86xx"; + pcie@8000 { + compatible = "fsl,mpc8641-pcie"; device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -399,8 +399,8 @@ }; - pci@9000 { - compatible = "86xx"; + pcie@9000 { + compatible = "fsl,mpc8641-pcie"; device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; Index: linux-rt-rebase.q/arch/powerpc/configs/mpc8544_ds_defconfig =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/configs/mpc8544_ds_defconfig +++ linux-rt-rebase.q/arch/powerpc/configs/mpc8544_ds_defconfig @@ -1,9 +1,26 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-rc7 -# Sun Jul 1 23:56:58 2007 +# Linux kernel version: 2.6.22 +# Fri Jul 20 14:09:13 2007 # # CONFIG_PPC64 is not set + +# +# Processor support +# +# CONFIG_6xx is not set +CONFIG_PPC_85xx=y +# CONFIG_PPC_8xx is not set +# CONFIG_40x is not set +# CONFIG_44x is not set +# CONFIG_E200 is not set +CONFIG_85xx=y +CONFIG_E500=y +CONFIG_BOOKE=y +CONFIG_FSL_BOOKE=y +# CONFIG_PHYS_64BIT is not set +# CONFIG_SPE is not set +# CONFIG_PPC_MM_SLICES is not set CONFIG_PPC32=y CONFIG_PPC_MERGE=y CONFIG_MMU=y @@ -14,6 +31,7 @@ CONFIG_ARCH_HAS_ILOG2_U32=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_FIND_NEXT_BIT=y +# CONFIG_ARCH_NO_VIRT_TO_BUS is not set CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_NVRAM=y @@ -25,28 +43,8 @@ CONFIG_PPC_UDBG_16550=y CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y CONFIG_DEFAULT_UIMAGE=y - -# -# Processor support -# -# CONFIG_CLASSIC32 is not set -# CONFIG_PPC_82xx is not set -# CONFIG_PPC_83xx is not set -CONFIG_PPC_85xx=y -# CONFIG_PPC_86xx is not set -# CONFIG_PPC_8xx is not set -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_E200 is not set -CONFIG_85xx=y -CONFIG_E500=y # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set -CONFIG_BOOKE=y -CONFIG_FSL_BOOKE=y -# CONFIG_PHYS_64BIT is not set -# CONFIG_SPE is not set -# CONFIG_PPC_MM_SLICES is not set CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -63,13 +61,12 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -CONFIG_IPC_NS=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set +# CONFIG_USER_NS is not set CONFIG_AUDIT=y # CONFIG_AUDITSYSCALL is not set CONFIG_IKCONFIG=y @@ -86,7 +83,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set -# CONFIG_HOTPLUG is not set +CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y @@ -105,24 +102,17 @@ CONFIG_SLAB=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 - -# -# Loadable module support -# CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y - -# -# Block layer -# CONFIG_BLOCK=y CONFIG_LBD=y # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set # # IO Schedulers @@ -153,7 +143,7 @@ CONFIG_MPC8544_DS=y CONFIG_MPC85xx=y CONFIG_MPIC=y # CONFIG_MPIC_WEIRD is not set -# CONFIG_PPC_I8259 is not set +CONFIG_PPC_I8259=y # CONFIG_PPC_RTAS is not set # CONFIG_MMIO_NVRAM is not set # CONFIG_PPC_MPC106 is not set @@ -191,6 +181,8 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_RESOURCES_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y CONFIG_PROC_DEVICETREE=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="root=/dev/sda3 rw console=ttyS0,115200" @@ -205,15 +197,21 @@ CONFIG_ISA_DMA_API=y # CONFIG_ZONE_DMA=y CONFIG_PPC_INDIRECT_PCI=y -CONFIG_PPC_INDIRECT_PCI_BE=y CONFIG_FSL_SOC=y -# CONFIG_PCI is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_ARCH_SUPPORTS_MSI is not set +CONFIG_FSL_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +CONFIG_PCI_SYSCALL=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +# CONFIG_PCI_MSI is not set +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support # +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set # # Advanced setup @@ -254,7 +252,6 @@ CONFIG_ASK_IP_FIB_HASH=y CONFIG_IP_FIB_HASH=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y -# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y @@ -330,6 +327,7 @@ CONFIG_FIB_RULES=y # CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -340,45 +338,35 @@ CONFIG_FIB_RULES=y # CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# # CONFIG_CONNECTOR is not set # CONFIG_MTD is not set - -# -# Parallel port support -# # CONFIG_PARPORT is not set - -# -# Plug and Play support -# -# CONFIG_PNPACPI is not set - -# -# Block devices -# +CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_CRYPTOLOOP is not set CONFIG_BLK_DEV_NBD=y +# CONFIG_BLK_DEV_SX8 is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set - -# -# Misc devices -# -# CONFIG_BLINK is not set +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set # CONFIG_IDE is not set # @@ -386,6 +374,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set # CONFIG_SCSI_NETLINK is not set CONFIG_SCSI_PROC_FS=y @@ -422,25 +411,120 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI low-level drivers # # CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set +# CONFIG_SATA_AHCI is not set +# CONFIG_SATA_SVW is not set +# CONFIG_ATA_PIIX is not set +# CONFIG_SATA_MV is not set +# CONFIG_SATA_NV is not set +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +# CONFIG_SATA_SIL is not set +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set # CONFIG_PATA_PLATFORM is not set +# CONFIG_MD is not set # -# Multi-device support (RAID and LVM) +# Fusion MPT device support # -# CONFIG_MD is not set -# CONFIG_MACINTOSH_DRIVERS is not set +# CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # -# Network device support +# IEEE 1394 (FireWire) support # +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_ARCNET is not set CONFIG_PHYLIB=y # @@ -454,17 +538,44 @@ CONFIG_PHYLIB=y CONFIG_VITESSE_PHY=y # CONFIG_SMSC_PHY is not set # CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set # CONFIG_FIXED_PHY is not set - -# -# Ethernet (10 or 100Mbit) -# CONFIG_NET_ETHERNET=y CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_NET_PCI is not set CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set CONFIG_GIANFAR=y CONFIG_GFAR_NAPI=y +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y +# CONFIG_CHELSIO_T1 is not set +# CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_MLX4_CORE is not set +# CONFIG_TR is not set # # Wireless LAN @@ -472,21 +583,16 @@ CONFIG_NETDEV_10000=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set +# CONFIG_NET_FC is not set # CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set - -# -# ISDN subsystem -# # CONFIG_ISDN is not set - -# -# Telephony Support -# # CONFIG_PHONE is not set # @@ -521,6 +627,7 @@ CONFIG_INPUT=y CONFIG_SERIO=y CONFIG_SERIO_I8042=y CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set @@ -539,6 +646,7 @@ CONFIG_HW_CONSOLE=y # CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set @@ -550,14 +658,11 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set # CONFIG_SERIAL_OF_PLATFORM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# # CONFIG_IPMI_HANDLER is not set # CONFIG_WATCHDOG is not set # CONFIG_HW_RANDOM is not set @@ -565,12 +670,12 @@ CONFIG_NVRAM=y CONFIG_GEN_RTC=y CONFIG_GEN_RTC_X=y # CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set # CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# # CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y # CONFIG_I2C is not set # @@ -578,11 +683,8 @@ CONFIG_GEN_RTC_X=y # # CONFIG_SPI is not set # CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# # CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set # CONFIG_HWMON is not set # @@ -655,19 +757,14 @@ CONFIG_DUMMY_CONSOLE=y # Sound # # CONFIG_SOUND is not set - -# -# HID Devices -# +CONFIG_HID_SUPPORT=y CONFIG_HID=y # CONFIG_HID_DEBUG is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB_ARCH_HAS_EHCI is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -691,14 +788,7 @@ CONFIG_HID=y # # LED Triggers # - -# -# InfiniBand support -# - -# -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# +# CONFIG_INFINIBAND is not set # # Real Time Clock @@ -719,19 +809,13 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_TEST is not set # -# I2C RTC drivers -# - -# -# SPI RTC drivers -# - -# # Platform RTC drivers # +# CONFIG_RTC_DRV_CMOS is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set # CONFIG_RTC_DRV_V3020 is not set # @@ -752,6 +836,11 @@ CONFIG_RTC_INTF_DEV=y # # +# Userspace I/O +# +# CONFIG_UIO is not set + +# # File systems # CONFIG_EXT2_FS=y @@ -859,7 +948,6 @@ CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set # # Partition Types @@ -941,6 +1029,7 @@ CONFIG_BITREVERSE=y # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_PLIST=y @@ -965,6 +1054,7 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set # CONFIG_DEBUG_SLAB is not set @@ -996,10 +1086,6 @@ CONFIG_FORCED_INLINING=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set - -# -# Cryptographic options -# CONFIG_CRYPTO=y CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_BLKCIPHER=y @@ -1038,7 +1124,4 @@ CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_CRC32C is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# +CONFIG_CRYPTO_HW=y Index: linux-rt-rebase.q/arch/powerpc/configs/mpc8568mds_defconfig =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/configs/mpc8568mds_defconfig +++ linux-rt-rebase.q/arch/powerpc/configs/mpc8568mds_defconfig @@ -1,9 +1,26 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-rc7 -# Sun Jul 1 23:56:59 2007 +# Linux kernel version: 2.6.22 +# Fri Jul 20 13:55:04 2007 # # CONFIG_PPC64 is not set + +# +# Processor support +# +# CONFIG_6xx is not set +CONFIG_PPC_85xx=y +# CONFIG_PPC_8xx is not set +# CONFIG_40x is not set +# CONFIG_44x is not set +# CONFIG_E200 is not set +CONFIG_85xx=y +CONFIG_E500=y +CONFIG_BOOKE=y +CONFIG_FSL_BOOKE=y +# CONFIG_PHYS_64BIT is not set +CONFIG_SPE=y +# CONFIG_PPC_MM_SLICES is not set CONFIG_PPC32=y CONFIG_PPC_MERGE=y CONFIG_MMU=y @@ -14,6 +31,7 @@ CONFIG_ARCH_HAS_ILOG2_U32=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_FIND_NEXT_BIT=y +# CONFIG_ARCH_NO_VIRT_TO_BUS is not set CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_NVRAM=y @@ -25,28 +43,8 @@ CONFIG_PPC_UDBG_16550=y CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y CONFIG_DEFAULT_UIMAGE=y - -# -# Processor support -# -# CONFIG_CLASSIC32 is not set -# CONFIG_PPC_82xx is not set -# CONFIG_PPC_83xx is not set -CONFIG_PPC_85xx=y -# CONFIG_PPC_86xx is not set -# CONFIG_PPC_8xx is not set -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_E200 is not set -CONFIG_85xx=y -CONFIG_E500=y # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set -CONFIG_BOOKE=y -CONFIG_FSL_BOOKE=y -# CONFIG_PHYS_64BIT is not set -CONFIG_SPE=y -# CONFIG_PPC_MM_SLICES is not set CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -63,12 +61,11 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_IPC_NS is not set CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set +# CONFIG_USER_NS is not set # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=14 @@ -100,24 +97,17 @@ CONFIG_SLAB=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 - -# -# Loadable module support -# CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set # CONFIG_KMOD is not set - -# -# Block layer -# CONFIG_BLOCK=y # CONFIG_LBD is not set # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set # # IO Schedulers @@ -186,6 +176,8 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_RESOURCES_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set # CONFIG_PM is not set @@ -201,14 +193,20 @@ CONFIG_ZONE_DMA=y CONFIG_PPC_INDIRECT_PCI=y CONFIG_PPC_INDIRECT_PCI_BE=y CONFIG_FSL_SOC=y -# CONFIG_PCI is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_ARCH_SUPPORTS_MSI is not set +CONFIG_FSL_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +CONFIG_PCI_SYSCALL=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +# CONFIG_PCI_MSI is not set +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support # # CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set # # Advanced setup @@ -309,6 +307,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -323,42 +322,31 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# # CONFIG_CONNECTOR is not set # CONFIG_MTD is not set - -# -# Parallel port support -# # CONFIG_PARPORT is not set - -# -# Plug and Play support -# -# CONFIG_PNPACPI is not set - -# -# Block devices -# +CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set - -# -# Misc devices -# -# CONFIG_BLINK is not set +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set # CONFIG_IDE is not set # @@ -366,6 +354,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set # CONFIG_SCSI_NETLINK is not set CONFIG_SCSI_PROC_FS=y @@ -402,23 +391,65 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI low-level drivers # # CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set # CONFIG_ATA is not set +# CONFIG_MD is not set # -# Multi-device support (RAID and LVM) +# Fusion MPT device support # -# CONFIG_MD is not set -# CONFIG_MACINTOSH_DRIVERS is not set +# CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set # -# Network device support +# IEEE 1394 (FireWire) support # +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_ARCNET is not set CONFIG_PHYLIB=y # @@ -432,17 +463,44 @@ CONFIG_MARVELL_PHY=y # CONFIG_VITESSE_PHY is not set # CONFIG_SMSC_PHY is not set # CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set # CONFIG_FIXED_PHY is not set - -# -# Ethernet (10 or 100Mbit) -# CONFIG_NET_ETHERNET=y CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_NET_PCI is not set CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set CONFIG_GIANFAR=y CONFIG_GFAR_NAPI=y +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y +# CONFIG_CHELSIO_T1 is not set +# CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_MLX4_CORE is not set +# CONFIG_TR is not set # # Wireless LAN @@ -450,21 +508,16 @@ CONFIG_NETDEV_10000=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set +# CONFIG_NET_FC is not set # CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set - -# -# ISDN subsystem -# # CONFIG_ISDN is not set - -# -# Telephony Support -# # CONFIG_PHONE is not set # @@ -510,6 +563,7 @@ CONFIG_INPUT=y # CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set @@ -521,14 +575,11 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set # CONFIG_SERIAL_OF_PLATFORM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# # CONFIG_IPMI_HANDLER is not set CONFIG_WATCHDOG=y # CONFIG_WATCHDOG_NOWAYOUT is not set @@ -538,17 +589,23 @@ CONFIG_WATCHDOG=y # # CONFIG_SOFT_WATCHDOG is not set # CONFIG_BOOKE_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set CONFIG_HW_RANDOM=y # CONFIG_NVRAM is not set CONFIG_GEN_RTC=y # CONFIG_GEN_RTC_X is not set # CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set # CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# # CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y CONFIG_I2C_CHARDEV=y @@ -563,23 +620,43 @@ CONFIG_I2C_CHARDEV=y # # I2C Hardware Bus support # +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set CONFIG_I2C_MPC=y +# CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_OCORES is not set # CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set # CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set # CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set # # Miscellaneous I2C Chip support # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set +# CONFIG_DS1682 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_M41T00 is not set # CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -590,11 +667,8 @@ CONFIG_I2C_MPC=y # # CONFIG_SPI is not set # CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# # CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set CONFIG_HWMON=y # CONFIG_HWMON_VID is not set # CONFIG_SENSORS_ABITUGURU is not set @@ -628,10 +702,13 @@ CONFIG_HWMON=y # CONFIG_SENSORS_MAX6650 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_SMSC47M192 is not set # CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83791D is not set # CONFIG_SENSORS_W83792D is not set @@ -670,19 +747,14 @@ CONFIG_DAB=y # Sound # # CONFIG_SOUND is not set - -# -# HID Devices -# +CONFIG_HID_SUPPORT=y CONFIG_HID=y # CONFIG_HID_DEBUG is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB_ARCH_HAS_EHCI is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -706,14 +778,7 @@ CONFIG_HID=y # # LED Triggers # - -# -# InfiniBand support -# - -# -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# +# CONFIG_INFINIBAND is not set # # Real Time Clock @@ -734,6 +799,11 @@ CONFIG_HID=y # # +# Userspace I/O +# +# CONFIG_UIO is not set + +# # File systems # CONFIG_EXT2_FS=y @@ -829,7 +899,6 @@ CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set # # Partition Types @@ -868,6 +937,7 @@ CONFIG_BITREVERSE=y # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_PLIST=y CONFIG_HAS_IOMEM=y @@ -892,6 +962,7 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set # CONFIG_DEBUG_SLAB is not set @@ -915,7 +986,7 @@ CONFIG_FORCED_INLINING=y CONFIG_DEBUGGER=y # CONFIG_XMON is not set # CONFIG_BDI_SWITCH is not set -CONFIG_BOOTX_TEXT=y +# CONFIG_BOOTX_TEXT is not set CONFIG_PPC_EARLY_DEBUG=y # CONFIG_PPC_EARLY_DEBUG_LPAR is not set # CONFIG_PPC_EARLY_DEBUG_G5 is not set @@ -932,10 +1003,6 @@ CONFIG_PPC_EARLY_DEBUG=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set - -# -# Cryptographic options -# CONFIG_CRYPTO=y CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_BLKCIPHER=y @@ -973,7 +1040,4 @@ CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_CRC32C is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# +CONFIG_CRYPTO_HW=y Index: linux-rt-rebase.q/arch/powerpc/kernel/pci_32.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/kernel/pci_32.c +++ linux-rt-rebase.q/arch/powerpc/kernel/pci_32.c @@ -415,15 +415,13 @@ probe_resource(struct pci_bus *parent, s return 0; } -static void __init -update_bridge_base(struct pci_bus *bus, int i) +void __init +update_bridge_resource(struct pci_dev *dev, struct resource *res) { - struct resource *res = bus->resource[i]; u8 io_base_lo, io_limit_lo; u16 mem_base, mem_limit; u16 cmd; unsigned long start, end, off; - struct pci_dev *dev = bus->self; struct pci_controller *hose = dev->sysdata; if (!hose) { @@ -467,12 +465,20 @@ update_bridge_base(struct pci_bus *bus, pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, mem_limit); } else { - DBG(KERN_ERR "PCI: ugh, bridge %s res %d has flags=%lx\n", - pci_name(dev), i, res->flags); + DBG(KERN_ERR "PCI: ugh, bridge %s res has flags=%lx\n", + pci_name(dev), res->flags); } pci_write_config_word(dev, PCI_COMMAND, cmd); } +static void __init +update_bridge_base(struct pci_bus *bus, int i) +{ + struct resource *res = bus->resource[i]; + struct pci_dev *dev = bus->self; + update_bridge_resource(dev, res); +} + static inline void alloc_resource(struct pci_dev *dev, int idx) { struct resource *pr, *r = &dev->resource[idx]; @@ -1468,3 +1474,10 @@ EARLY_PCI_OP(read, dword, u32 *) EARLY_PCI_OP(write, byte, u8) EARLY_PCI_OP(write, word, u16) EARLY_PCI_OP(write, dword, u32) + +extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); +int early_find_capability(struct pci_controller *hose, int bus, int devfn, + int cap) +{ + return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); +} Index: linux-rt-rebase.q/arch/powerpc/platforms/82xx/mpc82xx_ads.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/82xx/mpc82xx_ads.c +++ linux-rt-rebase.q/arch/powerpc/platforms/82xx/mpc82xx_ads.c @@ -553,7 +553,8 @@ static void __init mpc82xx_add_bridge(st setup_indirect_pci(hose, r.start + offsetof(pci_cpm2_t, pci_cfg_addr), - r.start + offsetof(pci_cpm2_t, pci_cfg_data)); + r.start + offsetof(pci_cpm2_t, pci_cfg_data), + 0); pci_process_bridge_OF_ranges(hose, np, 1); } Index: linux-rt-rebase.q/arch/powerpc/platforms/83xx/pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/83xx/pci.c +++ linux-rt-rebase.q/arch/powerpc/platforms/83xx/pci.c @@ -74,11 +74,11 @@ int __init mpc83xx_add_bridge(struct dev */ /* PCI 1 */ if ((rsrc.start & 0xfffff) == 0x8500) { - setup_indirect_pci(hose, immr + 0x8300, immr + 0x8304); + setup_indirect_pci(hose, immr + 0x8300, immr + 0x8304, 0); } /* PCI 2 */ if ((rsrc.start & 0xfffff) == 0x8600) { - setup_indirect_pci(hose, immr + 0x8380, immr + 0x8384); + setup_indirect_pci(hose, immr + 0x8380, immr + 0x8384, 0); primary = 0; } Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/Kconfig +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/Kconfig @@ -18,6 +18,7 @@ config MPC8560_ADS config MPC85xx_CDS bool "Freescale MPC85xx CDS" select DEFAULT_UIMAGE + select PPC_I8259 help This option enables support for the MPC85xx CDS board @@ -30,6 +31,7 @@ config MPC85xx_MDS config MPC8544_DS bool "Freescale MPC8544 DS" + select PPC_I8259 select DEFAULT_UIMAGE help This option enables support for the MPC8544 DS board @@ -50,9 +52,9 @@ config MPC8560 config MPC85xx bool select PPC_UDBG_16550 - select PPC_INDIRECT_PCI - select PPC_INDIRECT_PCI_BE + select PPC_INDIRECT_PCI if PCI select MPIC + select FSL_PCI if PCI select SERIAL_8250_SHARE_IRQ if SERIAL_8250 default y if MPC8540_ADS || MPC85xx_CDS || MPC8560_ADS \ || MPC85xx_MDS || MPC8544_DS Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/Makefile +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/Makefile @@ -1,7 +1,7 @@ # # Makefile for the PowerPC 85xx linux kernel. # -obj-$(CONFIG_PPC_85xx) += misc.o pci.o +obj-$(CONFIG_PPC_85xx) += misc.o obj-$(CONFIG_MPC8540_ADS) += mpc85xx_ads.o obj-$(CONFIG_MPC8560_ADS) += mpc85xx_ads.o obj-$(CONFIG_MPC85xx_CDS) += mpc85xx_cds.o Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc8544_ds.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/mpc8544_ds.c +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc8544_ds.c @@ -2,6 +2,8 @@ * MPC8544 DS Board Setup * * Author Xianghua Xiao (x.xiao@freescale.com) + * Roy Zang + * - Add PCI/PCI Exprees support * Copyright 2007 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it @@ -12,13 +14,16 @@ #include #include +#include #include #include #include +#include #include #include #include +#include #include #include #include @@ -27,6 +32,7 @@ #include #include +#include #include "mpc85xx.h" #undef DEBUG @@ -37,6 +43,17 @@ #define DBG(fmt, args...) #endif +#ifdef CONFIG_PPC_I8259 +static void mpc8544_8259_cascade(unsigned int irq, struct irq_desc *desc) +{ + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq != NO_IRQ) { + generic_handle_irq(cascade_irq); + } + desc->chip->eoi(irq); +} +#endif /* CONFIG_PPC_I8259 */ void __init mpc8544_ds_pic_init(void) { @@ -96,19 +113,240 @@ void __init mpc8544_ds_pic_init(void) #endif /* CONFIG_PPC_I8259 */ } +#ifdef CONFIG_PCI +enum pirq { PIRQA = 8, PIRQB, PIRQC, PIRQD, PIRQE, PIRQF, PIRQG, PIRQH }; + +/* + * Value in table -- IRQ number + */ +const unsigned char uli1575_irq_route_table[16] = { + 0, /* 0: Reserved */ + 0x8, + 0, /* 2: Reserved */ + 0x2, + 0x4, + 0x5, + 0x7, + 0x6, + 0, /* 8: Reserved */ + 0x1, + 0x3, + 0x9, + 0xb, + 0, /* 13: Reserved */ + 0xd, + 0xf, +}; + +static int __devinit +get_pci_irq_from_of(struct pci_controller *hose, int slot, int pin) +{ + struct of_irq oirq; + u32 laddr[3]; + struct device_node *hosenode = hose ? hose->arch_data : NULL; + + if (!hosenode) + return -EINVAL; + + laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(slot, 0) << 8); + laddr[1] = laddr[2] = 0; + of_irq_map_raw(hosenode, &pin, 1, laddr, &oirq); + DBG("mpc8544_ds: pci irq addr %x, slot %d, pin %d, irq %d\n", + laddr[0], slot, pin, oirq.specifier[0]); + return oirq.specifier[0]; +} + +/*8259*/ +static void __devinit quirk_uli1575(struct pci_dev *dev) +{ + unsigned short temp; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned char irq2pin[16]; + unsigned long pirq_map_word = 0; + u32 irq; + int i; + + /* + * ULI1575 interrupts route setup + */ + memset(irq2pin, 0, 16); /* Initialize default value 0 */ + + irq2pin[6]=PIRQA+3; /* enabled mapping for IRQ6 to PIRQD, used by SATA */ + + /* + * PIRQE -> PIRQF mapping set manually + * + * IRQ pin IRQ# + * PIRQE ---- 9 + * PIRQF ---- 10 + * PIRQG ---- 11 + * PIRQH ---- 12 + */ + for (i = 0; i < 4; i++) + irq2pin[i + 9] = PIRQE + i; + + /* Set IRQ-PIRQ Mapping to ULI1575 */ + for (i = 0; i < 16; i++) + if (irq2pin[i]) + pirq_map_word |= (uli1575_irq_route_table[i] & 0xf) + << ((irq2pin[i] - PIRQA) * 4); + + pirq_map_word |= 1<<26; /* disable INTx in EP mode*/ + + /* ULI1575 IRQ mapping conf register default value is 0xb9317542 */ + DBG("Setup ULI1575 IRQ mapping configuration register value = 0x%x\n", + (int)pirq_map_word); + pci_write_config_dword(dev, 0x48, pirq_map_word); + +#define ULI1575_SET_DEV_IRQ(slot, pin, reg) \ + do { \ + int irq; \ + irq = get_pci_irq_from_of(hose, slot, pin); \ + if (irq > 0 && irq < 16) \ + pci_write_config_byte(dev, reg, irq2pin[irq]); \ + else \ + printk(KERN_WARNING "ULI1575 device" \ + "(slot %d, pin %d) irq %d is invalid.\n", \ + slot, pin, irq); \ + } while(0) + + /* USB 1.1 OHCI controller 1, slot 28, pin 1 */ + ULI1575_SET_DEV_IRQ(28, 1, 0x86); + + /* USB 1.1 OHCI controller 2, slot 28, pin 2 */ + ULI1575_SET_DEV_IRQ(28, 2, 0x87); + + /* USB 1.1 OHCI controller 3, slot 28, pin 3 */ + ULI1575_SET_DEV_IRQ(28, 3, 0x88); + + /* USB 2.0 controller, slot 28, pin 4 */ + irq = get_pci_irq_from_of(hose, 28, 4); + if (irq >= 0 && irq <= 15) + pci_write_config_dword(dev, 0x74, uli1575_irq_route_table[irq]); + + /* Audio controller, slot 29, pin 1 */ + ULI1575_SET_DEV_IRQ(29, 1, 0x8a); + + /* Modem controller, slot 29, pin 2 */ + ULI1575_SET_DEV_IRQ(29, 2, 0x8b); + + /* HD audio controller, slot 29, pin 3 */ + ULI1575_SET_DEV_IRQ(29, 3, 0x8c); + + /* SMB interrupt: slot 30, pin 1 */ + ULI1575_SET_DEV_IRQ(30, 1, 0x8e); + + /* PMU ACPI SCI interrupt: slot 30, pin 2 */ + ULI1575_SET_DEV_IRQ(30, 2, 0x8f); + + /* Serial ATA interrupt: slot 31, pin 1 */ + ULI1575_SET_DEV_IRQ(31, 1, 0x8d); + + /* Primary PATA IDE IRQ: 14 + * Secondary PATA IDE IRQ: 15 + */ + pci_write_config_byte(dev, 0x44, 0x30 | uli1575_irq_route_table[14]); + pci_write_config_byte(dev, 0x75, uli1575_irq_route_table[15]); + + /* Set IRQ14 and IRQ15 to legacy IRQs */ + pci_read_config_word(dev, 0x46, &temp); + temp |= 0xc000; + pci_write_config_word(dev, 0x46, temp); + + /* Set i8259 interrupt trigger + * IRQ 3: Level + * IRQ 4: Level + * IRQ 5: Level + * IRQ 6: Level + * IRQ 7: Level + * IRQ 9: Level + * IRQ 10: Level + * IRQ 11: Level + * IRQ 12: Level + * IRQ 14: Edge + * IRQ 15: Edge + */ + outb(0xfa, 0x4d0); + outb(0x1e, 0x4d1); + +#undef ULI1575_SET_DEV_IRQ +} + +/* SATA */ +static void __devinit quirk_uli5288(struct pci_dev *dev) +{ + unsigned char c; + + pci_read_config_byte(dev, 0x83, &c); + c |= 0x80; /* read/write lock */ + pci_write_config_byte(dev, 0x83, c); + + pci_write_config_byte(dev, 0x09, 0x01); /* Base class code: storage */ + pci_write_config_byte(dev, 0x0a, 0x06); /* IDE disk */ + + pci_read_config_byte(dev, 0x83, &c); + c &= 0x7f; + pci_write_config_byte(dev, 0x83, c); + + pci_read_config_byte(dev, 0x84, &c); + c |= 0x01; /* emulated PATA mode enabled */ + pci_write_config_byte(dev, 0x84, c); +} + +/* PATA */ +static void __devinit quirk_uli5229(struct pci_dev *dev) +{ + unsigned short temp; + pci_write_config_word(dev, 0x04, 0x0405); /* MEM IO MSI */ + pci_read_config_word(dev, 0x4a, &temp); + temp |= 0x1000; /* Enable Native IRQ 14/15 */ + pci_write_config_word(dev, 0x4a, temp); +} + +/*Bridge*/ +static void __devinit early_uli5249(struct pci_dev *dev) +{ + unsigned char temp; + pci_write_config_word(dev, 0x04, 0x0007); /* mem access */ + pci_read_config_byte(dev, 0x7c, &temp); + pci_write_config_byte(dev, 0x7c, 0x80); /* R/W lock control */ + pci_write_config_byte(dev, 0x09, 0x01); /* set as pci-pci bridge */ + pci_write_config_byte(dev, 0x7c, temp); /* restore pci bus debug control */ + dev->class |= 0x1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249); +#endif /* CONFIG_PCI */ /* * Setup the architecture */ static void __init mpc8544_ds_setup_arch(void) { +#ifdef CONFIG_PCI + struct device_node *np; +#endif + if (ppc_md.progress) ppc_md.progress("mpc8544_ds_setup_arch()", 0); +#ifdef CONFIG_PCI + for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) { + struct resource rsrc; + of_address_to_resource(np, 0, &rsrc); + if ((rsrc.start & 0xfffff) == 0xb000) + fsl_add_bridge(np, 1); + else + fsl_add_bridge(np, 0); + } +#endif + printk("MPC8544 DS board from Freescale Semiconductor\n"); } - /* * Called very early, device-tree isn't unflattened */ @@ -124,6 +362,7 @@ define_machine(mpc8544_ds) { .probe = mpc8544_ds_probe, .setup_arch = mpc8544_ds_setup_arch, .init_IRQ = mpc8544_ds_pic_init, + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, .get_irq = mpic_get_irq, .restart = mpc85xx_restart, .calibrate_decr = generic_calibrate_decr, Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx.h =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/mpc85xx.h +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx.h @@ -15,4 +15,3 @@ */ extern void mpc85xx_restart(char *); -extern int mpc85xx_add_bridge(struct device_node *dev); Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx_ads.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -29,6 +29,7 @@ #include #include +#include #include "mpc85xx.h" #ifdef CONFIG_CPM2 @@ -217,7 +218,7 @@ static void __init mpc85xx_ads_setup_arc #ifdef CONFIG_PCI for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) - mpc85xx_add_bridge(np); + fsl_add_bridge(np, 1); ppc_md.pci_exclude_device = mpc85xx_exclude_device; #endif } Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx_cds.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ #include #include +#include #include "mpc85xx.h" static int cds_pci_slot = 2; @@ -58,8 +60,6 @@ static volatile u8 *cadmus; static int mpc85xx_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn) { - if ((bus == hose->first_busno) && PCI_SLOT(devfn) == 0) - return PCIBIOS_DEVICE_NOT_FOUND; /* We explicitly do not go past the Tundra 320 Bridge */ if ((bus == 1) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL)) return PCIBIOS_DEVICE_NOT_FOUND; @@ -69,6 +69,37 @@ static int mpc85xx_exclude_device(struct return PCIBIOS_SUCCESSFUL; } +static void mpc85xx_cds_restart(char *cmd) +{ + struct pci_dev *dev; + u_char tmp; + + if ((dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, + NULL))) { + + /* Use the VIA Super Southbridge to force a PCI reset */ + pci_read_config_byte(dev, 0x47, &tmp); + pci_write_config_byte(dev, 0x47, tmp | 1); + + /* Flush the outbound PCI write queues */ + pci_read_config_byte(dev, 0x47, &tmp); + + /* + * At this point, the harware reset should have triggered. + * However, if it doesn't work for some mysterious reason, + * just fall through to the default reset below. + */ + + pci_dev_put(dev); + } + + /* + * If we can't find the VIA chip (maybe the P2P bridge is disabled) + * or the VIA chip reset didn't work, just use the default reset. + */ + mpc85xx_restart(NULL); +} + static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev) { u_char c; @@ -98,7 +129,7 @@ static void __init mpc85xx_cds_pci_irq_f /* There are two USB controllers. * Identify them by functon number */ - if (PCI_FUNC(dev->devfn)) + if (PCI_FUNC(dev->devfn) == 3) dev->irq = 11; else dev->irq = 10; @@ -109,17 +140,41 @@ static void __init mpc85xx_cds_pci_irq_f } } +static void __devinit skip_fake_bridge(struct pci_dev *dev) +{ + /* Make it an error to skip the fake bridge + * in pci_setup_device() in probe.c */ + dev->hdr_type = 0x7f; +} +DECLARE_PCI_FIXUP_EARLY(0x1957, 0x3fff, skip_fake_bridge); +DECLARE_PCI_FIXUP_EARLY(0x3fff, 0x1957, skip_fake_bridge); +DECLARE_PCI_FIXUP_EARLY(0xff3f, 0x5719, skip_fake_bridge); + #ifdef CONFIG_PPC_I8259 -#warning The i8259 PIC support is currently broken -static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc85xx_8259_cascade_handler(unsigned int irq, + struct irq_desc *desc) { unsigned int cascade_irq = i8259_irq(); if (cascade_irq != NO_IRQ) + /* handle an interrupt from the 8259 */ generic_handle_irq(cascade_irq); - desc->chip->eoi(irq); + /* check for any interrupts from the shared IRQ line */ + handle_fasteoi_irq(irq, desc); +} + +static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id) +{ + return IRQ_HANDLED; } + +static struct irqaction mpc85xxcds_8259_irqaction = { + .handler = mpc85xx_8259_cascade_action, + .flags = IRQF_SHARED, + .mask = CPU_MASK_NONE, + .name = "8259 cascade", +}; #endif /* PPC_I8259 */ #endif /* CONFIG_PCI */ @@ -128,10 +183,6 @@ static void __init mpc85xx_cds_pic_init( struct mpic *mpic; struct resource r; struct device_node *np = NULL; -#ifdef CONFIG_PPC_I8259 - struct device_node *cascade_node = NULL; - int cascade_irq; -#endif np = of_find_node_by_type(np, "open-pic"); @@ -155,8 +206,19 @@ static void __init mpc85xx_cds_pic_init( of_node_put(np); mpic_init(mpic); +} + +#if defined(CONFIG_PPC_I8259) && defined(CONFIG_PCI) +static int mpc85xx_cds_8259_attach(void) +{ + int ret; + struct device_node *np = NULL; + struct device_node *cascade_node = NULL; + int cascade_irq; + + if (!machine_is(mpc85xx_cds)) + return 0; -#ifdef CONFIG_PPC_I8259 /* Initialize the i8259 controller */ for_each_node_by_type(np, "interrupt-controller") if (of_device_is_compatible(np, "chrp,iic")) { @@ -166,22 +228,39 @@ static void __init mpc85xx_cds_pic_init( if (cascade_node == NULL) { printk(KERN_DEBUG "Could not find i8259 PIC\n"); - return; + return -ENODEV; } cascade_irq = irq_of_parse_and_map(cascade_node, 0); if (cascade_irq == NO_IRQ) { printk(KERN_ERR "Failed to map cascade interrupt\n"); - return; + return -ENXIO; } i8259_init(cascade_node, 0); of_node_put(cascade_node); - set_irq_chained_handler(cascade_irq, mpc85xx_8259_cascade); -#endif /* CONFIG_PPC_I8259 */ + /* + * Hook the interrupt to make sure desc->action is never NULL. + * This is required to ensure that the interrupt does not get + * disabled when the last user of the shared IRQ line frees their + * interrupt. + */ + if ((ret = setup_irq(cascade_irq, &mpc85xxcds_8259_irqaction))) { + printk(KERN_ERR "Failed to setup cascade interrupt\n"); + return ret; + } + + /* Success. Connect our low-level cascade handler. */ + set_irq_handler(cascade_irq, mpc85xx_8259_cascade_handler); + + return 0; } +device_initcall(mpc85xx_cds_8259_attach); + +#endif /* CONFIG_PPC_I8259 */ + /* * Setup the architecture */ @@ -218,9 +297,14 @@ static void __init mpc85xx_cds_setup_arc } #ifdef CONFIG_PCI - for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) - mpc85xx_add_bridge(np); - + for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) { + struct resource rsrc; + of_address_to_resource(np, 0, &rsrc); + if ((rsrc.start & 0xfffff) == 0x8000) + fsl_add_bridge(np, 1); + else + fsl_add_bridge(np, 0); + } ppc_md.pci_irq_fixup = mpc85xx_cds_pci_irq_fixup; ppc_md.pci_exclude_device = mpc85xx_exclude_device; #endif @@ -265,7 +349,12 @@ define_machine(mpc85xx_cds) { .init_IRQ = mpc85xx_cds_pic_init, .show_cpuinfo = mpc85xx_cds_show_cpuinfo, .get_irq = mpic_get_irq, +#ifdef CONFIG_PCI + .restart = mpc85xx_cds_restart, +#else .restart = mpc85xx_restart, +#endif .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, }; Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx_mds.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ linux-rt-rebase.q/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -94,9 +95,8 @@ static void __init mpc85xx_mds_setup_arc } #ifdef CONFIG_PCI - for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) { - mpc85xx_add_bridge(np); - } + for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) + fsl_add_bridge(np, 1); of_node_put(np); #endif @@ -208,4 +208,5 @@ define_machine(mpc85xx_mds) { .restart = mpc85xx_restart, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, }; Index: linux-rt-rebase.q/arch/powerpc/platforms/85xx/pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/85xx/pci.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * FSL SoC setup code - * - * Maintained by Kumar Gala (see MAINTAINERS for contact information) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#ifdef CONFIG_PCI -int __init mpc85xx_add_bridge(struct device_node *dev) -{ - int len; - struct pci_controller *hose; - struct resource rsrc; - const int *bus_range; - int primary = 1, has_address = 0; - phys_addr_t immr = get_immrbase(); - - DBG("Adding PCI host bridge %s\n", dev->full_name); - - /* Fetch host bridge registers address */ - has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); - - /* Get bus range if any */ - bus_range = of_get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %s, assume" - " bus 0\n", dev->full_name); - } - - pci_assign_all_buses = 1; - hose = pcibios_alloc_controller(dev); - if (!hose) - return -ENOMEM; - - hose->first_busno = bus_range ? bus_range[0] : 0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; - - /* PCI 1 */ - if ((rsrc.start & 0xfffff) == 0x8000) { - setup_indirect_pci(hose, immr + 0x8000, immr + 0x8004); - } - /* PCI 2 */ - if ((rsrc.start & 0xfffff) == 0x9000) { - setup_indirect_pci(hose, immr + 0x9000, immr + 0x9004); - primary = 0; - } - - printk(KERN_INFO "Found MPC85xx PCI host bridge at 0x%016llx. " - "Firmware bus number: %d->%d\n", - (unsigned long long)rsrc.start, hose->first_busno, - hose->last_busno); - - DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", - hose, hose->cfg_addr, hose->cfg_data); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(hose, dev, primary); - - return 0; -} - -#endif Index: linux-rt-rebase.q/arch/powerpc/platforms/86xx/Kconfig =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/86xx/Kconfig +++ linux-rt-rebase.q/arch/powerpc/platforms/86xx/Kconfig @@ -14,8 +14,7 @@ endchoice config MPC8641 bool - select PPC_INDIRECT_PCI - select PPC_INDIRECT_PCI_BE + select FSL_PCI if PCI select PPC_UDBG_16550 select MPIC default y if MPC8641_HPCN Index: linux-rt-rebase.q/arch/powerpc/platforms/86xx/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/86xx/Makefile +++ linux-rt-rebase.q/arch/powerpc/platforms/86xx/Makefile @@ -4,4 +4,3 @@ obj-$(CONFIG_SMP) += mpc86xx_smp.o obj-$(CONFIG_MPC8641_HPCN) += mpc86xx_hpcn.o -obj-$(CONFIG_PCI) += pci.o Index: linux-rt-rebase.q/arch/powerpc/platforms/86xx/mpc86xx.h =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/86xx/mpc86xx.h +++ linux-rt-rebase.q/arch/powerpc/platforms/86xx/mpc86xx.h @@ -15,11 +15,6 @@ * mpc86xx_* files. Mostly for use by mpc86xx_setup(). */ -extern int mpc86xx_add_bridge(struct device_node *dev); - -extern int mpc86xx_exclude_device(struct pci_controller *hose, - u_char bus, u_char devfn); - extern void __init mpc86xx_smp_init(void); #endif /* __MPC86XX_H__ */ Index: linux-rt-rebase.q/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ linux-rt-rebase.q/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -31,6 +31,7 @@ #include +#include #include #include "mpc86xx.h" @@ -344,8 +345,14 @@ mpc86xx_hpcn_setup_arch(void) } #ifdef CONFIG_PCI - for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) - mpc86xx_add_bridge(np); + for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) { + struct resource rsrc; + of_address_to_resource(np, 0, &rsrc); + if ((rsrc.start & 0xfffff) == 0x8000) + fsl_add_bridge(np, 1); + else + fsl_add_bridge(np, 0); + } #endif printk("MPC86xx HPCN board from Freescale Semiconductor\n"); @@ -424,7 +431,6 @@ mpc86xx_time_init(void) return 0; } - define_machine(mpc86xx_hpcn) { .name = "MPC86xx HPCN", .probe = mpc86xx_hpcn_probe, @@ -436,4 +442,5 @@ define_machine(mpc86xx_hpcn) { .time_init = mpc86xx_time_init, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, }; Index: linux-rt-rebase.q/arch/powerpc/platforms/86xx/pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/86xx/pci.c +++ /dev/null @@ -1,238 +0,0 @@ -/* - * MPC86XX pci setup code - * - * Recode: ZHANG WEI - * Initial author: Xianghua Xiao - * - * Copyright 2006 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "mpc86xx.h" - -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args) -#else -#define DBG(fmt, args...) -#endif - -struct pcie_outbound_window_regs { - uint pexotar; /* 0x.0 - PCI Express outbound translation address register */ - uint pexotear; /* 0x.4 - PCI Express outbound translation extended address register */ - uint pexowbar; /* 0x.8 - PCI Express outbound window base address register */ - char res1[4]; - uint pexowar; /* 0x.10 - PCI Express outbound window attributes register */ - char res2[12]; -}; - -struct pcie_inbound_window_regs { - uint pexitar; /* 0x.0 - PCI Express inbound translation address register */ - char res1[4]; - uint pexiwbar; /* 0x.8 - PCI Express inbound window base address register */ - uint pexiwbear; /* 0x.c - PCI Express inbound window base extended address register */ - uint pexiwar; /* 0x.10 - PCI Express inbound window attributes register */ - char res2[12]; -}; - -static void __init setup_pcie_atmu(struct pci_controller *hose, struct resource *rsrc) -{ - volatile struct ccsr_pex *pcie; - volatile struct pcie_outbound_window_regs *pcieow; - volatile struct pcie_inbound_window_regs *pcieiw; - int i = 0; - - DBG("PCIE memory map start 0x%x, size 0x%x\n", rsrc->start, - rsrc->end - rsrc->start + 1); - pcie = ioremap(rsrc->start, rsrc->end - rsrc->start + 1); - - /* Disable all windows (except pexowar0 since its ignored) */ - pcie->pexowar1 = 0; - pcie->pexowar2 = 0; - pcie->pexowar3 = 0; - pcie->pexowar4 = 0; - pcie->pexiwar1 = 0; - pcie->pexiwar2 = 0; - pcie->pexiwar3 = 0; - - pcieow = (struct pcie_outbound_window_regs *)&pcie->pexotar1; - pcieiw = (struct pcie_inbound_window_regs *)&pcie->pexitar1; - - /* Setup outbound MEM window */ - for(i = 0; i < 3; i++) - if (hose->mem_resources[i].flags & IORESOURCE_MEM){ - DBG("PCIE MEM resource start 0x%08x, size 0x%08x.\n", - hose->mem_resources[i].start, - hose->mem_resources[i].end - - hose->mem_resources[i].start + 1); - pcieow->pexotar = (hose->mem_resources[i].start) >> 12 - & 0x000fffff; - pcieow->pexotear = 0; - pcieow->pexowbar = (hose->mem_resources[i].start) >> 12 - & 0x000fffff; - /* Enable, Mem R/W */ - pcieow->pexowar = 0x80044000 | - (__ilog2(hose->mem_resources[i].end - - hose->mem_resources[i].start + 1) - - 1); - pcieow++; - } - - /* Setup outbound IO window */ - if (hose->io_resource.flags & IORESOURCE_IO){ - DBG("PCIE IO resource start 0x%08x, size 0x%08x, phy base 0x%08x.\n", - hose->io_resource.start, - hose->io_resource.end - hose->io_resource.start + 1, - hose->io_base_phys); - pcieow->pexotar = (hose->io_resource.start) >> 12 & 0x000fffff; - pcieow->pexotear = 0; - pcieow->pexowbar = (hose->io_base_phys) >> 12 & 0x000fffff; - /* Enable, IO R/W */ - pcieow->pexowar = 0x80088000 | (__ilog2(hose->io_resource.end - - hose->io_resource.start + 1) - 1); - } - - /* Setup 2G inbound Memory Window @ 0 */ - pcieiw->pexitar = 0x00000000; - pcieiw->pexiwbar = 0x00000000; - /* Enable, Prefetch, Local Mem, Snoop R/W, 2G */ - pcieiw->pexiwar = 0xa0f5501e; -} - -static void __init -mpc86xx_setup_pcie(struct pci_controller *hose, u32 pcie_offset, u32 pcie_size) -{ - u16 cmd; - - DBG("PCIE host controller register offset 0x%08x, size 0x%08x.\n", - pcie_offset, pcie_size); - - early_read_config_word(hose, 0, 0, PCI_COMMAND, &cmd); - cmd |= PCI_COMMAND_SERR | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY - | PCI_COMMAND_IO; - early_write_config_word(hose, 0, 0, PCI_COMMAND, cmd); - - early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0x80); -} - -static void __devinit quirk_fsl_pcie_transparent(struct pci_dev *dev) -{ - struct resource *res; - int i, res_idx = PCI_BRIDGE_RESOURCES; - struct pci_controller *hose; - - /* - * Make the bridge be transparent. - */ - dev->transparent = 1; - - hose = pci_bus_to_host(dev->bus); - if (!hose) { - printk(KERN_ERR "Can't find hose for bus %d\n", - dev->bus->number); - return; - } - - if (hose->io_resource.flags) { - res = &dev->resource[res_idx++]; - res->start = hose->io_resource.start; - res->end = hose->io_resource.end; - res->flags = hose->io_resource.flags; - } - - for (i = 0; i < 3; i++) { - res = &dev->resource[res_idx + i]; - res->start = hose->mem_resources[i].start; - res->end = hose->mem_resources[i].end; - res->flags = hose->mem_resources[i].flags; - } -} - - -DECLARE_PCI_FIXUP_EARLY(0x1957, 0x7010, quirk_fsl_pcie_transparent); -DECLARE_PCI_FIXUP_EARLY(0x1957, 0x7011, quirk_fsl_pcie_transparent); - -#define PCIE_LTSSM 0x404 /* PCIe Link Training and Status */ -#define PCIE_LTSSM_L0 0x16 /* L0 state */ - -int __init mpc86xx_add_bridge(struct device_node *dev) -{ - int len; - struct pci_controller *hose; - struct resource rsrc; - const int *bus_range; - int has_address = 0; - int primary = 0; - u16 val; - - DBG("Adding PCIE host bridge %s\n", dev->full_name); - - /* Fetch host bridge registers address */ - has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); - - /* Get bus range if any */ - bus_range = of_get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) - printk(KERN_WARNING "Can't get bus-range for %s, assume" - " bus 0\n", dev->full_name); - - pci_assign_all_buses = 1; - hose = pcibios_alloc_controller(dev); - if (!hose) - return -ENOMEM; - - hose->indirect_type = PPC_INDIRECT_TYPE_EXT_REG | - PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS; - - hose->first_busno = bus_range ? bus_range[0] : 0x0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; - - setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4); - - /* Probe the hose link training status */ - early_read_config_word(hose, 0, 0, PCIE_LTSSM, &val); - if (val < PCIE_LTSSM_L0) - return -ENXIO; - - /* Setup the PCIE host controller. */ - mpc86xx_setup_pcie(hose, rsrc.start, rsrc.end - rsrc.start + 1); - - if ((rsrc.start & 0xfffff) == 0x8000) - primary = 1; - - printk(KERN_INFO "Found MPC86xx PCIE host bridge at 0x%08lx. " - "Firmware bus number: %d->%d\n", - (unsigned long) rsrc.start, - hose->first_busno, hose->last_busno); - - DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", - hose, hose->cfg_addr, hose->cfg_data); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(hose, dev, primary); - - /* Setup PEX window registers */ - setup_pcie_atmu(hose, &rsrc); - - return 0; -} Index: linux-rt-rebase.q/arch/powerpc/platforms/cell/spufs/spufs.h =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/cell/spufs/spufs.h +++ linux-rt-rebase.q/arch/powerpc/platforms/cell/spufs/spufs.h @@ -40,13 +40,10 @@ enum { struct spu_context_ops; struct spu_gang; -enum { - SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ -}; - /* ctx->sched_flags */ enum { SPU_SCHED_NOTIFY_ACTIVE, + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ }; struct spu_context { Index: linux-rt-rebase.q/arch/powerpc/platforms/chrp/pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/chrp/pci.c +++ linux-rt-rebase.q/arch/powerpc/platforms/chrp/pci.c @@ -181,7 +181,7 @@ setup_python(struct pci_controller *hose } iounmap(reg); - setup_indirect_pci(hose, r.start + 0xf8000, r.start + 0xf8010); + setup_indirect_pci(hose, r.start + 0xf8000, r.start + 0xf8010, 0); } /* Marvell Discovery II based Pegasos 2 */ @@ -277,13 +277,14 @@ chrp_find_bridges(void) hose->cfg_data = p; gg2_pci_config_base = p; } else if (is_pegasos == 1) { - setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc); + setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc, 0); } else if (is_pegasos == 2) { setup_peg2(hose, dev); } else if (!strncmp(model, "IBM,CPC710", 10)) { setup_indirect_pci(hose, r.start + 0x000f8000, - r.start + 0x000f8010); + r.start + 0x000f8010, + 0); if (index == 0) { dma = of_get_property(dev, "system-dma-base", &len); Index: linux-rt-rebase.q/arch/powerpc/platforms/embedded6xx/linkstation.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/platforms/embedded6xx/linkstation.c +++ linux-rt-rebase.q/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -73,7 +73,7 @@ static int __init linkstation_add_bridge return -ENOMEM; hose->first_busno = bus_range ? bus_range[0] : 0; hose->last_busno = bus_range ? bus_range[1] : 0xff; - setup_indirect_pci(hose, 0xfec00000, 0xfee00000); + setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0); /* Interpret the "ranges" property */ /* This also maps the I/O region and sets isa_io/mem_base */ Index: linux-rt-rebase.q/arch/powerpc/sysdev/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/sysdev/Makefile +++ linux-rt-rebase.q/arch/powerpc/sysdev/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_PPC_PMI) += pmi.o obj-$(CONFIG_U3_DART) += dart_iommu.o obj-$(CONFIG_MMIO_NVRAM) += mmio_nvram.o obj-$(CONFIG_FSL_SOC) += fsl_soc.o +obj-$(CONFIG_FSL_PCI) += fsl_pci.o obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o obj-$(CONFIG_QUICC_ENGINE) += qe_lib/ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o Index: linux-rt-rebase.q/arch/powerpc/sysdev/fsl_pci.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/arch/powerpc/sysdev/fsl_pci.c @@ -0,0 +1,257 @@ +/* + * MPC85xx/86xx PCI/PCIE support routing. + * + * Copyright 2007 Freescale Semiconductor, Inc + * + * Initial author: Xianghua Xiao + * Recode: ZHANG WEI + * Rewrite the routing for Frescale PCI and PCI Express + * Roy Zang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* atmu setup for fsl pci/pcie controller */ +void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc) +{ + struct ccsr_pci __iomem *pci; + int i; + + pr_debug("PCI memory map start 0x%x, size 0x%x\n", rsrc->start, + rsrc->end - rsrc->start + 1); + pci = ioremap(rsrc->start, rsrc->end - rsrc->start + 1); + + /* Disable all windows (except powar0 since its ignored) */ + for(i = 1; i < 5; i++) + out_be32(&pci->pow[i].powar, 0); + for(i = 0; i < 3; i++) + out_be32(&pci->piw[i].piwar, 0); + + /* Setup outbound MEM window */ + for(i = 0; i < 3; i++) + if (hose->mem_resources[i].flags & IORESOURCE_MEM){ + pr_debug("PCI MEM resource start 0x%08x, size 0x%08x.\n", + hose->mem_resources[i].start, + hose->mem_resources[i].end + - hose->mem_resources[i].start + 1); + out_be32(&pci->pow[i+1].potar, + (hose->mem_resources[i].start >> 12) + & 0x000fffff); + out_be32(&pci->pow[i+1].potear, 0); + out_be32(&pci->pow[i+1].powbar, + (hose->mem_resources[i].start >> 12) + & 0x000fffff); + /* Enable, Mem R/W */ + out_be32(&pci->pow[i+1].powar, 0x80044000 + | (__ilog2(hose->mem_resources[i].end + - hose->mem_resources[i].start + 1) - 1)); + } + + /* Setup outbound IO window */ + if (hose->io_resource.flags & IORESOURCE_IO){ + pr_debug("PCI IO resource start 0x%08x, size 0x%08x, phy base 0x%08x.\n", + hose->io_resource.start, + hose->io_resource.end - hose->io_resource.start + 1, + hose->io_base_phys); + out_be32(&pci->pow[i+1].potar, (hose->io_resource.start >> 12) + & 0x000fffff); + out_be32(&pci->pow[i+1].potear, 0); + out_be32(&pci->pow[i+1].powbar, (hose->io_base_phys >> 12) + & 0x000fffff); + /* Enable, IO R/W */ + out_be32(&pci->pow[i+1].powar, 0x80088000 + | (__ilog2(hose->io_resource.end + - hose->io_resource.start + 1) - 1)); + } + + /* Setup 2G inbound Memory Window @ 1 */ + out_be32(&pci->piw[2].pitar, 0x00000000); + out_be32(&pci->piw[2].piwbar,0x00000000); + out_be32(&pci->piw[2].piwar, PIWAR_2G); +} + +void __init setup_pci_cmd(struct pci_controller *hose) +{ + u16 cmd; + int cap_x; + + early_read_config_word(hose, 0, 0, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_SERR | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY + | PCI_COMMAND_IO; + early_write_config_word(hose, 0, 0, PCI_COMMAND, cmd); + + cap_x = early_find_capability(hose, 0, 0, PCI_CAP_ID_PCIX); + if (cap_x) { + int pci_x_cmd = cap_x + PCI_X_CMD; + cmd = PCI_X_CMD_MAX_SPLIT | PCI_X_CMD_MAX_READ + | PCI_X_CMD_ERO | PCI_X_CMD_DPERR_E; + early_write_config_word(hose, 0, 0, pci_x_cmd, cmd); + } else { + early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0x80); + } +} + +static void __devinit quirk_fsl_pcie_transparent(struct pci_dev *dev) +{ + struct resource *res; + int i, res_idx = PCI_BRIDGE_RESOURCES; + struct pci_controller *hose; + + /* if we aren't a PCIe don't bother */ + if (!pci_find_capability(dev, PCI_CAP_ID_EXP)) + return ; + + /* + * Make the bridge be transparent. + */ + dev->transparent = 1; + + hose = pci_bus_to_host(dev->bus); + if (!hose) { + printk(KERN_ERR "Can't find hose for bus %d\n", + dev->bus->number); + return; + } + + /* Clear out any of the virtual P2P bridge registers */ + pci_write_config_word(dev, PCI_IO_BASE_UPPER16, 0); + pci_write_config_word(dev, PCI_IO_LIMIT_UPPER16, 0); + pci_write_config_byte(dev, PCI_IO_BASE, 0x10); + pci_write_config_byte(dev, PCI_IO_LIMIT, 0); + pci_write_config_word(dev, PCI_MEMORY_BASE, 0x10); + pci_write_config_word(dev, PCI_MEMORY_LIMIT, 0); + pci_write_config_word(dev, PCI_PREF_BASE_UPPER32, 0x0); + pci_write_config_word(dev, PCI_PREF_LIMIT_UPPER32, 0x0); + pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, 0x10); + pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, 0); + + if (hose->io_resource.flags) { + res = &dev->resource[res_idx++]; + res->start = hose->io_resource.start; + res->end = hose->io_resource.end; + res->flags = hose->io_resource.flags; + update_bridge_resource(dev, res); + } + + for (i = 0; i < 3; i++) { + res = &dev->resource[res_idx + i]; + res->start = hose->mem_resources[i].start; + res->end = hose->mem_resources[i].end; + res->flags = hose->mem_resources[i].flags; + update_bridge_resource(dev, res); + } +} + +int __init fsl_pcie_check_link(struct pci_controller *hose) +{ + u16 val; + early_read_config_word(hose, 0, 0, PCIE_LTSSM, &val); + if (val < PCIE_LTSSM_L0) + return 1; + return 0; +} + +void fsl_pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_controller *hose = (struct pci_controller *) bus->sysdata; + int i; + + /* deal with bogus pci_bus when we don't have anything connected on PCIe */ + if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) { + if (bus->parent) { + for (i = 0; i < 4; ++i) + bus->resource[i] = bus->parent->resource[i]; + } + } +} + +int __init fsl_add_bridge(struct device_node *dev, int is_primary) +{ + int len; + struct pci_controller *hose; + struct resource rsrc; + const int *bus_range; + + pr_debug("Adding PCI host bridge %s\n", dev->full_name); + + /* Fetch host bridge registers address */ + if (of_address_to_resource(dev, 0, &rsrc)) { + printk(KERN_WARNING "Can't get pci register base!"); + return -ENOMEM; + } + + /* Get bus range if any */ + bus_range = of_get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) + printk(KERN_WARNING "Can't get bus-range for %s, assume" + " bus 0\n", dev->full_name); + + pci_assign_all_buses = 1; + hose = pcibios_alloc_controller(dev); + if (!hose) + return -ENOMEM; + + hose->first_busno = bus_range ? bus_range[0] : 0x0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4, + PPC_INDIRECT_TYPE_BIG_ENDIAN); + setup_pci_cmd(hose); + + /* check PCI express link status */ + if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { + hose->indirect_type = PPC_INDIRECT_TYPE_EXT_REG | + PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS; + if (fsl_pcie_check_link(hose)) + hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK; + } + + printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx." + "Firmware bus number: %d->%d\n", + (unsigned long long)rsrc.start, hose->first_busno, + hose->last_busno); + + pr_debug(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", + hose, hose->cfg_addr, hose->cfg_data); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, is_primary); + + /* Setup PEX window registers */ + setup_pci_atmu(hose, &rsrc); + + return 0; +} + +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8548E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8548, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8543E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8543, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8547E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8545E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8545, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8568E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8568, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8567E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8567, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8544E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8544, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8641D, quirk_fsl_pcie_transparent); Index: linux-rt-rebase.q/arch/powerpc/sysdev/fsl_pci.h =================================================================== --- /dev/null +++ linux-rt-rebase.q/arch/powerpc/sysdev/fsl_pci.h @@ -0,0 +1,88 @@ +/* + * MPC85xx/86xx PCI Express structure define + * + * Copyright 2007 Freescale Semiconductor, Inc + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifdef __KERNEL__ +#ifndef __POWERPC_FSL_PCI_H +#define __POWERPC_FSL_PCI_H + +#define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ +#define PCIE_LTSSM_L0 0x16 /* L0 state */ +#define PIWAR_2G 0xa0f5501e /* Enable, Prefetch, Local Mem, Snoop R/W, 2G */ + +/* PCI/PCI Express outbound window reg */ +struct pci_outbound_window_regs { + __be32 potar; /* 0x.0 - Outbound translation address register */ + __be32 potear; /* 0x.4 - Outbound translation extended address register */ + __be32 powbar; /* 0x.8 - Outbound window base address register */ + u8 res1[4]; + __be32 powar; /* 0x.10 - Outbound window attributes register */ + u8 res2[12]; +}; + +/* PCI/PCI Express inbound window reg */ +struct pci_inbound_window_regs { + __be32 pitar; /* 0x.0 - Inbound translation address register */ + u8 res1[4]; + __be32 piwbar; /* 0x.8 - Inbound window base address register */ + __be32 piwbear; /* 0x.c - Inbound window base extended address register */ + __be32 piwar; /* 0x.10 - Inbound window attributes register */ + u8 res2[12]; +}; + +/* PCI/PCI Express IO block registers for 85xx/86xx */ +struct ccsr_pci { + __be32 config_addr; /* 0x.000 - PCI/PCIE Configuration Address Register */ + __be32 config_data; /* 0x.004 - PCI/PCIE Configuration Data Register */ + __be32 int_ack; /* 0x.008 - PCI Interrupt Acknowledge Register */ + __be32 pex_otb_cpl_tor; /* 0x.00c - PCIE Outbound completion timeout register */ + __be32 pex_conf_tor; /* 0x.010 - PCIE configuration timeout register */ + u8 res2[12]; + __be32 pex_pme_mes_dr; /* 0x.020 - PCIE PME and message detect register */ + __be32 pex_pme_mes_disr; /* 0x.024 - PCIE PME and message disable register */ + __be32 pex_pme_mes_ier; /* 0x.028 - PCIE PME and message interrupt enable register */ + __be32 pex_pmcr; /* 0x.02c - PCIE power management command register */ + u8 res3[3024]; + +/* PCI/PCI Express outbound window 0-4 + * Window 0 is the default window and is the only window enabled upon reset. + * The default outbound register set is used when a transaction misses + * in all of the other outbound windows. + */ + struct pci_outbound_window_regs pow[5]; + + u8 res14[256]; + +/* PCI/PCI Express inbound window 3-1 + * inbound window 1 supports only a 32-bit base address and does not + * define an inbound window base extended address register. + */ + struct pci_inbound_window_regs piw[3]; + + __be32 pex_err_dr; /* 0x.e00 - PCI/PCIE error detect register */ + u8 res21[4]; + __be32 pex_err_en; /* 0x.e08 - PCI/PCIE error interrupt enable register */ + u8 res22[4]; + __be32 pex_err_disr; /* 0x.e10 - PCI/PCIE error disable register */ + u8 res23[12]; + __be32 pex_err_cap_stat; /* 0x.e20 - PCI/PCIE error capture status register */ + u8 res24[4]; + __be32 pex_err_cap_r0; /* 0x.e28 - PCIE error capture register 0 */ + __be32 pex_err_cap_r1; /* 0x.e2c - PCIE error capture register 0 */ + __be32 pex_err_cap_r2; /* 0x.e30 - PCIE error capture register 0 */ + __be32 pex_err_cap_r3; /* 0x.e34 - PCIE error capture register 0 */ +}; + +extern int fsl_add_bridge(struct device_node *dev, int is_primary); +extern void fsl_pcibios_fixup_bus(struct pci_bus *bus); + +#endif /* __POWERPC_FSL_PCI_H */ +#endif /* __KERNEL__ */ Index: linux-rt-rebase.q/arch/powerpc/sysdev/fsl_pcie.h =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/sysdev/fsl_pcie.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * MPC85xx/86xx PCI Express structure define - * - * Copyright 2007 Freescale Semiconductor, Inc - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -#ifdef __KERNEL__ -#ifndef __POWERPC_FSL_PCIE_H -#define __POWERPC_FSL_PCIE_H - -/* PCIE Express IO block registers in 85xx/86xx */ - -struct ccsr_pex { - __be32 __iomem pex_config_addr; /* 0x.000 - PCI Express Configuration Address Register */ - __be32 __iomem pex_config_data; /* 0x.004 - PCI Express Configuration Data Register */ - u8 __iomem res1[4]; - __be32 __iomem pex_otb_cpl_tor; /* 0x.00c - PCI Express Outbound completion timeout register */ - __be32 __iomem pex_conf_tor; /* 0x.010 - PCI Express configuration timeout register */ - u8 __iomem res2[12]; - __be32 __iomem pex_pme_mes_dr; /* 0x.020 - PCI Express PME and message detect register */ - __be32 __iomem pex_pme_mes_disr; /* 0x.024 - PCI Express PME and message disable register */ - __be32 __iomem pex_pme_mes_ier; /* 0x.028 - PCI Express PME and message interrupt enable register */ - __be32 __iomem pex_pmcr; /* 0x.02c - PCI Express power management command register */ - u8 __iomem res3[3024]; - __be32 __iomem pexotar0; /* 0x.c00 - PCI Express outbound translation address register 0 */ - __be32 __iomem pexotear0; /* 0x.c04 - PCI Express outbound translation extended address register 0*/ - u8 __iomem res4[8]; - __be32 __iomem pexowar0; /* 0x.c10 - PCI Express outbound window attributes register 0*/ - u8 __iomem res5[12]; - __be32 __iomem pexotar1; /* 0x.c20 - PCI Express outbound translation address register 1 */ - __be32 __iomem pexotear1; /* 0x.c24 - PCI Express outbound translation extended address register 1*/ - __be32 __iomem pexowbar1; /* 0x.c28 - PCI Express outbound window base address register 1*/ - u8 __iomem res6[4]; - __be32 __iomem pexowar1; /* 0x.c30 - PCI Express outbound window attributes register 1*/ - u8 __iomem res7[12]; - __be32 __iomem pexotar2; /* 0x.c40 - PCI Express outbound translation address register 2 */ - __be32 __iomem pexotear2; /* 0x.c44 - PCI Express outbound translation extended address register 2*/ - __be32 __iomem pexowbar2; /* 0x.c48 - PCI Express outbound window base address register 2*/ - u8 __iomem res8[4]; - __be32 __iomem pexowar2; /* 0x.c50 - PCI Express outbound window attributes register 2*/ - u8 __iomem res9[12]; - __be32 __iomem pexotar3; /* 0x.c60 - PCI Express outbound translation address register 3 */ - __be32 __iomem pexotear3; /* 0x.c64 - PCI Express outbound translation extended address register 3*/ - __be32 __iomem pexowbar3; /* 0x.c68 - PCI Express outbound window base address register 3*/ - u8 __iomem res10[4]; - __be32 __iomem pexowar3; /* 0x.c70 - PCI Express outbound window attributes register 3*/ - u8 __iomem res11[12]; - __be32 __iomem pexotar4; /* 0x.c80 - PCI Express outbound translation address register 4 */ - __be32 __iomem pexotear4; /* 0x.c84 - PCI Express outbound translation extended address register 4*/ - __be32 __iomem pexowbar4; /* 0x.c88 - PCI Express outbound window base address register 4*/ - u8 __iomem res12[4]; - __be32 __iomem pexowar4; /* 0x.c90 - PCI Express outbound window attributes register 4*/ - u8 __iomem res13[12]; - u8 __iomem res14[256]; - __be32 __iomem pexitar3; /* 0x.da0 - PCI Express inbound translation address register 3 */ - u8 __iomem res15[4]; - __be32 __iomem pexiwbar3; /* 0x.da8 - PCI Express inbound window base address register 3 */ - __be32 __iomem pexiwbear3; /* 0x.dac - PCI Express inbound window base extended address register 3 */ - __be32 __iomem pexiwar3; /* 0x.db0 - PCI Express inbound window attributes register 3 */ - u8 __iomem res16[12]; - __be32 __iomem pexitar2; /* 0x.dc0 - PCI Express inbound translation address register 2 */ - u8 __iomem res17[4]; - __be32 __iomem pexiwbar2; /* 0x.dc8 - PCI Express inbound window base address register 2 */ - __be32 __iomem pexiwbear2; /* 0x.dcc - PCI Express inbound window base extended address register 2 */ - __be32 __iomem pexiwar2; /* 0x.dd0 - PCI Express inbound window attributes register 2 */ - u8 __iomem res18[12]; - __be32 __iomem pexitar1; /* 0x.de0 - PCI Express inbound translation address register 2 */ - u8 __iomem res19[4]; - __be32 __iomem pexiwbar1; /* 0x.de8 - PCI Express inbound window base address register 2 */ - __be32 __iomem pexiwbear1; /* 0x.dec - PCI Express inbound window base extended address register 2 */ - __be32 __iomem pexiwar1; /* 0x.df0 - PCI Express inbound window attributes register 2 */ - u8 __iomem res20[12]; - __be32 __iomem pex_err_dr; /* 0x.e00 - PCI Express error detect register */ - u8 __iomem res21[4]; - __be32 __iomem pex_err_en; /* 0x.e08 - PCI Express error interrupt enable register */ - u8 __iomem res22[4]; - __be32 __iomem pex_err_disr; /* 0x.e10 - PCI Express error disable register */ - u8 __iomem res23[12]; - __be32 __iomem pex_err_cap_stat; /* 0x.e20 - PCI Express error capture status register */ - u8 __iomem res24[4]; - __be32 __iomem pex_err_cap_r0; /* 0x.e28 - PCI Express error capture register 0 */ - __be32 __iomem pex_err_cap_r1; /* 0x.e2c - PCI Express error capture register 0 */ - __be32 __iomem pex_err_cap_r2; /* 0x.e30 - PCI Express error capture register 0 */ - __be32 __iomem pex_err_cap_r3; /* 0x.e34 - PCI Express error capture register 0 */ -}; - -#endif /* __POWERPC_FSL_PCIE_H */ -#endif /* __KERNEL__ */ Index: linux-rt-rebase.q/arch/powerpc/sysdev/grackle.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/sysdev/grackle.c +++ linux-rt-rebase.q/arch/powerpc/sysdev/grackle.c @@ -55,7 +55,7 @@ static inline void grackle_set_loop_snoo void __init setup_grackle(struct pci_controller *hose) { - setup_indirect_pci(hose, 0xfec00000, 0xfee00000); + setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0); if (machine_is_compatible("PowerMac1,1")) pci_assign_all_buses = 1; if (machine_is_compatible("AAPL,PowerBook1998")) Index: linux-rt-rebase.q/arch/powerpc/sysdev/indirect_pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/sysdev/indirect_pci.c +++ linux-rt-rebase.q/arch/powerpc/sysdev/indirect_pci.c @@ -20,12 +20,6 @@ #include #include -#ifdef CONFIG_PPC_INDIRECT_PCI_BE -#define PCI_CFG_OUT out_be32 -#else -#define PCI_CFG_OUT out_le32 -#endif - static int indirect_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) @@ -35,10 +29,17 @@ indirect_read_config(struct pci_bus *bus u8 cfg_type = 0; u32 bus_no, reg; + if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) { + if (bus->number != hose->first_busno) + return PCIBIOS_DEVICE_NOT_FOUND; + if (devfn != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + } + if (ppc_md.pci_exclude_device) if (ppc_md.pci_exclude_device(hose, bus->number, devfn)) return PCIBIOS_DEVICE_NOT_FOUND; - + if (hose->indirect_type & PPC_INDIRECT_TYPE_SET_CFG_TYPE) if (bus->number != hose->first_busno) cfg_type = 1; @@ -51,9 +52,12 @@ indirect_read_config(struct pci_bus *bus else reg = offset & 0xfc; - PCI_CFG_OUT(hose->cfg_addr, - (0x80000000 | (bus_no << 16) - | (devfn << 8) | reg | cfg_type)); + if (hose->indirect_type & PPC_INDIRECT_TYPE_BIG_ENDIAN) + out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | + (devfn << 8) | reg | cfg_type)); + else + out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | + (devfn << 8) | reg | cfg_type)); /* * Note: the caller has already checked that offset is @@ -83,6 +87,13 @@ indirect_write_config(struct pci_bus *bu u8 cfg_type = 0; u32 bus_no, reg; + if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) { + if (bus->number != hose->first_busno) + return PCIBIOS_DEVICE_NOT_FOUND; + if (devfn != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + } + if (ppc_md.pci_exclude_device) if (ppc_md.pci_exclude_device(hose, bus->number, devfn)) return PCIBIOS_DEVICE_NOT_FOUND; @@ -99,9 +110,12 @@ indirect_write_config(struct pci_bus *bu else reg = offset & 0xfc; - PCI_CFG_OUT(hose->cfg_addr, - (0x80000000 | (bus_no << 16) - | (devfn << 8) | reg | cfg_type)); + if (hose->indirect_type & PPC_INDIRECT_TYPE_BIG_ENDIAN) + out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | + (devfn << 8) | reg | cfg_type)); + else + out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | + (devfn << 8) | reg | cfg_type)); /* surpress setting of PCI_PRIMARY_BUS */ if (hose->indirect_type & PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS) @@ -135,24 +149,15 @@ static struct pci_ops indirect_pci_ops = }; void __init -setup_indirect_pci_nomap(struct pci_controller* hose, void __iomem * cfg_addr, - void __iomem * cfg_data) -{ - hose->cfg_addr = cfg_addr; - hose->cfg_data = cfg_data; - hose->ops = &indirect_pci_ops; -} - -void __init -setup_indirect_pci(struct pci_controller* hose, u32 cfg_addr, u32 cfg_data) +setup_indirect_pci(struct pci_controller* hose, u32 cfg_addr, u32 cfg_data, u32 flags) { unsigned long base = cfg_addr & PAGE_MASK; - void __iomem *mbase, *addr, *data; + void __iomem *mbase; mbase = ioremap(base, PAGE_SIZE); - addr = mbase + (cfg_addr & ~PAGE_MASK); + hose->cfg_addr = mbase + (cfg_addr & ~PAGE_MASK); if ((cfg_data & PAGE_MASK) != base) mbase = ioremap(cfg_data & PAGE_MASK, PAGE_SIZE); - data = mbase + (cfg_data & ~PAGE_MASK); - setup_indirect_pci_nomap(hose, addr, data); + hose->cfg_data = mbase + (cfg_data & ~PAGE_MASK); + hose->ops = &indirect_pci_ops; } Index: linux-rt-rebase.q/arch/powerpc/sysdev/mv64x60_pci.c =================================================================== --- linux-rt-rebase.q.orig/arch/powerpc/sysdev/mv64x60_pci.c +++ linux-rt-rebase.q/arch/powerpc/sysdev/mv64x60_pci.c @@ -144,7 +144,7 @@ static int __init mv64x60_add_bridge(str hose->first_busno = bus_range ? bus_range[0] : 0; hose->last_busno = bus_range ? bus_range[1] : 0xff; - setup_indirect_pci(hose, rsrc.start, rsrc.start + 4); + setup_indirect_pci(hose, rsrc.start, rsrc.start + 4, 0); hose->self_busno = hose->first_busno; printk(KERN_INFO "Found MV64x60 PCI host bridge at 0x%016llx. " Index: linux-rt-rebase.q/arch/sparc/defconfig =================================================================== --- linux-rt-rebase.q.orig/arch/sparc/defconfig +++ linux-rt-rebase.q/arch/sparc/defconfig @@ -600,7 +600,7 @@ CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_IPMI_HANDLER is not set # CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=m -CONFIG_RTC=m +CONFIG_JS_RTC=m # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_DRM is not set Index: linux-rt-rebase.q/arch/sparc/kernel/sparc_ksyms.c =================================================================== --- linux-rt-rebase.q.orig/arch/sparc/kernel/sparc_ksyms.c +++ linux-rt-rebase.q/arch/sparc/kernel/sparc_ksyms.c @@ -161,6 +161,8 @@ EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_ EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_sgl)); EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_one)); +EXPORT_SYMBOL(BTFIXUP_CALL(pgprot_noncached)); + #ifdef CONFIG_SBUS EXPORT_SYMBOL(sbus_root); EXPORT_SYMBOL(dma_chain); @@ -260,6 +262,7 @@ EXPORT_SYMBOL(__memmove); /* Moving data to/from userspace. */ EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); /* Networking helper routines. */ EXPORT_SYMBOL(__csum_partial_copy_sparc_generic); Index: linux-rt-rebase.q/arch/sparc/kernel/vmlinux.lds.S =================================================================== --- linux-rt-rebase.q.orig/arch/sparc/kernel/vmlinux.lds.S +++ linux-rt-rebase.q/arch/sparc/kernel/vmlinux.lds.S @@ -35,6 +35,8 @@ SECTIONS __ex_table : { *(__ex_table) } __stop___ex_table = .; + NOTES + . = ALIGN(4096); __init_begin = .; _sinittext = .; Index: linux-rt-rebase.q/arch/sparc/lib/memset.S =================================================================== --- linux-rt-rebase.q.orig/arch/sparc/lib/memset.S +++ linux-rt-rebase.q/arch/sparc/lib/memset.S @@ -162,7 +162,7 @@ __bzero: 8: add %o0, 1, %o0 subcc %o1, 1, %o1 - bne,a 8b + bne 8b EX(stb %g3, [%o0 - 1], add %o1, 1) 0: retl Index: linux-rt-rebase.q/arch/sparc/prom/printf.c =================================================================== --- linux-rt-rebase.q.orig/arch/sparc/prom/printf.c +++ linux-rt-rebase.q/arch/sparc/prom/printf.c @@ -13,6 +13,7 @@ */ #include +#include #include #include @@ -44,3 +45,4 @@ prom_printf(char *fmt, ...) prom_write(ppbuf, i); } +EXPORT_SYMBOL(prom_printf); Index: linux-rt-rebase.q/arch/sparc64/defconfig =================================================================== --- linux-rt-rebase.q.orig/arch/sparc64/defconfig +++ linux-rt-rebase.q/arch/sparc64/defconfig @@ -1,11 +1,12 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22 -# Thu Jul 19 21:30:37 2007 +# Linux kernel version: 2.6.23-rc1 +# Sun Jul 22 19:24:37 2007 # CONFIG_SPARC=y CONFIG_SPARC64=y CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_64BIT=y CONFIG_MMU=y @@ -17,6 +18,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_AUDIT_ARCH=y CONFIG_ARCH_NO_VIRT_TO_BUS=y +CONFIG_OF=y CONFIG_SPARC64_PAGE_SIZE_8KB=y # CONFIG_SPARC64_PAGE_SIZE_64KB is not set # CONFIG_SPARC64_PAGE_SIZE_512KB is not set @@ -314,6 +316,7 @@ CONFIG_FW_LOADER=y # CONFIG_SYS_HYPERVISOR is not set CONFIG_CONNECTOR=m # CONFIG_MTD is not set +CONFIG_OF_DEVICE=y # CONFIG_PARPORT is not set CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_FD is not set @@ -433,10 +436,7 @@ CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_ISCSI_ATTRS=m # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# +CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=m # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set @@ -701,7 +701,6 @@ CONFIG_UNIX98_PTYS=y # CONFIG_IPMI_HANDLER is not set # CONFIG_WATCHDOG is not set # CONFIG_HW_RANDOM is not set -CONFIG_RTC=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_DRM is not set @@ -844,6 +843,7 @@ CONFIG_HWMON=y # # CONFIG_DISPLAY_SUPPORT is not set # CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set CONFIG_FB=y # CONFIG_FIRMWARE_EDID is not set CONFIG_FB_DDC=y @@ -937,7 +937,6 @@ CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y -# CONFIG_SND_RTCTIMER is not set # CONFIG_SND_DYNAMIC_MINORS is not set CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y @@ -1034,6 +1033,10 @@ CONFIG_SND_SUN_CS4231=m # CONFIG_SND_SOC is not set # +# SoC Audio support for SuperH +# + +# # Open Sound System # # CONFIG_SOUND_PRIME is not set @@ -1157,19 +1160,7 @@ CONFIG_USB_STORAGE=m # # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set - -# -# LED devices -# # CONFIG_NEW_LEDS is not set - -# -# LED drivers -# - -# -# LED Triggers -# # CONFIG_INFINIBAND is not set # @@ -1199,7 +1190,6 @@ CONFIG_USB_STORAGE=m # Misc Linux/SPARC drivers # CONFIG_SUN_OPENPROMIO=m -CONFIG_SUN_MOSTEK_RTC=y # CONFIG_OBP_FLASH is not set # CONFIG_SUN_BPP is not set # CONFIG_BBC_I2C is not set Index: linux-rt-rebase.q/arch/sparc64/kernel/head.S =================================================================== --- linux-rt-rebase.q.orig/arch/sparc64/kernel/head.S +++ linux-rt-rebase.q/arch/sparc64/kernel/head.S @@ -1,15 +1,15 @@ -/* $Id: head.S,v 1.87 2002/02/09 19:49:31 davem Exp $ - * head.S: Initial boot code for the Sparc64 port of Linux. +/* head.S: Initial boot code for the Sparc64 port of Linux. * - * Copyright (C) 1996,1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 1997, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 1996 David Sitsky (David.Sitsky@anu.edu.au) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx) */ #include #include #include +#include #include #include #include @@ -374,6 +374,7 @@ jump_to_sun4u_init: jmpl %g2 + %g0, %g0 nop + .section .text.init.refok sun4u_init: BRANCH_IF_SUN4V(g1, sun4v_init) @@ -529,6 +530,8 @@ tlb_fixup_done: nop /* Not reached... */ + .previous + /* This is meant to allow the sharing of this code between * boot processor invocation (via setup_tba() below) and * secondary processor startup (via trampoline.S). The Index: linux-rt-rebase.q/arch/sparc64/kernel/vmlinux.lds.S =================================================================== --- linux-rt-rebase.q.orig/arch/sparc64/kernel/vmlinux.lds.S +++ linux-rt-rebase.q/arch/sparc64/kernel/vmlinux.lds.S @@ -45,6 +45,8 @@ SECTIONS __ex_table : { *(__ex_table) } __stop___ex_table = .; + NOTES + . = ALIGN(PAGE_SIZE); __init_begin = .; .init.text : { Index: linux-rt-rebase.q/arch/um/drivers/mconsole_kern.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/drivers/mconsole_kern.c +++ linux-rt-rebase.q/arch/um/drivers/mconsole_kern.c @@ -499,7 +499,7 @@ static struct mc_device mem_mc = { .remove = mem_remove, }; -static int mem_mc_init(void) +static int __init mem_mc_init(void) { if(can_drop_memory()) mconsole_register_dev(&mem_mc); @@ -798,7 +798,7 @@ void mconsole_stack(struct mc_request *r */ static char *notify_socket = NULL; -static int mconsole_init(void) +static int __init mconsole_init(void) { /* long to avoid size mismatch warnings from gcc */ long sock; Index: linux-rt-rebase.q/arch/um/drivers/net_kern.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/drivers/net_kern.c +++ linux-rt-rebase.q/arch/um/drivers/net_kern.c @@ -623,7 +623,7 @@ static int eth_setup_common(char *str, i return found; } -static int eth_setup(char *str) +static int __init eth_setup(char *str) { struct eth_init *new; char *error; Index: linux-rt-rebase.q/arch/um/drivers/ubd_kern.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/drivers/ubd_kern.c +++ linux-rt-rebase.q/arch/um/drivers/ubd_kern.c @@ -469,7 +469,7 @@ __uml_help(fakehd, " Change the ubd device name to \"hd\".\n\n" ); -static void do_ubd_request(request_queue_t * q); +static void do_ubd_request(struct request_queue * q); /* Only changed by ubd_init, which is an initcall. */ int thread_fd = -1; @@ -1081,7 +1081,7 @@ static void prepare_request(struct reque } /* Called with dev->lock held */ -static void do_ubd_request(request_queue_t *q) +static void do_ubd_request(struct request_queue *q) { struct io_thread_req *io_req; struct request *req; Index: linux-rt-rebase.q/arch/um/kernel/mem.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/kernel/mem.c +++ linux-rt-rebase.q/arch/um/kernel/mem.c @@ -62,7 +62,7 @@ static void setup_highmem(unsigned long } #endif -void mem_init(void) +void __init mem_init(void) { /* clear the zero-page */ memset((void *) empty_zero_page, 0, PAGE_SIZE); Index: linux-rt-rebase.q/arch/um/kernel/physmem.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/kernel/physmem.c +++ linux-rt-rebase.q/arch/um/kernel/physmem.c @@ -28,7 +28,8 @@ unsigned long high_physmem; extern unsigned long long physmem_size; -int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) +int __init init_maps(unsigned long physmem, unsigned long iomem, + unsigned long highmem) { struct page *p, *map; unsigned long phys_len, phys_pages, highmem_len, highmem_pages; @@ -47,13 +48,7 @@ int init_maps(unsigned long physmem, uns total_pages = phys_pages + iomem_pages + highmem_pages; total_len = phys_len + iomem_len + highmem_len; - if(kmalloc_ok){ - map = kmalloc(total_len, GFP_KERNEL); - if(map == NULL) - map = vmalloc(total_len); - } - else map = alloc_bootmem_low_pages(total_len); - + map = alloc_bootmem_low_pages(total_len); if(map == NULL) return -ENOMEM; @@ -98,8 +93,8 @@ void map_memory(unsigned long virt, unsi extern int __syscall_stub_start; -void setup_physmem(unsigned long start, unsigned long reserve_end, - unsigned long len, unsigned long long highmem) +void __init setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long long highmem) { unsigned long reserve = reserve_end - start; int pfn = PFN_UP(__pa(reserve_end)); Index: linux-rt-rebase.q/arch/um/kernel/skas/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/kernel/skas/process.c +++ linux-rt-rebase.q/arch/um/kernel/skas/process.c @@ -145,7 +145,7 @@ void init_idle_skas(void) extern void start_kernel(void); -static int start_kernel_proc(void *unused) +static int __init start_kernel_proc(void *unused) { int pid; @@ -165,7 +165,7 @@ extern int userspace_pid[]; extern char cpu0_irqstack[]; -int start_uml_skas(void) +int __init start_uml_skas(void) { stack_protections((unsigned long) &cpu0_irqstack); set_sigstack(cpu0_irqstack, THREAD_SIZE); Index: linux-rt-rebase.q/arch/um/os-Linux/aio.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/os-Linux/aio.c +++ linux-rt-rebase.q/arch/um/os-Linux/aio.c @@ -14,6 +14,7 @@ #include "init.h" #include "user.h" #include "mode.h" +#include "kern_constants.h" struct aio_thread_req { enum aio_type type; @@ -65,47 +66,33 @@ static long io_getevents(aio_context_t c static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, int len, unsigned long long offset, struct aio_context *aio) { - struct iocb iocb, *iocbp = &iocb; + struct iocb *iocbp = & ((struct iocb) { + .aio_data = (unsigned long) aio, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset + }); char c; - int err; - - iocb = ((struct iocb) { .aio_data = (unsigned long) aio, - .aio_reqprio = 0, - .aio_fildes = fd, - .aio_buf = (unsigned long) buf, - .aio_nbytes = len, - .aio_offset = offset, - .aio_reserved1 = 0, - .aio_reserved2 = 0, - .aio_reserved3 = 0 }); - switch(type){ + switch (type) { case AIO_READ: - iocb.aio_lio_opcode = IOCB_CMD_PREAD; - err = io_submit(ctx, 1, &iocbp); + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; break; case AIO_WRITE: - iocb.aio_lio_opcode = IOCB_CMD_PWRITE; - err = io_submit(ctx, 1, &iocbp); + iocbp->aio_lio_opcode = IOCB_CMD_PWRITE; break; case AIO_MMAP: - iocb.aio_lio_opcode = IOCB_CMD_PREAD; - iocb.aio_buf = (unsigned long) &c; - iocb.aio_nbytes = sizeof(c); - err = io_submit(ctx, 1, &iocbp); + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + iocbp->aio_buf = (unsigned long) &c; + iocbp->aio_nbytes = sizeof(c); break; default: - printk("Bogus op in do_aio - %d\n", type); - err = -EINVAL; - break; + printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type); + return -EINVAL; } - if(err > 0) - err = 0; - else - err = -errno; - - return err; + return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno; } /* Initialized in an initcall and unchanged thereafter */ Index: linux-rt-rebase.q/arch/um/os-Linux/process.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/os-Linux/process.c +++ linux-rt-rebase.q/arch/um/os-Linux/process.c @@ -194,7 +194,7 @@ int os_unmap_memory(void *addr, int len) #define MADV_REMOVE KERNEL_MADV_REMOVE #endif -int __init os_drop_memory(void *addr, int length) +int os_drop_memory(void *addr, int length) { int err; Index: linux-rt-rebase.q/arch/um/os-Linux/user_syms.c =================================================================== --- linux-rt-rebase.q.orig/arch/um/os-Linux/user_syms.c +++ linux-rt-rebase.q/arch/um/os-Linux/user_syms.c @@ -19,10 +19,7 @@ extern void *memmove(void *, const void extern void *memset(void *, int, size_t); extern int printf(const char *, ...); -/* If they're not defined, the export is included in lib/string.c.*/ -#ifdef __HAVE_ARCH_STRLEN -EXPORT_SYMBOL(strlen); -#endif +/* If it's not defined, the export is included in lib/string.c.*/ #ifdef __HAVE_ARCH_STRSTR EXPORT_SYMBOL(strstr); #endif Index: linux-rt-rebase.q/arch/um/sys-i386/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/um/sys-i386/Makefile +++ linux-rt-rebase.q/arch/um/sys-i386/Makefile @@ -4,7 +4,7 @@ obj-y = bug.o bugs.o checksum.o delay.o obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o -subarch-obj-y = lib/bitops.o lib/semaphore.o +subarch-obj-y = lib/bitops.o lib/semaphore.o lib/string.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o Index: linux-rt-rebase.q/arch/x86_64/kernel/acpi/Makefile =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/acpi/Makefile +++ linux-rt-rebase.q/arch/x86_64/kernel/acpi/Makefile @@ -1,6 +1,6 @@ obj-y := boot.o boot-y := ../../../i386/kernel/acpi/boot.o -obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o +obj-y += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += processor.o Index: linux-rt-rebase.q/arch/x86_64/kernel/acpi/sleep.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/acpi/sleep.c +++ linux-rt-rebase.q/arch/x86_64/kernel/acpi/sleep.c @@ -51,8 +51,6 @@ Low-Level Sleep Support -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_SLEEP - /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; unsigned long acpi_realmode_flags; @@ -117,8 +115,6 @@ static int __init acpi_sleep_setup(char __setup("acpi_sleep=", acpi_sleep_setup); -#endif /*CONFIG_ACPI_SLEEP */ - void acpi_pci_link_exit(void) { } Index: linux-rt-rebase.q/arch/x86_64/kernel/head.S =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/head.S +++ linux-rt-rebase.q/arch/x86_64/kernel/head.S @@ -120,7 +120,7 @@ ident_complete: addq %rbp, trampoline_level4_pgt + 0(%rip) addq %rbp, trampoline_level4_pgt + (511*8)(%rip) #endif -#ifdef CONFIG_ACPI_SLEEP +#ifdef CONFIG_ACPI addq %rbp, wakeup_level4_pgt + 0(%rip) addq %rbp, wakeup_level4_pgt + (511*8)(%rip) #endif Index: linux-rt-rebase.q/arch/x86_64/kernel/setup.c =================================================================== --- linux-rt-rebase.q.orig/arch/x86_64/kernel/setup.c +++ linux-rt-rebase.q/arch/x86_64/kernel/setup.c @@ -333,7 +333,7 @@ void __init setup_arch(char **cmdline_p) reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); #endif -#ifdef CONFIG_ACPI_SLEEP +#ifdef CONFIG_ACPI /* * Reserve low memory region for sleep support. */ Index: linux-rt-rebase.q/block/as-iosched.c =================================================================== --- linux-rt-rebase.q.orig/block/as-iosched.c +++ linux-rt-rebase.q/block/as-iosched.c @@ -796,7 +796,7 @@ static void update_write_batch(struct as * as_completed_request is to be called when a request has completed and * returned something to the requesting process, be it an error or data. */ -static void as_completed_request(request_queue_t *q, struct request *rq) +static void as_completed_request(struct request_queue *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; @@ -853,7 +853,8 @@ out: * reference unless it replaces the request at somepart of the elevator * (ie. the dispatch queue) */ -static void as_remove_queued_request(request_queue_t *q, struct request *rq) +static void as_remove_queued_request(struct request_queue *q, + struct request *rq) { const int data_dir = rq_is_sync(rq); struct as_data *ad = q->elevator->elevator_data; @@ -978,7 +979,7 @@ static void as_move_to_dispatch(struct a * read/write expire, batch expire, etc, and moves it to the dispatch * queue. Returns 1 if a request was found, 0 otherwise. */ -static int as_dispatch_request(request_queue_t *q, int force) +static int as_dispatch_request(struct request_queue *q, int force) { struct as_data *ad = q->elevator->elevator_data; const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); @@ -1139,7 +1140,7 @@ fifo_expired: /* * add rq to rbtree and fifo */ -static void as_add_request(request_queue_t *q, struct request *rq) +static void as_add_request(struct request_queue *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; int data_dir; @@ -1167,7 +1168,7 @@ static void as_add_request(request_queue RQ_SET_STATE(rq, AS_RQ_QUEUED); } -static void as_activate_request(request_queue_t *q, struct request *rq) +static void as_activate_request(struct request_queue *q, struct request *rq) { WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED); RQ_SET_STATE(rq, AS_RQ_REMOVED); @@ -1175,7 +1176,7 @@ static void as_activate_request(request_ atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched); } -static void as_deactivate_request(request_queue_t *q, struct request *rq) +static void as_deactivate_request(struct request_queue *q, struct request *rq) { WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED); RQ_SET_STATE(rq, AS_RQ_DISPATCHED); @@ -1189,7 +1190,7 @@ static void as_deactivate_request(reques * is not empty - it is used in the block layer to check for plugging and * merging opportunities */ -static int as_queue_empty(request_queue_t *q) +static int as_queue_empty(struct request_queue *q) { struct as_data *ad = q->elevator->elevator_data; @@ -1198,7 +1199,7 @@ static int as_queue_empty(request_queue_ } static int -as_merge(request_queue_t *q, struct request **req, struct bio *bio) +as_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); @@ -1216,7 +1217,8 @@ as_merge(request_queue_t *q, struct requ return ELEVATOR_NO_MERGE; } -static void as_merged_request(request_queue_t *q, struct request *req, int type) +static void as_merged_request(struct request_queue *q, struct request *req, + int type) { struct as_data *ad = q->elevator->elevator_data; @@ -1234,7 +1236,7 @@ static void as_merged_request(request_qu } } -static void as_merged_requests(request_queue_t *q, struct request *req, +static void as_merged_requests(struct request_queue *q, struct request *req, struct request *next) { /* @@ -1285,7 +1287,7 @@ static void as_work_handler(struct work_ spin_unlock_irqrestore(q->queue_lock, flags); } -static int as_may_queue(request_queue_t *q, int rw) +static int as_may_queue(struct request_queue *q, int rw) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; @@ -1318,7 +1320,7 @@ static void as_exit_queue(elevator_t *e) /* * initialize elevator private data (as_data). */ -static void *as_init_queue(request_queue_t *q) +static void *as_init_queue(struct request_queue *q) { struct as_data *ad; Index: linux-rt-rebase.q/block/blktrace.c =================================================================== --- linux-rt-rebase.q.orig/block/blktrace.c +++ linux-rt-rebase.q/block/blktrace.c @@ -231,7 +231,7 @@ static void blk_trace_cleanup(struct blk kfree(bt); } -static int blk_trace_remove(request_queue_t *q) +static int blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; @@ -312,7 +312,7 @@ static struct rchan_callbacks blk_relay_ /* * Setup everything required to start tracing */ -static int blk_trace_setup(request_queue_t *q, struct block_device *bdev, +static int blk_trace_setup(struct request_queue *q, struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; @@ -401,7 +401,7 @@ err: return ret; } -static int blk_trace_startstop(request_queue_t *q, int start) +static int blk_trace_startstop(struct request_queue *q, int start) { struct blk_trace *bt; int ret; @@ -444,7 +444,7 @@ static int blk_trace_startstop(request_q **/ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { - request_queue_t *q; + struct request_queue *q; int ret, start = 0; q = bdev_get_queue(bdev); @@ -479,7 +479,7 @@ int blk_trace_ioctl(struct block_device * @q: the request queue associated with the device * **/ -void blk_trace_shutdown(request_queue_t *q) +void blk_trace_shutdown(struct request_queue *q) { if (q->blk_trace) { blk_trace_startstop(q, 0); Index: linux-rt-rebase.q/block/bsg.c =================================================================== --- linux-rt-rebase.q.orig/block/bsg.c +++ linux-rt-rebase.q/block/bsg.c @@ -37,7 +37,7 @@ #define BSG_VERSION "0.4" struct bsg_device { - request_queue_t *queue; + struct request_queue *queue; spinlock_t lock; struct list_head busy_list; struct list_head done_list; @@ -180,7 +180,7 @@ unlock: return ret; } -static int blk_fill_sgv4_hdr_rq(request_queue_t *q, struct request *rq, +static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, int has_write_perm) { memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ @@ -214,7 +214,7 @@ static int blk_fill_sgv4_hdr_rq(request_ * Check if sg_io_v4 from user is allowed and valid */ static int -bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw) +bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw) { int ret = 0; @@ -250,7 +250,7 @@ bsg_validate_sgv4_hdr(request_queue_t *q static struct request * bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr) { - request_queue_t *q = bd->queue; + struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; int ret, rw; unsigned int dxfer_len; @@ -345,7 +345,7 @@ static void bsg_rq_end_io(struct request * do final setup of a 'bc' and submit the matching 'rq' to the block * layer for io */ -static void bsg_add_command(struct bsg_device *bd, request_queue_t *q, +static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, struct bsg_command *bc, struct request *rq) { rq->sense = bc->sense; @@ -611,7 +611,7 @@ static int __bsg_write(struct bsg_device bc = NULL; ret = 0; while (nr_commands) { - request_queue_t *q = bd->queue; + struct request_queue *q = bd->queue; bc = bsg_alloc_command(bd); if (IS_ERR(bc)) { Index: linux-rt-rebase.q/block/cfq-iosched.c =================================================================== --- linux-rt-rebase.q.orig/block/cfq-iosched.c +++ linux-rt-rebase.q/block/cfq-iosched.c @@ -71,7 +71,7 @@ struct cfq_rb_root { * Per block device queue structure */ struct cfq_data { - request_queue_t *queue; + struct request_queue *queue; /* * rr list of queues with requests and the count of them @@ -197,7 +197,7 @@ CFQ_CFQQ_FNS(slice_new); CFQ_CFQQ_FNS(sync); #undef CFQ_CFQQ_FNS -static void cfq_dispatch_insert(request_queue_t *, struct request *); +static void cfq_dispatch_insert(struct request_queue *, struct request *); static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, struct task_struct *, gfp_t); static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, @@ -237,7 +237,7 @@ static inline void cfq_schedule_dispatch kblockd_schedule_work(&cfqd->unplug_work); } -static int cfq_queue_empty(request_queue_t *q) +static int cfq_queue_empty(struct request_queue *q) { struct cfq_data *cfqd = q->elevator->elevator_data; @@ -623,7 +623,7 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd return NULL; } -static void cfq_activate_request(request_queue_t *q, struct request *rq) +static void cfq_activate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; @@ -641,7 +641,7 @@ static void cfq_activate_request(request cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; } -static void cfq_deactivate_request(request_queue_t *q, struct request *rq) +static void cfq_deactivate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; @@ -665,7 +665,8 @@ static void cfq_remove_request(struct re } } -static int cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) +static int cfq_merge(struct request_queue *q, struct request **req, + struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; @@ -679,7 +680,7 @@ static int cfq_merge(request_queue_t *q, return ELEVATOR_NO_MERGE; } -static void cfq_merged_request(request_queue_t *q, struct request *req, +static void cfq_merged_request(struct request_queue *q, struct request *req, int type) { if (type == ELEVATOR_FRONT_MERGE) { @@ -690,7 +691,7 @@ static void cfq_merged_request(request_q } static void -cfq_merged_requests(request_queue_t *q, struct request *rq, +cfq_merged_requests(struct request_queue *q, struct request *rq, struct request *next) { /* @@ -703,7 +704,7 @@ cfq_merged_requests(request_queue_t *q, cfq_remove_request(next); } -static int cfq_allow_merge(request_queue_t *q, struct request *rq, +static int cfq_allow_merge(struct request_queue *q, struct request *rq, struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; @@ -913,7 +914,7 @@ static void cfq_arm_slice_timer(struct c /* * Move request from internal lists to the request queue dispatch list. */ -static void cfq_dispatch_insert(request_queue_t *q, struct request *rq) +static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq = RQ_CFQQ(rq); @@ -1093,7 +1094,7 @@ static int cfq_forced_dispatch(struct cf return dispatched; } -static int cfq_dispatch_requests(request_queue_t *q, int force) +static int cfq_dispatch_requests(struct request_queue *q, int force) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; @@ -1214,7 +1215,7 @@ static void cfq_exit_single_io_context(s struct cfq_data *cfqd = cic->key; if (cfqd) { - request_queue_t *q = cfqd->queue; + struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); __cfq_exit_single_io_context(cfqd, cic); @@ -1775,7 +1776,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s } } -static void cfq_insert_request(request_queue_t *q, struct request *rq) +static void cfq_insert_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq = RQ_CFQQ(rq); @@ -1789,7 +1790,7 @@ static void cfq_insert_request(request_q cfq_rq_enqueued(cfqd, cfqq, rq); } -static void cfq_completed_request(request_queue_t *q, struct request *rq) +static void cfq_completed_request(struct request_queue *q, struct request *rq) { struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; @@ -1868,7 +1869,7 @@ static inline int __cfq_may_queue(struct return ELV_MQUEUE_MAY; } -static int cfq_may_queue(request_queue_t *q, int rw) +static int cfq_may_queue(struct request_queue *q, int rw) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -1922,7 +1923,7 @@ static void cfq_put_request(struct reque * Allocate cfq data structures associated with this request. */ static int -cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) +cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -1974,7 +1975,7 @@ static void cfq_kick_queue(struct work_s { struct cfq_data *cfqd = container_of(work, struct cfq_data, unplug_work); - request_queue_t *q = cfqd->queue; + struct request_queue *q = cfqd->queue; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); @@ -2072,7 +2073,7 @@ static void cfq_put_async_queues(struct static void cfq_exit_queue(elevator_t *e) { struct cfq_data *cfqd = e->elevator_data; - request_queue_t *q = cfqd->queue; + struct request_queue *q = cfqd->queue; cfq_shutdown_timer_wq(cfqd); @@ -2098,7 +2099,7 @@ static void cfq_exit_queue(elevator_t *e kfree(cfqd); } -static void *cfq_init_queue(request_queue_t *q) +static void *cfq_init_queue(struct request_queue *q) { struct cfq_data *cfqd; Index: linux-rt-rebase.q/block/deadline-iosched.c =================================================================== --- linux-rt-rebase.q.orig/block/deadline-iosched.c +++ linux-rt-rebase.q/block/deadline-iosched.c @@ -106,7 +106,7 @@ deadline_add_request(struct request_queu /* * remove rq from rbtree and fifo. */ -static void deadline_remove_request(request_queue_t *q, struct request *rq) +static void deadline_remove_request(struct request_queue *q, struct request *rq) { struct deadline_data *dd = q->elevator->elevator_data; @@ -115,7 +115,7 @@ static void deadline_remove_request(requ } static int -deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) +deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct deadline_data *dd = q->elevator->elevator_data; struct request *__rq; @@ -144,8 +144,8 @@ out: return ret; } -static void deadline_merged_request(request_queue_t *q, struct request *req, - int type) +static void deadline_merged_request(struct request_queue *q, + struct request *req, int type) { struct deadline_data *dd = q->elevator->elevator_data; @@ -159,7 +159,7 @@ static void deadline_merged_request(requ } static void -deadline_merged_requests(request_queue_t *q, struct request *req, +deadline_merged_requests(struct request_queue *q, struct request *req, struct request *next) { /* @@ -185,7 +185,7 @@ deadline_merged_requests(request_queue_t static inline void deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) { - request_queue_t *q = rq->q; + struct request_queue *q = rq->q; deadline_remove_request(q, rq); elv_dispatch_add_tail(q, rq); @@ -236,7 +236,7 @@ static inline int deadline_check_fifo(st * deadline_dispatch_requests selects the best request according to * read/write expire, fifo_batch, etc */ -static int deadline_dispatch_requests(request_queue_t *q, int force) +static int deadline_dispatch_requests(struct request_queue *q, int force) { struct deadline_data *dd = q->elevator->elevator_data; const int reads = !list_empty(&dd->fifo_list[READ]); @@ -335,7 +335,7 @@ dispatch_request: return 1; } -static int deadline_queue_empty(request_queue_t *q) +static int deadline_queue_empty(struct request_queue *q) { struct deadline_data *dd = q->elevator->elevator_data; @@ -356,7 +356,7 @@ static void deadline_exit_queue(elevator /* * initialize elevator private data (deadline_data). */ -static void *deadline_init_queue(request_queue_t *q) +static void *deadline_init_queue(struct request_queue *q) { struct deadline_data *dd; Index: linux-rt-rebase.q/block/elevator.c =================================================================== --- linux-rt-rebase.q.orig/block/elevator.c +++ linux-rt-rebase.q/block/elevator.c @@ -56,7 +56,7 @@ static const int elv_hash_shift = 6; */ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) { - request_queue_t *q = rq->q; + struct request_queue *q = rq->q; elevator_t *e = q->elevator; if (e->ops->elevator_allow_merge_fn) @@ -141,12 +141,13 @@ static struct elevator_type *elevator_ge return e; } -static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) +static void *elevator_init_queue(struct request_queue *q, + struct elevator_queue *eq) { return eq->ops->elevator_init_fn(q); } -static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, +static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, void *data) { q->elevator = eq; @@ -172,7 +173,8 @@ __setup("elevator=", elevator_setup); static struct kobj_type elv_ktype; -static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) +static elevator_t *elevator_alloc(struct request_queue *q, + struct elevator_type *e) { elevator_t *eq; int i; @@ -212,7 +214,7 @@ static void elevator_release(struct kobj kfree(e); } -int elevator_init(request_queue_t *q, char *name) +int elevator_init(struct request_queue *q, char *name) { struct elevator_type *e = NULL; struct elevator_queue *eq; @@ -264,7 +266,7 @@ void elevator_exit(elevator_t *e) EXPORT_SYMBOL(elevator_exit); -static void elv_activate_rq(request_queue_t *q, struct request *rq) +static void elv_activate_rq(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -272,7 +274,7 @@ static void elv_activate_rq(request_queu e->ops->elevator_activate_req_fn(q, rq); } -static void elv_deactivate_rq(request_queue_t *q, struct request *rq) +static void elv_deactivate_rq(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -285,13 +287,13 @@ static inline void __elv_rqhash_del(stru hlist_del_init(&rq->hash); } -static void elv_rqhash_del(request_queue_t *q, struct request *rq) +static void elv_rqhash_del(struct request_queue *q, struct request *rq) { if (ELV_ON_HASH(rq)) __elv_rqhash_del(rq); } -static void elv_rqhash_add(request_queue_t *q, struct request *rq) +static void elv_rqhash_add(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -299,13 +301,13 @@ static void elv_rqhash_add(request_queue hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); } -static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) +static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) { __elv_rqhash_del(rq); elv_rqhash_add(q, rq); } -static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) +static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) { elevator_t *e = q->elevator; struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; @@ -391,7 +393,7 @@ EXPORT_SYMBOL(elv_rb_find); * entry. rq is sort insted into the dispatch queue. To be used by * specific elevators. */ -void elv_dispatch_sort(request_queue_t *q, struct request *rq) +void elv_dispatch_sort(struct request_queue *q, struct request *rq) { sector_t boundary; struct list_head *entry; @@ -449,7 +451,7 @@ void elv_dispatch_add_tail(struct reques EXPORT_SYMBOL(elv_dispatch_add_tail); -int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) +int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; struct request *__rq; @@ -481,7 +483,7 @@ int elv_merge(request_queue_t *q, struct return ELEVATOR_NO_MERGE; } -void elv_merged_request(request_queue_t *q, struct request *rq, int type) +void elv_merged_request(struct request_queue *q, struct request *rq, int type) { elevator_t *e = q->elevator; @@ -494,7 +496,7 @@ void elv_merged_request(request_queue_t q->last_merge = rq; } -void elv_merge_requests(request_queue_t *q, struct request *rq, +void elv_merge_requests(struct request_queue *q, struct request *rq, struct request *next) { elevator_t *e = q->elevator; @@ -509,7 +511,7 @@ void elv_merge_requests(request_queue_t q->last_merge = rq; } -void elv_requeue_request(request_queue_t *q, struct request *rq) +void elv_requeue_request(struct request_queue *q, struct request *rq) { /* * it already went through dequeue, we need to decrement the @@ -526,7 +528,7 @@ void elv_requeue_request(request_queue_t elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); } -static void elv_drain_elevator(request_queue_t *q) +static void elv_drain_elevator(struct request_queue *q) { static int printed; while (q->elevator->ops->elevator_dispatch_fn(q, 1)) @@ -540,7 +542,7 @@ static void elv_drain_elevator(request_q } } -void elv_insert(request_queue_t *q, struct request *rq, int where) +void elv_insert(struct request_queue *q, struct request *rq, int where) { struct list_head *pos; unsigned ordseq; @@ -638,7 +640,7 @@ void elv_insert(request_queue_t *q, stru } } -void __elv_add_request(request_queue_t *q, struct request *rq, int where, +void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { if (q->ordcolor) @@ -676,7 +678,7 @@ void __elv_add_request(request_queue_t * EXPORT_SYMBOL(__elv_add_request); -void elv_add_request(request_queue_t *q, struct request *rq, int where, +void elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { unsigned long flags; @@ -688,7 +690,7 @@ void elv_add_request(request_queue_t *q, EXPORT_SYMBOL(elv_add_request); -static inline struct request *__elv_next_request(request_queue_t *q) +static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -704,7 +706,7 @@ static inline struct request *__elv_next } } -struct request *elv_next_request(request_queue_t *q) +struct request *elv_next_request(struct request_queue *q) { struct request *rq; int ret; @@ -770,7 +772,7 @@ struct request *elv_next_request(request EXPORT_SYMBOL(elv_next_request); -void elv_dequeue_request(request_queue_t *q, struct request *rq) +void elv_dequeue_request(struct request_queue *q, struct request *rq) { BUG_ON(list_empty(&rq->queuelist)); BUG_ON(ELV_ON_HASH(rq)); @@ -788,7 +790,7 @@ void elv_dequeue_request(request_queue_t EXPORT_SYMBOL(elv_dequeue_request); -int elv_queue_empty(request_queue_t *q) +int elv_queue_empty(struct request_queue *q) { elevator_t *e = q->elevator; @@ -803,7 +805,7 @@ int elv_queue_empty(request_queue_t *q) EXPORT_SYMBOL(elv_queue_empty); -struct request *elv_latter_request(request_queue_t *q, struct request *rq) +struct request *elv_latter_request(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -812,7 +814,7 @@ struct request *elv_latter_request(reque return NULL; } -struct request *elv_former_request(request_queue_t *q, struct request *rq) +struct request *elv_former_request(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -821,7 +823,7 @@ struct request *elv_former_request(reque return NULL; } -int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) +int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) { elevator_t *e = q->elevator; @@ -832,7 +834,7 @@ int elv_set_request(request_queue_t *q, return 0; } -void elv_put_request(request_queue_t *q, struct request *rq) +void elv_put_request(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -840,7 +842,7 @@ void elv_put_request(request_queue_t *q, e->ops->elevator_put_req_fn(rq); } -int elv_may_queue(request_queue_t *q, int rw) +int elv_may_queue(struct request_queue *q, int rw) { elevator_t *e = q->elevator; @@ -850,7 +852,7 @@ int elv_may_queue(request_queue_t *q, in return ELV_MQUEUE_MAY; } -void elv_completed_request(request_queue_t *q, struct request *rq) +void elv_completed_request(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; @@ -1006,7 +1008,7 @@ EXPORT_SYMBOL_GPL(elv_unregister); * need for the new one. this way we have a chance of going back to the old * one, if the new one fails init for some reason. */ -static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) +static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { elevator_t *old_elevator, *e; void *data; @@ -1078,7 +1080,8 @@ fail_register: return 0; } -ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) +ssize_t elv_iosched_store(struct request_queue *q, const char *name, + size_t count) { char elevator_name[ELV_NAME_MAX]; size_t len; @@ -1107,7 +1110,7 @@ ssize_t elv_iosched_store(request_queue_ return count; } -ssize_t elv_iosched_show(request_queue_t *q, char *name) +ssize_t elv_iosched_show(struct request_queue *q, char *name) { elevator_t *e = q->elevator; struct elevator_type *elv = e->elevator_type; @@ -1127,7 +1130,8 @@ ssize_t elv_iosched_show(request_queue_t return len; } -struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) +struct request *elv_rb_former_request(struct request_queue *q, + struct request *rq) { struct rb_node *rbprev = rb_prev(&rq->rb_node); @@ -1139,7 +1143,8 @@ struct request *elv_rb_former_request(re EXPORT_SYMBOL(elv_rb_former_request); -struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) +struct request *elv_rb_latter_request(struct request_queue *q, + struct request *rq) { struct rb_node *rbnext = rb_next(&rq->rb_node); Index: linux-rt-rebase.q/block/ll_rw_blk.c =================================================================== --- linux-rt-rebase.q.orig/block/ll_rw_blk.c +++ linux-rt-rebase.q/block/ll_rw_blk.c @@ -40,7 +40,7 @@ static void blk_unplug_work(struct work_ static void blk_unplug_timeout(unsigned long data); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); static void init_request_from_bio(struct request *req, struct bio *bio); -static int __make_request(request_queue_t *q, struct bio *bio); +static int __make_request(struct request_queue *q, struct bio *bio); static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* @@ -121,7 +121,7 @@ static void blk_queue_congestion_thresho struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { struct backing_dev_info *ret = NULL; - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); if (q) ret = &q->backing_dev_info; @@ -140,7 +140,7 @@ EXPORT_SYMBOL(blk_get_backing_dev_info); * cdb from the request data for instance. * */ -void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn) +void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) { q->prep_rq_fn = pfn; } @@ -163,14 +163,14 @@ EXPORT_SYMBOL(blk_queue_prep_rq); * no merge_bvec_fn is defined for a queue, and only the fixed limits are * honored. */ -void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) +void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) { q->merge_bvec_fn = mbfn; } EXPORT_SYMBOL(blk_queue_merge_bvec); -void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) +void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) { q->softirq_done_fn = fn; } @@ -199,7 +199,7 @@ EXPORT_SYMBOL(blk_queue_softirq_done); * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling * blk_queue_bounce() to create a buffer in normal memory. **/ -void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) { /* * set defaults @@ -235,7 +235,7 @@ void blk_queue_make_request(request_queu EXPORT_SYMBOL(blk_queue_make_request); -static void rq_init(request_queue_t *q, struct request *rq) +static void rq_init(struct request_queue *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); @@ -272,7 +272,7 @@ static void rq_init(request_queue_t *q, * feature should call this function and indicate so. * **/ -int blk_queue_ordered(request_queue_t *q, unsigned ordered, +int blk_queue_ordered(struct request_queue *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn) { if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && @@ -311,7 +311,7 @@ EXPORT_SYMBOL(blk_queue_ordered); * to the block layer by defining it through this call. * **/ -void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff) +void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) { q->issue_flush_fn = iff; } @@ -321,7 +321,7 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); /* * Cache flushing for ordered writes handling */ -inline unsigned blk_ordered_cur_seq(request_queue_t *q) +inline unsigned blk_ordered_cur_seq(struct request_queue *q) { if (!q->ordseq) return 0; @@ -330,7 +330,7 @@ inline unsigned blk_ordered_cur_seq(requ unsigned blk_ordered_req_seq(struct request *rq) { - request_queue_t *q = rq->q; + struct request_queue *q = rq->q; BUG_ON(q->ordseq == 0); @@ -357,7 +357,7 @@ unsigned blk_ordered_req_seq(struct requ return QUEUE_ORDSEQ_DONE; } -void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) +void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) { struct request *rq; int uptodate; @@ -401,7 +401,7 @@ static void post_flush_end_io(struct req blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } -static void queue_flush(request_queue_t *q, unsigned which) +static void queue_flush(struct request_queue *q, unsigned which) { struct request *rq; rq_end_io_fn *end_io; @@ -425,7 +425,7 @@ static void queue_flush(request_queue_t elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(request_queue_t *q, +static inline struct request *start_ordered(struct request_queue *q, struct request *rq) { q->bi_size = 0; @@ -476,7 +476,7 @@ static inline struct request *start_orde return rq; } -int blk_do_ordered(request_queue_t *q, struct request **rqp) +int blk_do_ordered(struct request_queue *q, struct request **rqp) { struct request *rq = *rqp; int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); @@ -527,7 +527,7 @@ int blk_do_ordered(request_queue_t *q, s static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) { - request_queue_t *q = bio->bi_private; + struct request_queue *q = bio->bi_private; /* * This is dry run, restore bio_sector and size. We'll finish @@ -551,7 +551,7 @@ static int flush_dry_bio_endio(struct bi static int ordered_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) { - request_queue_t *q = rq->q; + struct request_queue *q = rq->q; bio_end_io_t *endio; void *private; @@ -588,7 +588,7 @@ static int ordered_bio_endio(struct requ * blk_queue_bounce_limit to have lower memory pages allocated as bounce * buffers for doing I/O to pages residing above @page. **/ -void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) +void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) { unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; int dma = 0; @@ -624,7 +624,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); * Enables a low level driver to set an upper limit on the size of * received requests. **/ -void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors) +void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) { if ((max_sectors << 9) < PAGE_CACHE_SIZE) { max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); @@ -651,7 +651,8 @@ EXPORT_SYMBOL(blk_queue_max_sectors); * physical data segments in a request. This would be the largest sized * scatter list the driver could handle. **/ -void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments) +void blk_queue_max_phys_segments(struct request_queue *q, + unsigned short max_segments) { if (!max_segments) { max_segments = 1; @@ -674,7 +675,8 @@ EXPORT_SYMBOL(blk_queue_max_phys_segment * address/length pairs the host adapter can actually give as once * to the device. **/ -void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments) +void blk_queue_max_hw_segments(struct request_queue *q, + unsigned short max_segments) { if (!max_segments) { max_segments = 1; @@ -695,7 +697,7 @@ EXPORT_SYMBOL(blk_queue_max_hw_segments) * Enables a low level driver to set an upper limit on the size of a * coalesced segment **/ -void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) +void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) { if (max_size < PAGE_CACHE_SIZE) { max_size = PAGE_CACHE_SIZE; @@ -718,7 +720,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size * even internal read-modify-write operations). Usually the default * of 512 covers most hardware. **/ -void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) +void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) { q->hardsect_size = size; } @@ -735,7 +737,7 @@ EXPORT_SYMBOL(blk_queue_hardsect_size); * @t: the stacking driver (top) * @b: the underlying device (bottom) **/ -void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) +void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { /* zero is "infinity" */ t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); @@ -756,7 +758,7 @@ EXPORT_SYMBOL(blk_queue_stack_limits); * @q: the request queue for the device * @mask: the memory boundary mask **/ -void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) +void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) { if (mask < PAGE_CACHE_SIZE - 1) { mask = PAGE_CACHE_SIZE - 1; @@ -778,7 +780,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary * this is used when buiding direct io requests for the queue. * **/ -void blk_queue_dma_alignment(request_queue_t *q, int mask) +void blk_queue_dma_alignment(struct request_queue *q, int mask) { q->dma_alignment = mask; } @@ -796,7 +798,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); * * no locks need be held. **/ -struct request *blk_queue_find_tag(request_queue_t *q, int tag) +struct request *blk_queue_find_tag(struct request_queue *q, int tag) { return blk_map_queue_find_tag(q->queue_tags, tag); } @@ -840,7 +842,7 @@ static int __blk_free_tags(struct blk_qu * blk_cleanup_queue() will take care of calling this function, if tagging * has been used. So there's no need to call this directly. **/ -static void __blk_queue_free_tags(request_queue_t *q) +static void __blk_queue_free_tags(struct request_queue *q) { struct blk_queue_tag *bqt = q->queue_tags; @@ -877,7 +879,7 @@ EXPORT_SYMBOL(blk_free_tags); * This is used to disabled tagged queuing to a device, yet leave * queue in function. **/ -void blk_queue_free_tags(request_queue_t *q) +void blk_queue_free_tags(struct request_queue *q) { clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); } @@ -885,7 +887,7 @@ void blk_queue_free_tags(request_queue_t EXPORT_SYMBOL(blk_queue_free_tags); static int -init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) +init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) { struct request **tag_index; unsigned long *tag_map; @@ -955,7 +957,7 @@ EXPORT_SYMBOL(blk_init_tags); * @depth: the maximum queue depth supported * @tags: the tag to use **/ -int blk_queue_init_tags(request_queue_t *q, int depth, +int blk_queue_init_tags(struct request_queue *q, int depth, struct blk_queue_tag *tags) { int rc; @@ -996,7 +998,7 @@ EXPORT_SYMBOL(blk_queue_init_tags); * Notes: * Must be called with the queue lock held. **/ -int blk_queue_resize_tags(request_queue_t *q, int new_depth) +int blk_queue_resize_tags(struct request_queue *q, int new_depth) { struct blk_queue_tag *bqt = q->queue_tags; struct request **tag_index; @@ -1059,7 +1061,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags); * Notes: * queue lock must be held. **/ -void blk_queue_end_tag(request_queue_t *q, struct request *rq) +void blk_queue_end_tag(struct request_queue *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; int tag = rq->tag; @@ -1111,7 +1113,7 @@ EXPORT_SYMBOL(blk_queue_end_tag); * Notes: * queue lock must be held. **/ -int blk_queue_start_tag(request_queue_t *q, struct request *rq) +int blk_queue_start_tag(struct request_queue *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; int tag; @@ -1158,7 +1160,7 @@ EXPORT_SYMBOL(blk_queue_start_tag); * Notes: * queue lock must be held. **/ -void blk_queue_invalidate_tags(request_queue_t *q) +void blk_queue_invalidate_tags(struct request_queue *q) { struct blk_queue_tag *bqt = q->queue_tags; struct list_head *tmp, *n; @@ -1205,7 +1207,7 @@ void blk_dump_rq_flags(struct request *r EXPORT_SYMBOL(blk_dump_rq_flags); -void blk_recount_segments(request_queue_t *q, struct bio *bio) +void blk_recount_segments(struct request_queue *q, struct bio *bio) { struct bio_vec *bv, *bvprv = NULL; int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster; @@ -1267,7 +1269,7 @@ new_hw_segment: } EXPORT_SYMBOL(blk_recount_segments); -static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, +static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) @@ -1288,7 +1290,7 @@ static int blk_phys_contig_segment(reque return 0; } -static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio, +static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) @@ -1308,7 +1310,8 @@ static int blk_hw_contig_segment(request * map a request to scatterlist, return number of sg entries setup. Caller * must make sure sg can hold rq->nr_phys_segments entries */ -int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) +int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sg) { struct bio_vec *bvec, *bvprv; struct bio *bio; @@ -1361,7 +1364,7 @@ EXPORT_SYMBOL(blk_rq_map_sg); * specific ones if so desired */ -static inline int ll_new_mergeable(request_queue_t *q, +static inline int ll_new_mergeable(struct request_queue *q, struct request *req, struct bio *bio) { @@ -1382,7 +1385,7 @@ static inline int ll_new_mergeable(reque return 1; } -static inline int ll_new_hw_segment(request_queue_t *q, +static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) { @@ -1406,7 +1409,7 @@ static inline int ll_new_hw_segment(requ return 1; } -int ll_back_merge_fn(request_queue_t *q, struct request *req, struct bio *bio) +int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { unsigned short max_sectors; int len; @@ -1444,7 +1447,7 @@ int ll_back_merge_fn(request_queue_t *q, } EXPORT_SYMBOL(ll_back_merge_fn); -static int ll_front_merge_fn(request_queue_t *q, struct request *req, +static int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { unsigned short max_sectors; @@ -1483,7 +1486,7 @@ static int ll_front_merge_fn(request_que return ll_new_hw_segment(q, req, bio); } -static int ll_merge_requests_fn(request_queue_t *q, struct request *req, +static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; @@ -1539,7 +1542,7 @@ static int ll_merge_requests_fn(request_ * This is called with interrupts off and no requests on the queue and * with the queue lock held. */ -void blk_plug_device(request_queue_t *q) +void blk_plug_device(struct request_queue *q) { WARN_ON(!irqs_disabled()); @@ -1562,7 +1565,7 @@ EXPORT_SYMBOL(blk_plug_device); * remove the queue from the plugged list, if present. called with * queue lock held and interrupts disabled. */ -int blk_remove_plug(request_queue_t *q) +int blk_remove_plug(struct request_queue *q) { WARN_ON(!irqs_disabled()); @@ -1578,7 +1581,7 @@ EXPORT_SYMBOL(blk_remove_plug); /* * remove the plug and let it rip.. */ -void __generic_unplug_device(request_queue_t *q) +void __generic_unplug_device(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; @@ -1592,7 +1595,7 @@ EXPORT_SYMBOL(__generic_unplug_device); /** * generic_unplug_device - fire a request queue - * @q: The &request_queue_t in question + * @q: The &struct request_queue in question * * Description: * Linux uses plugging to build bigger requests queues before letting @@ -1601,7 +1604,7 @@ EXPORT_SYMBOL(__generic_unplug_device); * gets unplugged, the request_fn defined for the queue is invoked and * transfers started. **/ -void generic_unplug_device(request_queue_t *q) +void generic_unplug_device(struct request_queue *q) { spin_lock_irq(q->queue_lock); __generic_unplug_device(q); @@ -1612,7 +1615,7 @@ EXPORT_SYMBOL(generic_unplug_device); static void blk_backing_dev_unplug(struct backing_dev_info *bdi, struct page *page) { - request_queue_t *q = bdi->unplug_io_data; + struct request_queue *q = bdi->unplug_io_data; /* * devices don't necessarily have an ->unplug_fn defined @@ -1627,7 +1630,8 @@ static void blk_backing_dev_unplug(struc static void blk_unplug_work(struct work_struct *work) { - request_queue_t *q = container_of(work, request_queue_t, unplug_work); + struct request_queue *q = + container_of(work, struct request_queue, unplug_work); blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, q->rq.count[READ] + q->rq.count[WRITE]); @@ -1637,7 +1641,7 @@ static void blk_unplug_work(struct work_ static void blk_unplug_timeout(unsigned long data) { - request_queue_t *q = (request_queue_t *)data; + struct request_queue *q = (struct request_queue *)data; blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, q->rq.count[READ] + q->rq.count[WRITE]); @@ -1647,14 +1651,14 @@ static void blk_unplug_timeout(unsigned /** * blk_start_queue - restart a previously stopped queue - * @q: The &request_queue_t in question + * @q: The &struct request_queue in question * * Description: * blk_start_queue() will clear the stop flag on the queue, and call * the request_fn for the queue if it was in a stopped state when * entered. Also see blk_stop_queue(). Queue lock must be held. **/ -void blk_start_queue(request_queue_t *q) +void blk_start_queue(struct request_queue *q) { WARN_ON(!irqs_disabled()); @@ -1677,7 +1681,7 @@ EXPORT_SYMBOL(blk_start_queue); /** * blk_stop_queue - stop a queue - * @q: The &request_queue_t in question + * @q: The &struct request_queue in question * * Description: * The Linux block layer assumes that a block driver will consume all @@ -1689,7 +1693,7 @@ EXPORT_SYMBOL(blk_start_queue); * the driver has signalled it's ready to go again. This happens by calling * blk_start_queue() to restart queue operations. Queue lock must be held. **/ -void blk_stop_queue(request_queue_t *q) +void blk_stop_queue(struct request_queue *q) { blk_remove_plug(q); set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); @@ -1746,7 +1750,7 @@ void blk_run_queue(struct request_queue EXPORT_SYMBOL(blk_run_queue); /** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed * @kobj: the kobj belonging of the request queue to be released * * Description: @@ -1762,7 +1766,8 @@ EXPORT_SYMBOL(blk_run_queue); **/ static void blk_release_queue(struct kobject *kobj) { - request_queue_t *q = container_of(kobj, struct request_queue, kobj); + struct request_queue *q = + container_of(kobj, struct request_queue, kobj); struct request_list *rl = &q->rq; blk_sync_queue(q); @@ -1778,13 +1783,13 @@ static void blk_release_queue(struct kob kmem_cache_free(requestq_cachep, q); } -void blk_put_queue(request_queue_t *q) +void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } EXPORT_SYMBOL(blk_put_queue); -void blk_cleanup_queue(request_queue_t * q) +void blk_cleanup_queue(struct request_queue * q) { mutex_lock(&q->sysfs_lock); set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); @@ -1798,7 +1803,7 @@ void blk_cleanup_queue(request_queue_t * EXPORT_SYMBOL(blk_cleanup_queue); -static int blk_init_free_list(request_queue_t *q) +static int blk_init_free_list(struct request_queue *q) { struct request_list *rl = &q->rq; @@ -1817,7 +1822,7 @@ static int blk_init_free_list(request_qu return 0; } -request_queue_t *blk_alloc_queue(gfp_t gfp_mask) +struct request_queue *blk_alloc_queue(gfp_t gfp_mask) { return blk_alloc_queue_node(gfp_mask, -1); } @@ -1825,9 +1830,9 @@ EXPORT_SYMBOL(blk_alloc_queue); static struct kobj_type queue_ktype; -request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { - request_queue_t *q; + struct request_queue *q; q = kmem_cache_alloc_node(requestq_cachep, gfp_mask | __GFP_ZERO, node_id); @@ -1882,16 +1887,16 @@ EXPORT_SYMBOL(blk_alloc_queue_node); * when the block device is deactivated (such as at module unload). **/ -request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) +struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { return blk_init_queue_node(rfn, lock, -1); } EXPORT_SYMBOL(blk_init_queue); -request_queue_t * +struct request_queue * blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { - request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); + struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); if (!q) return NULL; @@ -1940,7 +1945,7 @@ blk_init_queue_node(request_fn_proc *rfn } EXPORT_SYMBOL(blk_init_queue_node); -int blk_get_queue(request_queue_t *q) +int blk_get_queue(struct request_queue *q) { if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { kobject_get(&q->kobj); @@ -1952,7 +1957,7 @@ int blk_get_queue(request_queue_t *q) EXPORT_SYMBOL(blk_get_queue); -static inline void blk_free_request(request_queue_t *q, struct request *rq) +static inline void blk_free_request(struct request_queue *q, struct request *rq) { if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); @@ -1960,7 +1965,7 @@ static inline void blk_free_request(requ } static struct request * -blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -1988,7 +1993,7 @@ blk_alloc_request(request_queue_t *q, in * ioc_batching returns true if the ioc is a valid batching request and * should be given priority access to a request. */ -static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) +static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) { if (!ioc) return 0; @@ -2009,7 +2014,7 @@ static inline int ioc_batching(request_q * is the behaviour we want though - once it gets a wakeup it should be given * a nice run. */ -static void ioc_set_batching(request_queue_t *q, struct io_context *ioc) +static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) { if (!ioc || ioc_batching(q, ioc)) return; @@ -2018,7 +2023,7 @@ static void ioc_set_batching(request_que ioc->last_waited = jiffies; } -static void __freed_request(request_queue_t *q, int rw) +static void __freed_request(struct request_queue *q, int rw) { struct request_list *rl = &q->rq; @@ -2037,7 +2042,7 @@ static void __freed_request(request_queu * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(request_queue_t *q, int rw, int priv) +static void freed_request(struct request_queue *q, int rw, int priv) { struct request_list *rl = &q->rq; @@ -2057,7 +2062,7 @@ static void freed_request(request_queue_ * Returns NULL on failure, with queue_lock held. * Returns !NULL on success, with queue_lock *not held*. */ -static struct request *get_request(request_queue_t *q, int rw_flags, +static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) { struct request *rq = NULL; @@ -2162,7 +2167,7 @@ out: * * Called with q->queue_lock held, and returns with it unlocked. */ -static struct request *get_request_wait(request_queue_t *q, int rw_flags, +static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct bio *bio) { const int rw = rw_flags & 0x01; @@ -2204,7 +2209,7 @@ static struct request *get_request_wait( return rq; } -struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) +struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { struct request *rq; @@ -2234,7 +2239,7 @@ EXPORT_SYMBOL(blk_get_request); * * The queue lock must be held with interrupts disabled. */ -void blk_start_queueing(request_queue_t *q) +void blk_start_queueing(struct request_queue *q) { if (!blk_queue_plugged(q)) q->request_fn(q); @@ -2253,7 +2258,7 @@ EXPORT_SYMBOL(blk_start_queueing); * more, when that condition happens we need to put the request back * on the queue. Must be called with queue lock held. */ -void blk_requeue_request(request_queue_t *q, struct request *rq) +void blk_requeue_request(struct request_queue *q, struct request *rq) { blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); @@ -2284,7 +2289,7 @@ EXPORT_SYMBOL(blk_requeue_request); * of the queue for things like a QUEUE_FULL message from a device, or a * host that is unable to accept a particular command. */ -void blk_insert_request(request_queue_t *q, struct request *rq, +void blk_insert_request(struct request_queue *q, struct request *rq, int at_head, void *data) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; @@ -2330,7 +2335,7 @@ static int __blk_rq_unmap_user(struct bi return ret; } -static int __blk_rq_map_user(request_queue_t *q, struct request *rq, +static int __blk_rq_map_user(struct request_queue *q, struct request *rq, void __user *ubuf, unsigned int len) { unsigned long uaddr; @@ -2403,8 +2408,8 @@ unmap_bio: * original bio must be passed back in to blk_rq_unmap_user() for proper * unmapping. */ -int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, - unsigned long len) +int blk_rq_map_user(struct request_queue *q, struct request *rq, + void __user *ubuf, unsigned long len) { unsigned long bytes_read = 0; struct bio *bio = NULL; @@ -2470,7 +2475,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * original bio must be passed back in to blk_rq_unmap_user() for proper * unmapping. */ -int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, +int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct sg_iovec *iov, int iov_count, unsigned int len) { struct bio *bio; @@ -2540,7 +2545,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @len: length of user data * @gfp_mask: memory allocation flags */ -int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, +int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { struct bio *bio; @@ -2577,7 +2582,7 @@ EXPORT_SYMBOL(blk_rq_map_kern); * Insert a fully prepared request at the back of the io scheduler queue * for execution. Don't wait for completion. */ -void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, +void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, struct request *rq, int at_head, rq_end_io_fn *done) { @@ -2605,7 +2610,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait) * Insert a fully prepared request at the back of the io scheduler queue * for execution and wait for completion. */ -int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, +int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, struct request *rq, int at_head) { DECLARE_COMPLETION_ONSTACK(wait); @@ -2648,7 +2653,7 @@ EXPORT_SYMBOL(blk_execute_rq); */ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) { - request_queue_t *q; + struct request_queue *q; if (bdev->bd_disk == NULL) return -ENXIO; @@ -2684,7 +2689,7 @@ static void drive_stat_acct(struct reque * queue lock is held and interrupts disabled, as we muck with the * request queue list. */ -static inline void add_request(request_queue_t * q, struct request * req) +static inline void add_request(struct request_queue * q, struct request * req) { drive_stat_acct(req, req->nr_sectors, 1); @@ -2730,7 +2735,7 @@ EXPORT_SYMBOL_GPL(disk_round_stats); /* * queue lock must be held */ -void __blk_put_request(request_queue_t *q, struct request *req) +void __blk_put_request(struct request_queue *q, struct request *req) { if (unlikely(!q)) return; @@ -2760,7 +2765,7 @@ EXPORT_SYMBOL_GPL(__blk_put_request); void blk_put_request(struct request *req) { unsigned long flags; - request_queue_t *q = req->q; + struct request_queue *q = req->q; /* * Gee, IDE calls in w/ NULL q. Fix IDE and remove the @@ -2798,7 +2803,7 @@ EXPORT_SYMBOL(blk_end_sync_rq); /* * Has to be called with the request spinlock acquired */ -static int attempt_merge(request_queue_t *q, struct request *req, +static int attempt_merge(struct request_queue *q, struct request *req, struct request *next) { if (!rq_mergeable(req) || !rq_mergeable(next)) @@ -2851,7 +2856,8 @@ static int attempt_merge(request_queue_t return 1; } -static inline int attempt_back_merge(request_queue_t *q, struct request *rq) +static inline int attempt_back_merge(struct request_queue *q, + struct request *rq) { struct request *next = elv_latter_request(q, rq); @@ -2861,7 +2867,8 @@ static inline int attempt_back_merge(req return 0; } -static inline int attempt_front_merge(request_queue_t *q, struct request *rq) +static inline int attempt_front_merge(struct request_queue *q, + struct request *rq) { struct request *prev = elv_former_request(q, rq); @@ -2905,7 +2912,7 @@ static void init_request_from_bio(struct req->start_time = jiffies; } -static int __make_request(request_queue_t *q, struct bio *bio) +static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; int el_ret, nr_sectors, barrier, err; @@ -3119,7 +3126,7 @@ static inline int should_fail_request(st */ static inline void __generic_make_request(struct bio *bio) { - request_queue_t *q; + struct request_queue *q; sector_t maxsector; sector_t old_sector; int ret, nr_sectors = bio_sectors(bio); @@ -3312,7 +3319,7 @@ static void blk_recalc_rq_segments(struc struct bio *bio, *prevbio = NULL; int nr_phys_segs, nr_hw_segs; unsigned int phys_size, hw_size; - request_queue_t *q = rq->q; + struct request_queue *q = rq->q; if (!rq->bio) return; @@ -3658,7 +3665,8 @@ void end_request(struct request *req, in EXPORT_SYMBOL(end_request); -void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) +void blk_rq_bio_prep(struct request_queue *q, struct request *rq, + struct bio *bio) { /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ rq->cmd_flags |= (bio->bi_rw & 3); @@ -3701,7 +3709,7 @@ int __init blk_dev_init(void) sizeof(struct request), 0, SLAB_PANIC, NULL); requestq_cachep = kmem_cache_create("blkdev_queue", - sizeof(request_queue_t), 0, SLAB_PANIC, NULL); + sizeof(struct request_queue), 0, SLAB_PANIC, NULL); iocontext_cachep = kmem_cache_create("blkdev_ioc", sizeof(struct io_context), 0, SLAB_PANIC, NULL); @@ -4021,7 +4029,8 @@ static ssize_t queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct queue_sysfs_entry *entry = to_queue(attr); - request_queue_t *q = container_of(kobj, struct request_queue, kobj); + struct request_queue *q = + container_of(kobj, struct request_queue, kobj); ssize_t res; if (!entry->show) @@ -4041,7 +4050,7 @@ queue_attr_store(struct kobject *kobj, s const char *page, size_t length) { struct queue_sysfs_entry *entry = to_queue(attr); - request_queue_t *q = container_of(kobj, struct request_queue, kobj); + struct request_queue *q = container_of(kobj, struct request_queue, kobj); ssize_t res; @@ -4072,7 +4081,7 @@ int blk_register_queue(struct gendisk *d { int ret; - request_queue_t *q = disk->queue; + struct request_queue *q = disk->queue; if (!q || !q->request_fn) return -ENXIO; @@ -4097,7 +4106,7 @@ int blk_register_queue(struct gendisk *d void blk_unregister_queue(struct gendisk *disk) { - request_queue_t *q = disk->queue; + struct request_queue *q = disk->queue; if (q && q->request_fn) { elv_unregister_queue(q); Index: linux-rt-rebase.q/block/noop-iosched.c =================================================================== --- linux-rt-rebase.q.orig/block/noop-iosched.c +++ linux-rt-rebase.q/block/noop-iosched.c @@ -11,13 +11,13 @@ struct noop_data { struct list_head queue; }; -static void noop_merged_requests(request_queue_t *q, struct request *rq, +static void noop_merged_requests(struct request_queue *q, struct request *rq, struct request *next) { list_del_init(&next->queuelist); } -static int noop_dispatch(request_queue_t *q, int force) +static int noop_dispatch(struct request_queue *q, int force) { struct noop_data *nd = q->elevator->elevator_data; @@ -31,14 +31,14 @@ static int noop_dispatch(request_queue_t return 0; } -static void noop_add_request(request_queue_t *q, struct request *rq) +static void noop_add_request(struct request_queue *q, struct request *rq) { struct noop_data *nd = q->elevator->elevator_data; list_add_tail(&rq->queuelist, &nd->queue); } -static int noop_queue_empty(request_queue_t *q) +static int noop_queue_empty(struct request_queue *q) { struct noop_data *nd = q->elevator->elevator_data; @@ -46,7 +46,7 @@ static int noop_queue_empty(request_queu } static struct request * -noop_former_request(request_queue_t *q, struct request *rq) +noop_former_request(struct request_queue *q, struct request *rq) { struct noop_data *nd = q->elevator->elevator_data; @@ -56,7 +56,7 @@ noop_former_request(request_queue_t *q, } static struct request * -noop_latter_request(request_queue_t *q, struct request *rq) +noop_latter_request(struct request_queue *q, struct request *rq) { struct noop_data *nd = q->elevator->elevator_data; @@ -65,7 +65,7 @@ noop_latter_request(request_queue_t *q, return list_entry(rq->queuelist.next, struct request, queuelist); } -static void *noop_init_queue(request_queue_t *q) +static void *noop_init_queue(struct request_queue *q) { struct noop_data *nd; Index: linux-rt-rebase.q/block/scsi_ioctl.c =================================================================== --- linux-rt-rebase.q.orig/block/scsi_ioctl.c +++ linux-rt-rebase.q/block/scsi_ioctl.c @@ -49,22 +49,22 @@ static int sg_get_version(int __user *p) return put_user(sg_version_num, p); } -static int scsi_get_idlun(request_queue_t *q, int __user *p) +static int scsi_get_idlun(struct request_queue *q, int __user *p) { return put_user(0, p); } -static int scsi_get_bus(request_queue_t *q, int __user *p) +static int scsi_get_bus(struct request_queue *q, int __user *p) { return put_user(0, p); } -static int sg_get_timeout(request_queue_t *q) +static int sg_get_timeout(struct request_queue *q) { return q->sg_timeout / (HZ / USER_HZ); } -static int sg_set_timeout(request_queue_t *q, int __user *p) +static int sg_set_timeout(struct request_queue *q, int __user *p) { int timeout, err = get_user(timeout, p); @@ -74,14 +74,14 @@ static int sg_set_timeout(request_queue_ return err; } -static int sg_get_reserved_size(request_queue_t *q, int __user *p) +static int sg_get_reserved_size(struct request_queue *q, int __user *p) { unsigned val = min(q->sg_reserved_size, q->max_sectors << 9); return put_user(val, p); } -static int sg_set_reserved_size(request_queue_t *q, int __user *p) +static int sg_set_reserved_size(struct request_queue *q, int __user *p) { int size, err = get_user(size, p); @@ -101,7 +101,7 @@ static int sg_set_reserved_size(request_ * will always return that we are ATAPI even for a real SCSI drive, I'm not * so sure this is worth doing anything about (why would you care??) */ -static int sg_emulated_host(request_queue_t *q, int __user *p) +static int sg_emulated_host(struct request_queue *q, int __user *p) { return put_user(1, p); } @@ -214,7 +214,7 @@ int blk_verify_command(unsigned char *cm } EXPORT_SYMBOL_GPL(blk_verify_command); -static int blk_fill_sghdr_rq(request_queue_t *q, struct request *rq, +static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, int has_write_perm) { memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ @@ -286,7 +286,7 @@ static int blk_complete_sghdr_rq(struct return r; } -static int sg_io(struct file *file, request_queue_t *q, +static int sg_io(struct file *file, struct request_queue *q, struct gendisk *bd_disk, struct sg_io_hdr *hdr) { unsigned long start_time; @@ -519,7 +519,8 @@ error: EXPORT_SYMBOL_GPL(sg_scsi_ioctl); /* Send basic block requests */ -static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data) +static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, + int cmd, int data) { struct request *rq; int err; @@ -539,7 +540,8 @@ static int __blk_send_generic(request_qu return err; } -static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data) +static inline int blk_send_start_stop(struct request_queue *q, + struct gendisk *bd_disk, int data) { return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); } Index: linux-rt-rebase.q/drivers/acorn/block/fd1772.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acorn/block/fd1772.c +++ linux-rt-rebase.q/drivers/acorn/block/fd1772.c @@ -372,7 +372,7 @@ static int fd_test_drive_present(int dri static void config_types(void); static int floppy_open(struct inode *inode, struct file *filp); static int floppy_release(struct inode *inode, struct file *filp); -static void do_fd_request(request_queue_t *); +static void do_fd_request(struct request_queue *); /************************* End of Prototypes **************************/ @@ -1271,7 +1271,7 @@ static void fd1772_checkint(void) } } -static void do_fd_request(request_queue_t* q) +static void do_fd_request(struct request_queue* q) { unsigned long flags; Index: linux-rt-rebase.q/drivers/acorn/block/mfmhd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acorn/block/mfmhd.c +++ linux-rt-rebase.q/drivers/acorn/block/mfmhd.c @@ -924,7 +924,7 @@ static void mfm_request(void) DBG("mfm_request: Dropping out bottom\n"); } -static void do_mfm_request(request_queue_t *q) +static void do_mfm_request(struct request_queue *q) { DBG("do_mfm_request: about to mfm_request\n"); mfm_request(); Index: linux-rt-rebase.q/drivers/acpi/Kconfig =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/Kconfig +++ linux-rt-rebase.q/drivers/acpi/Kconfig @@ -11,6 +11,9 @@ menuconfig ACPI depends on PCI depends on PM select PNP + # for sleep + select HOTPLUG_CPU if X86 && SMP + select SUSPEND_SMP if X86 && SMP default y ---help--- Advanced Configuration and Power Interface (ACPI) support for @@ -42,51 +45,26 @@ menuconfig ACPI if ACPI -config ACPI_SLEEP - bool "Sleep States" - depends on X86 && (!SMP || SUSPEND_SMP) - default y +config ACPI_PROCFS + bool "Deprecated /proc/acpi files" + depends on PROC_FS ---help--- - This option adds support for ACPI suspend states. - - With this option, you will be able to put the system "to sleep". - Sleep states are low power states for the system and devices. All - of the system operating state is saved to either memory or disk - (depending on the state), to allow the system to resume operation - quickly at your request. - - Although this option sounds really nifty, barely any of the device - drivers have been converted to the new driver model and hence few - have proper power management support. - - This option is not recommended for anyone except those doing driver - power management development. - -config ACPI_SLEEP_PROC_FS - bool - depends on ACPI_SLEEP && PROC_FS - default y + For backwards compatibility, this option allows + depricated /proc/acpi/ files to exist, even when + they have been replaced by functions in /sys. + The deprecated files (and their replacements) include: + + /proc/acpi/sleep (/sys/power/state) + /proc/acpi/info (/sys/modules/acpi/parameters/acpica_version) + /proc/acpi/dsdt (/sys/firmware/acpi/tables/DSDT) + /proc/acpi/fadt (/sys/firmware/acpi/tables/FACP) + /proc/acpi/debug_layer (/sys/module/acpi/parameters/debug_layer) + /proc/acpi/debug_level (/sys/module/acpi/parameters/debug_level) -config ACPI_SLEEP_PROC_SLEEP - bool "/proc/acpi/sleep (deprecated)" - depends on ACPI_SLEEP_PROC_FS - default n - ---help--- - Create /proc/acpi/sleep - Deprecated by /sys/power/state + This option has no effect on /proc/acpi/ files + and functions which do not yet exist in /sys. -config ACPI_PROCFS - bool "Procfs interface (deprecated)" - default y - ---help--- - The Procfs interface for ACPI is made optional for backward compatibility. - As the same functions are duplicated in the sysfs interface - and this proc interface will be removed some time later, - it's marked as deprecated. - ( /proc/acpi/debug_layer && debug_level are deprecated by - /sys/module/acpi/parameters/debug_layer && debug_level. - /proc/acpi/info is deprecated by - /sys/module/acpi/parameters/acpica_version ) + Say N to delete /proc/acpi/ files that have moved to /sys/ config ACPI_AC tristate "AC Adapter" Index: linux-rt-rebase.q/drivers/acpi/ac.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/ac.c +++ linux-rt-rebase.q/drivers/acpi/ac.c @@ -34,7 +34,6 @@ #define ACPI_AC_COMPONENT 0x00020000 #define ACPI_AC_CLASS "ac_adapter" -#define ACPI_AC_HID "ACPI0003" #define ACPI_AC_DEVICE_NAME "AC Adapter" #define ACPI_AC_FILE_STATE "state" #define ACPI_AC_NOTIFY_STATUS 0x80 @@ -56,10 +55,16 @@ static int acpi_ac_add(struct acpi_devic static int acpi_ac_remove(struct acpi_device *device, int type); static int acpi_ac_open_fs(struct inode *inode, struct file *file); +const static struct acpi_device_id ac_device_ids[] = { + {"ACPI0003", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, ac_device_ids); + static struct acpi_driver acpi_ac_driver = { .name = "ac", .class = ACPI_AC_CLASS, - .ids = ACPI_AC_HID, + .ids = ac_device_ids, .ops = { .add = acpi_ac_add, .remove = acpi_ac_remove, Index: linux-rt-rebase.q/drivers/acpi/acpi_memhotplug.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/acpi_memhotplug.c +++ linux-rt-rebase.q/drivers/acpi/acpi_memhotplug.c @@ -53,10 +53,16 @@ static int acpi_memory_device_add(struct static int acpi_memory_device_remove(struct acpi_device *device, int type); static int acpi_memory_device_start(struct acpi_device *device); +static const struct acpi_device_id memory_device_ids[] = { + {ACPI_MEMORY_DEVICE_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, memory_device_ids); + static struct acpi_driver acpi_memory_device_driver = { .name = "acpi_memhotplug", .class = ACPI_MEMORY_DEVICE_CLASS, - .ids = ACPI_MEMORY_DEVICE_HID, + .ids = memory_device_ids, .ops = { .add = acpi_memory_device_add, .remove = acpi_memory_device_remove, Index: linux-rt-rebase.q/drivers/acpi/asus_acpi.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/asus_acpi.c +++ linux-rt-rebase.q/drivers/acpi/asus_acpi.c @@ -56,7 +56,6 @@ #define ACPI_HOTK_NAME "Asus Laptop ACPI Extras Driver" #define ACPI_HOTK_CLASS "hotkey" #define ACPI_HOTK_DEVICE_NAME "Hotkey" -#define ACPI_HOTK_HID "ATK0100" /* * Some events we use, same for all Asus @@ -426,14 +425,20 @@ static struct acpi_table_header *asus_in static struct asus_hotk *hotk; /* - * The hotkey driver declaration + * The hotkey driver and autoloading declaration */ static int asus_hotk_add(struct acpi_device *device); static int asus_hotk_remove(struct acpi_device *device, int type); +static const struct acpi_device_id asus_device_ids[] = { + {"ATK0100", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, asus_device_ids); + static struct acpi_driver asus_hotk_driver = { .name = "asus_acpi", .class = ACPI_HOTK_CLASS, - .ids = ACPI_HOTK_HID, + .ids = asus_device_ids, .ops = { .add = asus_hotk_add, .remove = asus_hotk_remove, Index: linux-rt-rebase.q/drivers/acpi/battery.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/battery.c +++ linux-rt-rebase.q/drivers/acpi/battery.c @@ -41,7 +41,6 @@ #define ACPI_BATTERY_COMPONENT 0x00040000 #define ACPI_BATTERY_CLASS "battery" -#define ACPI_BATTERY_HID "PNP0C0A" #define ACPI_BATTERY_DEVICE_NAME "Battery" #define ACPI_BATTERY_NOTIFY_STATUS 0x80 #define ACPI_BATTERY_NOTIFY_INFO 0x81 @@ -74,10 +73,16 @@ static int acpi_battery_add(struct acpi_ static int acpi_battery_remove(struct acpi_device *device, int type); static int acpi_battery_resume(struct acpi_device *device); +static const struct acpi_device_id battery_device_ids[] = { + {"PNP0C0A", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, battery_device_ids); + static struct acpi_driver acpi_battery_driver = { .name = "battery", .class = ACPI_BATTERY_CLASS, - .ids = ACPI_BATTERY_HID, + .ids = battery_device_ids, .ops = { .add = acpi_battery_add, .resume = acpi_battery_resume, Index: linux-rt-rebase.q/drivers/acpi/button.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/button.c +++ linux-rt-rebase.q/drivers/acpi/button.c @@ -66,6 +66,16 @@ MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Button Driver"); MODULE_LICENSE("GPL"); +static const struct acpi_device_id button_device_ids[] = { + {ACPI_BUTTON_HID_LID, 0}, + {ACPI_BUTTON_HID_SLEEP, 0}, + {ACPI_BUTTON_HID_SLEEPF, 0}, + {ACPI_BUTTON_HID_POWER, 0}, + {ACPI_BUTTON_HID_POWERF, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, button_device_ids); + static int acpi_button_add(struct acpi_device *device); static int acpi_button_remove(struct acpi_device *device, int type); static int acpi_button_info_open_fs(struct inode *inode, struct file *file); @@ -74,7 +84,7 @@ static int acpi_button_state_open_fs(str static struct acpi_driver acpi_button_driver = { .name = "button", .class = ACPI_BUTTON_CLASS, - .ids = "button_power,button_sleep,PNP0C0D,PNP0C0C,PNP0C0E", + .ids = button_device_ids, .ops = { .add = acpi_button_add, .remove = acpi_button_remove, Index: linux-rt-rebase.q/drivers/acpi/container.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/container.c +++ linux-rt-rebase.q/drivers/acpi/container.c @@ -52,10 +52,18 @@ MODULE_LICENSE("GPL"); static int acpi_container_add(struct acpi_device *device); static int acpi_container_remove(struct acpi_device *device, int type); +static const struct acpi_device_id container_device_ids[] = { + {"ACPI0004", 0}, + {"PNP0A05", 0}, + {"PNP0A06", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, container_device_ids); + static struct acpi_driver acpi_container_driver = { .name = "container", .class = ACPI_CONTAINER_CLASS, - .ids = "ACPI0004,PNP0A05,PNP0A06", + .ids = container_device_ids, .ops = { .add = acpi_container_add, .remove = acpi_container_remove, Index: linux-rt-rebase.q/drivers/acpi/ec.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/ec.c +++ linux-rt-rebase.q/drivers/acpi/ec.c @@ -41,7 +41,6 @@ #include #define ACPI_EC_CLASS "embedded_controller" -#define ACPI_EC_HID "PNP0C09" #define ACPI_EC_DEVICE_NAME "Embedded Controller" #define ACPI_EC_FILE_INFO "info" @@ -82,10 +81,15 @@ static int acpi_ec_start(struct acpi_dev static int acpi_ec_stop(struct acpi_device *device, int type); static int acpi_ec_add(struct acpi_device *device); +static const struct acpi_device_id ec_device_ids[] = { + {"PNP0C09", 0}, + {"", 0}, +}; + static struct acpi_driver acpi_ec_driver = { .name = "ec", .class = ACPI_EC_CLASS, - .ids = ACPI_EC_HID, + .ids = ec_device_ids, .ops = { .add = acpi_ec_add, .remove = acpi_ec_remove, Index: linux-rt-rebase.q/drivers/acpi/events/evrgnini.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/events/evrgnini.c +++ linux-rt-rebase.q/drivers/acpi/events/evrgnini.c @@ -378,7 +378,7 @@ static u8 acpi_ev_match_pci_root_bridge( static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) { acpi_status status; - struct acpi_device_id hid; + struct acpica_device_id hid; struct acpi_compatible_id_list *cid; acpi_native_uint i; Index: linux-rt-rebase.q/drivers/acpi/fan.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/fan.c +++ linux-rt-rebase.q/drivers/acpi/fan.c @@ -50,10 +50,16 @@ static int acpi_fan_remove(struct acpi_d static int acpi_fan_suspend(struct acpi_device *device, pm_message_t state); static int acpi_fan_resume(struct acpi_device *device); +static const struct acpi_device_id fan_device_ids[] = { + {"PNP0C0B", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, fan_device_ids); + static struct acpi_driver acpi_fan_driver = { .name = "fan", .class = ACPI_FAN_CLASS, - .ids = "PNP0C0B", + .ids = fan_device_ids, .ops = { .add = acpi_fan_add, .remove = acpi_fan_remove, Index: linux-rt-rebase.q/drivers/acpi/namespace/nsxfeval.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/namespace/nsxfeval.c +++ linux-rt-rebase.q/drivers/acpi/namespace/nsxfeval.c @@ -440,7 +440,7 @@ acpi_ns_get_device_callback(acpi_handle acpi_status status; struct acpi_namespace_node *node; u32 flags; - struct acpi_device_id hid; + struct acpica_device_id hid; struct acpi_compatible_id_list *cid; acpi_native_uint i; Index: linux-rt-rebase.q/drivers/acpi/pci_link.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/pci_link.c +++ linux-rt-rebase.q/drivers/acpi/pci_link.c @@ -46,7 +46,6 @@ #define _COMPONENT ACPI_PCI_COMPONENT ACPI_MODULE_NAME("pci_link"); #define ACPI_PCI_LINK_CLASS "pci_irq_routing" -#define ACPI_PCI_LINK_HID "PNP0C0F" #define ACPI_PCI_LINK_DEVICE_NAME "PCI Interrupt Link" #define ACPI_PCI_LINK_FILE_INFO "info" #define ACPI_PCI_LINK_FILE_STATUS "state" @@ -54,10 +53,16 @@ ACPI_MODULE_NAME("pci_link"); static int acpi_pci_link_add(struct acpi_device *device); static int acpi_pci_link_remove(struct acpi_device *device, int type); +static struct acpi_device_id link_device_ids[] = { + {"PNP0C0F", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, link_device_ids); + static struct acpi_driver acpi_pci_link_driver = { .name = "pci_link", .class = ACPI_PCI_LINK_CLASS, - .ids = ACPI_PCI_LINK_HID, + .ids = link_device_ids, .ops = { .add = acpi_pci_link_add, .remove = acpi_pci_link_remove, Index: linux-rt-rebase.q/drivers/acpi/pci_root.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/pci_root.c +++ linux-rt-rebase.q/drivers/acpi/pci_root.c @@ -38,16 +38,21 @@ #define _COMPONENT ACPI_PCI_COMPONENT ACPI_MODULE_NAME("pci_root"); #define ACPI_PCI_ROOT_CLASS "pci_bridge" -#define ACPI_PCI_ROOT_HID "PNP0A03" #define ACPI_PCI_ROOT_DEVICE_NAME "PCI Root Bridge" static int acpi_pci_root_add(struct acpi_device *device); static int acpi_pci_root_remove(struct acpi_device *device, int type); static int acpi_pci_root_start(struct acpi_device *device); +static struct acpi_device_id root_device_ids[] = { + {"PNP0A03", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, root_device_ids); + static struct acpi_driver acpi_pci_root_driver = { .name = "pci_root", .class = ACPI_PCI_ROOT_CLASS, - .ids = ACPI_PCI_ROOT_HID, + .ids = root_device_ids, .ops = { .add = acpi_pci_root_add, .remove = acpi_pci_root_remove, Index: linux-rt-rebase.q/drivers/acpi/power.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/power.c +++ linux-rt-rebase.q/drivers/acpi/power.c @@ -59,10 +59,16 @@ static int acpi_power_remove(struct acpi static int acpi_power_resume(struct acpi_device *device); static int acpi_power_open_fs(struct inode *inode, struct file *file); +static struct acpi_device_id power_device_ids[] = { + {ACPI_POWER_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, power_device_ids); + static struct acpi_driver acpi_power_driver = { .name = "power", .class = ACPI_POWER_CLASS, - .ids = ACPI_POWER_HID, + .ids = power_device_ids, .ops = { .add = acpi_power_add, .remove = acpi_power_remove, Index: linux-rt-rebase.q/drivers/acpi/processor_core.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/processor_core.c +++ linux-rt-rebase.q/drivers/acpi/processor_core.c @@ -88,10 +88,16 @@ static int acpi_processor_handle_eject(s extern int acpi_processor_tstate_has_changed(struct acpi_processor *pr); +static const struct acpi_device_id processor_device_ids[] = { + {ACPI_PROCESSOR_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, processor_device_ids); + static struct acpi_driver acpi_processor_driver = { .name = "processor", .class = ACPI_PROCESSOR_CLASS, - .ids = ACPI_PROCESSOR_HID, + .ids = processor_device_ids, .ops = { .add = acpi_processor_add, .remove = acpi_processor_remove, Index: linux-rt-rebase.q/drivers/acpi/processor_throttling.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/processor_throttling.c +++ linux-rt-rebase.q/drivers/acpi/processor_throttling.c @@ -47,6 +47,9 @@ ACPI_MODULE_NAME("processor_throttling") static int acpi_processor_get_throttling(struct acpi_processor *pr); int acpi_processor_set_throttling(struct acpi_processor *pr, int state); +/* + * _TPC - Throttling Present Capabilities + */ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) { acpi_status status = 0; @@ -55,8 +58,10 @@ static int acpi_processor_get_platform_l if (!pr) return -EINVAL; status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc); - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { - ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC")); + if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC")); + } return -ENODEV; } pr->throttling_platform_limit = (int)tpc; @@ -68,9 +73,9 @@ int acpi_processor_tstate_has_changed(st return acpi_processor_get_platform_limit(pr); } -/* -------------------------------------------------------------------------- - _PTC, _TSS, _TSD support - -------------------------------------------------------------------------- */ +/* + * _PTC - Processor Throttling Control (and status) register location + */ static int acpi_processor_get_throttling_control(struct acpi_processor *pr) { int result = 0; @@ -81,7 +86,9 @@ static int acpi_processor_get_throttling status = acpi_evaluate_object(pr->handle, "_PTC", NULL, &buffer); if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTC")); + if (status != AE_NOT_FOUND) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTC")); + } return -ENODEV; } @@ -132,6 +139,10 @@ static int acpi_processor_get_throttling return result; } + +/* + * _TSS - Throttling Supported States + */ static int acpi_processor_get_throttling_states(struct acpi_processor *pr) { int result = 0; @@ -144,7 +155,9 @@ static int acpi_processor_get_throttling status = acpi_evaluate_object(pr->handle, "_TSS", NULL, &buffer); if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSS")); + if (status != AE_NOT_FOUND) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSS")); + } return -ENODEV; } @@ -201,6 +214,10 @@ static int acpi_processor_get_throttling return result; } + +/* + * _TSD - T-State Dependencies + */ static int acpi_processor_get_tsd(struct acpi_processor *pr) { int result = 0; @@ -213,6 +230,9 @@ static int acpi_processor_get_tsd(struct status = acpi_evaluate_object(pr->handle, "_TSD", NULL, &buffer); if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSD")); + } return -ENODEV; } @@ -525,9 +545,6 @@ int acpi_processor_get_throttling_info(s int result = 0; int step = 0; int i = 0; - int no_ptc = 0; - int no_tss = 0; - int no_tsd = 0; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "pblk_address[0x%08x] duty_offset[%d] duty_width[%d]\n", @@ -538,12 +555,14 @@ int acpi_processor_get_throttling_info(s if (!pr) return -EINVAL; - /* TBD: Support ACPI 2.0 objects */ - no_ptc = acpi_processor_get_throttling_control(pr); - no_tss = acpi_processor_get_throttling_states(pr); - no_tsd = acpi_processor_get_tsd(pr); - - if (no_ptc || no_tss) { + /* + * Evaluate _PTC, _TSS and _TPC + * They must all be present or none of them can be used. + */ + if (acpi_processor_get_throttling_control(pr) || + acpi_processor_get_throttling_states(pr) || + acpi_processor_get_platform_limit(pr)) + { pr->throttling.acpi_processor_get_throttling = &acpi_processor_get_throttling_fadt; pr->throttling.acpi_processor_set_throttling = @@ -555,6 +574,8 @@ int acpi_processor_get_throttling_info(s &acpi_processor_set_throttling_ptc; } + acpi_processor_get_tsd(pr); + if (!pr->throttling.address) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No throttling register\n")); return 0; @@ -658,18 +679,20 @@ static int acpi_processor_throttling_seq pr->throttling.state_count - 1); seq_puts(seq, "states:\n"); - if (acpi_processor_get_throttling == acpi_processor_get_throttling_fadt) + if (pr->throttling.acpi_processor_get_throttling == + acpi_processor_get_throttling_fadt) { for (i = 0; i < pr->throttling.state_count; i++) seq_printf(seq, " %cT%d: %02d%%\n", (i == pr->throttling.state ? '*' : ' '), i, (pr->throttling.states[i].performance ? pr-> throttling.states[i].performance / 10 : 0)); - else + } else { for (i = 0; i < pr->throttling.state_count; i++) seq_printf(seq, " %cT%d: %02d%%\n", (i == pr->throttling.state ? '*' : ' '), i, (int)pr->throttling.states_tss[i]. freqpercentage); + } end: return 0; Index: linux-rt-rebase.q/drivers/acpi/sbs.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/sbs.c +++ linux-rt-rebase.q/drivers/acpi/sbs.c @@ -38,7 +38,6 @@ #define ACPI_SBS_CLASS "sbs" #define ACPI_AC_CLASS "ac_adapter" #define ACPI_BATTERY_CLASS "battery" -#define ACPI_SBS_HID "ACPI0002" #define ACPI_SBS_DEVICE_NAME "Smart Battery System" #define ACPI_SBS_FILE_INFO "info" #define ACPI_SBS_FILE_STATE "state" @@ -124,10 +123,17 @@ static int acpi_sbs_add(struct acpi_devi static int acpi_sbs_remove(struct acpi_device *device, int type); static int acpi_sbs_resume(struct acpi_device *device); +static const struct acpi_device_id sbs_device_ids[] = { + {"ACPI0001", 0}, + {"ACPI0005", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, sbs_device_ids); + static struct acpi_driver acpi_sbs_driver = { .name = "sbs", .class = ACPI_SBS_CLASS, - .ids = "ACPI0001,ACPI0005", + .ids = sbs_device_ids, .ops = { .add = acpi_sbs_add, .remove = acpi_sbs_remove, Index: linux-rt-rebase.q/drivers/acpi/scan.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/scan.c +++ linux-rt-rebase.q/drivers/acpi/scan.c @@ -16,7 +16,7 @@ ACPI_MODULE_NAME("scan"); extern struct acpi_device *acpi_root; #define ACPI_BUS_CLASS "system_bus" -#define ACPI_BUS_HID "ACPI_BUS" +#define ACPI_BUS_HID "LNXSYBUS" #define ACPI_BUS_DEVICE_NAME "System Bus" static LIST_HEAD(acpi_device_list); @@ -29,6 +29,62 @@ struct acpi_device_bus_id{ unsigned int instance_no; struct list_head node; }; + +/* + * Creates hid/cid(s) string needed for modalias and uevent + * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: + * char *modalias: "acpi:IBM0001:ACPI0001" +*/ +int create_modalias(struct acpi_device *acpi_dev, char *modalias, int size){ + + int len; + + if (!acpi_dev->flags.hardware_id) + return -ENODEV; + + len = snprintf(modalias, size, "acpi:%s:", + acpi_dev->pnp.hardware_id); + if (len < 0 || len >= size) + return -EINVAL; + size -= len; + + if (acpi_dev->flags.compatible_ids) { + struct acpi_compatible_id_list *cid_list; + int i; + int count; + + cid_list = acpi_dev->pnp.cid_list; + for (i = 0; i < cid_list->count; i++) { + count = snprintf(&modalias[len], size, "%s:", + cid_list->id[i].value); + if (count < 0 || count >= size) { + printk(KERN_ERR "acpi: %s cid[%i] exceeds event buffer size", + acpi_dev->pnp.device_name, i); + break; + } + len += count; + size -= count; + } + } + + modalias[len] = '\0'; + return len; +} + +static ssize_t +acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + int len; + + /* Device has no HID and no CID or string is >1024 */ + len = create_modalias(acpi_dev, buf, 1024); + if (len <= 0) + return 0; + buf[len++] = '\n'; + return len; +} +static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); + static int acpi_eject_operation(acpi_handle handle, int lockable) { struct acpi_object_list arg_list; @@ -154,6 +210,12 @@ static int acpi_device_setup_files(struc goto end; } + if (dev->flags.hardware_id || dev->flags.compatible_ids){ + result = device_create_file(&dev->dev, &dev_attr_modalias); + if(result) + goto end; + } + /* * If device has _EJ0, 'eject' file is created that is used to trigger * hot-removal function from userland. @@ -178,6 +240,9 @@ static void acpi_device_remove_files(str if (ACPI_SUCCESS(status)) device_remove_file(&dev->dev, &dev_attr_eject); + if (dev->flags.hardware_id || dev->flags.compatible_ids) + device_remove_file(&dev->dev, &dev_attr_modalias); + if(dev->flags.hardware_id) device_remove_file(&dev->dev, &dev_attr_hid); if(dev->handle) @@ -186,6 +251,37 @@ static void acpi_device_remove_files(str /* -------------------------------------------------------------------------- ACPI Bus operations -------------------------------------------------------------------------- */ + +int acpi_match_device_ids(struct acpi_device *device, + const struct acpi_device_id *ids) +{ + const struct acpi_device_id *id; + + if (device->flags.hardware_id) { + for (id = ids; id->id[0]; id++) { + if (!strcmp((char*)id->id, device->pnp.hardware_id)) + return 0; + } + } + + if (device->flags.compatible_ids) { + struct acpi_compatible_id_list *cid_list = device->pnp.cid_list; + int i; + + for (id = ids; id->id[0]; id++) { + /* compare multiple _CID entries against driver ids */ + for (i = 0; i < cid_list->count; i++) { + if (!strcmp((char*)id->id, + cid_list->id[i].value)) + return 0; + } + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(acpi_match_device_ids); + static void acpi_device_release(struct device *dev) { struct acpi_device *acpi_dev = to_acpi_device(dev); @@ -219,37 +315,19 @@ static int acpi_bus_match(struct device struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_driver *acpi_drv = to_acpi_driver(drv); - return !acpi_match_ids(acpi_dev, acpi_drv->ids); + return !acpi_match_device_ids(acpi_dev, acpi_drv->ids); } static int acpi_device_uevent(struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size) + char *buffer, int buffer_size) { struct acpi_device *acpi_dev = to_acpi_device(dev); - int i = 0, length = 0, ret = 0; - - if (acpi_dev->flags.hardware_id) - ret = add_uevent_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "HWID=%s", acpi_dev->pnp.hardware_id); - if (ret) - return -ENOMEM; - if (acpi_dev->flags.compatible_ids) { - int j; - struct acpi_compatible_id_list *cid_list; - cid_list = acpi_dev->pnp.cid_list; - - for (j = 0; j < cid_list->count; j++) { - ret = add_uevent_var(envp, num_envp, &i, buffer, - buffer_size, &length, "COMPTID=%s", - cid_list->id[j].value); - if (ret) - return -ENOMEM; - } + strcpy(buffer, "MODALIAS="); + if (create_modalias(acpi_dev, buffer + 9, buffer_size - 9) > 0) { + envp[0] = buffer; + envp[1] = NULL; } - - envp[i] = NULL; return 0; } @@ -543,25 +621,6 @@ void acpi_bus_data_handler(acpi_handle h return; } -int acpi_match_ids(struct acpi_device *device, char *ids) -{ - if (device->flags.hardware_id) - if (strstr(ids, device->pnp.hardware_id)) - return 0; - - if (device->flags.compatible_ids) { - struct acpi_compatible_id_list *cid_list = device->pnp.cid_list; - int i; - - /* compare multiple _CID entries against driver ids */ - for (i = 0; i < cid_list->count; i++) { - if (strstr(ids, cid_list->id[i].value)) - return 0; - } - } - return -ENOENT; -} - static int acpi_bus_get_perf_flags(struct acpi_device *device) { device->performance.state = ACPI_STATE_UNKNOWN; @@ -624,6 +683,13 @@ static int acpi_bus_get_wakeup_device_fl struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *package = NULL; + struct acpi_device_id button_device_ids[] = { + {"PNP0C0D", 0}, + {"PNP0C0C", 0}, + {"PNP0C0E", 0}, + {"", 0}, + }; + /* _PRW */ status = acpi_evaluate_object(device->handle, "_PRW", NULL, &buffer); @@ -643,7 +709,7 @@ static int acpi_bus_get_wakeup_device_fl device->wakeup.flags.valid = 1; /* Power button, Lid switch always enable wakeup */ - if (!acpi_match_ids(device, "PNP0C0D,PNP0C0C,PNP0C0E")) + if (!acpi_match_device_ids(device, button_device_ids)) device->wakeup.flags.run_wake = 1; end: Index: linux-rt-rebase.q/drivers/acpi/sleep/Makefile =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/sleep/Makefile +++ linux-rt-rebase.q/drivers/acpi/sleep/Makefile @@ -1,5 +1,5 @@ obj-y := poweroff.o wakeup.o -obj-$(CONFIG_ACPI_SLEEP) += main.o -obj-$(CONFIG_ACPI_SLEEP_PROC_FS) += proc.o +obj-y += main.o +obj-$(CONFIG_X86) += proc.o EXTRA_CFLAGS += $(ACPI_CFLAGS) Index: linux-rt-rebase.q/drivers/acpi/sleep/main.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/sleep/main.c +++ linux-rt-rebase.q/drivers/acpi/sleep/main.c @@ -34,35 +34,55 @@ static u32 acpi_suspend_states[] = { static int init_8259A_after_S1; +extern int acpi_sleep_prepare(u32 acpi_state); +extern void acpi_power_off(void); + +static u32 acpi_target_sleep_state = ACPI_STATE_S0; + +/** + * acpi_pm_set_target - Set the target system sleep state to the state + * associated with given @pm_state, if supported. + */ + +static int acpi_pm_set_target(suspend_state_t pm_state) +{ + u32 acpi_state = acpi_suspend_states[pm_state]; + int error = 0; + + if (sleep_states[acpi_state]) { + acpi_target_sleep_state = acpi_state; + } else { + printk(KERN_ERR "ACPI does not support this state: %d\n", + pm_state); + error = -ENOSYS; + } + return error; +} + /** * acpi_pm_prepare - Do preliminary suspend work. - * @pm_state: suspend state we're entering. + * @pm_state: ignored * - * Make sure we support the state. If we do, and we need it, set the - * firmware waking vector and do arch-specific nastiness to get the - * wakeup code to the waking vector. + * If necessary, set the firmware waking vector and do arch-specific + * nastiness to get the wakeup code to the waking vector. */ -extern int acpi_sleep_prepare(u32 acpi_state); -extern void acpi_power_off(void); - static int acpi_pm_prepare(suspend_state_t pm_state) { - u32 acpi_state = acpi_suspend_states[pm_state]; + int error = acpi_sleep_prepare(acpi_target_sleep_state); - if (!sleep_states[acpi_state]) { - printk("acpi_pm_prepare does not support %d \n", pm_state); - return -EPERM; - } - return acpi_sleep_prepare(acpi_state); + if (error) + acpi_target_sleep_state = ACPI_STATE_S0; + + return error; } /** * acpi_pm_enter - Actually enter a sleep state. - * @pm_state: State we're entering. + * @pm_state: ignored * - * Flush caches and go to sleep. For STR or STD, we have to call - * arch-specific assembly, which in turn call acpi_enter_sleep_state(). + * Flush caches and go to sleep. For STR we have to call arch-specific + * assembly, which in turn call acpi_enter_sleep_state(). * It's unfortunate, but it works. Please fix if you're feeling frisky. */ @@ -70,31 +90,31 @@ static int acpi_pm_enter(suspend_state_t { acpi_status status = AE_OK; unsigned long flags = 0; - u32 acpi_state = acpi_suspend_states[pm_state]; + u32 acpi_state = acpi_target_sleep_state; ACPI_FLUSH_CPU_CACHE(); /* Do arch specific saving of state. */ - if (pm_state > PM_SUSPEND_STANDBY) { + if (acpi_state == ACPI_STATE_S3) { int error = acpi_save_state_mem(); - if (error) + + if (error) { + acpi_target_sleep_state = ACPI_STATE_S0; return error; + } } local_irq_save(flags); acpi_enable_wakeup_device(acpi_state); - switch (pm_state) { - case PM_SUSPEND_STANDBY: + switch (acpi_state) { + case ACPI_STATE_S1: barrier(); status = acpi_enter_sleep_state(acpi_state); break; - case PM_SUSPEND_MEM: + case ACPI_STATE_S3: do_suspend_lowlevel(); break; - - default: - return -EINVAL; } /* ACPI 3.0 specs (P62) says that it's the responsabilty @@ -107,12 +127,8 @@ static int acpi_pm_enter(suspend_state_t local_irq_restore(flags); printk(KERN_DEBUG "Back to C!\n"); - /* restore processor state - * We should only be here if we're coming back from STR or STD. - * And, in the case of the latter, the memory image should have already - * been loaded from disk. - */ - if (pm_state > PM_SUSPEND_STANDBY) + /* restore processor state */ + if (acpi_state == ACPI_STATE_S3) acpi_restore_state_mem(); return ACPI_SUCCESS(status) ? 0 : -EFAULT; @@ -120,7 +136,7 @@ static int acpi_pm_enter(suspend_state_t /** * acpi_pm_finish - Finish up suspend sequence. - * @pm_state: State we're coming out of. + * @pm_state: ignored * * This is called after we wake back up (or if entering the sleep state * failed). @@ -128,7 +144,7 @@ static int acpi_pm_enter(suspend_state_t static int acpi_pm_finish(suspend_state_t pm_state) { - u32 acpi_state = acpi_suspend_states[pm_state]; + u32 acpi_state = acpi_target_sleep_state; acpi_leave_sleep_state(acpi_state); acpi_disable_wakeup_device(acpi_state); @@ -136,10 +152,14 @@ static int acpi_pm_finish(suspend_state_ /* reset firmware waking vector */ acpi_set_firmware_waking_vector((acpi_physical_address) 0); + acpi_target_sleep_state = ACPI_STATE_S0; + +#ifdef CONFIG_X86 if (init_8259A_after_S1) { printk("Broken toshiba laptop -> kicking interrupts\n"); init_8259A(0); } +#endif return 0; } @@ -176,6 +196,7 @@ static int acpi_pm_state_valid(suspend_s static struct pm_ops acpi_pm_ops = { .valid = acpi_pm_state_valid, + .set_target = acpi_pm_set_target, .prepare = acpi_pm_prepare, .enter = acpi_pm_enter, .finish = acpi_pm_finish, @@ -235,6 +256,81 @@ static struct hibernation_ops acpi_hiber }; #endif /* CONFIG_SOFTWARE_SUSPEND */ +/** + * acpi_pm_device_sleep_state - return preferred power state of ACPI device + * in the system sleep state given by %acpi_target_sleep_state + * @dev: device to examine + * @wake: if set, the device should be able to wake up the system + * @d_min_p: used to store the upper limit of allowed states range + * Return value: preferred power state of the device on success, -ENODEV on + * failure (ie. if there's no 'struct acpi_device' for @dev) + * + * Find the lowest power (highest number) ACPI device power state that + * device @dev can be in while the system is in the sleep state represented + * by %acpi_target_sleep_state. If @wake is nonzero, the device should be + * able to wake up the system from this sleep state. If @d_min_p is set, + * the highest power (lowest number) device power state of @dev allowed + * in this system sleep state is stored at the location pointed to by it. + * + * The caller must ensure that @dev is valid before using this function. + * The caller is also responsible for figuring out if the device is + * supposed to be able to wake up the system and passing this information + * via @wake. + */ + +int acpi_pm_device_sleep_state(struct device *dev, int wake, int *d_min_p) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(dev); + struct acpi_device *adev; + char acpi_method[] = "_SxD"; + unsigned long d_min, d_max; + + if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { + printk(KERN_ERR "ACPI handle has no context!\n"); + return -ENODEV; + } + + acpi_method[2] = '0' + acpi_target_sleep_state; + /* + * If the sleep state is S0, we will return D3, but if the device has + * _S0W, we will use the value from _S0W + */ + d_min = ACPI_STATE_D0; + d_max = ACPI_STATE_D3; + + /* + * If present, _SxD methods return the minimum D-state (highest power + * state) we can use for the corresponding S-states. Otherwise, the + * minimum D-state is D0 (ACPI 3.x). + * + * NOTE: We rely on acpi_evaluate_integer() not clobbering the integer + * provided -- that's our fault recovery, we ignore retval. + */ + if (acpi_target_sleep_state > ACPI_STATE_S0) + acpi_evaluate_integer(handle, acpi_method, NULL, &d_min); + + /* + * If _PRW says we can wake up the system from the target sleep state, + * the D-state returned by _SxD is sufficient for that (we assume a + * wakeup-aware driver if wake is set). Still, if _SxW exists + * (ACPI 3.x), it should return the maximum (lowest power) D-state that + * can wake the system. _S0W may be valid, too. + */ + if (acpi_target_sleep_state == ACPI_STATE_S0 || + (wake && adev->wakeup.state.enabled && + adev->wakeup.sleep_state <= acpi_target_sleep_state)) { + acpi_method[3] = 'W'; + acpi_evaluate_integer(handle, acpi_method, NULL, &d_max); + /* Sanity check */ + if (d_max < d_min) + d_min = d_max; + } + + if (d_min_p) + *d_min_p = d_min; + return d_max; +} + /* * Toshiba fails to preserve interrupts over S1, reinitialization * of 8259 is needed after S1 resume. Index: linux-rt-rebase.q/drivers/acpi/sleep/poweroff.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/sleep/poweroff.c +++ linux-rt-rebase.q/drivers/acpi/sleep/poweroff.c @@ -18,7 +18,6 @@ int acpi_sleep_prepare(u32 acpi_state) { -#ifdef CONFIG_ACPI_SLEEP /* do we have a wakeup address for S2 and S3? */ if (acpi_state == ACPI_STATE_S3) { if (!acpi_wakeup_address) { @@ -31,7 +30,6 @@ int acpi_sleep_prepare(u32 acpi_state) } ACPI_FLUSH_CPU_CACHE(); acpi_enable_wakeup_device_prep(acpi_state); -#endif acpi_gpe_sleep_prepare(acpi_state); acpi_enter_sleep_state_prep(acpi_state); return 0; Index: linux-rt-rebase.q/drivers/acpi/sleep/proc.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/sleep/proc.c +++ linux-rt-rebase.q/drivers/acpi/sleep/proc.c @@ -14,8 +14,16 @@ #include "sleep.h" #define _COMPONENT ACPI_SYSTEM_COMPONENT + +/* + * this file provides support for: + * /proc/acpi/sleep + * /proc/acpi/alarm + * /proc/acpi/wakeup + */ + ACPI_MODULE_NAME("sleep") -#ifdef CONFIG_ACPI_SLEEP_PROC_SLEEP +#ifdef CONFIG_ACPI_PROCFS static int acpi_system_sleep_seq_show(struct seq_file *seq, void *offset) { int i; @@ -68,7 +76,7 @@ acpi_system_write_sleep(struct file *fil Done: return error ? error : count; } -#endif /* CONFIG_ACPI_SLEEP_PROC_SLEEP */ +#endif /* CONFIG_ACPI_PROCFS */ #if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) /* use /sys/class/rtc/rtcX/wakealarm instead; it's not ACPI-specific */ @@ -463,7 +471,7 @@ static const struct file_operations acpi .release = single_release, }; -#ifdef CONFIG_ACPI_SLEEP_PROC_SLEEP +#ifdef CONFIG_ACPI_PROCFS static const struct file_operations acpi_system_sleep_fops = { .open = acpi_system_sleep_open_fs, .read = seq_read, @@ -471,7 +479,7 @@ static const struct file_operations acpi .llseek = seq_lseek, .release = single_release, }; -#endif /* CONFIG_ACPI_SLEEP_PROC_SLEEP */ +#endif /* CONFIG_ACPI_PROCFS */ #ifdef HAVE_ACPI_LEGACY_ALARM static const struct file_operations acpi_system_alarm_fops = { @@ -498,14 +506,14 @@ static int __init acpi_sleep_proc_init(v if (acpi_disabled) return 0; -#ifdef CONFIG_ACPI_SLEEP_PROC_SLEEP +#ifdef CONFIG_ACPI_PROCFS /* 'sleep' [R/W] */ entry = create_proc_entry("sleep", S_IFREG | S_IRUGO | S_IWUSR, acpi_root_dir); if (entry) entry->proc_fops = &acpi_system_sleep_fops; -#endif +#endif /* CONFIG_ACPI_PROCFS */ #ifdef HAVE_ACPI_LEGACY_ALARM /* 'alarm' [R/W] */ Index: linux-rt-rebase.q/drivers/acpi/sleep/wakeup.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/sleep/wakeup.c +++ linux-rt-rebase.q/drivers/acpi/sleep/wakeup.c @@ -17,7 +17,6 @@ ACPI_MODULE_NAME("wakeup_devices") extern struct list_head acpi_wakeup_device_list; extern spinlock_t acpi_device_lock; -#ifdef CONFIG_ACPI_SLEEP /** * acpi_enable_wakeup_device_prep - prepare wakeup devices * @sleep_state: ACPI state @@ -180,7 +179,6 @@ static int __init acpi_wakeup_device_ini } late_initcall(acpi_wakeup_device_init); -#endif /* * Disable all wakeup GPEs before entering requested sleep state. Index: linux-rt-rebase.q/drivers/acpi/thermal.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/thermal.c +++ linux-rt-rebase.q/drivers/acpi/thermal.c @@ -92,10 +92,16 @@ static int acpi_thermal_polling_open_fs( static ssize_t acpi_thermal_write_polling(struct file *, const char __user *, size_t, loff_t *); +static const struct acpi_device_id thermal_device_ids[] = { + {ACPI_THERMAL_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, thermal_device_ids); + static struct acpi_driver acpi_thermal_driver = { .name = "thermal", .class = ACPI_THERMAL_CLASS, - .ids = ACPI_THERMAL_HID, + .ids = thermal_device_ids, .ops = { .add = acpi_thermal_add, .remove = acpi_thermal_remove, Index: linux-rt-rebase.q/drivers/acpi/utilities/uteval.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/utilities/uteval.c +++ linux-rt-rebase.q/drivers/acpi/utilities/uteval.c @@ -407,7 +407,7 @@ acpi_ut_copy_id_string(char *destination acpi_status acpi_ut_execute_HID(struct acpi_namespace_node *device_node, - struct acpi_device_id *hid) + struct acpica_device_id *hid) { union acpi_operand_object *obj_desc; acpi_status status; @@ -609,7 +609,7 @@ acpi_ut_execute_CID(struct acpi_namespac acpi_status acpi_ut_execute_UID(struct acpi_namespace_node *device_node, - struct acpi_device_id *uid) + struct acpica_device_id *uid) { union acpi_operand_object *obj_desc; acpi_status status; Index: linux-rt-rebase.q/drivers/acpi/video.c =================================================================== --- linux-rt-rebase.q.orig/drivers/acpi/video.c +++ linux-rt-rebase.q/drivers/acpi/video.c @@ -74,10 +74,16 @@ MODULE_LICENSE("GPL"); static int acpi_video_bus_add(struct acpi_device *device); static int acpi_video_bus_remove(struct acpi_device *device, int type); +static const struct acpi_device_id video_device_ids[] = { + {ACPI_VIDEO_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, video_device_ids); + static struct acpi_driver acpi_video_bus = { .name = "video", .class = ACPI_VIDEO_CLASS, - .ids = ACPI_VIDEO_HID, + .ids = video_device_ids, .ops = { .add = acpi_video_bus_add, .remove = acpi_video_bus_remove, Index: linux-rt-rebase.q/drivers/ata/ata_piix.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ata/ata_piix.c +++ linux-rt-rebase.q/drivers/ata/ata_piix.c @@ -91,6 +91,7 @@ #include #include #include +#include #define DRV_NAME "ata_piix" #define DRV_VERSION "2.11" @@ -140,6 +141,9 @@ enum { RV = -3, /* reserved */ PIIX_AHCI_DEVICE = 6, + + /* host->flags bits */ + PIIX_HOST_BROKEN_SUSPEND = (1 << 24), }; struct piix_map_db { @@ -159,6 +163,10 @@ static void piix_set_piomode (struct ata static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev); static void ich_set_dmamode (struct ata_port *ap, struct ata_device *adev); static int ich_pata_cable_detect(struct ata_port *ap); +#ifdef CONFIG_PM +static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); +static int piix_pci_device_resume(struct pci_dev *pdev); +#endif static unsigned int in_module_init = 1; @@ -255,8 +263,8 @@ static struct pci_driver piix_pci_driver .probe = piix_init_one, .remove = ata_pci_remove_one, #ifdef CONFIG_PM - .suspend = ata_pci_device_suspend, - .resume = ata_pci_device_resume, + .suspend = piix_pci_device_suspend, + .resume = piix_pci_device_resume, #endif }; @@ -881,6 +889,107 @@ static void ich_set_dmamode (struct ata_ do_pata_set_dmamode(ap, adev, 1); } +#ifdef CONFIG_PM +static struct dmi_system_id piix_broken_suspend_dmi_table[] = { + { + .ident = "TECRA M5", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "TECRA M5"), + }, + }, + { + .ident = "Satellite U200", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "Satellite U200"), + }, + }, + { + .ident = "Satellite U205", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "Satellite U205"), + }, + }, + { + .ident = "Portege M500", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE M500"), + }, + }, + { } +}; + +static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + unsigned long flags; + int rc = 0; + + rc = ata_host_suspend(host, mesg); + if (rc) + return rc; + + /* Some braindamaged ACPI suspend implementations expect the + * controller to be awake on entry; otherwise, it burns cpu + * cycles and power trying to do something to the sleeping + * beauty. + */ + if (dmi_check_system(piix_broken_suspend_dmi_table) && + mesg.event == PM_EVENT_SUSPEND) { + pci_save_state(pdev); + + /* mark its power state as "unknown", since we don't + * know if e.g. the BIOS will change its device state + * when we suspend. + */ + if (pdev->current_state == PCI_D0) + pdev->current_state = PCI_UNKNOWN; + + /* tell resume that it's waking up from broken suspend */ + spin_lock_irqsave(&host->lock, flags); + host->flags |= PIIX_HOST_BROKEN_SUSPEND; + spin_unlock_irqrestore(&host->lock, flags); + } else + ata_pci_device_do_suspend(pdev, mesg); + + return 0; +} + +static int piix_pci_device_resume(struct pci_dev *pdev) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + unsigned long flags; + int rc; + + if (host->flags & PIIX_HOST_BROKEN_SUSPEND) { + spin_lock_irqsave(&host->lock, flags); + host->flags &= ~PIIX_HOST_BROKEN_SUSPEND; + spin_unlock_irqrestore(&host->lock, flags); + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + + /* PCI device wasn't disabled during suspend. Use + * __pci_reenable_device() to avoid affecting the + * enable count. + */ + rc = __pci_reenable_device(pdev); + if (rc) + dev_printk(KERN_ERR, &pdev->dev, "failed to enable " + "device after resume (%d)\n", rc); + } else + rc = ata_pci_device_do_resume(pdev); + + if (rc == 0) + ata_host_resume(host); + + return rc; +} +#endif + #define AHCI_PCI_BAR 5 #define AHCI_GLOBAL_CTL 0x04 #define AHCI_ENABLE (1 << 31) Index: linux-rt-rebase.q/drivers/ata/libata-scsi.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ata/libata-scsi.c +++ linux-rt-rebase.q/drivers/ata/libata-scsi.c @@ -768,7 +768,7 @@ static void ata_scsi_dev_config(struct s * Decrement max hw segments accordingly. */ if (dev->class == ATA_DEV_ATAPI) { - request_queue_t *q = sdev->request_queue; + struct request_queue *q = sdev->request_queue; blk_queue_max_hw_segments(q, q->max_hw_segments - 1); } Index: linux-rt-rebase.q/drivers/ata/pata_ali.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ata/pata_ali.c +++ linux-rt-rebase.q/drivers/ata/pata_ali.c @@ -45,7 +45,7 @@ static struct dmi_system_id cable_dmi_ta .ident = "HP Pavilion N5430", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_BOARD_NAME, "OmniBook N32N-736"), + DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), }, }, { } Index: linux-rt-rebase.q/drivers/ata/pata_hpt37x.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ata/pata_hpt37x.c +++ linux-rt-rebase.q/drivers/ata/pata_hpt37x.c @@ -26,7 +26,7 @@ #include #define DRV_NAME "pata_hpt37x" -#define DRV_VERSION "0.6.6" +#define DRV_VERSION "0.6.7" struct hpt_clock { u8 xfer_speed; @@ -1103,17 +1103,17 @@ static int hpt37x_init_one(struct pci_de /* Select the DPLL clock. */ pci_write_config_byte(dev, 0x5b, 0x21); - pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low); + pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low | 0x100); for(adjust = 0; adjust < 8; adjust++) { if (hpt37x_calibrate_dpll(dev)) break; /* See if it'll settle at a fractionally different clock */ - if ((adjust & 3) == 3) { - f_low --; - f_high ++; - } - pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low); + if (adjust & 1) + f_low -= adjust >> 1; + else + f_high += adjust >> 1; + pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low | 0x100); } if (adjust == 8) { printk(KERN_WARNING "hpt37x: DPLL did not stabilize.\n"); Index: linux-rt-rebase.q/drivers/block/amiflop.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/amiflop.c +++ linux-rt-rebase.q/drivers/block/amiflop.c @@ -1422,7 +1422,7 @@ static void redo_fd_request(void) goto repeat; } -static void do_fd_request(request_queue_t * q) +static void do_fd_request(struct request_queue * q) { redo_fd_request(); } Index: linux-rt-rebase.q/drivers/block/aoe/aoe.h =================================================================== --- linux-rt-rebase.q.orig/drivers/block/aoe/aoe.h +++ linux-rt-rebase.q/drivers/block/aoe/aoe.h @@ -138,7 +138,7 @@ struct aoedev { u16 maxbcnt; struct work_struct work;/* disk create work struct */ struct gendisk *gd; - request_queue_t blkq; + struct request_queue blkq; struct hd_geometry geo; sector_t ssize; struct timer_list timer; Index: linux-rt-rebase.q/drivers/block/aoe/aoeblk.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/aoe/aoeblk.c +++ linux-rt-rebase.q/drivers/block/aoe/aoeblk.c @@ -125,7 +125,7 @@ aoeblk_release(struct inode *inode, stru } static int -aoeblk_make_request(request_queue_t *q, struct bio *bio) +aoeblk_make_request(struct request_queue *q, struct bio *bio) { struct aoedev *d; struct buf *buf; Index: linux-rt-rebase.q/drivers/block/ataflop.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/ataflop.c +++ linux-rt-rebase.q/drivers/block/ataflop.c @@ -1466,7 +1466,7 @@ repeat: } -void do_fd_request(request_queue_t * q) +void do_fd_request(struct request_queue * q) { unsigned long flags; Index: linux-rt-rebase.q/drivers/block/cciss.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/cciss.c +++ linux-rt-rebase.q/drivers/block/cciss.c @@ -139,7 +139,7 @@ static struct board_type products[] = { static ctlr_info_t *hba[MAX_CTLR]; -static void do_cciss_request(request_queue_t *q); +static void do_cciss_request(struct request_queue *q); static irqreturn_t do_cciss_intr(int irq, void *dev_id); static int cciss_open(struct inode *inode, struct file *filep); static int cciss_release(struct inode *inode, struct file *filep); @@ -1584,7 +1584,7 @@ static int deregister_disk(struct gendis */ if (h->gendisk[0] != disk) { if (disk) { - request_queue_t *q = disk->queue; + struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) del_gendisk(disk); if (q) { @@ -2511,7 +2511,7 @@ after_error_processing: /* * Get a request and submit it to the controller. */ -static void do_cciss_request(request_queue_t *q) +static void do_cciss_request(struct request_queue *q) { ctlr_info_t *h = q->queuedata; CommandList_struct *c; @@ -3380,7 +3380,7 @@ static int __devinit cciss_init_one(stru do { drive_info_struct *drv = &(hba[i]->drv[j]); struct gendisk *disk = hba[i]->gendisk[j]; - request_queue_t *q; + struct request_queue *q; /* Check if the disk was allocated already */ if (!disk){ @@ -3523,7 +3523,7 @@ static void __devexit cciss_remove_one(s for (j = 0; j < CISS_MAX_LUN; j++) { struct gendisk *disk = hba[i]->gendisk[j]; if (disk) { - request_queue_t *q = disk->queue; + struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) del_gendisk(disk); Index: linux-rt-rebase.q/drivers/block/cpqarray.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/cpqarray.c +++ linux-rt-rebase.q/drivers/block/cpqarray.c @@ -161,7 +161,7 @@ static int ida_ioctl(struct inode *inode static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io); -static void do_ida_request(request_queue_t *q); +static void do_ida_request(struct request_queue *q); static void start_io(ctlr_info_t *h); static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); @@ -391,7 +391,7 @@ static void __devexit cpqarray_remove_on /* pdev is NULL for eisa */ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) { - request_queue_t *q; + struct request_queue *q; int j; /* @@ -886,7 +886,7 @@ static inline cmdlist_t *removeQ(cmdlist * are in here (either via the dummy do_ida_request functions or by being * called from the interrupt handler */ -static void do_ida_request(request_queue_t *q) +static void do_ida_request(struct request_queue *q) { ctlr_info_t *h = q->queuedata; cmdlist_t *c; Index: linux-rt-rebase.q/drivers/block/floppy.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/floppy.c +++ linux-rt-rebase.q/drivers/block/floppy.c @@ -251,7 +251,7 @@ static int irqdma_allocated; static struct request *current_req; static struct request_queue *floppy_queue; -static void do_fd_request(request_queue_t * q); +static void do_fd_request(struct request_queue * q); #ifndef fd_get_dma_residue #define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA) @@ -2981,7 +2981,7 @@ static void process_fd_request(void) schedule_bh(redo_fd_request); } -static void do_fd_request(request_queue_t * q) +static void do_fd_request(struct request_queue * q) { if (max_buffer_sectors == 0) { printk("VFS: do_fd_request called on non-open device\n"); Index: linux-rt-rebase.q/drivers/block/lguest_blk.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/lguest_blk.c +++ linux-rt-rebase.q/drivers/block/lguest_blk.c @@ -137,7 +137,7 @@ static void do_read(struct blockdev *bd, lguest_send_dma(bd->phys_addr, &ping); } -static void do_lgb_request(request_queue_t *q) +static void do_lgb_request(struct request_queue *q) { struct blockdev *bd; struct request *req; Index: linux-rt-rebase.q/drivers/block/loop.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/loop.c +++ linux-rt-rebase.q/drivers/block/loop.c @@ -529,7 +529,7 @@ static struct bio *loop_get_bio(struct l return bio; } -static int loop_make_request(request_queue_t *q, struct bio *old_bio) +static int loop_make_request(struct request_queue *q, struct bio *old_bio) { struct loop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@ -558,7 +558,7 @@ out: /* * kick off io on the underlying address space */ -static void loop_unplug(request_queue_t *q) +static void loop_unplug(struct request_queue *q) { struct loop_device *lo = q->queuedata; Index: linux-rt-rebase.q/drivers/block/nbd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/nbd.c +++ linux-rt-rebase.q/drivers/block/nbd.c @@ -100,7 +100,7 @@ static const char *nbdcmd_to_ascii(int c static void nbd_end_request(struct request *req) { int uptodate = (req->errors == 0) ? 1 : 0; - request_queue_t *q = req->q; + struct request_queue *q = req->q; unsigned long flags; dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name, @@ -410,7 +410,7 @@ static void nbd_clear_que(struct nbd_dev * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } */ -static void do_nbd_request(request_queue_t * q) +static void do_nbd_request(struct request_queue * q) { struct request *req; Index: linux-rt-rebase.q/drivers/block/paride/pcd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/paride/pcd.c +++ linux-rt-rebase.q/drivers/block/paride/pcd.c @@ -183,7 +183,7 @@ static int pcd_packet(struct cdrom_devic static int pcd_detect(void); static void pcd_probe_capabilities(void); static void do_pcd_read_drq(void); -static void do_pcd_request(request_queue_t * q); +static void do_pcd_request(struct request_queue * q); static void do_pcd_read(void); struct pcd_unit { @@ -713,7 +713,7 @@ static int pcd_detect(void) /* I/O request processing */ static struct request_queue *pcd_queue; -static void do_pcd_request(request_queue_t * q) +static void do_pcd_request(struct request_queue * q) { if (pcd_busy) return; Index: linux-rt-rebase.q/drivers/block/paride/pd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/paride/pd.c +++ linux-rt-rebase.q/drivers/block/paride/pd.c @@ -698,7 +698,7 @@ static enum action pd_identify(struct pd /* end of io request engine */ -static void do_pd_request(request_queue_t * q) +static void do_pd_request(struct request_queue * q) { if (pd_req) return; Index: linux-rt-rebase.q/drivers/block/paride/pf.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/paride/pf.c +++ linux-rt-rebase.q/drivers/block/paride/pf.c @@ -202,7 +202,7 @@ module_param_array(drive3, int, NULL, 0) #define ATAPI_WRITE_10 0x2a static int pf_open(struct inode *inode, struct file *file); -static void do_pf_request(request_queue_t * q); +static void do_pf_request(struct request_queue * q); static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); @@ -760,7 +760,7 @@ static void pf_end_request(int uptodate) } } -static void do_pf_request(request_queue_t * q) +static void do_pf_request(struct request_queue * q) { if (pf_busy) return; Index: linux-rt-rebase.q/drivers/block/pktcdvd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/pktcdvd.c +++ linux-rt-rebase.q/drivers/block/pktcdvd.c @@ -752,7 +752,7 @@ static inline struct bio *pkt_get_list_f */ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) { - request_queue_t *q = bdev_get_queue(pd->bdev); + struct request_queue *q = bdev_get_queue(pd->bdev); struct request *rq; int ret = 0; @@ -979,7 +979,7 @@ static void pkt_iosched_process_queue(st * Special care is needed if the underlying block device has a small * max_phys_segments value. */ -static int pkt_set_segment_merging(struct pktcdvd_device *pd, request_queue_t *q) +static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q) { if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) { /* @@ -2314,7 +2314,7 @@ static int pkt_open_dev(struct pktcdvd_d { int ret; long lba; - request_queue_t *q; + struct request_queue *q; /* * We need to re-open the cdrom device without O_NONBLOCK to be able @@ -2477,7 +2477,7 @@ static int pkt_end_io_read_cloned(struct return 0; } -static int pkt_make_request(request_queue_t *q, struct bio *bio) +static int pkt_make_request(struct request_queue *q, struct bio *bio) { struct pktcdvd_device *pd; char b[BDEVNAME_SIZE]; @@ -2626,7 +2626,7 @@ end_io: -static int pkt_merge_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *bvec) +static int pkt_merge_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *bvec) { struct pktcdvd_device *pd = q->queuedata; sector_t zone = ZONE(bio->bi_sector, pd); @@ -2647,7 +2647,7 @@ static int pkt_merge_bvec(request_queue_ static void pkt_init_queue(struct pktcdvd_device *pd) { - request_queue_t *q = pd->disk->queue; + struct request_queue *q = pd->disk->queue; blk_queue_make_request(q, pkt_make_request); blk_queue_hardsect_size(q, CD_FRAMESIZE); Index: linux-rt-rebase.q/drivers/block/ps2esdi.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/ps2esdi.c +++ linux-rt-rebase.q/drivers/block/ps2esdi.c @@ -64,7 +64,7 @@ static void reset_ctrl(void); static int ps2esdi_geninit(void); -static void do_ps2esdi_request(request_queue_t * q); +static void do_ps2esdi_request(struct request_queue * q); static void ps2esdi_readwrite(int cmd, struct request *req); @@ -473,7 +473,7 @@ static void __init ps2esdi_get_device_cf } /* strategy routine that handles most of the IO requests */ -static void do_ps2esdi_request(request_queue_t * q) +static void do_ps2esdi_request(struct request_queue * q) { struct request *req; /* since, this routine is called with interrupts cleared - they Index: linux-rt-rebase.q/drivers/block/ps3disk.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/ps3disk.c +++ linux-rt-rebase.q/drivers/block/ps3disk.c @@ -190,7 +190,7 @@ static int ps3disk_submit_flush_request( } static void ps3disk_do_request(struct ps3_storage_device *dev, - request_queue_t *q) + struct request_queue *q) { struct request *req; @@ -211,7 +211,7 @@ static void ps3disk_do_request(struct ps } } -static void ps3disk_request(request_queue_t *q) +static void ps3disk_request(struct request_queue *q) { struct ps3_storage_device *dev = q->queuedata; struct ps3disk_private *priv = dev->sbd.core.driver_data; @@ -404,7 +404,7 @@ static int ps3disk_identify(struct ps3_s return 0; } -static void ps3disk_prepare_flush(request_queue_t *q, struct request *req) +static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) { struct ps3_storage_device *dev = q->queuedata; @@ -414,7 +414,7 @@ static void ps3disk_prepare_flush(reques req->cmd_type = REQ_TYPE_FLUSH; } -static int ps3disk_issue_flush(request_queue_t *q, struct gendisk *gendisk, +static int ps3disk_issue_flush(struct request_queue *q, struct gendisk *gendisk, sector_t *sector) { struct ps3_storage_device *dev = q->queuedata; Index: linux-rt-rebase.q/drivers/block/rd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/rd.c +++ linux-rt-rebase.q/drivers/block/rd.c @@ -264,7 +264,7 @@ static int rd_blkdev_pagecache_IO(int rw * 19-JAN-1998 Richard Gooch Added devfs support * */ -static int rd_make_request(request_queue_t *q, struct bio *bio) +static int rd_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct address_space * mapping = bdev->bd_inode->i_mapping; Index: linux-rt-rebase.q/drivers/block/sunvdc.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/sunvdc.c +++ linux-rt-rebase.q/drivers/block/sunvdc.c @@ -444,7 +444,7 @@ out: return err; } -static void do_vdc_request(request_queue_t *q) +static void do_vdc_request(struct request_queue *q) { while (1) { struct request *req = elv_next_request(q); Index: linux-rt-rebase.q/drivers/block/swim3.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/swim3.c +++ linux-rt-rebase.q/drivers/block/swim3.c @@ -225,7 +225,7 @@ static unsigned short write_postamble[] static void swim3_select(struct floppy_state *fs, int sel); static void swim3_action(struct floppy_state *fs, int action); static int swim3_readbit(struct floppy_state *fs, int bit); -static void do_fd_request(request_queue_t * q); +static void do_fd_request(struct request_queue * q); static void start_request(struct floppy_state *fs); static void set_timeout(struct floppy_state *fs, int nticks, void (*proc)(unsigned long)); @@ -290,7 +290,7 @@ static int swim3_readbit(struct floppy_s return (stat & DATA) == 0; } -static void do_fd_request(request_queue_t * q) +static void do_fd_request(struct request_queue * q) { int i; for(i=0;iwait_q_prod % CARM_MAX_WAIT_Q; @@ -768,7 +768,7 @@ static inline void carm_push_q (struct c BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */ } -static inline request_queue_t *carm_pop_q(struct carm_host *host) +static inline struct request_queue *carm_pop_q(struct carm_host *host) { unsigned int idx; @@ -783,7 +783,7 @@ static inline request_queue_t *carm_pop_ static inline void carm_round_robin(struct carm_host *host) { - request_queue_t *q = carm_pop_q(host); + struct request_queue *q = carm_pop_q(host); if (q) { blk_start_queue(q); VPRINTK("STARTED QUEUE %p\n", q); @@ -802,7 +802,7 @@ static inline void carm_end_rq(struct ca } } -static void carm_oob_rq_fn(request_queue_t *q) +static void carm_oob_rq_fn(struct request_queue *q) { struct carm_host *host = q->queuedata; struct carm_request *crq; @@ -833,7 +833,7 @@ static void carm_oob_rq_fn(request_queue } } -static void carm_rq_fn(request_queue_t *q) +static void carm_rq_fn(struct request_queue *q) { struct carm_port *port = q->queuedata; struct carm_host *host = port->host; @@ -1494,7 +1494,7 @@ static int carm_init_disks(struct carm_h for (i = 0; i < CARM_MAX_PORTS; i++) { struct gendisk *disk; - request_queue_t *q; + struct request_queue *q; struct carm_port *port; port = &host->port[i]; @@ -1538,7 +1538,7 @@ static void carm_free_disks(struct carm_ for (i = 0; i < CARM_MAX_PORTS; i++) { struct gendisk *disk = host->port[i].disk; if (disk) { - request_queue_t *q = disk->queue; + struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) del_gendisk(disk); @@ -1571,7 +1571,7 @@ static int carm_init_one (struct pci_dev struct carm_host *host; unsigned int pci_dac; int rc; - request_queue_t *q; + struct request_queue *q; unsigned int i; if (!printed_version++) Index: linux-rt-rebase.q/drivers/block/ub.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/ub.c +++ linux-rt-rebase.q/drivers/block/ub.c @@ -503,7 +503,7 @@ static void ub_cleanup(struct ub_dev *sc { struct list_head *p; struct ub_lun *lun; - request_queue_t *q; + struct request_queue *q; while (!list_empty(&sc->luns)) { p = sc->luns.next; @@ -619,7 +619,7 @@ static struct ub_scsi_cmd *ub_cmdq_pop(s * The request function is our main entry point */ -static void ub_request_fn(request_queue_t *q) +static void ub_request_fn(struct request_queue *q) { struct ub_lun *lun = q->queuedata; struct request *rq; @@ -2273,7 +2273,7 @@ err_core: static int ub_probe_lun(struct ub_dev *sc, int lnum) { struct ub_lun *lun; - request_queue_t *q; + struct request_queue *q; struct gendisk *disk; int rc; Index: linux-rt-rebase.q/drivers/block/umem.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/umem.c +++ linux-rt-rebase.q/drivers/block/umem.c @@ -114,7 +114,7 @@ struct cardinfo { */ struct bio *bio, *currentbio, **biotail; - request_queue_t *queue; + struct request_queue *queue; struct mm_page { dma_addr_t page_dma; @@ -357,7 +357,7 @@ static inline void reset_page(struct mm_ page->biotail = & page->bio; } -static void mm_unplug_device(request_queue_t *q) +static void mm_unplug_device(struct request_queue *q) { struct cardinfo *card = q->queuedata; unsigned long flags; @@ -541,7 +541,7 @@ static void process_page(unsigned long d -- mm_make_request ----------------------------------------------------------------------------------- */ -static int mm_make_request(request_queue_t *q, struct bio *bio) +static int mm_make_request(struct request_queue *q, struct bio *bio) { struct cardinfo *card = q->queuedata; pr_debug("mm_make_request %llu %u\n", Index: linux-rt-rebase.q/drivers/block/viodasd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/viodasd.c +++ linux-rt-rebase.q/drivers/block/viodasd.c @@ -400,7 +400,7 @@ error_ret: /* * This is the external request processing routine */ -static void do_viodasd_request(request_queue_t *q) +static void do_viodasd_request(struct request_queue *q) { struct request *req; Index: linux-rt-rebase.q/drivers/block/xd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/xd.c +++ linux-rt-rebase.q/drivers/block/xd.c @@ -298,7 +298,7 @@ static u_char __init xd_detect (u_char * } /* do_xd_request: handle an incoming request */ -static void do_xd_request (request_queue_t * q) +static void do_xd_request (struct request_queue * q) { struct request *req; Index: linux-rt-rebase.q/drivers/block/xd.h =================================================================== --- linux-rt-rebase.q.orig/drivers/block/xd.h +++ linux-rt-rebase.q/drivers/block/xd.h @@ -104,7 +104,7 @@ static int xd_manual_geo_init (char *com static u_char xd_detect (u_char *controller, unsigned int *address); static u_char xd_initdrives (void (*init_drive)(u_char drive)); -static void do_xd_request (request_queue_t * q); +static void do_xd_request (struct request_queue * q); static int xd_ioctl (struct inode *inode,struct file *file,unsigned int cmd,unsigned long arg); static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count); static void xd_recalibrate (u_char drive); Index: linux-rt-rebase.q/drivers/block/xen-blkfront.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/xen-blkfront.c +++ linux-rt-rebase.q/drivers/block/xen-blkfront.c @@ -241,7 +241,7 @@ static inline void flush_requests(struct * do_blkif_request * read a block; request is in a request queue */ -static void do_blkif_request(request_queue_t *rq) +static void do_blkif_request(struct request_queue *rq) { struct blkfront_info *info = NULL; struct request *req; @@ -287,7 +287,7 @@ wait: static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { - request_queue_t *rq; + struct request_queue *rq; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) Index: linux-rt-rebase.q/drivers/block/xsysace.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/xsysace.c +++ linux-rt-rebase.q/drivers/block/xsysace.c @@ -458,7 +458,7 @@ static inline void ace_fsm_yieldirq(stru } /* Get the next read/write request; ending requests that we don't handle */ -struct request *ace_get_next_request(request_queue_t * q) +struct request *ace_get_next_request(struct request_queue * q) { struct request *req; @@ -825,7 +825,7 @@ static irqreturn_t ace_interrupt(int irq /* --------------------------------------------------------------------- * Block ops */ -static void ace_request(request_queue_t * q) +static void ace_request(struct request_queue * q) { struct request *req; struct ace_device *ace; Index: linux-rt-rebase.q/drivers/block/z2ram.c =================================================================== --- linux-rt-rebase.q.orig/drivers/block/z2ram.c +++ linux-rt-rebase.q/drivers/block/z2ram.c @@ -67,7 +67,7 @@ static DEFINE_SPINLOCK(z2ram_lock); static struct block_device_operations z2_fops; static struct gendisk *z2ram_gendisk; -static void do_z2_request(request_queue_t *q) +static void do_z2_request(struct request_queue *q) { struct request *req; while ((req = elv_next_request(q)) != NULL) { Index: linux-rt-rebase.q/drivers/cdrom/cdrom.c =================================================================== --- linux-rt-rebase.q.orig/drivers/cdrom/cdrom.c +++ linux-rt-rebase.q/drivers/cdrom/cdrom.c @@ -2094,7 +2094,7 @@ out: static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, int lba, int nframes) { - request_queue_t *q = cdi->disk->queue; + struct request_queue *q = cdi->disk->queue; struct request *rq; struct bio *bio; unsigned int len; Index: linux-rt-rebase.q/drivers/cdrom/viocd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/cdrom/viocd.c +++ linux-rt-rebase.q/drivers/cdrom/viocd.c @@ -398,7 +398,7 @@ static void viocd_end_request(struct req static int rwreq; -static void do_viocd_request(request_queue_t *q) +static void do_viocd_request(struct request_queue *q) { struct request *req; Index: linux-rt-rebase.q/drivers/char/Kconfig =================================================================== --- linux-rt-rebase.q.orig/drivers/char/Kconfig +++ linux-rt-rebase.q/drivers/char/Kconfig @@ -726,7 +726,7 @@ config NVRAM config RTC tristate "Enhanced Real Time Clock Support" - depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC64 && (!SPARC32 || PCI) && !FRV && !ARM && !SUPERH && !S390 + depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV && !ARM && !SUPERH && !S390 ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you @@ -750,6 +750,28 @@ config RTC To compile this driver as a module, choose M here: the module will be called rtc. +config JS_RTC + tristate "Enhanced Real Time Clock Support" + depends on SPARC32 && PCI + ---help--- + If you say Y here and create a character special file /dev/rtc with + major number 10 and minor number 135 using mknod ("man mknod"), you + will get access to the real time clock (or hardware clock) built + into your computer. + + Every PC has such a clock built in. It can be used to generate + signals from as low as 1Hz up to 8192Hz, and can also be used + as a 24 hour alarm. It reports status information via the file + /proc/driver/rtc and its behaviour is set by various ioctls on + /dev/rtc. + + If you think you have a use for such a device (such as periodic data + sampling), then say Y here, and read + for details. + + To compile this driver as a module, choose M here: the + module will be called js-rtc. + config SGI_DS1286 tristate "SGI DS1286 RTC support" depends on SGI_IP22 Index: linux-rt-rebase.q/drivers/char/Makefile =================================================================== --- linux-rt-rebase.q.orig/drivers/char/Makefile +++ linux-rt-rebase.q/drivers/char/Makefile @@ -109,6 +109,9 @@ obj-$(CONFIG_TCG_TPM) += tpm/ obj-$(CONFIG_PS3_FLASH) += ps3flash.o +obj-$(CONFIG_JS_RTC) += js-rtc.o +js-rtc-y = rtc.o + # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c Index: linux-rt-rebase.q/drivers/char/hpet.c =================================================================== --- linux-rt-rebase.q.orig/drivers/char/hpet.c +++ linux-rt-rebase.q/drivers/char/hpet.c @@ -1007,9 +1007,15 @@ static int hpet_acpi_remove(struct acpi_ return -EINVAL; } +static const struct acpi_device_id hpet_device_ids[] = { + {"PNP0103", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, hpet_device_ids); + static struct acpi_driver hpet_acpi_driver = { .name = "hpet", - .ids = "PNP0103", + .ids = hpet_device_ids, .ops = { .add = hpet_acpi_add, .remove = hpet_acpi_remove, Index: linux-rt-rebase.q/drivers/ide/ide-cd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ide/ide-cd.c +++ linux-rt-rebase.q/drivers/ide/ide-cd.c @@ -3071,7 +3071,7 @@ static inline void ide_cdrom_add_setting /* * standard prep_rq_fn that builds 10 byte cmds */ -static int ide_cdrom_prep_fs(request_queue_t *q, struct request *rq) +static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) { int hard_sect = queue_hardsect_size(q); long block = (long)rq->hard_sector / (hard_sect >> 9); @@ -3137,7 +3137,7 @@ static int ide_cdrom_prep_pc(struct requ return BLKPREP_OK; } -static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq) +static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq) { if (blk_fs_request(rq)) return ide_cdrom_prep_fs(q, rq); Index: linux-rt-rebase.q/drivers/ide/ide-disk.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ide/ide-disk.c +++ linux-rt-rebase.q/drivers/ide/ide-disk.c @@ -679,7 +679,7 @@ static ide_proc_entry_t idedisk_proc[] = }; #endif /* CONFIG_IDE_PROC_FS */ -static void idedisk_prepare_flush(request_queue_t *q, struct request *rq) +static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) { ide_drive_t *drive = q->queuedata; @@ -697,7 +697,7 @@ static void idedisk_prepare_flush(reques rq->buffer = rq->cmd; } -static int idedisk_issue_flush(request_queue_t *q, struct gendisk *disk, +static int idedisk_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { ide_drive_t *drive = q->queuedata; Index: linux-rt-rebase.q/drivers/ide/ide-io.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ide/ide-io.c +++ linux-rt-rebase.q/drivers/ide/ide-io.c @@ -1327,7 +1327,7 @@ static void ide_do_request (ide_hwgroup_ /* * Passes the stuff to ide_do_request */ -void do_ide_request(request_queue_t *q) +void do_ide_request(struct request_queue *q) { ide_drive_t *drive = q->queuedata; Index: linux-rt-rebase.q/drivers/ide/ide-probe.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ide/ide-probe.c +++ linux-rt-rebase.q/drivers/ide/ide-probe.c @@ -945,7 +945,7 @@ static void save_match(ide_hwif_t *hwif, */ static int ide_init_queue(ide_drive_t *drive) { - request_queue_t *q; + struct request_queue *q; ide_hwif_t *hwif = HWIF(drive); int max_sectors = 256; int max_sg_entries = PRD_ENTRIES; Index: linux-rt-rebase.q/drivers/ide/legacy/hd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/ide/legacy/hd.c +++ linux-rt-rebase.q/drivers/ide/legacy/hd.c @@ -652,7 +652,7 @@ repeat: } } -static void do_hd_request (request_queue_t * q) +static void do_hd_request (struct request_queue * q) { disable_irq(HD_IRQ); hd_request(); Index: linux-rt-rebase.q/drivers/input/misc/atlas_btns.c =================================================================== --- linux-rt-rebase.q.orig/drivers/input/misc/atlas_btns.c +++ linux-rt-rebase.q/drivers/input/misc/atlas_btns.c @@ -31,7 +31,6 @@ #define ACPI_ATLAS_NAME "Atlas ACPI" #define ACPI_ATLAS_CLASS "Atlas" -#define ACPI_ATLAS_BUTTON_HID "ASIM0000" static struct input_dev *input_dev; @@ -130,10 +129,16 @@ static int atlas_acpi_button_remove(stru return status; } +static const struct acpi_device_id atlas_device_ids[] = { + {"ASIM0000", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, atlas_device_ids); + static struct acpi_driver atlas_acpi_driver = { .name = ACPI_ATLAS_NAME, .class = ACPI_ATLAS_CLASS, - .ids = ACPI_ATLAS_BUTTON_HID, + .ids = atlas_device_ids, .ops = { .add = atlas_acpi_button_add, .remove = atlas_acpi_button_remove, Index: linux-rt-rebase.q/drivers/kvm/kvm_main.c =================================================================== --- linux-rt-rebase.q.orig/drivers/kvm/kvm_main.c +++ linux-rt-rebase.q/drivers/kvm/kvm_main.c @@ -297,9 +297,6 @@ static struct kvm *kvm_create_vm(void) kvm_io_bus_init(&kvm->pio_bus); spin_lock_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); - spin_lock(&kvm_lock); - list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); kvm_io_bus_init(&kvm->mmio_bus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { struct kvm_vcpu *vcpu = &kvm->vcpus[i]; @@ -309,6 +306,9 @@ static struct kvm *kvm_create_vm(void) vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; } + spin_lock(&kvm_lock); + list_add(&kvm->vm_list, &vm_list); + spin_unlock(&kvm_lock); return kvm; } @@ -1070,18 +1070,16 @@ static int emulator_write_phys(struct kv return 0; mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); - if (memcmp(virt + offset_in_page(gpa), val, bytes)) { - kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); - memcpy(virt + offset_in_page(gpa), val, bytes); - } + kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); + memcpy(virt + offset_in_page(gpa), val, bytes); kunmap_atomic(virt, KM_USER0); return 1; } -static int emulator_write_emulated(unsigned long addr, - const void *val, - unsigned int bytes, - struct x86_emulate_ctxt *ctxt) +static int emulator_write_emulated_onepage(unsigned long addr, + const void *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_io_device *mmio_dev; @@ -1113,6 +1111,26 @@ static int emulator_write_emulated(unsig return X86EMUL_CONTINUE; } +static int emulator_write_emulated(unsigned long addr, + const void *val, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ + /* Crossing a page boundary? */ + if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { + int rc, now; + + now = -addr & ~PAGE_MASK; + rc = emulator_write_emulated_onepage(addr, val, now, ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + addr += now; + val += now; + bytes -= now; + } + return emulator_write_emulated_onepage(addr, val, bytes, ctxt); +} + static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, @@ -2414,9 +2432,9 @@ static void cpuid_fix_nx_cap(struct kvm_ break; } } - if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { entry->edx &= ~(1 << 20); - printk(KERN_INFO ": guest NX capability removed\n"); + printk(KERN_INFO "kvm: guest NX capability removed\n"); } } Index: linux-rt-rebase.q/drivers/kvm/x86_emulate.c =================================================================== --- linux-rt-rebase.q.orig/drivers/kvm/x86_emulate.c +++ linux-rt-rebase.q/drivers/kvm/x86_emulate.c @@ -1178,6 +1178,8 @@ pop_instruction: twobyte_insn: switch (b) { case 0x01: /* lgdt, lidt, lmsw */ + /* Disable writeback. */ + no_wb = 1; switch (modrm_reg) { u16 size; unsigned long address; Index: linux-rt-rebase.q/drivers/md/dm-table.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/dm-table.c +++ linux-rt-rebase.q/drivers/md/dm-table.c @@ -526,7 +526,7 @@ static int __table_get_device(struct dm_ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) { - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); struct io_restrictions *rs = &ti->limits; /* @@ -979,7 +979,7 @@ int dm_table_any_congested(struct dm_tab devices = dm_table_get_devices(t); for (d = devices->next; d != devices; d = d->next) { struct dm_dev *dd = list_entry(d, struct dm_dev, list); - request_queue_t *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->bdev); r |= bdi_congested(&q->backing_dev_info, bdi_bits); } @@ -992,7 +992,7 @@ void dm_table_unplug_all(struct dm_table for (d = devices->next; d != devices; d = d->next) { struct dm_dev *dd = list_entry(d, struct dm_dev, list); - request_queue_t *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->bdev); if (q->unplug_fn) q->unplug_fn(q); @@ -1011,7 +1011,7 @@ int dm_table_flush_all(struct dm_table * for (d = devices->next; d != devices; d = d->next) { struct dm_dev *dd = list_entry(d, struct dm_dev, list); - request_queue_t *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->bdev); int err; if (!q->issue_flush_fn) Index: linux-rt-rebase.q/drivers/md/dm.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/dm.c +++ linux-rt-rebase.q/drivers/md/dm.c @@ -80,7 +80,7 @@ struct mapped_device { unsigned long flags; - request_queue_t *queue; + struct request_queue *queue; struct gendisk *disk; char name[16]; @@ -792,7 +792,7 @@ static void __split_bio(struct mapped_de * The request function that just remaps the bio built up by * dm_merge_bvec. */ -static int dm_request(request_queue_t *q, struct bio *bio) +static int dm_request(struct request_queue *q, struct bio *bio) { int r; int rw = bio_data_dir(bio); @@ -844,7 +844,7 @@ static int dm_request(request_queue_t *q return 0; } -static int dm_flush_all(request_queue_t *q, struct gendisk *disk, +static int dm_flush_all(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { struct mapped_device *md = q->queuedata; @@ -859,7 +859,7 @@ static int dm_flush_all(request_queue_t return ret; } -static void dm_unplug_all(request_queue_t *q) +static void dm_unplug_all(struct request_queue *q) { struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_table(md); @@ -1110,7 +1110,7 @@ static void __set_size(struct mapped_dev static int __bind(struct mapped_device *md, struct dm_table *t) { - request_queue_t *q = md->queue; + struct request_queue *q = md->queue; sector_t size; size = dm_table_get_size(t); Index: linux-rt-rebase.q/drivers/md/faulty.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/faulty.c +++ linux-rt-rebase.q/drivers/md/faulty.c @@ -167,7 +167,7 @@ static void add_sector(conf_t *conf, sec conf->nfaults = n+1; } -static int make_request(request_queue_t *q, struct bio *bio) +static int make_request(struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; conf_t *conf = (conf_t*)mddev->private; Index: linux-rt-rebase.q/drivers/md/linear.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/linear.c +++ linux-rt-rebase.q/drivers/md/linear.c @@ -55,7 +55,7 @@ static inline dev_info_t *which_dev(mdde * * Return amount of bytes we can take at this offset */ -static int linear_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; dev_info_t *dev0; @@ -79,20 +79,20 @@ static int linear_mergeable_bvec(request return maxsectors << 9; } -static void linear_unplug(request_queue_t *q) +static void linear_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; linear_conf_t *conf = mddev_to_conf(mddev); int i; for (i=0; i < mddev->raid_disks; i++) { - request_queue_t *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); + struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); if (r_queue->unplug_fn) r_queue->unplug_fn(r_queue); } } -static int linear_issue_flush(request_queue_t *q, struct gendisk *disk, +static int linear_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { mddev_t *mddev = q->queuedata; @@ -101,7 +101,7 @@ static int linear_issue_flush(request_qu for (i=0; i < mddev->raid_disks && ret == 0; i++) { struct block_device *bdev = conf->disks[i].rdev->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); + struct request_queue *r_queue = bdev_get_queue(bdev); if (!r_queue->issue_flush_fn) ret = -EOPNOTSUPP; @@ -118,7 +118,7 @@ static int linear_congested(void *data, int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { - request_queue_t *q = bdev_get_queue(conf->disks[i].rdev->bdev); + struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } return ret; @@ -330,7 +330,7 @@ static int linear_stop (mddev_t *mddev) return 0; } -static int linear_make_request (request_queue_t *q, struct bio *bio) +static int linear_make_request (struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); mddev_t *mddev = q->queuedata; Index: linux-rt-rebase.q/drivers/md/md.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/md.c +++ linux-rt-rebase.q/drivers/md/md.c @@ -211,7 +211,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); ) -static int md_fail_request (request_queue_t *q, struct bio *bio) +static int md_fail_request (struct request_queue *q, struct bio *bio) { bio_io_error(bio, bio->bi_size); return 0; Index: linux-rt-rebase.q/drivers/md/multipath.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/multipath.c +++ linux-rt-rebase.q/drivers/md/multipath.c @@ -125,7 +125,7 @@ static void unplug_slaves(mddev_t *mddev mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { - request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + struct request_queue *r_queue = bdev_get_queue(rdev->bdev); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); @@ -140,13 +140,13 @@ static void unplug_slaves(mddev_t *mddev rcu_read_unlock(); } -static void multipath_unplug(request_queue_t *q) +static void multipath_unplug(struct request_queue *q) { unplug_slaves(q->queuedata); } -static int multipath_make_request (request_queue_t *q, struct bio * bio) +static int multipath_make_request (struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; multipath_conf_t *conf = mddev_to_conf(mddev); @@ -199,7 +199,7 @@ static void multipath_status (struct seq seq_printf (seq, "]"); } -static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk, +static int multipath_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { mddev_t *mddev = q->queuedata; @@ -211,7 +211,7 @@ static int multipath_issue_flush(request mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct block_device *bdev = rdev->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); + struct request_queue *r_queue = bdev_get_queue(bdev); if (!r_queue->issue_flush_fn) ret = -EOPNOTSUPP; @@ -238,7 +238,7 @@ static int multipath_congested(void *dat for (i = 0; i < mddev->raid_disks ; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { - request_queue_t *q = bdev_get_queue(rdev->bdev); + struct request_queue *q = bdev_get_queue(rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); /* Just like multipath_map, we just check the Index: linux-rt-rebase.q/drivers/md/raid0.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/raid0.c +++ linux-rt-rebase.q/drivers/md/raid0.c @@ -25,7 +25,7 @@ #define MD_DRIVER #define MD_PERSONALITY -static void raid0_unplug(request_queue_t *q) +static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; raid0_conf_t *conf = mddev_to_conf(mddev); @@ -33,14 +33,14 @@ static void raid0_unplug(request_queue_t int i; for (i=0; iraid_disks; i++) { - request_queue_t *r_queue = bdev_get_queue(devlist[i]->bdev); + struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev); if (r_queue->unplug_fn) r_queue->unplug_fn(r_queue); } } -static int raid0_issue_flush(request_queue_t *q, struct gendisk *disk, +static int raid0_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { mddev_t *mddev = q->queuedata; @@ -50,7 +50,7 @@ static int raid0_issue_flush(request_que for (i=0; iraid_disks && ret == 0; i++) { struct block_device *bdev = devlist[i]->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); + struct request_queue *r_queue = bdev_get_queue(bdev); if (!r_queue->issue_flush_fn) ret = -EOPNOTSUPP; @@ -68,7 +68,7 @@ static int raid0_congested(void *data, i int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { - request_queue_t *q = bdev_get_queue(devlist[i]->bdev); + struct request_queue *q = bdev_get_queue(devlist[i]->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } @@ -268,7 +268,7 @@ static int create_strip_zones (mddev_t * * * Return amount of bytes we can accept at this offset */ -static int raid0_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); @@ -408,7 +408,7 @@ static int raid0_stop (mddev_t *mddev) return 0; } -static int raid0_make_request (request_queue_t *q, struct bio *bio) +static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; unsigned int sect_in_chunk, chunksize_bits, chunk_size, chunk_sects; Index: linux-rt-rebase.q/drivers/md/raid1.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/raid1.c +++ linux-rt-rebase.q/drivers/md/raid1.c @@ -552,7 +552,7 @@ static void unplug_slaves(mddev_t *mddev for (i=0; iraid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { - request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + struct request_queue *r_queue = bdev_get_queue(rdev->bdev); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); @@ -567,7 +567,7 @@ static void unplug_slaves(mddev_t *mddev rcu_read_unlock(); } -static void raid1_unplug(request_queue_t *q) +static void raid1_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; @@ -575,7 +575,7 @@ static void raid1_unplug(request_queue_t md_wakeup_thread(mddev->thread); } -static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, +static int raid1_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { mddev_t *mddev = q->queuedata; @@ -587,7 +587,7 @@ static int raid1_issue_flush(request_que mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct block_device *bdev = rdev->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); + struct request_queue *r_queue = bdev_get_queue(bdev); if (!r_queue->issue_flush_fn) ret = -EOPNOTSUPP; @@ -615,7 +615,7 @@ static int raid1_congested(void *data, i for (i = 0; i < mddev->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { - request_queue_t *q = bdev_get_queue(rdev->bdev); + struct request_queue *q = bdev_get_queue(rdev->bdev); /* Note the '|| 1' - when read_balance prefers * non-congested targets, it can be removed @@ -765,7 +765,7 @@ do_sync_io: return NULL; } -static int make_request(request_queue_t *q, struct bio * bio) +static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; conf_t *conf = mddev_to_conf(mddev); Index: linux-rt-rebase.q/drivers/md/raid10.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/raid10.c +++ linux-rt-rebase.q/drivers/md/raid10.c @@ -453,7 +453,7 @@ static sector_t raid10_find_virt(conf_t * If near_copies == raid_disk, there are no striping issues, * but in that case, the function isn't called at all. */ -static int raid10_mergeable_bvec(request_queue_t *q, struct bio *bio, +static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *bio_vec) { mddev_t *mddev = q->queuedata; @@ -595,7 +595,7 @@ static void unplug_slaves(mddev_t *mddev for (i=0; iraid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { - request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + struct request_queue *r_queue = bdev_get_queue(rdev->bdev); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); @@ -610,7 +610,7 @@ static void unplug_slaves(mddev_t *mddev rcu_read_unlock(); } -static void raid10_unplug(request_queue_t *q) +static void raid10_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; @@ -618,7 +618,7 @@ static void raid10_unplug(request_queue_ md_wakeup_thread(mddev->thread); } -static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, +static int raid10_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { mddev_t *mddev = q->queuedata; @@ -630,7 +630,7 @@ static int raid10_issue_flush(request_qu mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct block_device *bdev = rdev->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); + struct request_queue *r_queue = bdev_get_queue(bdev); if (!r_queue->issue_flush_fn) ret = -EOPNOTSUPP; @@ -658,7 +658,7 @@ static int raid10_congested(void *data, for (i = 0; i < mddev->raid_disks && ret == 0; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { - request_queue_t *q = bdev_get_queue(rdev->bdev); + struct request_queue *q = bdev_get_queue(rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } @@ -772,7 +772,7 @@ static void unfreeze_array(conf_t *conf) spin_unlock_irq(&conf->resync_lock); } -static int make_request(request_queue_t *q, struct bio * bio) +static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; conf_t *conf = mddev_to_conf(mddev); Index: linux-rt-rebase.q/drivers/md/raid5.c =================================================================== --- linux-rt-rebase.q.orig/drivers/md/raid5.c +++ linux-rt-rebase.q/drivers/md/raid5.c @@ -289,7 +289,7 @@ static struct stripe_head *__find_stripe } static void unplug_slaves(mddev_t *mddev); -static void raid5_unplug_device(request_queue_t *q); +static void raid5_unplug_device(struct request_queue *q); static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks, int pd_idx, int noblock) @@ -3182,7 +3182,7 @@ static void unplug_slaves(mddev_t *mddev for (i=0; iraid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { - request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + struct request_queue *r_queue = bdev_get_queue(rdev->bdev); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); @@ -3197,7 +3197,7 @@ static void unplug_slaves(mddev_t *mddev rcu_read_unlock(); } -static void raid5_unplug_device(request_queue_t *q) +static void raid5_unplug_device(struct request_queue *q) { mddev_t *mddev = q->queuedata; raid5_conf_t *conf = mddev_to_conf(mddev); @@ -3216,7 +3216,7 @@ static void raid5_unplug_device(request_ unplug_slaves(mddev); } -static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk, +static int raid5_issue_flush(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { mddev_t *mddev = q->queuedata; @@ -3228,7 +3228,7 @@ static int raid5_issue_flush(request_que mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct block_device *bdev = rdev->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); + struct request_queue *r_queue = bdev_get_queue(bdev); if (!r_queue->issue_flush_fn) ret = -EOPNOTSUPP; @@ -3267,7 +3267,7 @@ static int raid5_congested(void *data, i /* We want read requests to align with chunks where possible, * but write requests don't need to. */ -static int raid5_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); @@ -3377,7 +3377,7 @@ static int raid5_align_endio(struct bio static int bio_fits_rdev(struct bio *bi) { - request_queue_t *q = bdev_get_queue(bi->bi_bdev); + struct request_queue *q = bdev_get_queue(bi->bi_bdev); if ((bi->bi_size>>9) > q->max_sectors) return 0; @@ -3396,7 +3396,7 @@ static int bio_fits_rdev(struct bio *bi) } -static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio) +static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) { mddev_t *mddev = q->queuedata; raid5_conf_t *conf = mddev_to_conf(mddev); @@ -3466,7 +3466,7 @@ static int chunk_aligned_read(request_qu } -static int make_request(request_queue_t *q, struct bio * bi) +static int make_request(struct request_queue *q, struct bio * bi) { mddev_t *mddev = q->queuedata; raid5_conf_t *conf = mddev_to_conf(mddev); Index: linux-rt-rebase.q/drivers/message/i2o/i2o_block.c =================================================================== --- linux-rt-rebase.q.orig/drivers/message/i2o/i2o_block.c +++ linux-rt-rebase.q/drivers/message/i2o/i2o_block.c @@ -159,7 +159,7 @@ static int i2o_block_device_flush(struct * Returns 0 on success or negative error code on failure. */ -static int i2o_block_issue_flush(request_queue_t * queue, struct gendisk *disk, +static int i2o_block_issue_flush(struct request_queue * queue, struct gendisk *disk, sector_t * error_sector) { struct i2o_block_device *i2o_blk_dev = queue->queuedata; @@ -445,7 +445,7 @@ static void i2o_block_end_request(struct { struct i2o_block_request *ireq = req->special; struct i2o_block_device *dev = ireq->i2o_blk_dev; - request_queue_t *q = req->q; + struct request_queue *q = req->q; unsigned long flags; if (end_that_request_chunk(req, uptodate, nr_bytes)) { Index: linux-rt-rebase.q/drivers/misc/asus-laptop.c =================================================================== --- linux-rt-rebase.q.orig/drivers/misc/asus-laptop.c +++ linux-rt-rebase.q/drivers/misc/asus-laptop.c @@ -53,7 +53,6 @@ #define ASUS_HOTK_NAME "Asus Laptop Support" #define ASUS_HOTK_CLASS "hotkey" #define ASUS_HOTK_DEVICE_NAME "Hotkey" -#define ASUS_HOTK_HID "ATK0100" #define ASUS_HOTK_FILE "asus-laptop" #define ASUS_HOTK_PREFIX "\\_SB.ATKD." @@ -197,12 +196,18 @@ static struct asus_hotk *hotk; /* * The hotkey driver declaration */ +static const struct acpi_device_id asus_device_ids[] = { + {"ATK0100", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, asus_device_ids); + static int asus_hotk_add(struct acpi_device *device); static int asus_hotk_remove(struct acpi_device *device, int type); static struct acpi_driver asus_hotk_driver = { .name = ASUS_HOTK_NAME, .class = ASUS_HOTK_CLASS, - .ids = ASUS_HOTK_HID, + .ids = asus_device_ids, .ops = { .add = asus_hotk_add, .remove = asus_hotk_remove, @@ -1067,19 +1072,16 @@ static void asus_backlight_exit(void) } #define ASUS_LED_UNREGISTER(object) \ - if(object##_led.class_dev \ - && !IS_ERR(object##_led.class_dev)) \ - led_classdev_unregister(&object##_led) + led_classdev_unregister(&object##_led) static void asus_led_exit(void) { + destroy_workqueue(led_workqueue); ASUS_LED_UNREGISTER(mled); ASUS_LED_UNREGISTER(tled); ASUS_LED_UNREGISTER(pled); ASUS_LED_UNREGISTER(rled); ASUS_LED_UNREGISTER(gled); - - destroy_workqueue(led_workqueue); } static void __exit asus_laptop_exit(void) @@ -1135,29 +1137,42 @@ static int asus_led_init(struct device * rv = ASUS_LED_REGISTER(mled, dev); if (rv) - return rv; + goto out; rv = ASUS_LED_REGISTER(tled, dev); if (rv) - return rv; + goto out1; rv = ASUS_LED_REGISTER(rled, dev); if (rv) - return rv; + goto out2; rv = ASUS_LED_REGISTER(pled, dev); if (rv) - return rv; + goto out3; rv = ASUS_LED_REGISTER(gled, dev); if (rv) - return rv; + goto out4; led_workqueue = create_singlethread_workqueue("led_workqueue"); if (!led_workqueue) - return -ENOMEM; + goto out5; return 0; +out5: + rv = -ENOMEM; + ASUS_LED_UNREGISTER(gled); +out4: + ASUS_LED_UNREGISTER(pled); +out3: + ASUS_LED_UNREGISTER(rled); +out2: + ASUS_LED_UNREGISTER(tled); +out1: + ASUS_LED_UNREGISTER(mled); +out: + return rv; } static int __init asus_laptop_init(void) Index: linux-rt-rebase.q/drivers/misc/sony-laptop.c =================================================================== --- linux-rt-rebase.q.orig/drivers/misc/sony-laptop.c +++ linux-rt-rebase.q/drivers/misc/sony-laptop.c @@ -1124,10 +1124,22 @@ static int sony_nc_remove(struct acpi_de return 0; } +static const struct acpi_device_id sony_device_ids[] = { + {SONY_NC_HID, 0}, + {SONY_PIC_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, sony_device_ids); + +static const struct acpi_device_id sony_nc_device_ids[] = { + {SONY_NC_HID, 0}, + {"", 0}, +}; + static struct acpi_driver sony_nc_driver = { .name = SONY_NC_DRIVER_NAME, .class = SONY_NC_CLASS, - .ids = SONY_NC_HID, + .ids = sony_nc_device_ids, .owner = THIS_MODULE, .ops = { .add = sony_nc_add, @@ -2470,10 +2482,15 @@ static int sony_pic_resume(struct acpi_d return 0; } +static const struct acpi_device_id sony_pic_device_ids[] = { + {SONY_PIC_HID, 0}, + {"", 0}, +}; + static struct acpi_driver sony_pic_driver = { .name = SONY_PIC_DRIVER_NAME, .class = SONY_PIC_CLASS, - .ids = SONY_PIC_HID, + .ids = sony_pic_device_ids, .owner = THIS_MODULE, .ops = { .add = sony_pic_add, Index: linux-rt-rebase.q/drivers/misc/thinkpad_acpi.c =================================================================== --- linux-rt-rebase.q.orig/drivers/misc/thinkpad_acpi.c +++ linux-rt-rebase.q/drivers/misc/thinkpad_acpi.c @@ -411,12 +411,13 @@ static int __init register_tpacpi_subdri sprintf(ibm->acpi->driver->name, "%s_%s", IBM_NAME, ibm->name); ibm->acpi->driver->ids = ibm->acpi->hid; + ibm->acpi->driver->ops.add = &tpacpi_device_add; rc = acpi_bus_register_driver(ibm->acpi->driver); if (rc < 0) { printk(IBM_ERR "acpi_bus_register_driver(%s) failed: %d\n", - ibm->acpi->hid, rc); + ibm->name, rc); kfree(ibm->acpi->driver); ibm->acpi->driver = NULL; } else if (!rc) @@ -1316,8 +1317,13 @@ errexit: return res; } +static const struct acpi_device_id ibm_htk_device_ids[] = { + {IBM_HKEY_HID, 0}, + {"", 0}, +}; + static struct tp_acpi_drv_struct ibm_hotkey_acpidriver = { - .hid = IBM_HKEY_HID, + .hid = ibm_htk_device_ids, .notify = hotkey_notify, .handle = &hkey_handle, .type = ACPI_DEVICE_NOTIFY, @@ -2080,6 +2086,11 @@ IBM_HANDLE(dock, root, "\\_SB.GDCK", /* /* don't list other alternatives as we install a notify handler on the 570 */ IBM_HANDLE(pci, root, "\\_SB.PCI"); /* 570 */ +static const struct acpi_device_id ibm_pci_device_ids[] = { + {PCI_ROOT_HID_STRING, 0}, + {"", 0}, +}; + static struct tp_acpi_drv_struct ibm_dock_acpidriver[2] = { { .notify = dock_notify, @@ -2090,7 +2101,7 @@ static struct tp_acpi_drv_struct ibm_doc /* THIS ONE MUST NEVER BE USED FOR DRIVER AUTOLOADING. * We just use it to get notifications of dock hotplug * in very old thinkpads */ - .hid = PCI_ROOT_HID_STRING, + .hid = ibm_pci_device_ids, .notify = dock_notify, .handle = &pci_handle, .type = ACPI_SYSTEM_NOTIFY, @@ -2149,7 +2160,8 @@ static int __init dock_init2(struct ibm_ static void dock_notify(struct ibm_struct *ibm, u32 event) { int docked = dock_docked(); - int pci = ibm->acpi->hid && strstr(ibm->acpi->hid, PCI_ROOT_HID_STRING); + int pci = ibm->acpi->hid && ibm->acpi->device && + acpi_match_device_ids(ibm->acpi->device, ibm_pci_device_ids); if (event == 1 && !pci) /* 570 */ acpi_bus_generate_event(ibm->acpi->device, event, 1); /* button */ Index: linux-rt-rebase.q/drivers/misc/thinkpad_acpi.h =================================================================== --- linux-rt-rebase.q.orig/drivers/misc/thinkpad_acpi.h +++ linux-rt-rebase.q/drivers/misc/thinkpad_acpi.h @@ -193,7 +193,7 @@ static void thinkpad_acpi_module_exit(vo struct ibm_struct; struct tp_acpi_drv_struct { - char *hid; + const struct acpi_device_id *hid; struct acpi_driver *driver; void (*notify) (struct ibm_struct *, u32); Index: linux-rt-rebase.q/drivers/mmc/card/queue.c =================================================================== --- linux-rt-rebase.q.orig/drivers/mmc/card/queue.c +++ linux-rt-rebase.q/drivers/mmc/card/queue.c @@ -83,7 +83,7 @@ static int mmc_queue_thread(void *d) * on any queue on this host, and attempt to issue it. This may * not be the queue we were asked to process. */ -static void mmc_request(request_queue_t *q) +static void mmc_request(struct request_queue *q) { struct mmc_queue *mq = q->queuedata; struct request *req; @@ -211,7 +211,7 @@ int mmc_init_queue(struct mmc_queue *mq, void mmc_cleanup_queue(struct mmc_queue *mq) { - request_queue_t *q = mq->queue; + struct request_queue *q = mq->queue; unsigned long flags; /* Mark that we should start throwing out stragglers */ @@ -252,7 +252,7 @@ EXPORT_SYMBOL(mmc_cleanup_queue); */ void mmc_queue_suspend(struct mmc_queue *mq) { - request_queue_t *q = mq->queue; + struct request_queue *q = mq->queue; unsigned long flags; if (!(mq->flags & MMC_QUEUE_SUSPENDED)) { @@ -272,7 +272,7 @@ void mmc_queue_suspend(struct mmc_queue */ void mmc_queue_resume(struct mmc_queue *mq) { - request_queue_t *q = mq->queue; + struct request_queue *q = mq->queue; unsigned long flags; if (mq->flags & MMC_QUEUE_SUSPENDED) { Index: linux-rt-rebase.q/drivers/net/82596.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/82596.c +++ linux-rt-rebase.q/drivers/net/82596.c @@ -57,6 +57,7 @@ #include #include #include +#include static char version[] __initdata = "82596.c $Revision: 1.5 $\n"; Index: linux-rt-rebase.q/drivers/net/Makefile =================================================================== --- linux-rt-rebase.q.orig/drivers/net/Makefile +++ linux-rt-rebase.q/drivers/net/Makefile @@ -18,7 +18,7 @@ gianfar_driver-objs := gianfar.o \ gianfar_sysfs.o obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o -ucc_geth_driver-objs := ucc_geth.o ucc_geth_mii.o +ucc_geth_driver-objs := ucc_geth.o ucc_geth_mii.o ucc_geth_ethtool.o # # link order important here Index: linux-rt-rebase.q/drivers/net/acenic.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/acenic.c +++ linux-rt-rebase.q/drivers/net/acenic.c @@ -3128,12 +3128,6 @@ static int __devinit read_eeprom_byte(st int result = 0; short i; - if (!dev) { - printk(KERN_ERR "No device!\n"); - result = -ENODEV; - goto out; - } - /* * Don't take interrupts on this CPU will bit banging * the %#%#@$ I2C device Index: linux-rt-rebase.q/drivers/net/atl1/atl1_hw.h =================================================================== --- linux-rt-rebase.q.orig/drivers/net/atl1/atl1_hw.h +++ linux-rt-rebase.q/drivers/net/atl1/atl1_hw.h @@ -680,11 +680,6 @@ void atl1_check_options(struct atl1_adap #define AUTONEG_ADVERTISE_10_100_ALL 0x000F /* All 10/100 speeds */ #define AUTONEG_ADVERTISE_10_ALL 0x0003 /* 10Mbps Full & Half speeds */ -/* The size (in bytes) of a ethernet packet */ -#define ENET_HEADER_SIZE 14 -#define MAXIMUM_ETHERNET_FRAME_SIZE 1518 /* with FCS */ -#define MINIMUM_ETHERNET_FRAME_SIZE 64 /* with FCS */ -#define ETHERNET_FCS_SIZE 4 #define MAX_JUMBO_FRAME_SIZE 0x2800 #define PHY_AUTO_NEG_TIME 45 /* 4.5 Seconds */ @@ -929,8 +924,8 @@ enum atl1_dma_req_block { atl1_dma_req_128 = 0, atl1_dma_req_256 = 1, atl1_dma_req_512 = 2, - atl1_dam_req_1024 = 3, - atl1_dam_req_2048 = 4, + atl1_dma_req_1024 = 3, + atl1_dma_req_2048 = 4, atl1_dma_req_4096 = 5 }; Index: linux-rt-rebase.q/drivers/net/atl1/atl1_main.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/atl1/atl1_main.c +++ linux-rt-rebase.q/drivers/net/atl1/atl1_main.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -120,8 +121,8 @@ static int __devinit atl1_sw_init(struct struct atl1_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; - hw->max_frame_size = netdev->mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; - hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE; + hw->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + hw->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->wol = 0; adapter->rx_buffer_len = (hw->max_frame_size + 7) & ~7; @@ -314,7 +315,7 @@ err_nomem: return -ENOMEM; } -void atl1_init_ring_ptrs(struct atl1_adapter *adapter) +static void atl1_init_ring_ptrs(struct atl1_adapter *adapter) { struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring; struct atl1_rfd_ring *rfd_ring = &adapter->rfd_ring; @@ -688,9 +689,9 @@ static int atl1_change_mtu(struct net_de { struct atl1_adapter *adapter = netdev_priv(netdev); int old_mtu = netdev->mtu; - int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; + int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - if ((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || + if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { dev_warn(&adapter->pdev->dev, "invalid MTU setting\n"); return -EINVAL; @@ -908,8 +909,8 @@ static u32 atl1_configure(struct atl1_ad /* config DMA Engine */ value = ((((u32) hw->dmar_block) & DMA_CTRL_DMAR_BURST_LEN_MASK) << DMA_CTRL_DMAR_BURST_LEN_SHIFT) | - ((((u32) hw->dmaw_block) & DMA_CTRL_DMAR_BURST_LEN_MASK) - << DMA_CTRL_DMAR_BURST_LEN_SHIFT) | DMA_CTRL_DMAR_EN | + ((((u32) hw->dmaw_block) & DMA_CTRL_DMAW_BURST_LEN_MASK) + << DMA_CTRL_DMAW_BURST_LEN_SHIFT) | DMA_CTRL_DMAR_EN | DMA_CTRL_DMAW_EN; value |= (u32) hw->dma_ord; if (atl1_rcb_128 == hw->rcb_value) @@ -917,7 +918,10 @@ static u32 atl1_configure(struct atl1_ad iowrite32(value, hw->hw_addr + REG_DMA_CTRL); /* config CMB / SMB */ - value = hw->cmb_rrd | ((u32) hw->cmb_tpd << 16); + value = (hw->cmb_tpd > adapter->tpd_ring.count) ? + hw->cmb_tpd : adapter->tpd_ring.count; + value <<= 16; + value |= hw->cmb_rrd; iowrite32(value, hw->hw_addr + REG_CMB_WRITE_TH); value = hw->cmb_rx_timer | ((u32) hw->cmb_tx_timer << 16); iowrite32(value, hw->hw_addr + REG_CMB_WRITE_TIMER); @@ -1334,7 +1338,7 @@ rrd_ok: skb = buffer_info->skb; length = le16_to_cpu(rrd->xsz.xsum_sz.pkt_size); - skb_put(skb, length - ETHERNET_FCS_SIZE); + skb_put(skb, length - ETH_FCS_LEN); /* Receive Checksum Offload */ atl1_rx_checksum(adapter, rrd, skb); @@ -1422,7 +1426,7 @@ static void atl1_intr_tx(struct atl1_ada netif_wake_queue(adapter->netdev); } -static u16 tpd_avail(struct atl1_tpd_ring *tpd_ring) +static u16 atl1_tpd_avail(struct atl1_tpd_ring *tpd_ring) { u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean); u16 next_to_use = atomic_read(&tpd_ring->next_to_use); @@ -1453,7 +1457,7 @@ static int atl1_tso(struct atl1_adapter tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, IPPROTO_TCP, 0); ipofst = skb_network_offset(skb); - if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */ + if (ipofst != ETH_HLEN) /* 802.3 frame */ tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT; tso->tsopl |= (iph->ihl & @@ -1708,7 +1712,7 @@ static int atl1_xmit_frame(struct sk_buf return NETDEV_TX_LOCKED; } - if (tpd_avail(&adapter->tpd_ring) < count) { + if (atl1_tpd_avail(&adapter->tpd_ring) < count) { /* not enough descriptors */ netif_stop_queue(netdev); spin_unlock_irqrestore(&adapter->lock, flags); Index: linux-rt-rebase.q/drivers/net/defxx.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/defxx.c +++ linux-rt-rebase.q/drivers/net/defxx.c @@ -200,6 +200,7 @@ /* Include files */ #include +#include #include #include #include @@ -240,8 +241,6 @@ static char version[] __devinitdata = */ #define NEW_SKB_SIZE (PI_RCV_DATA_K_SIZE_MAX+128) -#define __unused __attribute__ ((unused)) - #ifdef CONFIG_PCI #define DFX_BUS_PCI(dev) (dev->bus == &pci_bus_type) #else @@ -375,7 +374,7 @@ static inline void dfx_outl(DFX_board_t static void dfx_port_write_long(DFX_board_t *bp, int offset, u32 data) { - struct device __unused *bdev = bp->bus_dev; + struct device __maybe_unused *bdev = bp->bus_dev; int dfx_bus_tc = DFX_BUS_TC(bdev); int dfx_use_mmio = DFX_MMIO || dfx_bus_tc; @@ -399,7 +398,7 @@ static inline void dfx_inl(DFX_board_t * static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data) { - struct device __unused *bdev = bp->bus_dev; + struct device __maybe_unused *bdev = bp->bus_dev; int dfx_bus_tc = DFX_BUS_TC(bdev); int dfx_use_mmio = DFX_MMIO || dfx_bus_tc; @@ -866,7 +865,7 @@ static void __devinit dfx_bus_uninit(str static void __devinit dfx_bus_config_check(DFX_board_t *bp) { - struct device __unused *bdev = bp->bus_dev; + struct device __maybe_unused *bdev = bp->bus_dev; int dfx_bus_eisa = DFX_BUS_EISA(bdev); int status; /* return code from adapter port control call */ u32 host_data; /* LW data returned from port control call */ @@ -3624,8 +3623,8 @@ static void __devexit dfx_unregister(str } -static int __devinit __unused dfx_dev_register(struct device *); -static int __devexit __unused dfx_dev_unregister(struct device *); +static int __devinit __maybe_unused dfx_dev_register(struct device *); +static int __devexit __maybe_unused dfx_dev_unregister(struct device *); #ifdef CONFIG_PCI static int __devinit dfx_pci_register(struct pci_dev *, @@ -3699,7 +3698,7 @@ static struct tc_driver dfx_tc_driver = }; #endif /* CONFIG_TC */ -static int __devinit __unused dfx_dev_register(struct device *dev) +static int __devinit __maybe_unused dfx_dev_register(struct device *dev) { int status; @@ -3709,7 +3708,7 @@ static int __devinit __unused dfx_dev_re return status; } -static int __devexit __unused dfx_dev_unregister(struct device *dev) +static int __devexit __maybe_unused dfx_dev_unregister(struct device *dev) { put_device(dev); dfx_unregister(dev); Index: linux-rt-rebase.q/drivers/net/ehea/ehea.h =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ehea/ehea.h +++ linux-rt-rebase.q/drivers/net/ehea/ehea.h @@ -39,7 +39,7 @@ #include #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0071" +#define DRV_VERSION "EHEA_0072" /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 Index: linux-rt-rebase.q/drivers/net/ehea/ehea_main.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ehea/ehea_main.c +++ linux-rt-rebase.q/drivers/net/ehea/ehea_main.c @@ -589,6 +589,23 @@ static int ehea_poll(struct net_device * return 1; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ehea_netpoll(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + + netif_rx_schedule(port->port_res[0].d_netdev); +} +#endif + +static int ehea_poll_firstqueue(struct net_device *dev, int *budget) +{ + struct ehea_port *port = netdev_priv(dev); + struct net_device *d_dev = port->port_res[0].d_netdev; + + return ehea_poll(d_dev, budget); +} + static irqreturn_t ehea_recv_irq_handler(int irq, void *param) { struct ehea_port_res *pr = param; @@ -2626,7 +2643,10 @@ struct ehea_port *ehea_setup_single_port memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN); dev->open = ehea_open; - dev->poll = ehea_poll; + dev->poll = ehea_poll_firstqueue; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ehea_netpoll; +#endif dev->weight = 64; dev->stop = ehea_stop; dev->hard_start_xmit = ehea_start_xmit; Index: linux-rt-rebase.q/drivers/net/forcedeth.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/forcedeth.c +++ linux-rt-rebase.q/drivers/net/forcedeth.c @@ -5546,6 +5546,22 @@ static struct pci_device_id pci_tbl[] = PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_27), .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, }, + { /* MCP73 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_28), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, + { /* MCP73 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_29), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, + { /* MCP73 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_30), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, + { /* MCP73 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_31), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, {0,}, }; Index: linux-rt-rebase.q/drivers/net/netxen/netxen_nic.h =================================================================== --- linux-rt-rebase.q.orig/drivers/net/netxen/netxen_nic.h +++ linux-rt-rebase.q/drivers/net/netxen/netxen_nic.h @@ -1179,8 +1179,7 @@ dma_watchdog_shutdown_poll_result(struct NETXEN_CAM_RAM(NETXEN_CAM_RAM_DMA_WATCHDOG_CTRL), &ctrl, 4)) printk(KERN_ERR "failed to read dma watchdog status\n"); - return ((netxen_get_dma_watchdog_enabled(ctrl) == 0) && - (netxen_get_dma_watchdog_disabled(ctrl) == 0)); + return (netxen_get_dma_watchdog_enabled(ctrl) == 0); } static inline int Index: linux-rt-rebase.q/drivers/net/netxen/netxen_nic_main.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/netxen/netxen_nic_main.c +++ linux-rt-rebase.q/drivers/net/netxen/netxen_nic_main.c @@ -46,7 +46,7 @@ MODULE_DESCRIPTION("NetXen Multi port (1 MODULE_LICENSE("GPL"); MODULE_VERSION(NETXEN_NIC_LINUX_VERSIONID); -char netxen_nic_driver_name[] = "netxen-nic"; +char netxen_nic_driver_name[] = "netxen_nic"; static char netxen_nic_driver_string[] = "NetXen Network Driver version " NETXEN_NIC_LINUX_VERSIONID; @@ -640,6 +640,10 @@ netxen_nic_probe(struct pci_dev *pdev, c NETXEN_CRB_NORMALIZE(adapter, NETXEN_ROMUSB_GLB_PEGTUNE_DONE)); /* Handshake with the card before we register the devices. */ + writel(0, NETXEN_CRB_NORMALIZE(adapter, CRB_CMDPEG_STATE)); + netxen_pinit_from_rom(adapter, 0); + msleep(1); + netxen_load_firmware(adapter); netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE); } @@ -782,19 +786,18 @@ static void __devexit netxen_nic_remove( if (adapter->portnum == 0) { if (init_firmware_done) { - dma_watchdog_shutdown_request(adapter); - msleep(100); i = 100; - while ((dma_watchdog_shutdown_poll_result(adapter) != 1) && i) { - printk(KERN_INFO "dma_watchdog_shutdown_poll still in progress\n"); + do { + if (dma_watchdog_shutdown_request(adapter) == 1) + break; msleep(100); - i--; - } - - if (i == 0) { - printk(KERN_ERR "dma_watchdog_shutdown_request failed\n"); - return; - } + if (dma_watchdog_shutdown_poll_result(adapter) == 1) + break; + } while (--i); + + if (i == 0) + printk(KERN_ERR "%s: dma_watchdog_shutdown failed\n", + netdev->name); /* clear the register for future unloads/loads */ writel(0, NETXEN_CRB_NORMALIZE(adapter, NETXEN_CAM_RAM(0x1fc))); @@ -803,11 +806,9 @@ static void __devexit netxen_nic_remove( /* leave the hw in the same state as reboot */ writel(0, NETXEN_CRB_NORMALIZE(adapter, CRB_CMDPEG_STATE)); - if (netxen_pinit_from_rom(adapter, 0)) - return; + netxen_pinit_from_rom(adapter, 0); msleep(1); - if (netxen_load_firmware(adapter)) - return; + netxen_load_firmware(adapter); netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE); } @@ -816,22 +817,21 @@ static void __devexit netxen_nic_remove( printk(KERN_INFO "State: 0x%0x\n", readl(NETXEN_CRB_NORMALIZE(adapter, CRB_CMDPEG_STATE))); - dma_watchdog_shutdown_request(adapter); - msleep(100); i = 100; - while ((dma_watchdog_shutdown_poll_result(adapter) != 1) && i) { - printk(KERN_INFO "dma_watchdog_shutdown_poll still in progress\n"); + do { + if (dma_watchdog_shutdown_request(adapter) == 1) + break; msleep(100); - i--; - } + if (dma_watchdog_shutdown_poll_result(adapter) == 1) + break; + } while (--i); if (i) { netxen_free_adapter_offload(adapter); } else { - printk(KERN_ERR "failed to dma shutdown\n"); - return; + printk(KERN_ERR "%s: dma_watchdog_shutdown failed\n", + netdev->name); } - } iounmap(adapter->ahw.db_base); Index: linux-rt-rebase.q/drivers/net/phy/vitesse.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/phy/vitesse.c +++ linux-rt-rebase.q/drivers/net/phy/vitesse.c @@ -109,7 +109,7 @@ static int vsc824x_config_intr(struct ph */ err = phy_read(phydev, MII_VSC8244_ISTAT); - if (err) + if (err < 0) return err; err = phy_write(phydev, MII_VSC8244_IMASK, 0); Index: linux-rt-rebase.q/drivers/net/ps3_gelic_net.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ps3_gelic_net.c +++ linux-rt-rebase.q/drivers/net/ps3_gelic_net.c @@ -290,7 +290,8 @@ static void gelic_net_release_rx_chain(s descr->buf_addr = 0; dev_kfree_skb_any(descr->skb); descr->skb = NULL; - descr->dmac_cmd_status = GELIC_NET_DESCR_NOT_IN_USE; + gelic_net_set_descr_status(descr, + GELIC_NET_DESCR_NOT_IN_USE); } descr = descr->next; } while (descr != card->rx_chain.head); @@ -374,7 +375,7 @@ static void gelic_net_release_tx_descr(s descr->skb = NULL; /* set descr status */ - descr->dmac_cmd_status = GELIC_NET_DMAC_CMDSTAT_NOT_IN_USE; + gelic_net_set_descr_status(descr, GELIC_NET_DESCR_NOT_IN_USE); } /** @@ -403,26 +404,29 @@ static void gelic_net_release_tx_chain(s "%s: forcing end of tx descriptor " \ "with status %x\n", __func__, status); - card->netdev_stats.tx_dropped++; + card->netdev->stats.tx_dropped++; break; case GELIC_NET_DESCR_COMPLETE: - card->netdev_stats.tx_packets++; - card->netdev_stats.tx_bytes += - tx_chain->tail->skb->len; + if (tx_chain->tail->skb) { + card->netdev->stats.tx_packets++; + card->netdev->stats.tx_bytes += + tx_chain->tail->skb->len; + } break; case GELIC_NET_DESCR_CARDOWNED: /* pending tx request */ default: /* any other value (== GELIC_NET_DESCR_NOT_IN_USE) */ - goto out; + if (!stop) + goto out; } gelic_net_release_tx_descr(card, tx_chain->tail); - release = 1; + release ++; } out: - if (!stop && release) + if (!stop && (2 < release)) netif_wake_queue(card->netdev); } @@ -659,19 +663,21 @@ static int gelic_net_prepare_tx_descr_v( { dma_addr_t buf[2]; unsigned int vlan_len; + struct gelic_net_descr *sec_descr = descr->next; if (skb->len < GELIC_NET_VLAN_POS) return -EINVAL; - memcpy(&descr->vlan, skb->data, GELIC_NET_VLAN_POS); + vlan_len = GELIC_NET_VLAN_POS; + memcpy(&descr->vlan, skb->data, vlan_len); if (card->vlan_index != -1) { + /* internal vlan tag used */ descr->vlan.h_vlan_proto = htons(ETH_P_8021Q); /* vlan 0x8100*/ descr->vlan.h_vlan_TCI = htons(card->vlan_id[card->vlan_index]); - vlan_len = GELIC_NET_VLAN_POS + VLAN_HLEN; /* VLAN_HLEN=4 */ - } else - vlan_len = GELIC_NET_VLAN_POS; /* no vlan tag */ + vlan_len += VLAN_HLEN; /* added for above two lines */ + } - /* first descr */ + /* map data area */ buf[0] = dma_map_single(ctodev(card), &descr->vlan, vlan_len, DMA_TO_DEVICE); @@ -682,20 +688,6 @@ static int gelic_net_prepare_tx_descr_v( return -ENOMEM; } - descr->buf_addr = buf[0]; - descr->buf_size = vlan_len; - descr->skb = skb; /* not used */ - descr->data_status = 0; - gelic_net_set_txdescr_cmdstat(descr, skb, 1); /* not the frame end */ - - /* second descr */ - card->tx_chain.head = card->tx_chain.head->next; - descr->next_descr_addr = descr->next->bus_addr; - descr = descr->next; - if (gelic_net_get_descr_status(descr) != GELIC_NET_DESCR_NOT_IN_USE) - /* XXX will be removed */ - dev_err(ctodev(card), "descr is not free!\n"); - buf[1] = dma_map_single(ctodev(card), skb->data + GELIC_NET_VLAN_POS, skb->len - GELIC_NET_VLAN_POS, DMA_TO_DEVICE); @@ -710,13 +702,24 @@ static int gelic_net_prepare_tx_descr_v( return -ENOMEM; } - descr->buf_addr = buf[1]; - descr->buf_size = skb->len - GELIC_NET_VLAN_POS; - descr->skb = skb; + /* first descr */ + descr->buf_addr = buf[0]; + descr->buf_size = vlan_len; + descr->skb = NULL; /* not used */ descr->data_status = 0; - descr->next_descr_addr = 0; /* terminate hw descr */ - gelic_net_set_txdescr_cmdstat(descr, skb, 0); + descr->next_descr_addr = descr->next->bus_addr; + gelic_net_set_txdescr_cmdstat(descr, skb, 1); /* not the frame end */ + + /* second descr */ + sec_descr->buf_addr = buf[1]; + sec_descr->buf_size = skb->len - GELIC_NET_VLAN_POS; + sec_descr->skb = skb; + sec_descr->data_status = 0; + sec_descr->next_descr_addr = 0; /* terminate hw descr */ + gelic_net_set_txdescr_cmdstat(sec_descr, skb, 0); + /* bump free descriptor pointer */ + card->tx_chain.head = sec_descr->next; return 0; } @@ -729,7 +732,7 @@ static int gelic_net_prepare_tx_descr_v( static int gelic_net_kick_txdma(struct gelic_net_card *card, struct gelic_net_descr *descr) { - int status = -ENXIO; + int status = 0; int count = 10; if (card->tx_dma_progress) @@ -763,47 +766,62 @@ static int gelic_net_kick_txdma(struct g static int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct gelic_net_card *card = netdev_priv(netdev); - struct gelic_net_descr *descr = NULL; + struct gelic_net_descr *descr; int result; unsigned long flags; spin_lock_irqsave(&card->tx_dma_lock, flags); gelic_net_release_tx_chain(card, 0); - if (!skb) - goto kick; + descr = gelic_net_get_next_tx_descr(card); if (!descr) { + /* + * no more descriptors free + */ netif_stop_queue(netdev); spin_unlock_irqrestore(&card->tx_dma_lock, flags); return NETDEV_TX_BUSY; } - result = gelic_net_prepare_tx_descr_v(card, descr, skb); - - if (result) - goto error; - card->tx_chain.head = card->tx_chain.head->next; - - if (descr->prev) - descr->prev->next_descr_addr = descr->bus_addr; -kick: + result = gelic_net_prepare_tx_descr_v(card, descr, skb); + if (result) { + /* + * DMA map failed. As chanses are that failure + * would continue, just release skb and return + */ + card->netdev->stats.tx_dropped++; + dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&card->tx_dma_lock, flags); + return NETDEV_TX_OK; + } + /* + * link this prepared descriptor to previous one + * to achieve high performance + */ + descr->prev->next_descr_addr = descr->bus_addr; /* * as hardware descriptor is modified in the above lines, * ensure that the hardware sees it */ wmb(); - if (gelic_net_kick_txdma(card, card->tx_chain.tail)) - goto error; + if (gelic_net_kick_txdma(card, descr)) { + /* + * kick failed. + * release descriptors which were just prepared + */ + card->netdev->stats.tx_dropped++; + gelic_net_release_tx_descr(card, descr); + gelic_net_release_tx_descr(card, descr->next); + card->tx_chain.tail = descr->next->next; + dev_info(ctodev(card), "%s: kick failure\n", __func__); + } else { + /* OK, DMA started/reserved */ + netdev->trans_start = jiffies; + } - netdev->trans_start = jiffies; spin_unlock_irqrestore(&card->tx_dma_lock, flags); return NETDEV_TX_OK; - -error: - card->netdev_stats.tx_dropped++; - spin_unlock_irqrestore(&card->tx_dma_lock, flags); - return NETDEV_TX_LOCKED; } /** @@ -854,8 +872,8 @@ static void gelic_net_pass_skb_up(struct skb->ip_summed = CHECKSUM_NONE; /* update netdevice statistics */ - card->netdev_stats.rx_packets++; - card->netdev_stats.rx_bytes += skb->len; + card->netdev->stats.rx_packets++; + card->netdev->stats.rx_bytes += skb->len; /* pass skb up to stack */ netif_receive_skb(skb); @@ -895,38 +913,67 @@ static int gelic_net_decode_one_descr(st (status == GELIC_NET_DESCR_FORCE_END)) { dev_info(ctodev(card), "dropping RX descriptor with state %x\n", status); - card->netdev_stats.rx_dropped++; + card->netdev->stats.rx_dropped++; goto refill; } - if ((status != GELIC_NET_DESCR_COMPLETE) && - (status != GELIC_NET_DESCR_FRAME_END)) { + if (status == GELIC_NET_DESCR_BUFFER_FULL) { + /* + * Buffer full would occur if and only if + * the frame length was longer than the size of this + * descriptor's buffer. If the frame length was equal + * to or shorter than buffer'size, FRAME_END condition + * would occur. + * Anyway this frame was longer than the MTU, + * just drop it. + */ + dev_info(ctodev(card), "overlength frame\n"); + goto refill; + } + /* + * descriptoers any other than FRAME_END here should + * be treated as error. + */ + if (status != GELIC_NET_DESCR_FRAME_END) { dev_dbg(ctodev(card), "RX descriptor with state %x\n", status); goto refill; } /* ok, we've got a packet in descr */ - gelic_net_pass_skb_up(descr, card); /* 1: skb_up sccess */ - + gelic_net_pass_skb_up(descr, card); refill: - descr->next_descr_addr = 0; /* unlink the descr */ + /* + * So that always DMAC can see the end + * of the descriptor chain to avoid + * from unwanted DMAC overrun. + */ + descr->next_descr_addr = 0; /* change the descriptor state: */ gelic_net_set_descr_status(descr, GELIC_NET_DESCR_NOT_IN_USE); - /* refill one desc - * FIXME: this can fail, but for now, just leave this - * descriptor without skb + /* + * this call can fail, but for now, just leave this + * decriptor without skb */ gelic_net_prepare_rx_descr(card, descr); + chain->head = descr; chain->tail = descr->next; + + /* + * Set this descriptor the end of the chain. + */ descr->prev->next_descr_addr = descr->bus_addr; + /* + * If dmac chain was met, DMAC stopped. + * thus re-enable it + */ if (dmac_chain_ended) { - gelic_net_enable_rxdmac(card); - dev_dbg(ctodev(card), "reenable rx dma\n"); + card->rx_dma_restart_required = 1; + dev_dbg(ctodev(card), "reenable rx dma scheduled\n"); } return 1; @@ -968,20 +1015,6 @@ static int gelic_net_poll(struct net_dev } else return 1; } - -/** - * gelic_net_get_stats - get interface statistics - * @netdev: interface device structure - * - * returns the interface statistics residing in the gelic_net_card struct - */ -static struct net_device_stats *gelic_net_get_stats(struct net_device *netdev) -{ - struct gelic_net_card *card = netdev_priv(netdev); - - return &card->netdev_stats; -} - /** * gelic_net_change_mtu - changes the MTU of an interface * @netdev: interface device structure @@ -1016,6 +1049,11 @@ static irqreturn_t gelic_net_interrupt(i if (!status) return IRQ_NONE; + if (card->rx_dma_restart_required) { + card->rx_dma_restart_required = 0; + gelic_net_enable_rxdmac(card); + } + if (status & GELIC_NET_RXINT) { gelic_net_rx_irq_off(card); netif_rx_schedule(netdev); @@ -1024,9 +1062,10 @@ static irqreturn_t gelic_net_interrupt(i if (status & GELIC_NET_TXINT) { spin_lock_irqsave(&card->tx_dma_lock, flags); card->tx_dma_progress = 0; + gelic_net_release_tx_chain(card, 0); + /* kick outstanding tx descriptor if any */ + gelic_net_kick_txdma(card, card->tx_chain.tail); spin_unlock_irqrestore(&card->tx_dma_lock, flags); - /* start pending DMA */ - gelic_net_xmit(NULL, netdev); } return IRQ_HANDLED; } @@ -1068,7 +1107,7 @@ static int gelic_net_open_device(struct } result = request_irq(card->netdev->irq, gelic_net_interrupt, - IRQF_DISABLED, "gelic network", card->netdev); + IRQF_DISABLED, card->netdev->name, card->netdev); if (result) { dev_info(ctodev(card), "%s:%d: request_irq failed (%d)\n", @@ -1107,7 +1146,7 @@ static int gelic_net_open(struct net_dev card->descr, GELIC_NET_TX_DESCRIPTORS)) goto alloc_tx_failed; if (gelic_net_init_chain(card, &card->rx_chain, - card->descr + GELIC_NET_RX_DESCRIPTORS, + card->descr + GELIC_NET_TX_DESCRIPTORS, GELIC_NET_RX_DESCRIPTORS)) goto alloc_rx_failed; @@ -1129,7 +1168,6 @@ static int gelic_net_open(struct net_dev netif_start_queue(netdev); netif_carrier_on(netdev); - netif_poll_enable(netdev); return 0; @@ -1141,7 +1179,6 @@ alloc_tx_failed: return -ENOMEM; } -#ifdef GELIC_NET_ETHTOOL static void gelic_net_get_drvinfo (struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -1261,7 +1298,6 @@ static struct ethtool_ops gelic_net_etht .get_rx_csum = gelic_net_get_rx_csum, .set_rx_csum = gelic_net_set_rx_csum, }; -#endif /** * gelic_net_tx_timeout_task - task scheduled by the watchdog timeout @@ -1320,7 +1356,6 @@ static void gelic_net_setup_netdev_ops(s netdev->open = &gelic_net_open; netdev->stop = &gelic_net_stop; netdev->hard_start_xmit = &gelic_net_xmit; - netdev->get_stats = &gelic_net_get_stats; netdev->set_multicast_list = &gelic_net_set_multi; netdev->change_mtu = &gelic_net_change_mtu; /* tx watchdog */ @@ -1329,9 +1364,7 @@ static void gelic_net_setup_netdev_ops(s /* NAPI */ netdev->poll = &gelic_net_poll; netdev->weight = GELIC_NET_NAPI_WEIGHT; -#ifdef GELIC_NET_ETHTOOL netdev->ethtool_ops = &gelic_net_ethtool_ops; -#endif } /** Index: linux-rt-rebase.q/drivers/net/ps3_gelic_net.h =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ps3_gelic_net.h +++ linux-rt-rebase.q/drivers/net/ps3_gelic_net.h @@ -28,21 +28,12 @@ #ifndef _GELIC_NET_H #define _GELIC_NET_H -#define GELIC_NET_DRV_NAME "Gelic Network Driver" -#define GELIC_NET_DRV_VERSION "1.0" - -#define GELIC_NET_ETHTOOL /* use ethtool */ - -/* ioctl */ -#define GELIC_NET_GET_MODE (SIOCDEVPRIVATE + 0) -#define GELIC_NET_SET_MODE (SIOCDEVPRIVATE + 1) - /* descriptors */ #define GELIC_NET_RX_DESCRIPTORS 128 /* num of descriptors */ #define GELIC_NET_TX_DESCRIPTORS 128 /* num of descriptors */ -#define GELIC_NET_MAX_MTU 2308 -#define GELIC_NET_MIN_MTU 64 +#define GELIC_NET_MAX_MTU VLAN_ETH_FRAME_LEN +#define GELIC_NET_MIN_MTU VLAN_ETH_ZLEN #define GELIC_NET_RXBUF_ALIGN 128 #define GELIC_NET_RX_CSUM_DEFAULT 1 /* hw chksum */ #define GELIC_NET_WATCHDOG_TIMEOUT 5*HZ @@ -90,7 +81,8 @@ enum gelic_net_int1_status { */ #define GELIC_NET_RXVLNPKT 0x00200000 /* VLAN packet */ /* bit 20..16 reserved */ -#define GELIC_NET_RXRECNUM 0x0000ff00 /* reception receipt number */ +#define GELIC_NET_RXRRECNUM 0x0000ff00 /* reception receipt number */ +#define GELIC_NET_RXRRECNUM_SHIFT 8 /* bit 7..0 reserved */ #define GELIC_NET_TXDESC_TAIL 0 @@ -133,19 +125,19 @@ enum gelic_net_int1_status { * interrupt status */ #define GELIC_NET_DMAC_CMDSTAT_CHAIN_END 0x00000002 /* RXDCEIS:DMA stopped */ -#define GELIC_NET_DMAC_CMDSTAT_NOT_IN_USE 0xb0000000 #define GELIC_NET_DESCR_IND_PROC_SHIFT 28 #define GELIC_NET_DESCR_IND_PROC_MASKO 0x0fffffff enum gelic_net_descr_status { - GELIC_NET_DESCR_COMPLETE = 0x00, /* used in rx and tx */ + GELIC_NET_DESCR_COMPLETE = 0x00, /* used in tx */ + GELIC_NET_DESCR_BUFFER_FULL = 0x00, /* used in rx */ GELIC_NET_DESCR_RESPONSE_ERROR = 0x01, /* used in rx and tx */ GELIC_NET_DESCR_PROTECTION_ERROR = 0x02, /* used in rx and tx */ GELIC_NET_DESCR_FRAME_END = 0x04, /* used in rx */ GELIC_NET_DESCR_FORCE_END = 0x05, /* used in rx and tx */ GELIC_NET_DESCR_CARDOWNED = 0x0a, /* used in rx and tx */ - GELIC_NET_DESCR_NOT_IN_USE /* any other value */ + GELIC_NET_DESCR_NOT_IN_USE = 0x0b /* any other value */ }; /* for lv1_net_control */ #define GELIC_NET_GET_MAC_ADDRESS 0x0000000000000001 @@ -216,10 +208,10 @@ struct gelic_net_card { struct gelic_net_descr_chain tx_chain; struct gelic_net_descr_chain rx_chain; + int rx_dma_restart_required; /* gurad dmac descriptor chain*/ spinlock_t chain_lock; - struct net_device_stats netdev_stats; int rx_csum; /* guard tx_dma_progress */ spinlock_t tx_dma_lock; Index: linux-rt-rebase.q/drivers/net/ucc_geth.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ucc_geth.c +++ linux-rt-rebase.q/drivers/net/ucc_geth.c @@ -43,10 +43,6 @@ #undef DEBUG -#define DRV_DESC "QE UCC Gigabit Ethernet Controller" -#define DRV_NAME "ucc_geth" -#define DRV_VERSION "1.1" - #define ugeth_printk(level, format, arg...) \ printk(level format "\n", ## arg) @@ -64,9 +60,19 @@ #else #define ugeth_vdbg(fmt, args...) do { } while (0) #endif /* UGETH_VERBOSE_DEBUG */ +#define UGETH_MSG_DEFAULT (NETIF_MSG_IFUP << 1 ) - 1 +void uec_set_ethtool_ops(struct net_device *netdev); + static DEFINE_SPINLOCK(ugeth_lock); +static struct { + u32 msg_enable; +} debug = { -1 }; + +module_param_named(debug, debug.msg_enable, int, 0); +MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)"); + static struct ucc_geth_info ugeth_primary_info = { .uf_info = { .bd_mem_part = MEM_PART_SYSTEM, @@ -104,6 +110,7 @@ static struct ucc_geth_info ugeth_primar .maxRetransmission = 0xf, .collisionWindow = 0x37, .receiveFlowControl = 1, + .transmitFlowControl = 1, .maxGroupAddrInHash = 4, .maxIndAddrInHash = 4, .prel = 7, @@ -139,7 +146,9 @@ static struct ucc_geth_info ugeth_primar .numStationAddresses = UCC_GETH_NUM_OF_STATION_ADDRESSES_1, .largestexternallookupkeysize = QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE, - .statisticsMode = UCC_GETH_STATISTICS_GATHERING_MODE_NONE, + .statisticsMode = UCC_GETH_STATISTICS_GATHERING_MODE_HARDWARE | + UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_TX | + UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_RX, .vlanOperationTagged = UCC_GETH_VLAN_OPERATION_TAGGED_NOP, .vlanOperationNonTagged = UCC_GETH_VLAN_OPERATION_NON_TAGGED_NOP, .rxQoSMode = UCC_GETH_QOS_MODE_DEFAULT, @@ -281,7 +290,8 @@ static int fill_init_enet_entries(struct for (i = 0; i < num_entries; i++) { if ((snum = qe_get_snum()) < 0) { - ugeth_err("fill_init_enet_entries: Can not get SNUM."); + if (netif_msg_ifup(ugeth)) + ugeth_err("fill_init_enet_entries: Can not get SNUM."); return snum; } if ((i == 0) && skip_page_for_first_entry) @@ -291,8 +301,8 @@ static int fill_init_enet_entries(struct init_enet_offset = qe_muram_alloc(thread_size, thread_alignment); if (IS_ERR_VALUE(init_enet_offset)) { - ugeth_err - ("fill_init_enet_entries: Can not allocate DPRAM memory."); + if (netif_msg_ifup(ugeth)) + ugeth_err("fill_init_enet_entries: Can not allocate DPRAM memory."); qe_put_snum((u8) snum); return -ENOMEM; } @@ -1200,7 +1210,7 @@ static int init_inter_frame_gap_params(u return 0; } -static int init_flow_control_params(u32 automatic_flow_control_mode, +int init_flow_control_params(u32 automatic_flow_control_mode, int rx_flow_control_enable, int tx_flow_control_enable, u16 pause_period, @@ -1486,9 +1496,9 @@ static int adjust_enet_interface(struct ret_val = init_preamble_length(ug_info->prel, &ug_regs->maccfg2); if (ret_val != 0) { - ugeth_err - ("%s: Preamble length must be between 3 and 7 inclusive.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: Preamble length must be between 3 and 7 inclusive.", + __FUNCTION__); return ret_val; } @@ -1726,7 +1736,8 @@ static int ugeth_enable(struct ucc_geth_ /* check if the UCC number is in range. */ if (ugeth->ug_info->uf_info.ucc_num >= UCC_MAX_NUM) { - ugeth_err("%s: ucc_num out of range.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: ucc_num out of range.", __FUNCTION__); return -EINVAL; } @@ -1754,7 +1765,8 @@ static int ugeth_disable(struct ucc_geth /* check if the UCC number is in range. */ if (ugeth->ug_info->uf_info.ucc_num >= UCC_MAX_NUM) { - ugeth_err("%s: ucc_num out of range.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: ucc_num out of range.", __FUNCTION__); return -EINVAL; } @@ -2306,7 +2318,9 @@ static int ucc_struct_init(struct ucc_ge if (!((uf_info->bd_mem_part == MEM_PART_SYSTEM) || (uf_info->bd_mem_part == MEM_PART_MURAM))) { - ugeth_err("%s: Bad memory partition value.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: Bad memory partition value.", + __FUNCTION__); return -EINVAL; } @@ -2315,9 +2329,10 @@ static int ucc_struct_init(struct ucc_ge if ((ug_info->bdRingLenRx[i] < UCC_GETH_RX_BD_RING_SIZE_MIN) || (ug_info->bdRingLenRx[i] % UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT)) { - ugeth_err - ("%s: Rx BD ring length must be multiple of 4," - " no smaller than 8.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err + ("%s: Rx BD ring length must be multiple of 4, no smaller than 8.", + __FUNCTION__); return -EINVAL; } } @@ -2325,9 +2340,10 @@ static int ucc_struct_init(struct ucc_ge /* Tx BD lengths */ for (i = 0; i < ug_info->numQueuesTx; i++) { if (ug_info->bdRingLenTx[i] < UCC_GETH_TX_BD_RING_SIZE_MIN) { - ugeth_err - ("%s: Tx BD ring length must be no smaller than 2.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err + ("%s: Tx BD ring length must be no smaller than 2.", + __FUNCTION__); return -EINVAL; } } @@ -2335,31 +2351,35 @@ static int ucc_struct_init(struct ucc_ge /* mrblr */ if ((uf_info->max_rx_buf_length == 0) || (uf_info->max_rx_buf_length % UCC_GETH_MRBLR_ALIGNMENT)) { - ugeth_err - ("%s: max_rx_buf_length must be non-zero multiple of 128.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err + ("%s: max_rx_buf_length must be non-zero multiple of 128.", + __FUNCTION__); return -EINVAL; } /* num Tx queues */ if (ug_info->numQueuesTx > NUM_TX_QUEUES) { - ugeth_err("%s: number of tx queues too large.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: number of tx queues too large.", __FUNCTION__); return -EINVAL; } /* num Rx queues */ if (ug_info->numQueuesRx > NUM_RX_QUEUES) { - ugeth_err("%s: number of rx queues too large.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: number of rx queues too large.", __FUNCTION__); return -EINVAL; } /* l2qt */ for (i = 0; i < UCC_GETH_VLAN_PRIORITY_MAX; i++) { if (ug_info->l2qt[i] >= ug_info->numQueuesRx) { - ugeth_err - ("%s: VLAN priority table entry must not be" - " larger than number of Rx queues.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err + ("%s: VLAN priority table entry must not be" + " larger than number of Rx queues.", + __FUNCTION__); return -EINVAL; } } @@ -2367,26 +2387,29 @@ static int ucc_struct_init(struct ucc_ge /* l3qt */ for (i = 0; i < UCC_GETH_IP_PRIORITY_MAX; i++) { if (ug_info->l3qt[i] >= ug_info->numQueuesRx) { - ugeth_err - ("%s: IP priority table entry must not be" - " larger than number of Rx queues.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err + ("%s: IP priority table entry must not be" + " larger than number of Rx queues.", + __FUNCTION__); return -EINVAL; } } if (ug_info->cam && !ug_info->ecamptr) { - ugeth_err("%s: If cam mode is chosen, must supply cam ptr.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: If cam mode is chosen, must supply cam ptr.", + __FUNCTION__); return -EINVAL; } if ((ug_info->numStationAddresses != UCC_GETH_NUM_OF_STATION_ADDRESSES_1) && ug_info->rxExtendedFiltering) { - ugeth_err("%s: Number of station addresses greater than 1 " - "not allowed in extended parsing mode.", - __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: Number of station addresses greater than 1 " + "not allowed in extended parsing mode.", + __FUNCTION__); return -EINVAL; } @@ -2399,7 +2422,8 @@ static int ucc_struct_init(struct ucc_ge uf_info->uccm_mask |= (UCCE_TXBF_SINGLE_MASK << i); /* Initialize the general fast UCC block. */ if (ucc_fast_init(uf_info, &ugeth->uccf)) { - ugeth_err("%s: Failed to init uccf.", __FUNCTION__); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: Failed to init uccf.", __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2452,7 +2476,9 @@ static int ucc_geth_startup(struct ucc_g numThreadsRxNumerical = 8; break; default: - ugeth_err("%s: Bad number of Rx threads value.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Bad number of Rx threads value.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -EINVAL; break; @@ -2475,7 +2501,9 @@ static int ucc_geth_startup(struct ucc_g numThreadsTxNumerical = 8; break; default: - ugeth_err("%s: Bad number of Tx threads value.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Bad number of Tx threads value.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -EINVAL; break; @@ -2507,7 +2535,7 @@ static int ucc_geth_startup(struct ucc_g /* For more details see the hardware spec. */ init_flow_control_params(ug_info->aufc, ug_info->receiveFlowControl, - 1, + ug_info->transmitFlowControl, ug_info->pausePeriod, ug_info->extensionField, &uf_regs->upsmr, @@ -2527,8 +2555,9 @@ static int ucc_geth_startup(struct ucc_g ug_info->backToBackInterFrameGap, &ug_regs->ipgifg); if (ret_val != 0) { - ugeth_err("%s: IPGIFG initialization parameter too large.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: IPGIFG initialization parameter too large.", + __FUNCTION__); ucc_geth_memclean(ugeth); return ret_val; } @@ -2544,7 +2573,8 @@ static int ucc_geth_startup(struct ucc_g ug_info->collisionWindow, &ug_regs->hafdup); if (ret_val != 0) { - ugeth_err("%s: Half Duplex initialization parameter too large.", + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Half Duplex initialization parameter too large.", __FUNCTION__); ucc_geth_memclean(ugeth); return ret_val; @@ -2597,9 +2627,10 @@ static int ucc_geth_startup(struct ucc_g tx_bd_ring_offset[j]); } if (!ugeth->p_tx_bd_ring[j]) { - ugeth_err - ("%s: Can not allocate memory for Tx bd rings.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate memory for Tx bd rings.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2632,9 +2663,10 @@ static int ucc_geth_startup(struct ucc_g rx_bd_ring_offset[j]); } if (!ugeth->p_rx_bd_ring[j]) { - ugeth_err - ("%s: Can not allocate memory for Rx bd rings.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate memory for Rx bd rings.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2648,8 +2680,9 @@ static int ucc_geth_startup(struct ucc_g GFP_KERNEL); if (ugeth->tx_skbuff[j] == NULL) { - ugeth_err("%s: Could not allocate tx_skbuff", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Could not allocate tx_skbuff", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2679,8 +2712,9 @@ static int ucc_geth_startup(struct ucc_g GFP_KERNEL); if (ugeth->rx_skbuff[j] == NULL) { - ugeth_err("%s: Could not allocate rx_skbuff", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Could not allocate rx_skbuff", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2711,9 +2745,10 @@ static int ucc_geth_startup(struct ucc_g qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram), UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT); if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_tx_glbl_pram.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_tx_glbl_pram.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2733,9 +2768,10 @@ static int ucc_geth_startup(struct ucc_g 32 * (numThreadsTxNumerical == 1), UCC_GETH_THREAD_DATA_ALIGNMENT); if (IS_ERR_VALUE(ugeth->thread_dat_tx_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_thread_data_tx.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_thread_data_tx.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2761,9 +2797,10 @@ static int ucc_geth_startup(struct ucc_g sizeof(struct ucc_geth_send_queue_qd), UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT); if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_send_q_mem_reg.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_send_q_mem_reg.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2804,9 +2841,10 @@ static int ucc_geth_startup(struct ucc_g qe_muram_alloc(sizeof(struct ucc_geth_scheduler), UCC_GETH_SCHEDULER_ALIGNMENT); if (IS_ERR_VALUE(ugeth->scheduler_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_scheduler.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_scheduler.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2852,9 +2890,11 @@ static int ucc_geth_startup(struct ucc_g (struct ucc_geth_tx_firmware_statistics_pram), UCC_GETH_TX_STATISTICS_ALIGNMENT); if (IS_ERR_VALUE(ugeth->tx_fw_statistics_pram_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_tx_fw_statistics_pram.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for" + " p_tx_fw_statistics_pram.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2891,9 +2931,10 @@ static int ucc_geth_startup(struct ucc_g qe_muram_alloc(sizeof(struct ucc_geth_rx_global_pram), UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_rx_glbl_pram.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_rx_glbl_pram.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2912,9 +2953,10 @@ static int ucc_geth_startup(struct ucc_g sizeof(struct ucc_geth_thread_data_rx), UCC_GETH_THREAD_DATA_ALIGNMENT); if (IS_ERR_VALUE(ugeth->thread_dat_rx_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_thread_data_rx.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_thread_data_rx.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2935,9 +2977,10 @@ static int ucc_geth_startup(struct ucc_g (struct ucc_geth_rx_firmware_statistics_pram), UCC_GETH_RX_STATISTICS_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_fw_statistics_pram_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_rx_fw_statistics_pram.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for" + " p_rx_fw_statistics_pram.", __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -2957,9 +3000,10 @@ static int ucc_geth_startup(struct ucc_g sizeof(struct ucc_geth_rx_interrupt_coalescing_entry) + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_rx_irq_coalescing_tbl.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for" + " p_rx_irq_coalescing_tbl.", __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -3025,9 +3069,10 @@ static int ucc_geth_startup(struct ucc_g sizeof(struct ucc_geth_rx_prefetched_bds)), UCC_GETH_RX_BD_QUEUES_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_bd_qs_tbl_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_rx_bd_qs_tbl.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_rx_bd_qs_tbl.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -3102,8 +3147,9 @@ static int ucc_geth_startup(struct ucc_g /* initialize extended filtering */ if (ug_info->rxExtendedFiltering) { if (!ug_info->extendedFilteringChainPointer) { - ugeth_err("%s: Null Extended Filtering Chain Pointer.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Null Extended Filtering Chain Pointer.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -EINVAL; } @@ -3114,9 +3160,10 @@ static int ucc_geth_startup(struct ucc_g qe_muram_alloc(sizeof(struct ucc_geth_exf_global_pram), UCC_GETH_RX_EXTENDED_FILTERING_GLOBAL_PARAMETERS_ALIGNMENT); if (IS_ERR_VALUE(ugeth->exf_glbl_param_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_exf_glbl_param.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for" + " p_exf_glbl_param.", __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -3161,9 +3208,10 @@ static int ucc_geth_startup(struct ucc_g */ if (!(ugeth->p_init_enet_param_shadow = kmalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) { - ugeth_err - ("%s: Can not allocate memory for" - " p_UccInitEnetParamShadows.", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate memory for" + " p_UccInitEnetParamShadows.", __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -3196,8 +3244,9 @@ static int ucc_geth_startup(struct ucc_g QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES) && (ug_info->largestexternallookupkeysize != QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES)) { - ugeth_err("%s: Invalid largest External Lookup Key Size.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Invalid largest External Lookup Key Size.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -EINVAL; } @@ -3222,8 +3271,9 @@ static int ucc_geth_startup(struct ucc_g /* Rx needs one extra for terminator */ , size, UCC_GETH_THREAD_RX_PRAM_ALIGNMENT, ug_info->riscRx, 1)) != 0) { - ugeth_err("%s: Can not fill p_init_enet_param_shadow.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Can not fill p_init_enet_param_shadow.", + __FUNCTION__); ucc_geth_memclean(ugeth); return ret_val; } @@ -3237,8 +3287,9 @@ static int ucc_geth_startup(struct ucc_g sizeof(struct ucc_geth_thread_tx_pram), UCC_GETH_THREAD_TX_PRAM_ALIGNMENT, ug_info->riscTx, 0)) != 0) { - ugeth_err("%s: Can not fill p_init_enet_param_shadow.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Can not fill p_init_enet_param_shadow.", + __FUNCTION__); ucc_geth_memclean(ugeth); return ret_val; } @@ -3246,8 +3297,9 @@ static int ucc_geth_startup(struct ucc_g /* Load Rx bds with buffers */ for (i = 0; i < ug_info->numQueuesRx; i++) { if ((ret_val = rx_bd_buffer_set(ugeth, (u8) i)) != 0) { - ugeth_err("%s: Can not fill Rx bds with buffers.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Can not fill Rx bds with buffers.", + __FUNCTION__); ucc_geth_memclean(ugeth); return ret_val; } @@ -3256,9 +3308,10 @@ static int ucc_geth_startup(struct ucc_g /* Allocate InitEnet command parameter structure */ init_enet_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_init_pram), 4); if (IS_ERR_VALUE(init_enet_pram_offset)) { - ugeth_err - ("%s: Can not allocate DPRAM memory for p_init_enet_pram.", - __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err + ("%s: Can not allocate DPRAM memory for p_init_enet_pram.", + __FUNCTION__); ucc_geth_memclean(ugeth); return -ENOMEM; } @@ -3428,8 +3481,9 @@ static int ucc_geth_rx(struct ucc_geth_p if (!skb || (!(bd_status & (R_F | R_L))) || (bd_status & R_ERRORS_FATAL)) { - ugeth_vdbg("%s, %d: ERROR!!! skb - 0x%08x", - __FUNCTION__, __LINE__, (u32) skb); + if (netif_msg_rx_err(ugeth)) + ugeth_err("%s, %d: ERROR!!! skb - 0x%08x", + __FUNCTION__, __LINE__, (u32) skb); if (skb) dev_kfree_skb_any(skb); @@ -3458,7 +3512,8 @@ static int ucc_geth_rx(struct ucc_geth_p skb = get_new_skb(ugeth, bd); if (!skb) { - ugeth_warn("%s: No Rx Data Buffer", __FUNCTION__); + if (netif_msg_rx_err(ugeth)) + ugeth_warn("%s: No Rx Data Buffer", __FUNCTION__); ugeth->stats.rx_dropped++; break; } @@ -3649,28 +3704,32 @@ static int ucc_geth_open(struct net_devi /* Test station address */ if (dev->dev_addr[0] & ENET_GROUP_ADDR) { - ugeth_err("%s: Multicast address used for station address" - " - is this what you wanted?", __FUNCTION__); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Multicast address used for station address" + " - is this what you wanted?", __FUNCTION__); return -EINVAL; } err = ucc_struct_init(ugeth); if (err) { - ugeth_err("%s: Cannot configure internal struct, aborting.", dev->name); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Cannot configure internal struct, aborting.", dev->name); return err; } err = ucc_geth_startup(ugeth); if (err) { - ugeth_err("%s: Cannot configure net device, aborting.", - dev->name); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Cannot configure net device, aborting.", + dev->name); return err; } err = adjust_enet_interface(ugeth); if (err) { - ugeth_err("%s: Cannot configure net device, aborting.", - dev->name); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Cannot configure net device, aborting.", + dev->name); return err; } @@ -3687,7 +3746,8 @@ static int ucc_geth_open(struct net_devi err = init_phy(dev); if (err) { - ugeth_err("%s: Cannot initialize PHY, aborting.", dev->name); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Cannot initialize PHY, aborting.", dev->name); return err; } @@ -3697,15 +3757,17 @@ static int ucc_geth_open(struct net_devi request_irq(ugeth->ug_info->uf_info.irq, ucc_geth_irq_handler, 0, "UCC Geth", dev); if (err) { - ugeth_err("%s: Cannot get IRQ for net device, aborting.", - dev->name); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Cannot get IRQ for net device, aborting.", + dev->name); ucc_geth_stop(ugeth); return err; } err = ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); if (err) { - ugeth_err("%s: Cannot enable net device, aborting.", dev->name); + if (netif_msg_ifup(ugeth)) + ugeth_err("%s: Cannot enable net device, aborting.", dev->name); ucc_geth_stop(ugeth); return err; } @@ -3732,8 +3794,6 @@ static int ucc_geth_close(struct net_dev return 0; } -const struct ethtool_ops ucc_geth_ethtool_ops = { }; - static phy_interface_t to_phy_interface(const char *phy_connection_type) { if (strcasecmp(phy_connection_type, "mii") == 0) @@ -3790,6 +3850,13 @@ static int ucc_geth_probe(struct of_devi return -ENODEV; ug_info = &ugeth_info[ucc_num]; + if (ug_info == NULL) { + if (netif_msg_probe(&debug)) + ugeth_err("%s: [%d] Missing additional data!", + __FUNCTION__, ucc_num); + return -ENODEV; + } + ug_info->uf_info.ucc_num = ucc_num; prop = of_get_property(np, "rx-clock", NULL); @@ -3868,15 +3935,10 @@ static int ucc_geth_probe(struct of_devi ug_info->mdio_bus = res.start; - printk(KERN_INFO "ucc_geth: UCC%1d at 0x%8x (irq = %d) \n", - ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs, - ug_info->uf_info.irq); - - if (ug_info == NULL) { - ugeth_err("%s: [%d] Missing additional data!", __FUNCTION__, - ucc_num); - return -ENODEV; - } + if (netif_msg_probe(&debug)) + printk(KERN_INFO "ucc_geth: UCC%1d at 0x%8x (irq = %d) \n", + ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs, + ug_info->uf_info.irq); /* Create an ethernet device instance */ dev = alloc_etherdev(sizeof(*ugeth)); @@ -3896,6 +3958,7 @@ static int ucc_geth_probe(struct of_devi SET_NETDEV_DEV(dev, device); /* Fill in the dev structure */ + uec_set_ethtool_ops(dev); dev->open = ucc_geth_open; dev->hard_start_xmit = ucc_geth_start_xmit; dev->tx_timeout = ucc_geth_timeout; @@ -3909,16 +3972,16 @@ static int ucc_geth_probe(struct of_devi // dev->change_mtu = ucc_geth_change_mtu; dev->mtu = 1500; dev->set_multicast_list = ucc_geth_set_multi; - dev->ethtool_ops = &ucc_geth_ethtool_ops; - ugeth->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1; + ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT); ugeth->phy_interface = phy_interface; ugeth->max_speed = max_speed; err = register_netdev(dev); if (err) { - ugeth_err("%s: Cannot register net device, aborting.", - dev->name); + if (netif_msg_probe(ugeth)) + ugeth_err("%s: Cannot register net device, aborting.", + dev->name); free_netdev(dev); return err; } @@ -3972,7 +4035,8 @@ static int __init ucc_geth_init(void) if (ret) return ret; - printk(KERN_INFO "ucc_geth: " DRV_DESC "\n"); + if (netif_msg_drv(&debug)) + printk(KERN_INFO "ucc_geth: " DRV_DESC "\n"); for (i = 0; i < 8; i++) memcpy(&(ugeth_info[i]), &ugeth_primary_info, sizeof(ugeth_primary_info)); Index: linux-rt-rebase.q/drivers/net/ucc_geth.h =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ucc_geth.h +++ linux-rt-rebase.q/drivers/net/ucc_geth.h @@ -30,6 +30,10 @@ #include "ucc_geth_mii.h" +#define DRV_DESC "QE UCC Gigabit Ethernet Controller" +#define DRV_NAME "ucc_geth" +#define DRV_VERSION "1.1" + #define NUM_TX_QUEUES 8 #define NUM_RX_QUEUES 8 #define NUM_BDS_IN_PREFETCHED_BDS 4 @@ -896,6 +900,7 @@ struct ucc_geth_hardware_statistics { #define UCC_GETH_TX_VTAG_TABLE_ENTRY_MAX 8 #define UCC_GETH_RX_BD_RING_SIZE_MIN 8 #define UCC_GETH_TX_BD_RING_SIZE_MIN 2 +#define UCC_GETH_BD_RING_SIZE_MAX 0xffff #define UCC_GETH_SIZE_OF_BD QE_SIZEOF_BD @@ -1135,6 +1140,7 @@ struct ucc_geth_info { int bro; int ecm; int receiveFlowControl; + int transmitFlowControl; u8 maxGroupAddrInHash; u8 maxIndAddrInHash; u8 prel; Index: linux-rt-rebase.q/drivers/net/ucc_geth_ethtool.c =================================================================== --- /dev/null +++ linux-rt-rebase.q/drivers/net/ucc_geth_ethtool.c @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * Description: QE UCC Gigabit Ethernet Ethtool API Set + * + * Author: Li Yang + * + * Limitation: + * Can only get/set setttings of the first queue. + * Need to re-open the interface manually after changing some paramters. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ucc_geth.h" +#include "ucc_geth_mii.h" + +static char hw_stat_gstrings[][ETH_GSTRING_LEN] = { + "tx-64-frames", + "tx-65-127-frames", + "tx-128-255-frames", + "rx-64-frames", + "rx-65-127-frames", + "rx-128-255-frames", + "tx-bytes-ok", + "tx-pause-frames", + "tx-multicast-frames", + "tx-broadcast-frames", + "rx-frames", + "rx-bytes-ok", + "rx-bytes-all", + "rx-multicast-frames", + "rx-broadcast-frames", + "stats-counter-carry", + "stats-counter-mask", + "rx-dropped-frames", +}; + +static char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { + "tx-single-collision", + "tx-multiple-collision", + "tx-late-collsion", + "tx-aborted-frames", + "tx-lost-frames", + "tx-carrier-sense-errors", + "tx-frames-ok", + "tx-excessive-differ-frames", + "tx-256-511-frames", + "tx-1024-1518-frames", + "tx-jumbo-frames", +}; + +static char rx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { + "rx-crc-errors", + "rx-alignment-errors", + "rx-in-range-length-errors", + "rx-out-of-range-length-errors", + "rx-too-long-frames", + "rx-runt", + "rx-very-long-event", + "rx-symbol-errors", + "rx-busy-drop-frames", + "reserved", + "reserved", + "rx-mismatch-drop-frames", + "rx-small-than-64", + "rx-256-511-frames", + "rx-512-1023-frames", + "rx-1024-1518-frames", + "rx-jumbo-frames", + "rx-mac-error-loss", + "rx-pause-frames", + "reserved", + "rx-vlan-removed", + "rx-vlan-replaced", + "rx-vlan-inserted", + "rx-ip-checksum-errors", +}; + +#define UEC_HW_STATS_LEN ARRAY_SIZE(hw_stat_gstrings) +#define UEC_TX_FW_STATS_LEN ARRAY_SIZE(tx_fw_stat_gstrings) +#define UEC_RX_FW_STATS_LEN ARRAY_SIZE(rx_fw_stat_gstrings) + +extern int init_flow_control_params(u32 automatic_flow_control_mode, + int rx_flow_control_enable, + int tx_flow_control_enable, u16 pause_period, + u16 extension_field, volatile u32 *upsmr_register, + volatile u32 *uempr_register, volatile u32 *maccfg1_register); + +static int +uec_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + struct phy_device *phydev = ugeth->phydev; + struct ucc_geth_info *ug_info = ugeth->ug_info; + + if (!phydev) + return -ENODEV; + + ecmd->maxtxpkt = 1; + ecmd->maxrxpkt = ug_info->interruptcoalescingmaxvalue[0]; + + return phy_ethtool_gset(phydev, ecmd); +} + +static int +uec_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + struct phy_device *phydev = ugeth->phydev; + + if (!phydev) + return -ENODEV; + + return phy_ethtool_sset(phydev, ecmd); +} + +static void +uec_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + + pause->autoneg = ugeth->phydev->autoneg; + + if (ugeth->ug_info->receiveFlowControl) + pause->rx_pause = 1; + if (ugeth->ug_info->transmitFlowControl) + pause->tx_pause = 1; +} + +static int +uec_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + int ret = 0; + + ugeth->ug_info->receiveFlowControl = pause->rx_pause; + ugeth->ug_info->transmitFlowControl = pause->tx_pause; + + if (ugeth->phydev->autoneg) { + if (netif_running(netdev)) { + /* FIXME: automatically restart */ + printk(KERN_INFO + "Please re-open the interface.\n"); + } + } else { + struct ucc_geth_info *ug_info = ugeth->ug_info; + + ret = init_flow_control_params(ug_info->aufc, + ug_info->receiveFlowControl, + ug_info->transmitFlowControl, + ug_info->pausePeriod, + ug_info->extensionField, + &ugeth->uccf->uf_regs->upsmr, + &ugeth->ug_regs->uempr, + &ugeth->ug_regs->maccfg1); + } + + return ret; +} + +static uint32_t +uec_get_msglevel(struct net_device *netdev) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + return ugeth->msg_enable; +} + +static void +uec_set_msglevel(struct net_device *netdev, uint32_t data) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + ugeth->msg_enable = data; +} + +static int +uec_get_regs_len(struct net_device *netdev) +{ + return sizeof(struct ucc_geth); +} + +static void +uec_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + int i; + struct ucc_geth_private *ugeth = netdev_priv(netdev); + u32 __iomem *ug_regs = (u32 __iomem *)ugeth->ug_regs; + u32 *buff = p; + + for (i = 0; i < sizeof(struct ucc_geth) / sizeof(u32); i++) + buff[i] = in_be32(&ug_regs[i]); +} + +static void +uec_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + struct ucc_geth_info *ug_info = ugeth->ug_info; + int queue = 0; + + ring->rx_max_pending = UCC_GETH_BD_RING_SIZE_MAX; + ring->rx_mini_max_pending = UCC_GETH_BD_RING_SIZE_MAX; + ring->rx_jumbo_max_pending = UCC_GETH_BD_RING_SIZE_MAX; + ring->tx_max_pending = UCC_GETH_BD_RING_SIZE_MAX; + + ring->rx_pending = ug_info->bdRingLenRx[queue]; + ring->rx_mini_pending = ug_info->bdRingLenRx[queue]; + ring->rx_jumbo_pending = ug_info->bdRingLenRx[queue]; + ring->tx_pending = ug_info->bdRingLenTx[queue]; +} + +static int +uec_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + struct ucc_geth_info *ug_info = ugeth->ug_info; + int queue = 0, ret = 0; + + if (ring->rx_pending < UCC_GETH_RX_BD_RING_SIZE_MIN) { + printk("%s: RxBD ring size must be no smaller than %d.\n", + netdev->name, UCC_GETH_RX_BD_RING_SIZE_MIN); + return -EINVAL; + } + if (ring->rx_pending % UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT) { + printk("%s: RxBD ring size must be multiple of %d.\n", + netdev->name, UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT); + return -EINVAL; + } + if (ring->tx_pending < UCC_GETH_TX_BD_RING_SIZE_MIN) { + printk("%s: TxBD ring size must be no smaller than %d.\n", + netdev->name, UCC_GETH_TX_BD_RING_SIZE_MIN); + return -EINVAL; + } + + ug_info->bdRingLenRx[queue] = ring->rx_pending; + ug_info->bdRingLenTx[queue] = ring->tx_pending; + + if (netif_running(netdev)) { + /* FIXME: restart automatically */ + printk(KERN_INFO + "Please re-open the interface.\n"); + } + + return ret; +} + +static int uec_get_stats_count(struct net_device *netdev) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + u32 stats_mode = ugeth->ug_info->statisticsMode; + int len = 0; + + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_HARDWARE) + len += UEC_HW_STATS_LEN; + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_TX) + len += UEC_TX_FW_STATS_LEN; + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_RX) + len += UEC_RX_FW_STATS_LEN; + + return len; +} + +static void uec_get_strings(struct net_device *netdev, u32 stringset, u8 *buf) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + u32 stats_mode = ugeth->ug_info->statisticsMode; + + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_HARDWARE) { + memcpy(buf, hw_stat_gstrings, UEC_HW_STATS_LEN * + ETH_GSTRING_LEN); + buf += UEC_HW_STATS_LEN * ETH_GSTRING_LEN; + } + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_TX) { + memcpy(buf, tx_fw_stat_gstrings, UEC_TX_FW_STATS_LEN * + ETH_GSTRING_LEN); + buf += UEC_TX_FW_STATS_LEN * ETH_GSTRING_LEN; + } + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_RX) + memcpy(buf, tx_fw_stat_gstrings, UEC_RX_FW_STATS_LEN * + ETH_GSTRING_LEN); +} + +static void uec_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + u32 stats_mode = ugeth->ug_info->statisticsMode; + u32 __iomem *base; + int i, j = 0; + + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_HARDWARE) { + base = (u32 __iomem *)&ugeth->ug_regs->tx64; + for (i = 0; i < UEC_HW_STATS_LEN; i++) + data[j++] = (u64)in_be32(&base[i]); + } + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_TX) { + base = (u32 __iomem *)ugeth->p_tx_fw_statistics_pram; + for (i = 0; i < UEC_TX_FW_STATS_LEN; i++) + data[j++] = (u64)in_be32(&base[i]); + } + if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_RX) { + base = (u32 __iomem *)ugeth->p_rx_fw_statistics_pram; + for (i = 0; i < UEC_RX_FW_STATS_LEN; i++) + data[j++] = (u64)in_be32(&base[i]); + } +} + +static int uec_nway_reset(struct net_device *netdev) +{ + struct ucc_geth_private *ugeth = netdev_priv(netdev); + + return phy_start_aneg(ugeth->phydev); +} + +/* Report driver information */ +static void +uec_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + strncpy(drvinfo->driver, DRV_NAME, 32); + strncpy(drvinfo->version, DRV_VERSION, 32); + strncpy(drvinfo->fw_version, "N/A", 32); + strncpy(drvinfo->bus_info, "QUICC ENGINE", 32); + drvinfo->n_stats = uec_get_stats_count(netdev); + drvinfo->testinfo_len = 0; + drvinfo->eedump_len = 0; + drvinfo->regdump_len = uec_get_regs_len(netdev); +} + +static const struct ethtool_ops uec_ethtool_ops = { + .get_settings = uec_get_settings, + .set_settings = uec_set_settings, + .get_drvinfo = uec_get_drvinfo, + .get_regs_len = uec_get_regs_len, + .get_regs = uec_get_regs, + .get_msglevel = uec_get_msglevel, + .set_msglevel = uec_set_msglevel, + .nway_reset = uec_nway_reset, + .get_link = ethtool_op_get_link, + .get_ringparam = uec_get_ringparam, + .set_ringparam = uec_set_ringparam, + .get_pauseparam = uec_get_pauseparam, + .set_pauseparam = uec_set_pauseparam, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_tso = ethtool_op_get_tso, + .get_stats_count = uec_get_stats_count, + .get_strings = uec_get_strings, + .get_ethtool_stats = uec_get_ethtool_stats, + .get_perm_addr = ethtool_op_get_perm_addr, +}; + +void uec_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &uec_ethtool_ops); +} Index: linux-rt-rebase.q/drivers/net/ucc_geth_mii.c =================================================================== --- linux-rt-rebase.q.orig/drivers/net/ucc_geth_mii.c +++ linux-rt-rebase.q/drivers/net/ucc_geth_mii.c @@ -54,8 +54,8 @@ #define vdbg(format, arg...) do {} while(0) #endif -#define DRV_DESC "QE UCC Ethernet Controller MII Bus" -#define DRV_NAME "fsl-uec_mdio" +#define MII_DRV_DESC "QE UCC Ethernet Controller MII Bus" +#define MII_DRV_NAME "fsl-uec_mdio" /* Write value to the PHY for this device to the register at regnum, */ /* waiting until the write is done before it returns. All PHY */ @@ -261,7 +261,7 @@ static struct of_device_id uec_mdio_matc }; static struct of_platform_driver uec_mdio_driver = { - .name = DRV_NAME, + .name = MII_DRV_NAME, .probe = uec_mdio_probe, .remove = uec_mdio_remove, .match_table = uec_mdio_match, Index: linux-rt-rebase.q/drivers/pci/pci-acpi.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/pci-acpi.c +++ linux-rt-rebase.q/drivers/pci/pci-acpi.c @@ -245,16 +245,33 @@ EXPORT_SYMBOL(pci_osc_control_set); * currently we simply return _SxD, if present. */ -static int acpi_pci_choose_state(struct pci_dev *pdev, pm_message_t state) +static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev, + pm_message_t state) { - /* TBD */ + int acpi_state; - return -ENODEV; + acpi_state = acpi_pm_device_sleep_state(&pdev->dev, + device_may_wakeup(&pdev->dev), NULL); + if (acpi_state < 0) + return PCI_POWER_ERROR; + + switch (acpi_state) { + case ACPI_STATE_D0: + return PCI_D0; + case ACPI_STATE_D1: + return PCI_D1; + case ACPI_STATE_D2: + return PCI_D2; + case ACPI_STATE_D3: + return PCI_D3hot; + } + return PCI_POWER_ERROR; } static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) { acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); + acpi_handle tmp; static int state_conv[] = { [0] = 0, [1] = 1, @@ -266,6 +283,9 @@ static int acpi_pci_set_power_state(stru if (!handle) return -ENODEV; + /* If the ACPI device has _EJ0, ignore the device */ + if (ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp))) + return 0; return acpi_bus_set_power(handle, acpi_state); } Index: linux-rt-rebase.q/drivers/pci/pci.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/pci.c +++ linux-rt-rebase.q/drivers/pci/pci.c @@ -499,7 +499,7 @@ pci_set_power_state(struct pci_dev *dev, return 0; } -int (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); +pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); /** * pci_choose_state - Choose the power state of a PCI device @@ -513,15 +513,15 @@ int (*platform_pci_choose_state)(struct pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) { - int ret; + pci_power_t ret; if (!pci_find_capability(dev, PCI_CAP_ID_PM)) return PCI_D0; if (platform_pci_choose_state) { ret = platform_pci_choose_state(dev, state); - if (ret >= 0) - state.event = ret; + if (ret != PCI_POWER_ERROR) + return ret; } switch (state.event) { @@ -1604,6 +1604,7 @@ early_param("pci", pci_setup); device_initcall(pci_init); EXPORT_SYMBOL_GPL(pci_restore_bars); +EXPORT_SYMBOL(__pci_reenable_device); EXPORT_SYMBOL(pci_enable_device_bars); EXPORT_SYMBOL(pci_enable_device); EXPORT_SYMBOL(pcim_enable_device); Index: linux-rt-rebase.q/drivers/pci/pci.h =================================================================== --- linux-rt-rebase.q.orig/drivers/pci/pci.h +++ linux-rt-rebase.q/drivers/pci/pci.h @@ -1,6 +1,5 @@ /* Functions internal to the PCI core code */ -extern int __must_check __pci_reenable_device(struct pci_dev *); extern int pci_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size); extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); @@ -13,7 +12,7 @@ extern int pci_bus_alloc_resource(struct resource_size_t, resource_size_t), void *alignf_data); /* Firmware callbacks */ -extern int (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); +extern pci_power_t (*platform_pci_choose_state)(struct pci_dev *dev, pm_message_t state); extern int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t state); extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); Index: linux-rt-rebase.q/drivers/pnp/driver.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pnp/driver.c +++ linux-rt-rebase.q/drivers/pnp/driver.c @@ -167,6 +167,8 @@ static int pnp_bus_suspend(struct device return error; } + if (pnp_dev->protocol && pnp_dev->protocol->suspend) + pnp_dev->protocol->suspend(pnp_dev, state); return 0; } @@ -179,6 +181,9 @@ static int pnp_bus_resume(struct device if (!pnp_drv) return 0; + if (pnp_dev->protocol && pnp_dev->protocol->resume) + pnp_dev->protocol->resume(pnp_dev); + if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) { error = pnp_start_dev(pnp_dev); if (error) Index: linux-rt-rebase.q/drivers/pnp/pnpacpi/core.c =================================================================== --- linux-rt-rebase.q.orig/drivers/pnp/pnpacpi/core.c +++ linux-rt-rebase.q/drivers/pnp/pnpacpi/core.c @@ -21,7 +21,10 @@ #include #include +#include #include +#include + #include "pnpacpi.h" static int num = 0; @@ -33,15 +36,17 @@ static int num = 0; * have irqs (PIC, Timer) because we call acpi_register_gsi. * Finaly only devices that have a CRS method need to be in this list. */ -static char __initdata excluded_id_list[] = - "PNP0C09," /* EC */ - "PNP0C0F," /* Link device */ - "PNP0000," /* PIC */ - "PNP0100," /* Timer */ - ; +static __initdata struct acpi_device_id excluded_id_list[] ={ + {"PNP0C09", 0}, /* EC */ + {"PNP0C0F", 0}, /* Link device */ + {"PNP0000", 0}, /* PIC */ + {"PNP0100", 0}, /* Timer */ + {"", 0}, +}; + static inline int is_exclusive_device(struct acpi_device *dev) { - return (!acpi_match_ids(dev, excluded_id_list)); + return (!acpi_match_device_ids(dev, excluded_id_list)); } /* @@ -119,11 +124,25 @@ static int pnpacpi_disable_resources(str return ACPI_FAILURE(status) ? -ENODEV : 0; } +static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) +{ + return acpi_bus_set_power((acpi_handle)dev->data, + acpi_pm_device_sleep_state(&dev->dev, + device_may_wakeup(&dev->dev), NULL)); +} + +static int pnpacpi_resume(struct pnp_dev *dev) +{ + return acpi_bus_set_power((acpi_handle)dev->data, ACPI_STATE_D0); +} + static struct pnp_protocol pnpacpi_protocol = { .name = "Plug and Play ACPI", .get = pnpacpi_get_resources, .set = pnpacpi_set_resources, .disable = pnpacpi_disable_resources, + .suspend = pnpacpi_suspend, + .resume = pnpacpi_resume, }; static int __init pnpacpi_add_device(struct acpi_device *device) Index: linux-rt-rebase.q/drivers/rtc/class.c =================================================================== --- linux-rt-rebase.q.orig/drivers/rtc/class.c +++ linux-rt-rebase.q/drivers/rtc/class.c @@ -46,6 +46,7 @@ static int rtc_suspend(struct device *de { struct rtc_device *rtc = to_rtc_device(dev); struct rtc_time tm; + struct timespec ts = current_kernel_time(); if (strncmp(rtc->dev.bus_id, CONFIG_RTC_HCTOSYS_DEVICE, @@ -57,8 +58,8 @@ static int rtc_suspend(struct device *de /* RTC precision is 1 second; adjust delta for avg 1/2 sec err */ set_normalized_timespec(&delta, - xtime.tv_sec - oldtime, - xtime.tv_nsec - (NSEC_PER_SEC >> 1)); + ts.tv_sec - oldtime, + ts.tv_nsec - (NSEC_PER_SEC >> 1)); return 0; } Index: linux-rt-rebase.q/drivers/s390/block/dasd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/block/dasd.c +++ linux-rt-rebase.q/drivers/s390/block/dasd.c @@ -1187,7 +1187,7 @@ dasd_end_request_cb(struct dasd_ccw_req static void __dasd_process_blk_queue(struct dasd_device * device) { - request_queue_t *queue; + struct request_queue *queue; struct request *req; struct dasd_ccw_req *cqr; int nr_queued; @@ -1740,7 +1740,7 @@ dasd_cancel_req(struct dasd_ccw_req *cqr * Dasd request queue function. Called from ll_rw_blk.c */ static void -do_dasd_request(request_queue_t * queue) +do_dasd_request(struct request_queue * queue) { struct dasd_device *device; Index: linux-rt-rebase.q/drivers/s390/block/dasd_int.h =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/block/dasd_int.h +++ linux-rt-rebase.q/drivers/s390/block/dasd_int.h @@ -293,7 +293,7 @@ struct dasd_uid { struct dasd_device { /* Block device stuff. */ struct gendisk *gdp; - request_queue_t *request_queue; + struct request_queue *request_queue; spinlock_t request_queue_lock; struct block_device *bdev; unsigned int devindex; Index: linux-rt-rebase.q/drivers/s390/block/dcssblk.c =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/block/dcssblk.c +++ linux-rt-rebase.q/drivers/s390/block/dcssblk.c @@ -621,7 +621,7 @@ out: } static int -dcssblk_make_request(request_queue_t *q, struct bio *bio) +dcssblk_make_request(struct request_queue *q, struct bio *bio) { struct dcssblk_dev_info *dev_info; struct bio_vec *bvec; Index: linux-rt-rebase.q/drivers/s390/block/xpram.c =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/block/xpram.c +++ linux-rt-rebase.q/drivers/s390/block/xpram.c @@ -191,7 +191,7 @@ static unsigned long __init xpram_highes /* * Block device make request function. */ -static int xpram_make_request(request_queue_t *q, struct bio *bio) +static int xpram_make_request(struct request_queue *q, struct bio *bio) { xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; struct bio_vec *bvec; Index: linux-rt-rebase.q/drivers/s390/char/tape.h =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/char/tape.h +++ linux-rt-rebase.q/drivers/s390/char/tape.h @@ -188,7 +188,7 @@ struct tape_blk_data { struct tape_device * device; /* Block device request queue. */ - request_queue_t * request_queue; + struct request_queue * request_queue; spinlock_t request_queue_lock; /* Task to move entries from block request to CCS request queue. */ Index: linux-rt-rebase.q/drivers/s390/char/tape_block.c =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/char/tape_block.c +++ linux-rt-rebase.q/drivers/s390/char/tape_block.c @@ -147,7 +147,7 @@ static void tapeblock_requeue(struct work_struct *work) { struct tape_blk_data * blkdat; struct tape_device * device; - request_queue_t * queue; + struct request_queue * queue; int nr_queued; struct request * req; struct list_head * l; @@ -194,7 +194,7 @@ tapeblock_requeue(struct work_struct *wo * Tape request queue function. Called from ll_rw_blk.c */ static void -tapeblock_request_fn(request_queue_t *queue) +tapeblock_request_fn(struct request_queue *queue) { struct tape_device *device; Index: linux-rt-rebase.q/drivers/s390/net/ctcmain.c =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/net/ctcmain.c +++ linux-rt-rebase.q/drivers/s390/net/ctcmain.c @@ -674,7 +674,7 @@ ch_action_txdone(fsm_instance * fi, int int first = 1; int i; unsigned long duration; - struct timespec done_stamp = xtime; + struct timespec done_stamp = current_kernel_time(); DBF_TEXT(trace, 4, __FUNCTION__); @@ -730,7 +730,7 @@ ch_action_txdone(fsm_instance * fi, int spin_unlock(&ch->collect_lock); ch->ccw[1].count = ch->trans_skb->len; fsm_addtimer(&ch->timer, CTC_TIMEOUT_5SEC, CH_EVENT_TIMER, ch); - ch->prof.send_stamp = xtime; + ch->prof.send_stamp = current_kernel_time(); rc = ccw_device_start(ch->cdev, &ch->ccw[0], (unsigned long) ch, 0xff, 0); ch->prof.doios_multi++; @@ -2281,7 +2281,7 @@ transmit_skb(struct channel *ch, struct fsm_newstate(ch->fsm, CH_STATE_TX); fsm_addtimer(&ch->timer, CTC_TIMEOUT_5SEC, CH_EVENT_TIMER, ch); spin_lock_irqsave(get_ccwdev_lock(ch->cdev), saveflags); - ch->prof.send_stamp = xtime; + ch->prof.send_stamp = current_kernel_time(); rc = ccw_device_start(ch->cdev, &ch->ccw[ccw_idx], (unsigned long) ch, 0xff, 0); spin_unlock_irqrestore(get_ccwdev_lock(ch->cdev), saveflags); Index: linux-rt-rebase.q/drivers/s390/net/netiucv.c =================================================================== --- linux-rt-rebase.q.orig/drivers/s390/net/netiucv.c +++ linux-rt-rebase.q/drivers/s390/net/netiucv.c @@ -753,7 +753,7 @@ static void conn_action_txdone(fsm_insta header.next = 0; memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header, NETIUCV_HDRLEN); - conn->prof.send_stamp = xtime; + conn->prof.send_stamp = current_kernel_time(); txmsg.class = 0; txmsg.tag = 0; rc = iucv_message_send(conn->path, &txmsg, 0, 0, @@ -1185,7 +1185,7 @@ static int netiucv_transmit_skb(struct i memcpy(skb_put(nskb, NETIUCV_HDRLEN), &header, NETIUCV_HDRLEN); fsm_newstate(conn->fsm, CONN_STATE_TX); - conn->prof.send_stamp = xtime; + conn->prof.send_stamp = current_kernel_time(); msg.tag = 1; msg.class = 0; Index: linux-rt-rebase.q/drivers/sbus/char/Kconfig =================================================================== --- linux-rt-rebase.q.orig/drivers/sbus/char/Kconfig +++ linux-rt-rebase.q/drivers/sbus/char/Kconfig @@ -15,6 +15,7 @@ config SUN_OPENPROMIO config SUN_MOSTEK_RTC tristate "Mostek real time clock support" + depends on SPARC32 help The Mostek RTC chip is used on all known Sun computers except some JavaStations. For a JavaStation you need to say Y both here Index: linux-rt-rebase.q/drivers/sbus/char/jsflash.c =================================================================== --- linux-rt-rebase.q.orig/drivers/sbus/char/jsflash.c +++ linux-rt-rebase.q/drivers/sbus/char/jsflash.c @@ -185,7 +185,7 @@ static void jsfd_read(char *buf, unsigne } } -static void jsfd_do_request(request_queue_t *q) +static void jsfd_do_request(struct request_queue *q) { struct request *req; Index: linux-rt-rebase.q/drivers/scsi/aacraid/linit.c =================================================================== --- linux-rt-rebase.q.orig/drivers/scsi/aacraid/linit.c +++ linux-rt-rebase.q/drivers/scsi/aacraid/linit.c @@ -636,6 +636,8 @@ static int aac_cfg_open(struct inode *in static int aac_cfg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; return aac_do_ioctl(file->private_data, cmd, (void __user *)arg); } @@ -689,6 +691,8 @@ static int aac_compat_ioctl(struct scsi_ static long aac_compat_cfg_ioctl(struct file *file, unsigned cmd, unsigned long arg) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; return aac_compat_do_ioctl((struct aac_dev *)file->private_data, cmd, arg); } #endif Index: linux-rt-rebase.q/drivers/scsi/scsi_lib.c =================================================================== --- linux-rt-rebase.q.orig/drivers/scsi/scsi_lib.c +++ linux-rt-rebase.q/drivers/scsi/scsi_lib.c @@ -654,7 +654,7 @@ void scsi_run_host_queues(struct Scsi_Ho static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, int bytes, int requeue) { - request_queue_t *q = cmd->device->request_queue; + struct request_queue *q = cmd->device->request_queue; struct request *req = cmd->request; unsigned long flags; @@ -818,7 +818,7 @@ void scsi_io_completion(struct scsi_cmnd { int result = cmd->result; int this_count = cmd->request_bufflen; - request_queue_t *q = cmd->device->request_queue; + struct request_queue *q = cmd->device->request_queue; struct request *req = cmd->request; int clear_errors = 1; struct scsi_sense_hdr sshdr; @@ -1038,7 +1038,7 @@ static int scsi_init_io(struct scsi_cmnd return BLKPREP_KILL; } -static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk, +static int scsi_issue_flush_fn(struct request_queue *q, struct gendisk *disk, sector_t *error_sector) { struct scsi_device *sdev = q->queuedata; @@ -1340,7 +1340,7 @@ static inline int scsi_host_queue_ready( /* * Kill a request for a dead device */ -static void scsi_kill_request(struct request *req, request_queue_t *q) +static void scsi_kill_request(struct request *req, struct request_queue *q) { struct scsi_cmnd *cmd = req->special; struct scsi_device *sdev = cmd->device; @@ -2119,7 +2119,7 @@ EXPORT_SYMBOL(scsi_target_resume); int scsi_internal_device_block(struct scsi_device *sdev) { - request_queue_t *q = sdev->request_queue; + struct request_queue *q = sdev->request_queue; unsigned long flags; int err = 0; @@ -2159,7 +2159,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_b int scsi_internal_device_unblock(struct scsi_device *sdev) { - request_queue_t *q = sdev->request_queue; + struct request_queue *q = sdev->request_queue; int err; unsigned long flags; Index: linux-rt-rebase.q/drivers/scsi/sd.c =================================================================== --- linux-rt-rebase.q.orig/drivers/scsi/sd.c +++ linux-rt-rebase.q/drivers/scsi/sd.c @@ -814,7 +814,7 @@ static int sd_issue_flush(struct device return ret; } -static void sd_prepare_flush(request_queue_t *q, struct request *rq) +static void sd_prepare_flush(struct request_queue *q, struct request *rq) { memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd_type = REQ_TYPE_BLOCK_PC; @@ -1285,7 +1285,7 @@ got_data: */ int hard_sector = sector_size; sector_t sz = (sdkp->capacity/2) * (hard_sector/256); - request_queue_t *queue = sdp->request_queue; + struct request_queue *queue = sdp->request_queue; sector_t mb = sz; blk_queue_hardsect_size(queue, hard_sector); Index: linux-rt-rebase.q/drivers/scsi/sr.c =================================================================== --- linux-rt-rebase.q.orig/drivers/scsi/sr.c +++ linux-rt-rebase.q/drivers/scsi/sr.c @@ -624,7 +624,7 @@ static void get_sectorsize(struct scsi_c unsigned char *buffer; int the_result, retries = 3; int sector_size; - request_queue_t *queue; + struct request_queue *queue; buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); if (!buffer) Index: linux-rt-rebase.q/drivers/serial/68328serial.c =================================================================== --- linux-rt-rebase.q.orig/drivers/serial/68328serial.c +++ linux-rt-rebase.q/drivers/serial/68328serial.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -401,9 +400,9 @@ irqreturn_t rs_interrupt(int irq, void * return IRQ_HANDLED; } -static void do_softint(void *private) +static void do_softint(struct work_struct *work) { - struct m68k_serial *info = (struct m68k_serial *) private; + struct m68k_serial *info = container_of(work, struct m68k_serial, tqueue); struct tty_struct *tty; tty = info->tty; @@ -425,9 +424,9 @@ static void do_softint(void *private) * do_serial_hangup() -> tty->hangup() -> rs_hangup() * */ -static void do_serial_hangup(void *private) +static void do_serial_hangup(struct work_struct *work) { - struct m68k_serial *info = (struct m68k_serial *) private; + struct m68k_serial *info = container_of(work, struct m68k_serial, tqueue_hangup); struct tty_struct *tty; tty = info->tty; @@ -1324,59 +1323,6 @@ static void show_serial_version(void) printk("MC68328 serial driver version 1.00\n"); } -#ifdef CONFIG_PM_LEGACY -/* Serial Power management - * The console (currently fixed at line 0) is a special case for power - * management because the kernel is so chatty. The console will be - * explicitly disabled my our power manager as the last minute, so we won't - * mess with it here. - */ -static struct pm_dev *serial_pm[NR_PORTS]; - -static int serial_pm_callback(struct pm_dev *dev, pm_request_t request, void *data) -{ - struct m68k_serial *info = (struct m68k_serial *)dev->data; - - if(info == NULL) - return -1; - - /* special case for line 0 - pm restores it */ - if(info->line == 0) - return 0; - - switch (request) { - case PM_SUSPEND: - shutdown(info); - break; - - case PM_RESUME: - startup(info); - break; - } - return 0; -} - -void shutdown_console(void) -{ - struct m68k_serial *info = &m68k_soft[0]; - - /* HACK: wait a bit for any pending printk's to be dumped */ - { - int i = 10000; - while(i--); - } - - shutdown(info); -} - -void startup_console(void) -{ - struct m68k_serial *info = &m68k_soft[0]; - startup(info); -} -#endif /* CONFIG_PM_LEGACY */ - - static const struct tty_operations rs_ops = { .open = rs_open, .close = rs_close, @@ -1444,8 +1390,8 @@ rs68328_init(void) info->event = 0; info->count = 0; info->blocked_open = 0; - INIT_WORK(&info->tqueue, do_softint, info); - INIT_WORK(&info->tqueue_hangup, do_serial_hangup, info); + INIT_WORK(&info->tqueue, do_softint); + INIT_WORK(&info->tqueue_hangup, do_serial_hangup); init_waitqueue_head(&info->open_wait); init_waitqueue_head(&info->close_wait); info->line = i; @@ -1467,11 +1413,6 @@ rs68328_init(void) IRQ_FLG_STD, "M68328_UART", NULL)) panic("Unable to attach 68328 serial interrupt\n"); -#ifdef CONFIG_PM_LEGACY - serial_pm[i] = pm_register(PM_SYS_DEV, PM_SYS_COM, serial_pm_callback); - if (serial_pm[i]) - serial_pm[i]->data = info; -#endif } local_irq_restore(flags); return 0; Index: linux-rt-rebase.q/drivers/serial/8250.c =================================================================== --- linux-rt-rebase.q.orig/drivers/serial/8250.c +++ linux-rt-rebase.q/drivers/serial/8250.c @@ -2650,8 +2650,9 @@ static int __devinit serial8250_probe(st ret = serial8250_register_port(&port); if (ret < 0) { dev_err(&dev->dev, "unable to register port at index %d " - "(IO%lx MEM%lx IRQ%d): %d\n", i, - p->iobase, p->mapbase, p->irq, ret); + "(IO%lx MEM%llx IRQ%d): %d\n", i, + p->iobase, (unsigned long long)p->mapbase, + p->irq, ret); } } return 0; Index: linux-rt-rebase.q/drivers/serial/8250_early.c =================================================================== --- linux-rt-rebase.q.orig/drivers/serial/8250_early.c +++ linux-rt-rebase.q/drivers/serial/8250_early.c @@ -151,8 +151,9 @@ static int __init parse_options(struct e #else port->membase = ioremap(port->mapbase, 64); if (!port->membase) { - printk(KERN_ERR "%s: Couldn't ioremap 0x%lx\n", - __FUNCTION__, port->mapbase); + printk(KERN_ERR "%s: Couldn't ioremap 0x%llx\n", + __FUNCTION__, + (unsigned long long)port->mapbase); return -ENOMEM; } #endif @@ -175,9 +176,10 @@ static int __init parse_options(struct e device->baud); } - printk(KERN_INFO "Early serial console at %s 0x%lx (options '%s')\n", + printk(KERN_INFO "Early serial console at %s 0x%llx (options '%s')\n", mmio ? "MMIO" : "I/O port", - mmio ? port->mapbase : (unsigned long) port->iobase, + mmio ? (unsigned long long) port->mapbase + : (unsigned long long) port->iobase, device->options); return 0; } Index: linux-rt-rebase.q/drivers/serial/serial_core.c =================================================================== --- linux-rt-rebase.q.orig/drivers/serial/serial_core.c +++ linux-rt-rebase.q/drivers/serial/serial_core.c @@ -626,7 +626,7 @@ static int uart_get_info(struct uart_sta tmp.hub6 = port->hub6; tmp.io_type = port->iotype; tmp.iomem_reg_shift = port->regshift; - tmp.iomem_base = (void *)port->mapbase; + tmp.iomem_base = (void *)(unsigned long)port->mapbase; if (copy_to_user(retinfo, &tmp, sizeof(*retinfo))) return -EFAULT; @@ -1666,10 +1666,11 @@ static int uart_line_info(char *buf, str return 0; mmio = port->iotype >= UPIO_MEM; - ret = sprintf(buf, "%d: uart:%s %s%08lX irq:%d", + ret = sprintf(buf, "%d: uart:%s %s%08llX irq:%d", port->line, uart_type(port), mmio ? "mmio:0x" : "port:", - mmio ? port->mapbase : (unsigned long) port->iobase, + mmio ? (unsigned long long)port->mapbase + : (unsigned long long) port->iobase, port->irq); if (port->type == PORT_UNKNOWN) { @@ -2069,7 +2070,7 @@ uart_report_port(struct uart_driver *drv case UPIO_TSI: case UPIO_DWAPB: snprintf(address, sizeof(address), - "MMIO 0x%lx", port->mapbase); + "MMIO 0x%llx", (unsigned long long)port->mapbase); break; default: strlcpy(address, "*unknown*", sizeof(address)); Index: linux-rt-rebase.q/drivers/video/Kconfig =================================================================== --- linux-rt-rebase.q.orig/drivers/video/Kconfig +++ linux-rt-rebase.q/drivers/video/Kconfig @@ -1571,7 +1571,14 @@ config FB_PM3 config FB_AU1100 bool "Au1100 LCD Driver" - depends on (FB = y) && EXPERIMENTAL && PCI && MIPS && MIPS_PB1100=y + depends on (FB = y) && MIPS && SOC_AU1100 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the framebuffer driver for the AMD Au1100 SOC. It can drive + various panels and CRTs by passing in kernel cmd line option + au1100fb:panel=. config FB_AU1200 bool "Au1200 LCD Driver" Index: linux-rt-rebase.q/fs/bio.c =================================================================== --- linux-rt-rebase.q.orig/fs/bio.c +++ linux-rt-rebase.q/fs/bio.c @@ -230,7 +230,7 @@ void bio_put(struct bio *bio) } } -inline int bio_phys_segments(request_queue_t *q, struct bio *bio) +inline int bio_phys_segments(struct request_queue *q, struct bio *bio) { if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) blk_recount_segments(q, bio); @@ -238,7 +238,7 @@ inline int bio_phys_segments(request_que return bio->bi_phys_segments; } -inline int bio_hw_segments(request_queue_t *q, struct bio *bio) +inline int bio_hw_segments(struct request_queue *q, struct bio *bio) { if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) blk_recount_segments(q, bio); @@ -257,7 +257,7 @@ inline int bio_hw_segments(request_queue */ void __bio_clone(struct bio *bio, struct bio *bio_src) { - request_queue_t *q = bdev_get_queue(bio_src->bi_bdev); + struct request_queue *q = bdev_get_queue(bio_src->bi_bdev); memcpy(bio->bi_io_vec, bio_src->bi_io_vec, bio_src->bi_max_vecs * sizeof(struct bio_vec)); @@ -303,7 +303,7 @@ struct bio *bio_clone(struct bio *bio, g */ int bio_get_nr_vecs(struct block_device *bdev) { - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); int nr_pages; nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -315,7 +315,7 @@ int bio_get_nr_vecs(struct block_device return nr_pages; } -static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page +static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned short max_sectors) { @@ -425,7 +425,7 @@ static int __bio_add_page(request_queue_ * smaller than PAGE_SIZE, so it is always possible to add a single * page to an empty bio. This should only be used by REQ_PC bios. */ -int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page, +int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors); @@ -523,7 +523,7 @@ int bio_uncopy_user(struct bio *bio) * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr, +struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, unsigned int len, int write_to_vm) { unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -600,7 +600,7 @@ out_bmd: return ERR_PTR(ret); } -static struct bio *__bio_map_user_iov(request_queue_t *q, +static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, int write_to_vm) @@ -712,7 +712,7 @@ static struct bio *__bio_map_user_iov(re /** * bio_map_user - map user address into bio - * @q: the request_queue_t for the bio + * @q: the struct request_queue for the bio * @bdev: destination block device * @uaddr: start of user address * @len: length in bytes @@ -721,7 +721,7 @@ static struct bio *__bio_map_user_iov(re * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, +struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, unsigned long uaddr, unsigned int len, int write_to_vm) { struct sg_iovec iov; @@ -734,7 +734,7 @@ struct bio *bio_map_user(request_queue_t /** * bio_map_user_iov - map user sg_iovec table into bio - * @q: the request_queue_t for the bio + * @q: the struct request_queue for the bio * @bdev: destination block device * @iov: the iovec. * @iov_count: number of elements in the iovec @@ -743,7 +743,7 @@ struct bio *bio_map_user(request_queue_t * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, +struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, int write_to_vm) { @@ -808,7 +808,7 @@ static int bio_map_kern_endio(struct bio } -static struct bio *__bio_map_kern(request_queue_t *q, void *data, +static struct bio *__bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) { unsigned long kaddr = (unsigned long)data; @@ -847,7 +847,7 @@ static struct bio *__bio_map_kern(reques /** * bio_map_kern - map kernel address into bio - * @q: the request_queue_t for the bio + * @q: the struct request_queue for the bio * @data: pointer to buffer to map * @len: length in bytes * @gfp_mask: allocation flags for bio allocation @@ -855,7 +855,7 @@ static struct bio *__bio_map_kern(reques * Map the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, +struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) { struct bio *bio; Index: linux-rt-rebase.q/fs/ocfs2/file.c =================================================================== --- linux-rt-rebase.q.orig/fs/ocfs2/file.c +++ linux-rt-rebase.q/fs/ocfs2/file.c @@ -2153,7 +2153,7 @@ static int ocfs2_splice_write_actor(stru src = buf->ops->map(pipe, buf, 1); dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, count); - kunmap_atomic(page, KM_USER1); + kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, Index: linux-rt-rebase.q/fs/open.c =================================================================== --- linux-rt-rebase.q.orig/fs/open.c +++ linux-rt-rebase.q/fs/open.c @@ -403,7 +403,7 @@ asmlinkage long sys_fallocate(int fd, in if (inode->i_op && inode->i_op->fallocate) ret = inode->i_op->fallocate(inode, mode, offset, len); else - ret = -ENOSYS; + ret = -EOPNOTSUPP; out_fput: fput(file); Index: linux-rt-rebase.q/include/acpi/acpi_bus.h =================================================================== --- linux-rt-rebase.q.orig/include/acpi/acpi_bus.h +++ linux-rt-rebase.q/include/acpi/acpi_bus.h @@ -131,7 +131,7 @@ struct acpi_device_ops { struct acpi_driver { char name[80]; char class[80]; - char *ids; /* Supported Hardware IDs */ + const struct acpi_device_id *ids; /* Supported Hardware IDs */ struct acpi_device_ops ops; struct device_driver drv; struct module *owner; @@ -341,7 +341,8 @@ int acpi_bus_add(struct acpi_device **ch int acpi_bus_trim(struct acpi_device *start, int rmdevice); int acpi_bus_start(struct acpi_device *device); acpi_status acpi_bus_get_ejd(acpi_handle handle, acpi_handle * ejd); -int acpi_match_ids(struct acpi_device *device, char *ids); +int acpi_match_device_ids(struct acpi_device *device, + const struct acpi_device_id *ids); int acpi_create_dir(struct acpi_device *); void acpi_remove_dir(struct acpi_device *); @@ -365,6 +366,8 @@ acpi_handle acpi_get_child(acpi_handle, acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle)) +int acpi_pm_device_sleep_state(struct device *, int, int *); + #endif /* CONFIG_ACPI */ #endif /*__ACPI_BUS_H__*/ Index: linux-rt-rebase.q/include/acpi/acpi_drivers.h =================================================================== --- linux-rt-rebase.q.orig/include/acpi/acpi_drivers.h +++ linux-rt-rebase.q/include/acpi/acpi_drivers.h @@ -34,16 +34,21 @@ #define ACPI_BUS_COMPONENT 0x00010000 #define ACPI_SYSTEM_COMPONENT 0x02000000 -/* _HID definitions */ +/* + * _HID definitions + * HIDs must conform to ACPI spec(6.1.4) + * Linux specific HIDs do not apply to this and begin with LNX: + */ -#define ACPI_POWER_HID "power_resource" +#define ACPI_POWER_HID "LNXPOWER" #define ACPI_PROCESSOR_HID "ACPI0007" -#define ACPI_SYSTEM_HID "acpi_system" -#define ACPI_THERMAL_HID "thermal" -#define ACPI_BUTTON_HID_POWERF "button_power" -#define ACPI_BUTTON_HID_SLEEPF "button_sleep" -#define ACPI_VIDEO_HID "video" -#define ACPI_BAY_HID "bay" +#define ACPI_SYSTEM_HID "LNXSYSTM" +#define ACPI_THERMAL_HID "LNXTHERM" +#define ACPI_BUTTON_HID_POWERF "LNXPWRBN" +#define ACPI_BUTTON_HID_SLEEPF "LNXSLPBN" +#define ACPI_VIDEO_HID "LNXVIDEO" +#define ACPI_BAY_HID "LNXIOBAY" + /* -------------------------------------------------------------------------- PCI -------------------------------------------------------------------------- */ @@ -142,10 +147,6 @@ static inline void unregister_hotplug_do /*-------------------------------------------------------------------------- Suspend/Resume -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_SLEEP extern int acpi_sleep_init(void); -#else -#define acpi_sleep_init() do {} while (0) -#endif #endif /*__ACPI_DRIVERS_H__*/ Index: linux-rt-rebase.q/include/acpi/actypes.h =================================================================== --- linux-rt-rebase.q.orig/include/acpi/actypes.h +++ linux-rt-rebase.q/include/acpi/actypes.h @@ -809,7 +809,7 @@ acpi_status(*acpi_walk_callback) (acpi_h /* Common string version of device HIDs and UIDs */ -struct acpi_device_id { +struct acpica_device_id { char value[ACPI_DEVICE_ID_LENGTH]; }; @@ -859,8 +859,8 @@ struct acpi_device_info { u32 valid; /* Indicates which fields below are valid */ u32 current_status; /* _STA value */ acpi_integer address; /* _ADR value if any */ - struct acpi_device_id hardware_id; /* _HID value if any */ - struct acpi_device_id unique_id; /* _UID value if any */ + struct acpica_device_id hardware_id; /* _HID value if any */ + struct acpica_device_id unique_id; /* _UID value if any */ u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */ struct acpi_compatible_id_list compatibility_id; /* List of _CIDs if any */ }; Index: linux-rt-rebase.q/include/acpi/acutils.h =================================================================== --- linux-rt-rebase.q.orig/include/acpi/acutils.h +++ linux-rt-rebase.q/include/acpi/acutils.h @@ -354,7 +354,7 @@ acpi_ut_evaluate_numeric_object(char *ob acpi_status acpi_ut_execute_HID(struct acpi_namespace_node *device_node, - struct acpi_device_id *hid); + struct acpica_device_id *hid); acpi_status acpi_ut_execute_CID(struct acpi_namespace_node *device_node, @@ -366,7 +366,7 @@ acpi_ut_execute_STA(struct acpi_namespac acpi_status acpi_ut_execute_UID(struct acpi_namespace_node *device_node, - struct acpi_device_id *uid); + struct acpica_device_id *uid); acpi_status acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest); Index: linux-rt-rebase.q/include/asm-arm/arch-omap/mailbox.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-arm/arch-omap/mailbox.h +++ linux-rt-rebase.q/include/asm-arm/arch-omap/mailbox.h @@ -37,7 +37,7 @@ struct omap_mbox_ops { struct omap_mbox_queue { spinlock_t lock; - request_queue_t *queue; + struct request_queue *queue; struct work_struct work; int (*callback)(void *); struct omap_mbox *mbox; Index: linux-rt-rebase.q/include/asm-i386/acpi.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/acpi.h +++ linux-rt-rebase.q/include/asm-i386/acpi.h @@ -121,19 +121,6 @@ static inline void acpi_disable_pci(void } extern int acpi_irq_balance_set(char *str); -#else /* !CONFIG_ACPI */ - -#define acpi_lapic 0 -#define acpi_ioapic 0 -static inline void acpi_noirq_set(void) { } -static inline void acpi_disable_pci(void) { } -static inline void disable_acpi(void) { } - -#endif /* !CONFIG_ACPI */ - - -#ifdef CONFIG_ACPI_SLEEP - /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); extern void acpi_restore_state_mem(void); @@ -143,7 +130,15 @@ extern unsigned long acpi_wakeup_address /* early initialization routine */ extern void acpi_reserve_bootmem(void); -#endif /*CONFIG_ACPI_SLEEP*/ +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 +static inline void acpi_noirq_set(void) { } +static inline void acpi_disable_pci(void) { } +static inline void disable_acpi(void) { } + +#endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 Index: linux-rt-rebase.q/include/asm-i386/bootparam.h =================================================================== --- linux-rt-rebase.q.orig/include/asm-i386/bootparam.h +++ linux-rt-rebase.q/include/asm-i386/bootparam.h @@ -4,8 +4,9 @@ #include #include #include -#include #include +#include +#include #include