patches/0000755001303100130310000000000011014364606012426 5ustar rostedtrostedtpatches/preempt-realtime-fs-block.patch0000644001303100130310000003226711014364535020434 0ustar rostedtrostedt--- block/blk-core.c | 6 ++-- fs/aio.c | 6 +++- fs/block_dev.c | 34 +++++++++++++++++++++------ fs/dcache.c | 5 ++-- fs/dnotify.c | 2 - fs/exec.c | 8 +++++- fs/file.c | 5 ++-- fs/lockd/svc.c | 8 +----- fs/pipe.c | 12 +++++++++ fs/proc/proc_misc.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/task_mmu.c | 4 ++- fs/xfs/linux-2.6/mrlock.h | 4 +-- fs/xfs/xfs_mount.h | 2 - include/linux/genhd.h | 9 +++++-- 14 files changed, 130 insertions(+), 31 deletions(-) Index: linux-2.6.25.4-rt2/block/blk-core.c =================================================================== --- linux-2.6.25.4-rt2.orig/block/blk-core.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/block/blk-core.c 2008-05-19 16:55:57.000000000 -0400 @@ -211,7 +211,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); */ void blk_plug_device(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -233,7 +233,7 @@ EXPORT_SYMBOL(blk_plug_device); */ int blk_remove_plug(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; @@ -331,7 +331,7 @@ EXPORT_SYMBOL(blk_unplug); **/ void blk_start_queue(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); Index: linux-2.6.25.4-rt2/fs/aio.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/aio.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/aio.c 2008-05-19 16:55:57.000000000 -0400 @@ -581,13 +581,15 @@ static void use_mm(struct mm_struct *mm) tsk->flags |= PF_BORROWED_MM; active_mm = tsk->active_mm; atomic_inc(&mm->mm_count); - tsk->mm = mm; - tsk->active_mm = mm; + local_irq_disable(); // FIXME /* * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise * it won't work. Update it accordingly if you change it here */ switch_mm(active_mm, mm, tsk); + tsk->mm = mm; + tsk->active_mm = mm; + local_irq_enable(); task_unlock(tsk); mmdrop(active_mm); Index: linux-2.6.25.4-rt2/fs/block_dev.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/block_dev.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/block_dev.c 2008-05-19 16:55:57.000000000 -0400 @@ -1029,14 +1029,32 @@ static int __blkdev_get(struct block_dev * For now, block device ->open() routine must _not_ * examine anything in 'inode' argument except ->i_rdev. */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, for_part); + struct file *fake_file; + struct dentry *fake_dentry; + int err = -ENOMEM; + + fake_file = kmalloc(sizeof(*fake_file), GFP_KERNEL); + if (!fake_file) + goto out; + memset(fake_file, 0, sizeof(*fake_file)); + + fake_dentry = kmalloc(sizeof(*fake_dentry), GFP_KERNEL); + if (!fake_dentry) + goto out_free_file; + memset(fake_dentry, 0, sizeof(*fake_dentry)); + + fake_file->f_mode = mode; + fake_file->f_flags = flags; + fake_file->f_path.dentry = fake_dentry; + fake_dentry->d_inode = bdev->bd_inode; + + err = do_open(bdev, fake_file, for_part); + + kfree(fake_dentry); +out_free_file: + kfree(fake_file); +out: + return err; } int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) Index: linux-2.6.25.4-rt2/fs/dcache.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/dcache.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/dcache.c 2008-05-19 16:55:57.000000000 -0400 @@ -693,8 +693,9 @@ void shrink_dcache_for_umount(struct sup { struct dentry *dentry; - if (down_read_trylock(&sb->s_umount)) - BUG(); +// -rt: this might succeed there ... +// if (down_read_trylock(&sb->s_umount)) +// BUG(); dentry = sb->s_root; sb->s_root = NULL; Index: linux-2.6.25.4-rt2/fs/dnotify.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/dnotify.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/dnotify.c 2008-05-19 16:55:57.000000000 -0400 @@ -173,7 +173,7 @@ void dnotify_parent(struct dentry *dentr spin_lock(&dentry->d_lock); parent = dentry->d_parent; - if (parent->d_inode->i_dnotify_mask & event) { + if (unlikely(parent->d_inode->i_dnotify_mask & event)) { dget(parent); spin_unlock(&dentry->d_lock); __inode_dir_notify(parent->d_inode, event); Index: linux-2.6.25.4-rt2/fs/exec.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/exec.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/exec.c 2008-05-19 16:55:57.000000000 -0400 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -730,11 +731,16 @@ static int exec_mmap(struct mm_struct *m } } task_lock(tsk); + + local_irq_disable(); active_mm = tsk->active_mm; + activate_mm(active_mm, mm); tsk->mm = mm; tsk->active_mm = mm; - activate_mm(active_mm, mm); + local_irq_enable(); + task_unlock(tsk); + arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); Index: linux-2.6.25.4-rt2/fs/file.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/file.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/file.c 2008-05-19 16:55:57.000000000 -0400 @@ -98,14 +98,15 @@ void free_fdtable_rcu(struct rcu_head *r kfree(fdt->open_fds); kfree(fdt); } else { - fddef = &get_cpu_var(fdtable_defer_list); + + fddef = &per_cpu(fdtable_defer_list, raw_smp_processor_id()); + spin_lock(&fddef->lock); fdt->next = fddef->next; fddef->next = fdt; /* vmallocs are handled from the workqueue context */ schedule_work(&fddef->wq); spin_unlock(&fddef->lock); - put_cpu_var(fdtable_defer_list); } } Index: linux-2.6.25.4-rt2/fs/lockd/svc.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/lockd/svc.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/lockd/svc.c 2008-05-19 16:55:57.000000000 -0400 @@ -344,16 +344,12 @@ lockd_down(void) * Wait for the lockd process to exit, but since we're holding * the lockd semaphore, we can't wait around forever ... */ - clear_thread_flag(TIF_SIGPENDING); - interruptible_sleep_on_timeout(&lockd_exit, HZ); - if (nlmsvc_pid) { + if (wait_event_interruptible_timeout(lockd_exit, + nlmsvc_pid == 0, HZ) <= 0) { printk(KERN_WARNING "lockd_down: lockd failed to exit, clearing pid\n"); nlmsvc_pid = 0; } - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); out: mutex_unlock(&nlmsvc_mutex); } Index: linux-2.6.25.4-rt2/fs/pipe.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/pipe.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/pipe.c 2008-05-19 16:55:57.000000000 -0400 @@ -385,8 +385,14 @@ redo: wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } + /* + * Hack: we turn off atime updates for -RT kernels. + * Who uses them on pipes anyway? + */ +#ifndef CONFIG_PREEMPT_RT if (ret > 0) file_accessed(filp); +#endif return ret; } @@ -558,8 +564,14 @@ out: wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } + /* + * Hack: we turn off atime updates for -RT kernels. + * Who uses them on pipes anyway? + */ +#ifndef CONFIG_PREEMPT_RT if (ret > 0) file_update_time(filp); +#endif return ret; } Index: linux-2.6.25.4-rt2/fs/proc/proc_misc.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/proc/proc_misc.c 2008-05-19 16:55:45.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/proc/proc_misc.c 2008-05-19 16:55:57.000000000 -0400 @@ -103,6 +103,27 @@ static int loadavg_read_proc(char *page, return proc_calc_metrics(page, start, off, count, eof, len); } +#ifdef CONFIG_PREEMPT_RT +static int loadavg_rt_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + extern unsigned long avenrun_rt[]; + extern unsigned long rt_nr_running(void); + int a, b, c; + int len; + + a = avenrun_rt[0] + (FIXED_1/200); + b = avenrun_rt[1] + (FIXED_1/200); + c = avenrun_rt[2] + (FIXED_1/200); + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + rt_nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid); + return proc_calc_metrics(page, start, off, count, eof, len); +} +#endif + static int uptime_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -560,6 +581,38 @@ static int show_stat(struct seq_file *p, nr_iowait()); kfree(per_irq_sum); +#ifdef CONFIG_PREEMPT_RT + { + unsigned long nr_uninterruptible_cpu(int cpu); + extern int pi_initialized; + unsigned long rt_nr_running(void); + unsigned long rt_nr_running_cpu(int cpu); + unsigned long rt_nr_uninterruptible(void); + unsigned long rt_nr_uninterruptible_cpu(int cpu); + + int i; + + seq_printf(p, "pi_init: %d\n", pi_initialized); + seq_printf(p, "nr_running(): %ld\n", + nr_running()); + seq_printf(p, "nr_uninterruptible(): %ld\n", + nr_uninterruptible()); + for_each_online_cpu(i) + seq_printf(p, "nr_uninterruptible(%d): %ld\n", + i, nr_uninterruptible_cpu(i)); + seq_printf(p, "rt_nr_running(): %ld\n", + rt_nr_running()); + for_each_online_cpu(i) + seq_printf(p, "rt_nr_running(%d): %ld\n", + i, rt_nr_running_cpu(i)); + seq_printf(p, "nr_rt_uninterruptible(): %ld\n", + rt_nr_uninterruptible()); + for_each_online_cpu(i) + seq_printf(p, "nr_rt_uninterruptible(%d): %ld\n", + i, rt_nr_uninterruptible_cpu(i)); + } +#endif + return 0; } @@ -839,6 +892,9 @@ void __init proc_misc_init(void) int (*read_proc)(char*,char**,off_t,int,int*,void*); } *p, simple_ones[] = { {"loadavg", loadavg_read_proc}, +#ifdef CONFIG_PREEMPT_RT + {"loadavgrt", loadavg_rt_read_proc}, +#endif {"uptime", uptime_read_proc}, {"meminfo", meminfo_read_proc}, {"version", version_read_proc}, Index: linux-2.6.25.4-rt2/fs/proc/task_mmu.c =================================================================== --- linux-2.6.25.4-rt2.orig/fs/proc/task_mmu.c 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/proc/task_mmu.c 2008-05-19 16:55:57.000000000 -0400 @@ -173,8 +173,10 @@ static void *m_start(struct seq_file *m, vma = NULL; if ((unsigned long)l < mm->map_count) { vma = mm->mmap; - while (l-- && vma) + while (l-- && vma) { vma = vma->vm_next; + cond_resched(); + } goto out; } Index: linux-2.6.25.4-rt2/fs/xfs/linux-2.6/mrlock.h =================================================================== --- linux-2.6.25.4-rt2.orig/fs/xfs/linux-2.6/mrlock.h 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/xfs/linux-2.6/mrlock.h 2008-05-19 16:55:57.000000000 -0400 @@ -23,8 +23,8 @@ enum { MR_NONE, MR_ACCESS, MR_UPDATE }; typedef struct { - struct rw_semaphore mr_lock; - int mr_writer; + struct compat_rw_semaphore mr_lock; + int mr_writer; } mrlock_t; #define mrinit(mrp, name) \ Index: linux-2.6.25.4-rt2/fs/xfs/xfs_mount.h =================================================================== --- linux-2.6.25.4-rt2.orig/fs/xfs/xfs_mount.h 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/fs/xfs/xfs_mount.h 2008-05-19 16:55:57.000000000 -0400 @@ -282,7 +282,7 @@ typedef struct xfs_mount { uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ struct xfs_perag *m_perag; /* per-ag accounting info */ - struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ + struct compat_rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ struct mutex m_growlock; /* growfs mutex */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ Index: linux-2.6.25.4-rt2/include/linux/genhd.h =================================================================== --- linux-2.6.25.4-rt2.orig/include/linux/genhd.h 2008-05-19 16:54:59.000000000 -0400 +++ linux-2.6.25.4-rt2/include/linux/genhd.h 2008-05-19 16:55:57.000000000 -0400 @@ -164,8 +164,13 @@ static inline struct hd_struct *get_part } #ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +#define __disk_stat_add(gendiskp, field, addnd) \ +do { \ + preempt_disable(); \ + (per_cpu_ptr(gendiskp->dkstats, \ + smp_processor_id())->field += addnd); \ + preempt_enable(); \ +} while (0) #define disk_stat_read(gendiskp, field) \ ({ \ patches/trace-histograms.patch0000644001303100130310000005531211014364470016730 0ustar rostedtrostedtCritical latency timings histogram This patch adds hooks into the latency tracer to give us histograms of interrupts off, preemption off and wakeup timings. This code was based off of work done by Yi Yang But heavily modified to work with the new tracer, and some clean ups by Steven Rostedt This adds the following to /debugfs/tracing latency_hist/ - root dir for historgrams. Under latency_hist there is (depending on what's configured): interrupt_off_latency/ - latency histograms of interrupts off. preempt_interrupts_off_latency/ - latency histograms of preemption and/or interrupts off. preempt_off_latency/ - latency histograms of preemption off. wakeup_latency/ - latency histograms of wakeup timings. Under each of the above is a file labeled: CPU# for each possible CPU were # is the CPU number. reset - writing into this file will reset the histogram back to zeros and start again. Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 21 + kernel/trace/Makefile | 4 kernel/trace/trace_hist.c | 548 ++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_hist.h | 39 ++ kernel/trace/trace_irqsoff.c | 17 + kernel/trace/trace_sched_switch.c | 2 kernel/trace/trace_sched_wakeup.c | 5 7 files changed, 635 insertions(+), 1 deletion(-) Index: linux-2.6.25.4-rt2/kernel/trace/Kconfig =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/trace/Kconfig 2008-05-19 16:55:20.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/trace/Kconfig 2008-05-19 16:55:20.000000000 -0400 @@ -139,3 +139,24 @@ config FTRACE_STARTUP_TEST a series of tests are made to verify that the tracer is functioning properly. It will do tests on all the configured tracers of ftrace. + +config INTERRUPT_OFF_HIST + bool "Interrupts off critical timings histogram" + depends on IRQSOFF_TRACER + help + This option uses the infrastructure of the critical + irqs off timings to create a histogram of latencies. + +config PREEMPT_OFF_HIST + bool "Preempt off critical timings histogram" + depends on PREEMPT_TRACER + help + This option uses the infrastructure of the critical + preemption off timings to create a histogram of latencies. + +config WAKEUP_LATENCY_HIST + bool "Interrupts off critical timings histogram" + depends on SCHED_TRACER + help + This option uses the infrastructure of the wakeup tracer + to create a histogram of latencies. Index: linux-2.6.25.4-rt2/kernel/trace/Makefile =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/trace/Makefile 2008-05-19 16:55:20.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/trace/Makefile 2008-05-19 16:55:20.000000000 -0400 @@ -22,4 +22,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sche obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_EVENT_TRACER) += trace_events.o +obj-$(CONFIG_INTERRUPT_OFF_HIST) += trace_hist.o +obj-$(CONFIG_PREEMPT_OFF_HIST) += trace_hist.o +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += trace_hist.o + libftrace-y := ftrace.o Index: linux-2.6.25.4-rt2/kernel/trace/trace_irqsoff.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/trace/trace_irqsoff.c 2008-05-19 16:55:20.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/trace/trace_irqsoff.c 2008-05-19 16:55:20.000000000 -0400 @@ -17,6 +17,7 @@ #include #include "trace.h" +#include "trace_hist.h" static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; @@ -255,10 +256,14 @@ void start_critical_timings(void) { if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); + + tracing_hist_preempt_start(); } void stop_critical_timings(void) { + tracing_hist_preempt_stop(TRACE_STOP); + if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } @@ -267,6 +272,8 @@ void stop_critical_timings(void) #ifdef CONFIG_PROVE_LOCKING void time_hardirqs_on(unsigned long a0, unsigned long a1) { + tracing_hist_preempt_stop(1); + if (!preempt_trace() && irq_trace()) stop_critical_timing(a0, a1); } @@ -275,6 +282,8 @@ void time_hardirqs_off(unsigned long a0, { if (!preempt_trace() && irq_trace()) start_critical_timing(a0, a1); + + tracing_hist_preempt_start(); } #else /* !CONFIG_PROVE_LOCKING */ @@ -317,11 +326,15 @@ void trace_hardirqs_off(void) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); + + tracing_hist_preempt_start(); } EXPORT_SYMBOL(trace_hardirqs_off); void trace_hardirqs_on_caller(unsigned long caller_addr) { + tracing_hist_preempt_stop(1); + if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } @@ -331,6 +344,8 @@ void trace_hardirqs_off_caller(unsigned { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); + + tracing_hist_preempt_start(); } EXPORT_SYMBOL(trace_hardirqs_off_caller); @@ -340,12 +355,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller) #ifdef CONFIG_PREEMPT_TRACER void trace_preempt_on(unsigned long a0, unsigned long a1) { + tracing_hist_preempt_stop(0); stop_critical_timing(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { start_critical_timing(a0, a1); + tracing_hist_preempt_start(); } #endif /* CONFIG_PREEMPT_TRACER */ Index: linux-2.6.25.4-rt2/kernel/trace/trace_sched_wakeup.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/trace/trace_sched_wakeup.c 2008-05-19 16:55:17.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/trace/trace_sched_wakeup.c 2008-05-19 16:55:20.000000000 -0400 @@ -17,6 +17,7 @@ #include #include "trace.h" +#include "trace_hist.h" static struct trace_array *wakeup_trace; static int __read_mostly tracer_enabled; @@ -55,7 +56,9 @@ wakeup_sched_switch(struct task_struct * long disabled; int cpu; - if (unlikely(!tracer_enabled)) + tracing_hist_wakeup_stop(next); + + if (!tracer_enabled) return; /* Index: linux-2.6.25.4-rt2/kernel/trace/trace_hist.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.25.4-rt2/kernel/trace/trace_hist.c 2008-05-19 16:55:20.000000000 -0400 @@ -0,0 +1,548 @@ +/* + * kernel/trace/trace_hist.c + * + * Add support for histograms of preemption-off latency and + * interrupt-off latency and wakeup latency, it depends on + * Real-Time Preemption Support. + * + * Copyright (C) 2005 MontaVista Software, Inc. + * Yi Yang + * + * Converted to work with the new latency tracer. + * Copyright (C) 2008 Red Hat, Inc. + * Steven Rostedt + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" +#include "trace_hist.h" + +enum { + INTERRUPT_LATENCY = 0, + PREEMPT_LATENCY, + PREEMPT_INTERRUPT_LATENCY, + WAKEUP_LATENCY, +}; + +#define MAX_ENTRY_NUM 10240 + +struct hist_data { + atomic_t hist_mode; /* 0 log, 1 don't log */ + unsigned long min_lat; + unsigned long avg_lat; + unsigned long max_lat; + unsigned long long beyond_hist_bound_samples; + unsigned long long accumulate_lat; + unsigned long long total_samples; + unsigned long long hist_array[MAX_ENTRY_NUM]; +}; + +static char *latency_hist_dir_root = "latency_hist"; + +#ifdef CONFIG_INTERRUPT_OFF_HIST +static DEFINE_PER_CPU(struct hist_data, interrupt_off_hist); +static char *interrupt_off_hist_dir = "interrupt_off_latency"; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST +static DEFINE_PER_CPU(struct hist_data, preempt_off_hist); +static char *preempt_off_hist_dir = "preempt_off_latency"; +#endif + +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) +static DEFINE_PER_CPU(struct hist_data, preempt_irqs_off_hist); +static char *preempt_irqs_off_hist_dir = "preempt_interrupts_off_latency"; +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist); +static char *wakeup_latency_hist_dir = "wakeup_latency"; +#endif + +static inline u64 u64_div(u64 x, u64 y) +{ + do_div(x, y); + return x; +} + +void latency_hist(int latency_type, int cpu, unsigned long latency) +{ + struct hist_data *my_hist; + + if ((cpu < 0) || (cpu >= NR_CPUS) || (latency_type < INTERRUPT_LATENCY) + || (latency_type > WAKEUP_LATENCY) || (latency < 0)) + return; + + switch (latency_type) { +#ifdef CONFIG_INTERRUPT_OFF_HIST + case INTERRUPT_LATENCY: + my_hist = &per_cpu(interrupt_off_hist, cpu); + break; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + case PREEMPT_LATENCY: + my_hist = &per_cpu(preempt_off_hist, cpu); + break; +#endif + +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) + case PREEMPT_INTERRUPT_LATENCY: + my_hist = &per_cpu(preempt_irqs_off_hist, cpu); + break; +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + case WAKEUP_LATENCY: + my_hist = &per_cpu(wakeup_latency_hist, cpu); + break; +#endif + default: + return; + } + + if (atomic_read(&my_hist->hist_mode) == 0) + return; + + if (latency >= MAX_ENTRY_NUM) + my_hist->beyond_hist_bound_samples++; + else + my_hist->hist_array[latency]++; + + if (latency < my_hist->min_lat) + my_hist->min_lat = latency; + else if (latency > my_hist->max_lat) + my_hist->max_lat = latency; + + my_hist->total_samples++; + my_hist->accumulate_lat += latency; + my_hist->avg_lat = (unsigned long) u64_div(my_hist->accumulate_lat, + my_hist->total_samples); + return; +} + +static void *l_start(struct seq_file *m, loff_t *pos) +{ + loff_t *index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); + loff_t index = *pos; + struct hist_data *my_hist = m->private; + + if (!index_ptr) + return NULL; + + if (index == 0) { + atomic_dec(&my_hist->hist_mode); + seq_printf(m, "#Minimum latency: %lu microseconds.\n" + "#Average latency: %lu microseconds.\n" + "#Maximum latency: %lu microseconds.\n" + "#Total samples: %llu\n" + "#There are %llu samples greater or equal" + " than %d microseconds\n" + "#usecs\t%16s\n" + , my_hist->min_lat + , my_hist->avg_lat + , my_hist->max_lat + , my_hist->total_samples + , my_hist->beyond_hist_bound_samples + , MAX_ENTRY_NUM, "samples"); + } + if (index >= MAX_ENTRY_NUM) + return NULL; + + *index_ptr = index; + return index_ptr; +} + +static void *l_next(struct seq_file *m, void *p, loff_t *pos) +{ + loff_t *index_ptr = p; + struct hist_data *my_hist = m->private; + + if (++*pos >= MAX_ENTRY_NUM) { + atomic_inc(&my_hist->hist_mode); + return NULL; + } + *index_ptr = *pos; + return index_ptr; +} + +static void l_stop(struct seq_file *m, void *p) +{ + kfree(p); +} + +static int l_show(struct seq_file *m, void *p) +{ + int index = *(loff_t *) p; + struct hist_data *my_hist = m->private; + + seq_printf(m, "%5d\t%16llu\n", index, my_hist->hist_array[index]); + return 0; +} + +static struct seq_operations latency_hist_seq_op = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show +}; + +static int latency_hist_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &latency_hist_seq_op); + if (!ret) { + struct seq_file *seq = file->private_data; + seq->private = inode->i_private; + } + return ret; +} + +static struct file_operations latency_hist_fops = { + .open = latency_hist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void hist_reset(struct hist_data *hist) +{ + atomic_dec(&hist->hist_mode); + + memset(hist->hist_array, 0, sizeof(hist->hist_array)); + hist->beyond_hist_bound_samples = 0UL; + hist->min_lat = 0xFFFFFFFFUL; + hist->max_lat = 0UL; + hist->total_samples = 0UL; + hist->accumulate_lat = 0UL; + hist->avg_lat = 0UL; + + atomic_inc(&hist->hist_mode); +} + +ssize_t latency_hist_reset(struct file *file, const char __user *a, + size_t size, loff_t *off) +{ + int cpu; + struct hist_data *hist; + int latency_type = (long)file->private_data; + + switch (latency_type) { + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + case WAKEUP_LATENCY: + for_each_online_cpu(cpu) { + hist = &per_cpu(wakeup_latency_hist, cpu); + hist_reset(hist); + } + break; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + case PREEMPT_LATENCY: + for_each_online_cpu(cpu) { + hist = &per_cpu(preempt_off_hist, cpu); + hist_reset(hist); + } + break; +#endif + +#ifdef CONFIG_INTERRUPT_OFF_HIST + case INTERRUPT_LATENCY: + for_each_online_cpu(cpu) { + hist = &per_cpu(interrupt_off_hist, cpu); + hist_reset(hist); + } + break; +#endif + } + + return size; +} + +static struct file_operations latency_hist_reset_fops = { + .open = tracing_open_generic, + .write = latency_hist_reset, +}; + +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) +#ifdef CONFIG_INTERRUPT_OFF_HIST +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start); +static DEFINE_PER_CPU(int, hist_irqsoff_tracing); +#endif +#ifdef CONFIG_PREEMPT_OFF_HIST +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start); +static DEFINE_PER_CPU(int, hist_preemptoff_tracing); +#endif +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start); +static DEFINE_PER_CPU(int, hist_preemptirqsoff_tracing); +#endif + +void tracing_hist_preempt_start(void) +{ + cycle_t uninitialized_var(start); + int start_set = 0; + int cpu; + + /* cpu is only used if we are in atomic */ + cpu = raw_smp_processor_id(); + +#ifdef CONFIG_INTERRUPT_OFF_HIST + if (irqs_disabled() && + !per_cpu(hist_irqsoff_tracing, cpu)) { + per_cpu(hist_irqsoff_tracing, cpu) = 1; + start_set++; + start = ftrace_now(cpu); + per_cpu(hist_irqsoff_start, cpu) = start; + } +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + if (preempt_count() && + !per_cpu(hist_preemptoff_tracing, cpu)) { + per_cpu(hist_preemptoff_tracing, cpu) = 1; + if (1 || !(start_set++)) + start = ftrace_now(cpu); + per_cpu(hist_preemptoff_start, cpu) = start; + + } +#endif + +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) + if ((preempt_count() || irqs_disabled()) && + !per_cpu(hist_preemptirqsoff_tracing, cpu)) { + per_cpu(hist_preemptirqsoff_tracing, cpu) = 1; + if (1 || !(start_set)) + start = ftrace_now(cpu); + per_cpu(hist_preemptirqsoff_start, cpu) = start; + } +#endif +} + +void tracing_hist_preempt_stop(int irqs_on) +{ + long latency; + cycle_t start; + cycle_t uninitialized_var(stop); + int stop_set = 0; + int cpu; + + /* irqs_on == TRACE_STOP if we must stop tracing. */ + + /* cpu is only used if we are in atomic */ + cpu = raw_smp_processor_id(); + +#ifdef CONFIG_INTERRUPT_OFF_HIST + if (irqs_on && + per_cpu(hist_irqsoff_tracing, cpu)) { + stop = ftrace_now(cpu); + stop_set++; + start = per_cpu(hist_irqsoff_start, cpu); + latency = (long)nsecs_to_usecs(stop - start); + if (latency > 1000000) { + printk("%d: latency = %ld (%lu)\n", __LINE__, latency, latency); + printk("%d: start=%Ld stop=%Ld\n", __LINE__, start, stop); + } + barrier(); + per_cpu(hist_irqsoff_tracing, cpu) = 0; + latency_hist(INTERRUPT_LATENCY, cpu, latency); + } +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + if ((!irqs_on || irqs_on == TRACE_STOP) && + per_cpu(hist_preemptoff_tracing, cpu)) { + WARN_ON(!preempt_count()); + if (1 || !(stop_set++)) + stop = ftrace_now(cpu); + start = per_cpu(hist_preemptoff_start, cpu); + latency = (long)nsecs_to_usecs(stop - start); + if (latency > 1000000) { + printk("%d: latency = %ld (%lu)\n", __LINE__, latency, latency); + printk("%d: start=%Ld stop=%Ld\n", __LINE__, start, stop); + } + barrier(); + per_cpu(hist_preemptoff_tracing, cpu) = 0; + latency_hist(PREEMPT_LATENCY, cpu, latency); + } +#endif + +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) + if (((!irqs_on && !irqs_disabled()) || + (irqs_on && !preempt_count()) || + (irqs_on == TRACE_STOP)) && + per_cpu(hist_preemptirqsoff_tracing, cpu)) { + WARN_ON(!preempt_count() && !irqs_disabled()); + if (1 || !stop_set) + stop = ftrace_now(cpu); + start = per_cpu(hist_preemptirqsoff_start, cpu); + latency = (long)nsecs_to_usecs(stop - start); + if (latency > 1000000) { + printk("%d: latency = %ld (%lu)\n", __LINE__, latency, latency); + printk("%d: start=%Ld stop=%Ld\n", __LINE__, start, stop); + } + barrier(); + per_cpu(hist_preemptirqsoff_tracing, cpu) = 0; + latency_hist(PREEMPT_INTERRUPT_LATENCY, cpu, latency); + } +#endif +} +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST +int tracing_wakeup_hist __read_mostly = 1; + +static unsigned wakeup_prio = (unsigned)-1 ; +static struct task_struct *wakeup_task; +static cycle_t wakeup_start; +static DEFINE_SPINLOCK(wakeup_lock); + +void tracing_hist_wakeup_start(struct task_struct *p, + struct task_struct *curr) +{ + unsigned long flags; + + if (likely(!rt_task(p)) || + p->prio >= wakeup_prio || + p->prio >= curr->prio) + return; + + spin_lock_irqsave(&wakeup_lock, flags); + if (wakeup_task) + put_task_struct(wakeup_task); + + get_task_struct(p); + wakeup_task = p; + wakeup_prio = p->prio; + wakeup_start = ftrace_now(raw_smp_processor_id()); + spin_unlock_irqrestore(&wakeup_lock, flags); +} + +void tracing_hist_wakeup_stop(struct task_struct *next) +{ + unsigned long flags; + long latency; + cycle_t stop; + + if (next != wakeup_task) + return; + + stop = ftrace_now(raw_smp_processor_id()); + + spin_lock_irqsave(&wakeup_lock, flags); + if (wakeup_task != next) + goto out; + + latency = (long)nsecs_to_usecs(stop - wakeup_start); + + latency_hist(WAKEUP_LATENCY, smp_processor_id(), latency); + + put_task_struct(wakeup_task); + wakeup_task = NULL; + wakeup_prio = (unsigned)-1; + out: + spin_unlock_irqrestore(&wakeup_lock, flags); + +} +#endif + +static __init int latency_hist_init(void) +{ + struct dentry *latency_hist_root = NULL; + struct dentry *dentry; + struct dentry *entry; + int i = 0, len = 0; + struct hist_data *my_hist; + char name[64]; + + dentry = tracing_init_dentry(); + + latency_hist_root = + debugfs_create_dir(latency_hist_dir_root, dentry); + +#ifdef CONFIG_INTERRUPT_OFF_HIST + dentry = debugfs_create_dir(interrupt_off_hist_dir, + latency_hist_root); + for_each_possible_cpu(i) { + len = sprintf(name, "CPU%d", i); + name[len] = '\0'; + entry = debugfs_create_file(name, 0444, dentry, + &per_cpu(interrupt_off_hist, i), + &latency_hist_fops); + my_hist = &per_cpu(interrupt_off_hist, i); + atomic_set(&my_hist->hist_mode, 1); + my_hist->min_lat = 0xFFFFFFFFUL; + } + entry = debugfs_create_file("reset", 0444, dentry, + (void *)INTERRUPT_LATENCY, + &latency_hist_reset_fops); +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + dentry = debugfs_create_dir(preempt_off_hist_dir, + latency_hist_root); + for_each_possible_cpu(i) { + len = sprintf(name, "CPU%d", i); + name[len] = '\0'; + entry = debugfs_create_file(name, 0444, dentry, + &per_cpu(preempt_off_hist, i), + &latency_hist_fops); + my_hist = &per_cpu(preempt_off_hist, i); + atomic_set(&my_hist->hist_mode, 1); + my_hist->min_lat = 0xFFFFFFFFUL; + } + entry = debugfs_create_file("reset", 0444, dentry, + (void *)PREEMPT_LATENCY, + &latency_hist_reset_fops); +#endif + +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) + dentry = debugfs_create_dir(preempt_irqs_off_hist_dir, + latency_hist_root); + for_each_possible_cpu(i) { + len = sprintf(name, "CPU%d", i); + name[len] = '\0'; + entry = debugfs_create_file(name, 0444, dentry, + &per_cpu(preempt_off_hist, i), + &latency_hist_fops); + my_hist = &per_cpu(preempt_irqs_off_hist, i); + atomic_set(&my_hist->hist_mode, 1); + my_hist->min_lat = 0xFFFFFFFFUL; + } + entry = debugfs_create_file("reset", 0444, dentry, + (void *)PREEMPT_INTERRUPT_LATENCY, + &latency_hist_reset_fops); +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + dentry = debugfs_create_dir(wakeup_latency_hist_dir, + latency_hist_root); + for_each_possible_cpu(i) { + len = sprintf(name, "CPU%d", i); + name[len] = '\0'; + entry = debugfs_create_file(name, 0444, dentry, + &per_cpu(wakeup_latency_hist, i), + &latency_hist_fops); + my_hist = &per_cpu(wakeup_latency_hist, i); + atomic_set(&my_hist->hist_mode, 1); + my_hist->min_lat = 0xFFFFFFFFUL; + } + entry = debugfs_create_file("reset", 0444, dentry, + (void *)WAKEUP_LATENCY, + &latency_hist_reset_fops); +#endif + return 0; + +} + +__initcall(latency_hist_init); Index: linux-2.6.25.4-rt2/kernel/trace/trace_hist.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.25.4-rt2/kernel/trace/trace_hist.h 2008-05-19 16:55:20.000000000 -0400 @@ -0,0 +1,39 @@ +/* + * kernel/trace/trace_hist.h + * + * Add support for histograms of preemption-off latency and + * interrupt-off latency and wakeup latency, it depends on + * Real-Time Preemption Support. + * + * Copyright (C) 2005 MontaVista Software, Inc. + * Yi Yang + * + * Converted to work with the new latency tracer. + * Copyright (C) 2008 Red Hat, Inc. + * Steven Rostedt + * + */ +#ifndef _LIB_TRACING_TRACER_HIST_H_ +#define _LIB_TRACING_TRACER_HIST_H_ + +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) +# define TRACE_STOP 2 +void tracing_hist_preempt_start(void); +void tracing_hist_preempt_stop(int irqs_on); +#else +# define tracing_hist_preempt_start() do { } while (0) +# define tracing_hist_preempt_stop(irqs_off) do { } while (0) +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST +void tracing_hist_wakeup_start(struct task_struct *p, + struct task_struct *curr); +void tracing_hist_wakeup_stop(struct task_struct *next); +extern int tracing_wakeup_hist; +#else +# define tracing_hist_wakeup_start(p, curr) do { } while (0) +# define tracing_hist_wakeup_stop(next) do { } while (0) +# define tracing_wakeup_hist 0 +#endif + +#endif /* ifndef _LIB_TRACING_TRACER_HIST_H_ */ Index: linux-2.6.25.4-rt2/kernel/trace/trace_sched_switch.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/trace/trace_sched_switch.c 2008-05-19 16:55:20.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/trace/trace_sched_switch.c 2008-05-19 16:55:20.000000000 -0400 @@ -13,6 +13,7 @@ #include #include "trace.h" +#include "trace_hist.h" static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; @@ -96,6 +97,7 @@ ftrace_wake_up_task(void *__rq, struct t struct task_struct *curr) { trace_event_wakeup(wakee, curr); + tracing_hist_wakeup_start(wakee, curr); wakeup_func(__rq, wakee, curr); patches/no-warning-for-irqs-disabled-in-local-bh-enable.patch0000644001303100130310000000211511014364573024344 0ustar rostedtrostedtFrom: Kevin Hilman Subject: [PATCH/RFC -rt] local_bh_enable() is safe for irqs_disabled() In local_bh_enable() there is a WARN_ON(irqs_disabled()), but looking at the rest of the code, it seems it expects to be used with interrupts off, so is this warning really necessary? I hit this warning in the ads7846 touchscreen driver timer function where enable_irq() may be called with interrupts disabled. Since enable_irq now calls local_bh_disable/enable for IRQ resend, this warning is triggered. Patch against 2.6.23.9-rt12 Signed-off-by: Kevin Hilman --- kernel/softirq.c | 1 - 1 file changed, 1 deletion(-) Index: linux-2.6.25.4-rt2/kernel/softirq.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/softirq.c 2008-05-19 16:56:14.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/softirq.c 2008-05-19 16:56:27.000000000 -0400 @@ -209,7 +209,6 @@ void local_bh_enable(void) WARN_ON_ONCE(in_irq()); #endif - WARN_ON_ONCE(irqs_disabled()); #ifdef CONFIG_TRACE_IRQFLAGS local_irq_save(flags); patches/user-no-irq-disable.patch0000644001303100130310000000134411014364572017235 0ustar rostedtrostedt--- kernel/user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-2.6.25.4-rt2/kernel/user.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/user.c 2008-05-19 16:55:56.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/user.c 2008-05-19 16:56:26.000000000 -0400 @@ -265,14 +265,14 @@ static void remove_user_sysfs_dir(struct */ uids_mutex_lock(); - local_irq_save(flags); + local_irq_save_nort(flags); if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { uid_hash_remove(up); remove_user = 1; spin_unlock_irqrestore(&uidhash_lock, flags); } else { - local_irq_restore(flags); + local_irq_restore_nort(flags); } if (!remove_user) patches/disable-lpptest-on-nonlinux.patch0000644001303100130310000000273411014364561021033 0ustar rostedtrostedt Sadly people keep wanting to build kernels on non-Linux hosts (cygwin & solaris) and testlpp really doesn't like to build on those. I have a separate patch to testlpp.c that fixes this, but it really makes no sense to build the tool to run on your cygwin host as it's meant to be run on Linux with the testlpp module loaded. Even this patch isn't really the right solution b/c you really want to cross-build the may be cross-building for another architecture from Linux you want cross-compile, not host compile but there's no really easy way to cross-compile a userland binary from the kernel build w/o some makefile uglyiness AFAICT. Is there some sort of -rt userland package this could move to instead of being in the kernel itself...? Signed-off-by: Deepak Saxena --- scripts/Makefile | 3 +++ 1 file changed, 3 insertions(+) Index: linux-2.6.25.4-rt2/scripts/Makefile =================================================================== --- linux-2.6.25.4-rt2.orig/scripts/Makefile 2008-05-19 16:55:30.000000000 -0400 +++ linux-2.6.25.4-rt2/scripts/Makefile 2008-05-19 16:56:17.000000000 -0400 @@ -12,9 +12,12 @@ hostprogs-$(CONFIG_LOGO) += pnmt hostprogs-$(CONFIG_VT) += conmakehash hostprogs-$(CONFIG_PROM_CONSOLE) += conmakehash hostprogs-$(CONFIG_IKCONFIG) += bin2c +HOST_OS := $(shell uname) +ifeq ($(HOST_OS),Linux) ifdef CONFIG_LPPTEST hostprogs-y += testlpp endif +endif always := $(hostprogs-y) $(hostprogs-m) patches/kmap-atomic-i386-fix.patch0000644001303100130310000000320311014364553017123 0ustar rostedtrostedt--- arch/x86/mm/highmem_32.c | 2 +- include/asm-x86/highmem.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) Index: linux-2.6.25.4-rt2/arch/x86/mm/highmem_32.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/mm/highmem_32.c 2008-05-19 16:56:10.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/mm/highmem_32.c 2008-05-19 16:56:10.000000000 -0400 @@ -3,9 +3,9 @@ void *kmap(struct page *page) { - might_sleep(); if (!PageHighMem(page)) return page_address(page); + might_sleep(); return kmap_high(page); } Index: linux-2.6.25.4-rt2/include/asm-x86/highmem.h =================================================================== --- linux-2.6.25.4-rt2.orig/include/asm-x86/highmem.h 2008-05-19 16:55:54.000000000 -0400 +++ linux-2.6.25.4-rt2/include/asm-x86/highmem.h 2008-05-19 16:56:10.000000000 -0400 @@ -88,10 +88,10 @@ struct page *kmap_atomic_to_page(void *p * on PREEMPT_RT kmap_atomic() is a wrapper that uses kmap(): */ #ifdef CONFIG_PREEMPT_RT -# define kmap_atomic_prot(page, type, prot) kmap(page) -# define kmap_atomic(page, type) kmap(page) +# define kmap_atomic_prot(page, type, prot) ({ pagefault_disable(); kmap(page); }) +# define kmap_atomic(page, type) ({ pagefault_disable(); kmap(page); }) # define kmap_atomic_pfn(pfn, type) kmap(pfn_to_page(pfn)) -# define kunmap_atomic(kvaddr, type) kunmap_virt(kvaddr) +# define kunmap_atomic(kvaddr, type) do { pagefault_enable(); kunmap_virt(kvaddr); } while(0) # define kmap_atomic_to_page(kvaddr) kmap_to_page(kvaddr) #else # define kmap_atomic_prot(page, type, prot) __kmap_atomic_prot(page, type, prot) patches/rcu-preempt-boost-default.patch0000644001303100130310000000124711014364463020465 0ustar rostedtrostedt--- kernel/Kconfig.preempt | 1 + 1 file changed, 1 insertion(+) Index: linux-2.6.25.4-rt2/kernel/Kconfig.preempt =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/Kconfig.preempt 2008-05-19 16:55:15.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/Kconfig.preempt 2008-05-19 16:55:15.000000000 -0400 @@ -69,6 +69,7 @@ config PREEMPT_RCU config PREEMPT_RCU_BOOST bool "Enable priority boosting of RCU read-side critical sections" depends on PREEMPT_RCU + default y if PREEMPT_RT help This option permits priority boosting of RCU read-side critical sections tat have been preempted and a RT process is waiting patches/preempt-realtime-arm-rawlock-in-mmu_context-h.patch0000644001303100130310000000335711014364523024337 0ustar rostedtrostedtFrom khilman@mvista.com Fri Aug 31 05:09:03 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.7-deb (2006-10-05) on debian X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=AWL autolearn=unavailable version=3.1.7-deb Received: from paris.hilman.org (deeprooted.net [216.254.16.51]) by mail.tglx.de (Postfix) with ESMTP id 1F21965C003 for ; Fri, 31 Aug 2007 05:09:03 +0200 (CEST) Received: by paris.hilman.org (Postfix, from userid 1000) id C5837E4C5FE; Thu, 30 Aug 2007 20:09:02 -0700 (PDT) Message-Id: <20070831030841.799694742@mvista.com> User-Agent: quilt/0.45-1 Date: Thu, 30 Aug 2007 20:08:41 -0700 From: Kevin Hilman To: Ingo Molnar , Thomas Gleixner Cc: LKML , RT-Users Subject: [PATCH 2.6.23-rc2-rt2] ARM: use raw lock in __new_context X-Evolution-Source: imap://tglx%40linutronix.de@localhost:8993/ Content-Transfer-Encoding: 8bit Mime-Version: 1.0 The ARM CPU ASID lock should be raw as it's used by schedule() when creating a new context. Signed-off-by: Kevin Hilman --- arch/arm/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-2.6.25.4-rt2/arch/arm/mm/context.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/arm/mm/context.c 2008-05-19 16:55:02.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/arm/mm/context.c 2008-05-19 16:55:47.000000000 -0400 @@ -14,7 +14,7 @@ #include #include -static DEFINE_SPINLOCK(cpu_asid_lock); +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); unsigned int cpu_last_asid = ASID_FIRST_VERSION; /* patches/version.patch0000644001303100130310000000131111014364606015130 0ustar rostedtrostedtSubject: add -rt extra-version From: Ingo Molnar add -rt extra-version. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-2.6.25.4-rt2/Makefile =================================================================== --- linux-2.6.25.4-rt2.orig/Makefile 2008-05-19 16:55:17.000000000 -0400 +++ linux-2.6.25.4-rt2/Makefile 2008-05-19 16:56:38.000000000 -0400 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 25 -EXTRAVERSION = .4 +EXTRAVERSION = .4-rt2 NAME = Funky Weasel is Jiggy wit it # *DOCUMENTATION* patches/rcu-various-fixups.patch0000644001303100130310000000425011014364462017243 0ustar rostedtrostedt--- security/selinux/avc.c | 9 +++++++++ security/selinux/netif.c | 2 ++ 2 files changed, 11 insertions(+) Index: linux-2.6.25.4-rt2/security/selinux/avc.c =================================================================== --- linux-2.6.25.4-rt2.orig/security/selinux/avc.c 2008-05-19 16:55:13.000000000 -0400 +++ linux-2.6.25.4-rt2/security/selinux/avc.c 2008-05-19 16:55:14.000000000 -0400 @@ -312,6 +312,7 @@ static inline int avc_reclaim_node(void) if (!spin_trylock_irqsave(&avc_cache.slots_lock[hvalue], flags)) continue; + rcu_read_lock(); list_for_each_entry(node, &avc_cache.slots[hvalue], list) { if (atomic_dec_and_test(&node->ae.used)) { /* Recently Unused */ @@ -319,11 +320,13 @@ static inline int avc_reclaim_node(void) avc_cache_stats_incr(reclaims); ecx++; if (ecx >= AVC_CACHE_RECLAIM) { + rcu_read_unlock(); spin_unlock_irqrestore(&avc_cache.slots_lock[hvalue], flags); goto out; } } } + rcu_read_unlock(); spin_unlock_irqrestore(&avc_cache.slots_lock[hvalue], flags); } out: @@ -821,8 +824,14 @@ int avc_ss_reset(u32 seqno) for (i = 0; i < AVC_CACHE_SLOTS; i++) { spin_lock_irqsave(&avc_cache.slots_lock[i], flag); + /* + * On -rt the outer spinlock does not prevent RCU + * from being performed: + */ + rcu_read_lock(); list_for_each_entry(node, &avc_cache.slots[i], list) avc_node_delete(node); + rcu_read_unlock(); spin_unlock_irqrestore(&avc_cache.slots_lock[i], flag); } Index: linux-2.6.25.4-rt2/security/selinux/netif.c =================================================================== --- linux-2.6.25.4-rt2.orig/security/selinux/netif.c 2008-05-19 16:55:13.000000000 -0400 +++ linux-2.6.25.4-rt2/security/selinux/netif.c 2008-05-19 16:55:14.000000000 -0400 @@ -259,11 +259,13 @@ static void sel_netif_flush(void) int idx; struct sel_netif *netif; + rcu_read_lock(); spin_lock_bh(&sel_netif_lock); for (idx = 0; idx < SEL_NETIF_HASH_SIZE; idx++) list_for_each_entry(netif, &sel_netif_hash[idx], list) sel_netif_destroy(netif); spin_unlock_bh(&sel_netif_lock); + rcu_read_unlock(); } static int sel_netif_avc_callback(u32 event, u32 ssid, u32 tsid, patches/sched_rt-fixup.patch0000644001303100130310000000176511014364603016401 0ustar rostedtrostedtFrom: Peter Zijlstra --- kernel/sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) Index: linux-2.6.25.4-rt2/kernel/sched.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/sched.c 2008-05-19 16:56:32.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/sched.c 2008-05-19 16:56:35.000000000 -0400 @@ -654,7 +654,8 @@ static __read_mostly int scheduler_runni * part of the period that we allow rt tasks to run in us. * default: 0.95s */ -int sysctl_sched_rt_runtime = 950000; +/* int sysctl_sched_rt_runtime = 950000; */ +int sysctl_sched_rt_runtime = -1; static inline u64 global_rt_period(void) { @@ -7655,8 +7656,7 @@ void __init sched_init(void) #endif #ifdef CONFIG_RT_GROUP_SCHED - init_task_group.rt_runtime = - sysctl_sched_rt_runtime * NSEC_PER_USEC; + init_task_group.rt_runtime = global_rt_runtime(); INIT_LIST_HEAD(&rq->leaf_rt_rq_list); init_tg_rt_entry(rq, &init_task_group, &per_cpu(init_rt_rq, i), patches/rcu-new-7.patch0000644001303100130310000002050611014364464015200 0ustar rostedtrostedtFrom paulmck@linux.vnet.ibm.com Thu Sep 27 15:32:09 2007 Date: Mon, 10 Sep 2007 11:39:46 -0700 From: Paul E. McKenney To: linux-kernel@vger.kernel.org Cc: linux-rt-users@vger.kernel.org, mingo@elte.hu, akpm@linux-foundation.org, dipankar@in.ibm.com, josht@linux.vnet.ibm.com, tytso@us.ibm.com, dvhltc@us.ibm.com, tglx@linutronix.de, a.p.zijlstra@chello.nl, bunk@kernel.org, ego@in.ibm.com, oleg@tv-sign.ru, srostedt@redhat.com Subject: [PATCH RFC 7/9] RCU: rcutorture testing for RCU priority boosting Work in progress, not for inclusion. Still uses xtime because this patch is still against 2.6.22. This patch modifies rcutorture to also torture RCU priority boosting. The torturing involves forcing RCU read-side critical sections (already performed as part of the torturing of RCU) to run for extremely long time periods, increasing the probability of their being preempted and thus needing priority boosting. The fact that rcutorture's "nreaders" module parameter defaults to twice the number of CPUs helps ensure lots of the needed preemption. To cause the torturing to be fully effective in -mm, run in presence of CPU-hotplug operations. Signed-off-by: Paul E. McKenney --- kernel/rcutorture.c | 91 ++++++++++++++++++++++++++++++++++++++-------- kernel/time/timekeeping.c | 2 + 2 files changed, 79 insertions(+), 14 deletions(-) Index: linux-2.6.25.4-rt2/kernel/rcutorture.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/rcutorture.c 2008-05-19 16:55:13.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/rcutorture.c 2008-05-19 16:55:16.000000000 -0400 @@ -57,6 +57,7 @@ static int stat_interval; /* Interval be static int verbose; /* Print more debug info. */ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ +static int preempt_torture; /* Realtime task preempts torture readers. */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -71,6 +72,8 @@ module_param(test_no_idle_hz, bool, 0444 MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); module_param(shuffle_interval, int, 0444); MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); +module_param(preempt_torture, bool, 0444); +MODULE_PARM_DESC(preempt_torture, "Enable realtime preemption torture"); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -191,6 +194,8 @@ struct rcu_torture_ops { int (*completed)(void); void (*deferredfree)(struct rcu_torture *p); void (*sync)(void); + long (*preemptstart)(void); + void (*preemptend)(void); int (*stats)(char *page); char *name; }; @@ -255,16 +260,75 @@ static void rcu_torture_deferred_free(st call_rcu(&p->rtort_rcu, rcu_torture_cb); } +static struct task_struct *rcu_preeempt_task; +static unsigned long rcu_torture_preempt_errors; + +static int rcu_torture_preempt(void *arg) +{ + int completedstart; + int err; + time_t gcstart; + struct sched_param sp; + + sp.sched_priority = MAX_RT_PRIO - 1; + err = sched_setscheduler(current, SCHED_RR, &sp); + if (err != 0) + printk(KERN_ALERT "rcu_torture_preempt() priority err: %d\n", + err); + current->flags |= PF_NOFREEZE; + + do { + completedstart = rcu_torture_completed(); + gcstart = xtime.tv_sec; + while ((xtime.tv_sec - gcstart < 10) && + (rcu_torture_completed() == completedstart)) + cond_resched(); + if (rcu_torture_completed() == completedstart) + rcu_torture_preempt_errors++; + schedule_timeout_interruptible(1); + } while (!kthread_should_stop()); + return 0; +} + +static long rcu_preempt_start(void) +{ + long retval = 0; + + rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL, + "rcu_torture_preempt"); + if (IS_ERR(rcu_preeempt_task)) { + VERBOSE_PRINTK_ERRSTRING("Failed to create preempter"); + retval = PTR_ERR(rcu_preeempt_task); + rcu_preeempt_task = NULL; + } + return retval; +} + +static void rcu_preempt_end(void) +{ + if (rcu_preeempt_task != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_preempt task"); + kthread_stop(rcu_preeempt_task); + } + rcu_preeempt_task = NULL; +} + +static int rcu_preempt_stats(char *page) +{ + return sprintf(page, + "Preemption stalls: %lu\n", rcu_torture_preempt_errors); +} + static struct rcu_torture_ops rcu_ops = { - .init = NULL, - .cleanup = NULL, .readlock = rcu_torture_read_lock, .readdelay = rcu_read_delay, .readunlock = rcu_torture_read_unlock, .completed = rcu_torture_completed, .deferredfree = rcu_torture_deferred_free, .sync = synchronize_rcu, - .stats = NULL, + .preemptstart = rcu_preempt_start, + .preemptend = rcu_preempt_end, + .stats = rcu_preempt_stats, .name = "rcu" }; @@ -296,14 +360,12 @@ static void rcu_sync_torture_init(void) static struct rcu_torture_ops rcu_sync_ops = { .init = rcu_sync_torture_init, - .cleanup = NULL, .readlock = rcu_torture_read_lock, .readdelay = rcu_read_delay, .readunlock = rcu_torture_read_unlock, .completed = rcu_torture_completed, .deferredfree = rcu_sync_torture_deferred_free, .sync = synchronize_rcu, - .stats = NULL, .name = "rcu_sync" }; @@ -355,28 +417,23 @@ static void rcu_bh_torture_synchronize(v } static struct rcu_torture_ops rcu_bh_ops = { - .init = NULL, - .cleanup = NULL, .readlock = rcu_bh_torture_read_lock, .readdelay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_bh_torture_read_unlock, .completed = rcu_bh_torture_completed, .deferredfree = rcu_bh_torture_deferred_free, .sync = rcu_bh_torture_synchronize, - .stats = NULL, .name = "rcu_bh" }; static struct rcu_torture_ops rcu_bh_sync_ops = { .init = rcu_sync_torture_init, - .cleanup = NULL, .readlock = rcu_bh_torture_read_lock, .readdelay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_bh_torture_read_unlock, .completed = rcu_bh_torture_completed, .deferredfree = rcu_sync_torture_deferred_free, .sync = rcu_bh_torture_synchronize, - .stats = NULL, .name = "rcu_bh_sync" }; @@ -488,14 +545,12 @@ static void sched_torture_synchronize(vo static struct rcu_torture_ops sched_ops = { .init = rcu_sync_torture_init, - .cleanup = NULL, .readlock = sched_torture_read_lock, .readdelay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, .completed = sched_torture_completed, .deferredfree = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, - .stats = NULL, .name = "sched" }; @@ -787,9 +842,10 @@ rcu_torture_print_module_parms(char *tag printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval = %d\n", + "shuffle_interval=%d preempt_torture=%d\n", torture_type, tag, nrealreaders, nfakewriters, - stat_interval, verbose, test_no_idle_hz, shuffle_interval); + stat_interval, verbose, test_no_idle_hz, shuffle_interval, + preempt_torture); } static void @@ -842,6 +898,8 @@ rcu_torture_cleanup(void) kthread_stop(stats_task); } stats_task = NULL; + if (preempt_torture && (cur_ops->preemptend != NULL)) + cur_ops->preemptend(); /* Wait for all RCU callbacks to fire. */ rcu_barrier(); @@ -984,6 +1042,11 @@ rcu_torture_init(void) goto unwind; } } + if (preempt_torture && (cur_ops->preemptstart != NULL)) { + firsterr = cur_ops->preemptstart(); + if (firsterr != 0) + goto unwind; + } return 0; unwind: Index: linux-2.6.25.4-rt2/kernel/time/timekeeping.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/time/timekeeping.c 2008-05-19 16:55:13.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/time/timekeeping.c 2008-05-19 16:55:16.000000000 -0400 @@ -26,6 +26,7 @@ */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); +EXPORT_SYMBOL_GPL(xtime_lock); /* * The current time @@ -45,6 +46,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOC struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static unsigned long total_sleep_time; /* seconds */ +EXPORT_SYMBOL_GPL(xtime); static struct timespec xtime_cache __attribute__ ((aligned (16))); void update_xtime_cache(u64 nsec) patches/nmi-profiling-base.patch0000644001303100130310000003040011014364501017120 0ustar rostedtrostedtSubject: [patch] nmi-driven profiling for /proc/profile From: Ingo Molnar nmi-driven profiling for /proc/profile Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 8 ---- arch/x86/kernel/irq_64.c | 2 + arch/x86/kernel/nmi_32.c | 89 ++++++++++++++++++++++++++++++++++++++++++---- arch/x86/kernel/nmi_64.c | 64 +++++++++++++++++++++++++++++++-- include/asm-x86/apic.h | 2 + include/linux/profile.h | 1 include/linux/sched.h | 1 kernel/profile.c | 9 +++- kernel/time/tick-common.c | 1 kernel/time/tick-sched.c | 2 - 10 files changed, 155 insertions(+), 24 deletions(-) Index: linux-2.6.25.4-rt2/arch/x86/kernel/crash.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/crash.c 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/crash.c 2008-05-19 16:55:29.000000000 -0400 @@ -78,14 +78,6 @@ static int crash_nmi_callback(struct not return 1; } -static void smp_send_nmi_allbutself(void) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); -} - static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, }; Index: linux-2.6.25.4-rt2/arch/x86/kernel/nmi_32.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/nmi_32.c 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/nmi_32.c 2008-05-19 16:55:29.000000000 -0400 @@ -25,6 +25,7 @@ #include #include +#include #include "mach_traps.h" @@ -42,7 +43,7 @@ static cpumask_t backtrace_mask = CPU_MA atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static DEFINE_PER_CPU(short, wd_enabled); @@ -92,7 +93,7 @@ static int __init check_nmi_watchdog(voi for_each_possible_cpu(cpu) prev_nmi_count[cpu] = nmi_count(cpu); local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks + mdelay((100*1000)/nmi_hz); // wait 100 ticks for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP @@ -317,6 +318,46 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (system_state == SYSTEM_BOOTING) + return; + + printk(KERN_WARNING "nmi_show_all_regs(): start on CPU#%d.\n", + raw_smp_processor_id()); + dump_stack(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + + smp_send_nmi_allbutself(); + + for_each_online_cpu(i) { + while (nmi_show_regs[i] == 1) + barrier(); + } +} + +static DEFINE_SPINLOCK(nmi_print_lock); + +void irq_show_regs_callback(int cpu, struct pt_regs *regs) +{ + if (!nmi_show_regs[cpu]) + return; + + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk(KERN_WARNING "NMI show regs on CPU#%d:\n", cpu); + printk(KERN_WARNING "apic_timer_irqs: %d\n", + per_cpu(irq_stat, cpu).apic_timer_irqs); + show_regs(regs); + spin_unlock(&nmi_print_lock); +} + __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { @@ -330,6 +371,8 @@ __kprobes int nmi_watchdog_tick(struct p int cpu = smp_processor_id(); int rc = 0; + __profile_tick(CPU_PROFILING, regs); + /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { @@ -354,6 +397,9 @@ __kprobes int nmi_watchdog_tick(struct p sum = per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; + irq_show_regs_callback(cpu, regs); + + /* if the apic timer isn't firing, this cpu isn't doing much */ /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && last_irq_sums[cpu] == sum) { /* @@ -361,11 +407,30 @@ __kprobes int nmi_watchdog_tick(struct p * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + spin_lock(&nmi_print_lock); + printk(KERN_WARNING "NMI watchdog detected lockup on " + "CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], + 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + nmi_show_regs[i] = 1; + while (nmi_show_regs[i] == 1) + cpu_relax(); + } + printk(KERN_WARNING "NMI watchdog running again ...\n"); + for_each_online_cpu(i) + alert_counter[i] = 0; + + + } + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; @@ -463,5 +528,15 @@ void __trigger_all_cpu_backtrace(void) } } +void smp_send_nmi_allbutself(void) +{ +#ifdef CONFIG_SMP + cpumask_t mask = cpu_online_map; + cpu_clear(safe_smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, NMI_VECTOR); +#endif +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); Index: linux-2.6.25.4-rt2/arch/x86/kernel/irq_64.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/irq_64.c 2008-05-19 16:55:19.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/irq_64.c 2008-05-19 16:55:29.000000000 -0400 @@ -166,6 +166,8 @@ asmlinkage unsigned int do_IRQ(struct pt unsigned vector = ~regs->orig_ax; unsigned irq; + irq_show_regs_callback(smp_processor_id(), regs); + exit_idle(); irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; Index: linux-2.6.25.4-rt2/arch/x86/kernel/nmi_64.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/nmi_64.c 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/nmi_64.c 2008-05-19 16:55:29.000000000 -0400 @@ -20,11 +20,13 @@ #include #include #include +#include #include #include #include #include +#include int unknown_nmi_panic; int nmi_watchdog_enabled; @@ -42,7 +44,7 @@ atomic_t nmi_active = ATOMIC_INIT(0); / static int panic_on_timeout; unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static DEFINE_PER_CPU(short, wd_enabled); @@ -297,7 +299,7 @@ void touch_nmi_watchdog(void) unsigned cpu; /* - * Tell other CPUs to reset their alert counters. We cannot + * Tell other CPUs to reset their alert counters. We cannot * do it ourselves because the alert count increase is not * atomic. */ @@ -311,6 +313,41 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (system_state == SYSTEM_BOOTING) + return; + + smp_send_nmi_allbutself(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + + for_each_online_cpu(i) { + while (nmi_show_regs[i] == 1) + barrier(); + } +} + +static DEFINE_SPINLOCK(nmi_print_lock); + +void irq_show_regs_callback(int cpu, struct pt_regs *regs) +{ + if (!nmi_show_regs[cpu]) + return; + + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk(KERN_WARNING "NMI show regs on CPU#%d:\n", cpu); + printk(KERN_WARNING "apic_timer_irqs: %d\n", read_pda(apic_timer_irqs)); + show_regs(regs); + spin_unlock(&nmi_print_lock); +} + int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; @@ -318,6 +355,9 @@ int __kprobes nmi_watchdog_tick(struct p int cpu = smp_processor_id(); int rc = 0; + irq_show_regs_callback(cpu, regs); + __profile_tick(CPU_PROFILING, regs); + /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { @@ -354,9 +394,20 @@ int __kprobes nmi_watchdog_tick(struct p * wait a few IRQs (5 seconds) before doing the oops ... */ local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) + if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + nmi_show_regs[i] = 1; + while (nmi_show_regs[i] == 1) + cpu_relax(); + } + die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, panic_on_timeout); + } } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); @@ -474,5 +525,12 @@ void __trigger_all_cpu_backtrace(void) } } +void smp_send_nmi_allbutself(void) +{ +#ifdef CONFIG_SMP + send_IPI_allbutself(NMI_VECTOR); +#endif +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); Index: linux-2.6.25.4-rt2/include/asm-x86/apic.h =================================================================== --- linux-2.6.25.4-rt2.orig/include/asm-x86/apic.h 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/include/asm-x86/apic.h 2008-05-19 16:55:29.000000000 -0400 @@ -137,4 +137,6 @@ static inline void lapic_shutdown(void) #endif /* !CONFIG_X86_LOCAL_APIC */ +extern void smp_send_nmi_allbutself(void); + #endif /* __ASM_APIC_H */ Index: linux-2.6.25.4-rt2/include/linux/profile.h =================================================================== --- linux-2.6.25.4-rt2.orig/include/linux/profile.h 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/include/linux/profile.h 2008-05-19 16:55:29.000000000 -0400 @@ -23,6 +23,7 @@ struct notifier_block; /* init basic kernel profiler */ void __init profile_init(void); +void __profile_tick(int type, struct pt_regs *regs); void profile_tick(int); /* Index: linux-2.6.25.4-rt2/kernel/profile.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/profile.c 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/profile.c 2008-05-19 16:55:29.000000000 -0400 @@ -408,16 +408,19 @@ void profile_hits(int type, void *__pc, #endif /* !CONFIG_SMP */ EXPORT_SYMBOL_GPL(profile_hits); -void profile_tick(int type) +void __profile_tick(int type, struct pt_regs *regs) { - struct pt_regs *regs = get_irq_regs(); - if (type == CPU_PROFILING && timer_hook) timer_hook(regs); if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); } +void profile_tick(int type) +{ + return __profile_tick(type, get_irq_regs()); +} + #ifdef CONFIG_PROC_FS #include #include Index: linux-2.6.25.4-rt2/kernel/time/tick-common.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/time/tick-common.c 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/time/tick-common.c 2008-05-19 16:55:29.000000000 -0400 @@ -68,7 +68,6 @@ static void tick_periodic(int cpu) } update_process_times(user_mode(get_irq_regs())); - profile_tick(CPU_PROFILING); } /* Index: linux-2.6.25.4-rt2/kernel/time/tick-sched.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/time/tick-sched.c 2008-05-19 16:55:09.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/time/tick-sched.c 2008-05-19 16:55:29.000000000 -0400 @@ -476,7 +476,6 @@ static void tick_nohz_handler(struct clo } update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING); /* Do not restart, when we are in the idle loop */ if (ts->tick_stopped) @@ -583,7 +582,6 @@ static enum hrtimer_restart tick_sched_t ts->idle_jiffies++; } update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING); } /* Do not restart, when we are in the idle loop */ Index: linux-2.6.25.4-rt2/include/linux/sched.h =================================================================== --- linux-2.6.25.4-rt2.orig/include/linux/sched.h 2008-05-19 16:55:21.000000000 -0400 +++ linux-2.6.25.4-rt2/include/linux/sched.h 2008-05-19 16:55:29.000000000 -0400 @@ -271,6 +271,7 @@ static inline void show_state(void) } extern void show_regs(struct pt_regs *); +extern void irq_show_regs_callback(int cpu, struct pt_regs *regs); /* * TASK is a pointer to the task whose backtrace we want to see (or NULL for current patches/rcu-trace-fix-free.patch0000644001303100130310000000146411014364462017044 0ustar rostedtrostedt--- kernel/rcupreempt_trace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) Index: linux-2.6.25.4-rt2/kernel/rcupreempt_trace.c =================================================================== --- linux-2.6.25.4-rt2.orig/kernel/rcupreempt_trace.c 2008-05-19 16:55:13.000000000 -0400 +++ linux-2.6.25.4-rt2/kernel/rcupreempt_trace.c 2008-05-19 16:55:14.000000000 -0400 @@ -309,11 +309,16 @@ out: static int __init rcupreempt_trace_init(void) { + int ret; + mutex_init(&rcupreempt_trace_mutex); rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL); if (!rcupreempt_trace_buf) return 1; - return rcupreempt_debugfs_init(); + ret = rcupreempt_debugfs_init(); + if (ret) + kfree(rcupreempt_trace_buf); + return ret; } static void __exit rcupreempt_trace_cleanup(void) patches/nmi-profiling.patch0000644001303100130310000000512311014364544016223 0ustar rostedtrostedt--- arch/x86/kernel/irq_32.c | 2 ++ arch/x86/kernel/nmi_32.c | 5 ++--- arch/x86/kernel/nmi_64.c | 4 ++-- drivers/char/sysrq.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) Index: linux-2.6.25.4-rt2/arch/x86/kernel/irq_32.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/irq_32.c 2008-05-19 16:55:54.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/irq_32.c 2008-05-19 16:56:03.000000000 -0400 @@ -79,7 +79,9 @@ unsigned int do_IRQ(struct pt_regs *regs u32 *isp; #endif +#ifdef CONFIG_X86_LOCAL_APIC irq_show_regs_callback(smp_processor_id(), regs); +#endif if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", Index: linux-2.6.25.4-rt2/arch/x86/kernel/nmi_32.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/nmi_32.c 2008-05-19 16:55:54.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/nmi_32.c 2008-05-19 16:56:03.000000000 -0400 @@ -347,9 +347,9 @@ void nmi_show_all_regs(void) } } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); -void irq_show_regs_callback(int cpu, struct pt_regs *regs) +notrace void irq_show_regs_callback(int cpu, struct pt_regs *regs) { if (!nmi_show_regs[cpu]) return; @@ -433,7 +433,6 @@ __kprobes int nmi_watchdog_tick(struct p for_each_online_cpu(i) alert_counter[i] = 0; - } } else { Index: linux-2.6.25.4-rt2/arch/x86/kernel/nmi_64.c =================================================================== --- linux-2.6.25.4-rt2.orig/arch/x86/kernel/nmi_64.c 2008-05-19 16:55:50.000000000 -0400 +++ linux-2.6.25.4-rt2/arch/x86/kernel/nmi_64.c 2008-05-19 16:56:03.000000000 -0400 @@ -335,9 +335,9 @@ void nmi_show_all_regs(void) } } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); -void irq_show_regs_callback(int cpu, struct pt_regs *regs) +notrace void irq_show_regs_callback(int cpu, struct pt_regs *regs) { if (!nmi_show_regs[cpu]) return; Index: linux-2.6.25.4-rt2/drivers/char/sysrq.c =================================================================== --- linux-2.6.25.4-rt2.orig/drivers/char/sysrq.c 2008-05-19 16:55:59.000000000 -0400 +++ linux-2.6.25.4-rt2/drivers/char/sysrq.c 2008-05-19 16:56:03.000000000 -0400 @@ -209,7 +209,7 @@ static struct sysrq_key_op sysrq_showreg .enable_mask = SYSRQ_ENABLE_DUMP, }; -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) static void sysrq_handle_showallregs(int key, struct tty_struct *tty) { patches/preempt-realtime-rawlocks.patch0000644001303100130310000001134311014364540020545 0ustar rostedtrostedt--- drivers/oprofile/oprofilefs.c | 2 +- drivers/pci/access.c | 2 +- drivers/video/console/vgacon.c | 2 +- include/linux/kprobes.h | 2 +- include/linux/oprofile.h | 2 +- include/linux/percpu_counter.h | 2 +- kernel/kprobes.c | 2 +- kernel/softlockup.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) Index: linux-2.6.25.4-rt2/drivers/oprofile/oprofilefs.c =================================================================== --- linux-2.6.25.4-rt2.orig/drivers/oprofile/oprofilefs.c 2008-05-19 16:54:58.000000000 -0400 +++ linux-2.6.25.4-rt2/drivers/oprofile/oprofilefs.c 2008-05-19 16:56:00.000000000 -0400 @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_RAW_SPINLOCK(oprofilefs_lock); static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) { Index: linux-2.6.25.4-rt2/drivers/pci/access.c =================================================================== --- linux-2.6.25.4-rt2.orig/drivers/pci/access.c 2008-05-19 16:54:58.000000000 -0400 +++ linux-2.6.25.4-rt2/drivers/pci/access.c 2008-05-19 16:56:00.000000000 -0400 @@ -11,7 +11,7 @@ * configuration space. */ -static DEFINE_SPINLOCK(pci_lock); +static DEFINE_RAW_SPINLOCK(pci_lock); /* * Wrappers for all PCI configuration access functions. They just check Index: linux-2.6.25.4-rt2/drivers/video/console/vgacon.c =================================================================== --- linux-2.6.25.4-rt2.orig/drivers/video/console/vgacon.c 2008-05-19 16:54:58.000000000 -0400 +++ linux-2.6.25.4-rt2/drivers/video/console/vgacon.c 2008-05-19 16:56:00.000000000 -0400 @@ -51,7 +51,7 @@ #include