diff -urN linux-2.4.18-preempt/Documentation/Configure.help linux/Documentation/Configure.help --- linux-2.4.18-preempt/Documentation/Configure.help Tue Jun 11 14:05:50 2002 +++ linux/Documentation/Configure.help Tue Jun 11 14:38:52 2002 @@ -24023,6 +24023,16 @@ of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. +CONFIG_PREEMPT_LOG + Say Y here to include a log of kernel preemption events. This can + be very helpful when debugging problems caused by preemption taking + place in unprotected critical regions. + + See Documentation/preempt-logging.txt for more information. + + If you are debugging kernel preemption and think a log of preemption + events would be useful, say Y here. Otherwise say N. + Include kgdb kernel debugger CONFIG_KGDB Include in-kernel hooks for kgdb, the Linux kernel source level diff -urN linux-2.4.18-preempt/Documentation/preempt-logging.txt linux/Documentation/preempt-logging.txt --- linux-2.4.18-preempt/Documentation/preempt-logging.txt Wed Dec 31 16:00:00 1969 +++ linux/Documentation/preempt-logging.txt Tue Jun 11 14:37:20 2002 @@ -0,0 +1,27 @@ +Kernel preemption debugging tool +--------------------------------- + +CONFIG_PREEMPT_LOG adds a kernel preemption log, which can be very +useful in debugging unprotected critical regions. The log records the +instruction address where the preemption occurred, the PID of the +process that was preempted, and a timestamp. + +show_preempt_log() dumps the most recent 20 events, most recent first. +Printed for each event are the PID of the preempted process, the number +of milliseconds since the most recent preemption and the instruction +pointer address of where the process was preempted. show_preempt_log() +is added to the kernel Oops message, and can also be viewed with +SysRq-G. (The addresses are repeated in a format that can be fed to +ksymoops for decoding into symbols.) + +save_preempt_log() can be used to take a snapshot of the preemption log +after an "interesting" event has taken place. For example, I used this +to catch the preemption problem that turned out to be the page fault +trap handler being called with interrupts enabled, leading to corruption +of the fault address register if preemption occurred before it was +saved. I added a call to save_preemp_log() in fault.c at the point +where a process is about to be killed with a SEGFAULT. Sure enough, the +preemption log showed a preemption on the first instruction of the trap +handler whenever a process unexpectedly SEGVed. (This problem was fixed +in Linux subsequently, when it turned out to be a problem even without +preemption.) diff -urN linux-2.4.18-preempt/arch/i386/config.in linux/arch/i386/config.in --- linux-2.4.18-preempt/arch/i386/config.in Tue Jun 11 14:05:50 2002 +++ linux/arch/i386/config.in Tue Jun 11 14:39:45 2002 @@ -426,6 +426,7 @@ bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + dep_bool ' Preemption logging' CONFIG_PREEMPT_LOG $CONFIG_PREEMPT fi endmenu diff -urN linux-2.4.18-preempt/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- linux-2.4.18-preempt/arch/i386/kernel/entry.S Wed Jun 5 16:10:31 2002 +++ linux/arch/i386/kernel/entry.S Tue Jun 11 13:53:41 2002 @@ -286,7 +286,10 @@ jnz restore_all incl preempt_count(%ebx) sti + movl EIP(%esp),%ecx + pushl %ecx call SYMBOL_NAME(preempt_schedule) + addl $4,%esp jmp ret_from_intr #else jmp restore_all diff -urN linux-2.4.18-preempt/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.4.18-preempt/arch/i386/kernel/traps.c Wed Jun 5 16:10:31 2002 +++ linux/arch/i386/kernel/traps.c Tue Jun 11 15:02:57 2002 @@ -235,6 +235,8 @@ } } printk("\n"); + + show_preempt_log(); } spinlock_t die_lock = SPIN_LOCK_UNLOCKED; diff -urN linux-2.4.18-preempt/drivers/char/sysrq.c linux/drivers/char/sysrq.c --- linux-2.4.18-preempt/drivers/char/sysrq.c Wed Jun 5 16:09:58 2002 +++ linux/drivers/char/sysrq.c Tue Jun 11 16:10:59 2002 @@ -277,6 +277,17 @@ action_msg: "Show Memory", }; +static void sysrq_handle_preempt_log(int key, struct pt_regs *pt_regs, + struct kbd_struct *kbd, struct tty_struct *tty) { + show_preempt_log(); +} + +static struct sysrq_key_op sysrq_preempt_log_op = { + handler: sysrq_handle_preempt_log, + help_msg: "showpreemptloG", + action_msg: "Show Preempt Log", +}; + /* SHOW SYSRQ HANDLERS BLOCK */ @@ -357,7 +368,11 @@ /* d */ NULL, /* e */ &sysrq_term_op, /* f */ NULL, +#ifdef CONFIG_PREEMPT_LOG +/* g */ &sysrq_preempt_log_op, +#else /* g */ NULL, +#endif /* h */ NULL, /* i */ &sysrq_kill_op, /* j */ NULL, diff -urN linux-2.4.18-preempt/include/linux/sched.h linux/include/linux/sched.h --- linux-2.4.18-preempt/include/linux/sched.h Tue Jun 11 14:05:51 2002 +++ linux/include/linux/sched.h Tue Jun 11 15:54:22 2002 @@ -155,8 +155,21 @@ #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); + #ifdef CONFIG_PREEMPT + +#ifdef CONFIG_PREEMPT_LOG +asmlinkage void preempt_schedule(void *); +extern void show_preempt_log(void); +extern void save_preempt_log(void); +extern void log_preemption(void *); +#else asmlinkage void preempt_schedule(void); +#define show_preempt_log() do { } while(0) +#define save_preempt_log() do { } while(0) +#define log_preemption(p) do { } while(0) +#endif + #endif extern int schedule_task(struct tq_struct *task); diff -urN linux-2.4.18-preempt/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-2.4.18-preempt/include/linux/spinlock.h Wed Jun 5 16:09:09 2002 +++ linux/include/linux/spinlock.h Tue Jun 11 15:51:05 2002 @@ -137,6 +137,12 @@ #ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPT_LOG +#define preempt_schedule_and_log(ip) preempt_schedule(ip) +#else +#define preempt_schedule_and_log(ip) preempt_schedule() +#endif + #define preempt_get_count() (current->preempt_count) #define preempt_is_disabled() (preempt_get_count() != 0) @@ -157,7 +163,7 @@ --current->preempt_count; \ barrier(); \ if (unlikely(current->preempt_count < current->need_resched)) \ - preempt_schedule(); \ + preempt_schedule_and_log(current_text_addr()); \ } while (0) #define spin_lock(lock) \ diff -urN linux-2.4.18-preempt/kernel/sched.c linux/kernel/sched.c --- linux-2.4.18-preempt/kernel/sched.c Wed Jun 5 16:09:06 2002 +++ linux/kernel/sched.c Tue Jun 11 15:56:12 2002 @@ -720,18 +720,155 @@ } #ifdef CONFIG_PREEMPT + /* * this is is the entry point to schedule() from in-kernel preemption. */ +#ifdef CONFIG_PREEMPT_LOG +asmlinkage void preempt_schedule(void *ip) +#else asmlinkage void preempt_schedule(void) +#endif { do { + log_preemption(ip); current->preempt_count += PREEMPT_ACTIVE; schedule(); current->preempt_count -= PREEMPT_ACTIVE; barrier(); } while (current->need_resched); } + +#ifdef CONFIG_PREEMPT_LOG + +struct preempt_entry { + struct timeval tv; + void *ip; + pid_t pid; +}; + +#define PREEMPT_LOG_LEN 100 + +static struct preempt_entry preempt_log[PREEMPT_LOG_LEN] = {{{ 0, 0 }, 0, 0 }}; +static struct preempt_entry saved_preempt_log[PREEMPT_LOG_LEN] = + {{{ 0, 0 }, 0, 0 }}; + +static int pli = -1; +static int saved_pli = 0; +static int preempt_log_saved = 0; +static spinlock_t preempt_log_lock = SPIN_LOCK_UNLOCKED; + +void log_preemption(void *ip) +{ + unsigned long flags; + + if (!ip) + return; + + /* Avoid unwanted preemption on unlock */ + preempt_disable(); + + spin_lock_irqsave(&preempt_log_lock, flags); + + /* + * Compress the log by eliminating consecutive identical + * preempted addresses. This prevents the log from filling + * up with duplicate addresses from default_idle. + */ + if (ip == preempt_log[pli].ip) + goto out; + + if (++pli == PREEMPT_LOG_LEN) + pli = 0; + preempt_log[pli].pid = current->pid; + preempt_log[pli].ip = ip; + do_gettimeofday(&preempt_log[pli].tv); + +out: + spin_unlock_irqrestore(&preempt_log_lock, flags); + preempt_enable_no_resched(); +} + +void save_preempt_log() +{ + unsigned long flags; + + spin_lock_irqsave(&preempt_log_lock, flags); + memcpy(saved_preempt_log, preempt_log, sizeof(saved_preempt_log)); + saved_pli = pli; + spin_unlock_irqrestore(&preempt_log_lock, flags); + preempt_log_saved = 1; +} + +void show_preempt_log() +{ + int i; + struct timeval *t0; + struct timeval now; + long recent; + + if (!preempt_log_saved) + save_preempt_log(); + + do_gettimeofday(&now); + + t0 = &saved_preempt_log[saved_pli].tv; + + recent = ((now.tv_sec - t0->tv_sec) * 1000000 + + (now.tv_usec - t0->tv_usec)) / 1000; + printk("Most recently logged preemption event happened %ldms ago\n", + recent); + + printk(" PID TIME IP\n"); + + for (i = 0; i < 20; i++) { + int n = saved_pli - i; + long msecs; + struct timeval *t; + + if (n < 0) + n += PREEMPT_LOG_LEN; + + t = &saved_preempt_log[n].tv; + + msecs = ((t0->tv_sec - t->tv_sec) * 1000000 + + (t0->tv_usec - t->tv_usec)) / 1000; + + printk("%5d", saved_preempt_log[n].pid); + printk(" %6ld", msecs); +#if (BITS_PER_LONG == 32) + printk(" [<%08lx>]\n", + (unsigned long)(saved_preempt_log[n].ip)); +#else + printk(" [<%016lx>]\n", + (unsigned long)(saved_preempt_log[n].ip)); +#endif + } + + /* + * Repeat the addresses while pretending to be a trace, + * so that the existing version of ksymoops will helpfully + * decode them. + */ + printk("Trace:"); + for (i = 0; i < 20; i++) { + int n = saved_pli - i; + + if (n < 0) + n += PREEMPT_LOG_LEN; + +#if (BITS_PER_LONG == 32) + printk(" [<%08lx>]", (unsigned long)(saved_preempt_log[n].ip)); +#else + printk(" [<%016lx>]", (unsigned long)(saved_preempt_log[n].ip)); +#endif + } + printk("\n"); + + preempt_log_saved = 0; +} +#endif /* CONFIG_PREEMPT_LOG */ + #endif /* CONFIG_PREEMPT */ /*