diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/Makefile 320-kcg/Makefile --- 310-irqbal_fast/Makefile 2003-10-21 11:45:19.000000000 -0700 +++ 320-kcg/Makefile 2003-10-21 11:52:15.000000000 -0700 @@ -439,6 +439,10 @@ ifndef CONFIG_FRAME_POINTER CFLAGS += -fomit-frame-pointer endif +ifeq ($(CONFIG_MCOUNT),y) +CFLAGS += -pg +endif + ifdef CONFIG_DEBUG_INFO CFLAGS += -g endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/Kconfig 320-kcg/arch/i386/Kconfig --- 310-irqbal_fast/arch/i386/Kconfig 2003-10-21 11:45:19.000000000 -0700 +++ 320-kcg/arch/i386/Kconfig 2003-10-21 11:52:15.000000000 -0700 @@ -1363,6 +1363,14 @@ config X86_MPPARSE depends on X86_LOCAL_APIC && !X86_VISWS default y +config MCOUNT + bool "Generate function call graph" + depends on FRAME_POINTER + help + This option instruments the kernel to generate a deterministic + function call graph. Answering Y here will make your kernel run + ???% slower. + endmenu source "security/Kconfig" diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/boot/compressed/Makefile 320-kcg/arch/i386/boot/compressed/Makefile --- 310-irqbal_fast/arch/i386/boot/compressed/Makefile 2003-10-21 11:45:19.000000000 -0700 +++ 320-kcg/arch/i386/boot/compressed/Makefile 2003-10-21 11:52:15.000000000 -0700 @@ -10,6 +10,11 @@ EXTRA_AFLAGS := -traditional CFLAGS := $(CFLAGS_NOGCOV) LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 +ifeq ($(CONFIG_MCOUNT),y) +$(obj)/misc.o: + $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/misc.c -o $(obj)/misc.o +endif + $(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/kernel/i386_ksyms.c 320-kcg/arch/i386/kernel/i386_ksyms.c --- 310-irqbal_fast/arch/i386/kernel/i386_ksyms.c 2003-10-14 15:50:11.000000000 -0700 +++ 320-kcg/arch/i386/kernel/i386_ksyms.c 2003-10-21 11:52:15.000000000 -0700 @@ -186,6 +186,11 @@ extern void * memcpy(void *,const void * EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(memset); +#ifdef CONFIG_MCOUNT +extern void mcount(void); +EXPORT_SYMBOL_NOVERS(mcount); +#endif + #ifdef CONFIG_HAVE_DEC_LOCK EXPORT_SYMBOL(atomic_dec_and_lock); #endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/kernel/semaphore.c 320-kcg/arch/i386/kernel/semaphore.c --- 310-irqbal_fast/arch/i386/kernel/semaphore.c 2002-12-09 18:45:55.000000000 -0800 +++ 320-kcg/arch/i386/kernel/semaphore.c 2003-10-21 11:52:15.000000000 -0700 @@ -191,6 +191,7 @@ asm( ".align 4\n" ".globl __down_failed\n" "__down_failed:\n\t" +// "call mcount_stext_lock\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" @@ -214,6 +215,7 @@ asm( ".align 4\n" ".globl __down_failed_interruptible\n" "__down_failed_interruptible:\n\t" +// "call mcount_stext_lock\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" @@ -235,6 +237,7 @@ asm( ".align 4\n" ".globl __down_failed_trylock\n" "__down_failed_trylock:\n\t" +// "call mcount_stext_lock\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" @@ -256,6 +259,7 @@ asm( ".align 4\n" ".globl __up_wakeup\n" "__up_wakeup:\n\t" +// "call mcount_stext_lock\n\t" "pushl %eax\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/lib/Makefile 320-kcg/arch/i386/lib/Makefile --- 310-irqbal_fast/arch/i386/lib/Makefile 2003-06-19 14:41:15.000000000 -0700 +++ 320-kcg/arch/i386/lib/Makefile 2003-10-21 11:52:15.000000000 -0700 @@ -10,3 +10,4 @@ lib-y = checksum.o delay.o \ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o +lib-$(CONFIG_MCOUNT) += mcount.o diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/lib/mcount.S 320-kcg/arch/i386/lib/mcount.S --- 310-irqbal_fast/arch/i386/lib/mcount.S 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/arch/i386/lib/mcount.S 2003-10-21 11:52:15.000000000 -0700 @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2000 SGI + * + * Written by Dimitris Michailidis dimitris@sgi.com + * + * This file implements mcount(), which is used to collect profiling data. + * We provide several variants to accomodate different types of callers at + * the lowest possible overhead. + */ + +#include +#include + +#define MCOUNT_HEAD \ + pushl %ecx /* We must protect the arguments of FASTCALLs */; \ + movl mcount_hook, %ecx; \ + testl %ecx, %ecx; \ + jz 1f; \ + pushl %eax; \ + pushl %edx; \ + movl 12(%esp), %edx /* mcount()'s parent */ + +#define MCOUNT_TAIL \ + call *%ecx; \ + popl %edx; \ + popl %eax; \ +1: popl %ecx + +/* + * This is the main variant and is called by C code. GCC's -pg option + * automatically instruments every C function with a call to this. + */ +ENTRY(mcount) +#if defined(CONFIG_MCOUNT) + MCOUNT_HEAD +#ifdef CONFIG_FRAME_POINTER + movl 4(%ebp), %eax /* mcount()'s parent's parent */ +#endif + MCOUNT_TAIL +#endif + ret + +/* + * This variant is used by assembly functions. Must be inserted by hand. + */ +ENTRY(mcount_asm) +#if defined(CONFIG_MCOUNT) + MCOUNT_HEAD + movl 16(%esp), %eax /* mcount()'s parent's parent */ + MCOUNT_TAIL +#endif + ret +/* + * This variant is used by assembly functions in section .stext.lock. + * Must be inserted by hand. + */ +ENTRY(mcount_stext_lock) +#if defined(CONFIG_MCOUNT) + MCOUNT_HEAD + movl 16(%esp), %eax /* mcount()'s parent's parent */ + addl 1(%eax), %eax /* this and the next lines are magic */ + leal 5(%eax), %eax + MCOUNT_TAIL +#endif + ret + diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/fs/proc/proc_misc.c 320-kcg/fs/proc/proc_misc.c --- 310-irqbal_fast/fs/proc/proc_misc.c 2003-10-21 11:19:06.000000000 -0700 +++ 320-kcg/fs/proc/proc_misc.c 2003-10-21 11:52:15.000000000 -0700 @@ -51,6 +51,10 @@ #include #include +#ifdef CONFIG_MCOUNT +#include +#endif + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* @@ -854,4 +858,13 @@ void __init proc_misc_init(void) entry->proc_fops = &ppc_htab_operations; } #endif +#ifdef CONFIG_MCOUNT + { + extern struct file_operations mcount_operations; + extern struct proc_dir_entry *mcount_pde; + mcount_pde = create_proc_entry("mcount", S_IRUGO|S_IWUSR, NULL); + if (mcount_pde) + mcount_pde->proc_fops = &mcount_operations; + } +#endif } diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/include/linux/mcount.h 320-kcg/include/linux/mcount.h --- 310-irqbal_fast/include/linux/mcount.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/include/linux/mcount.h 2003-10-21 11:52:15.000000000 -0700 @@ -0,0 +1,77 @@ +/* + * include/linux/mcount.h + * + * Implementation of kernel mcount handler and supporting functions. + * + * Code based on kernprof http://oss.sgi.com/projects/kernprof/ + * Copyright (C) SGI 1999, 2000, 2001 + * Written by Dimitris Michailidis (dimitris@engr.sgi.com) + * Modified by John Hawkes (hawkes@engr.sgi.com) + * Contributions from Niels Christiansen (nchr@us.ibm.com) + * Adapted for stand-alone call graphing by Adam Litke (agl@us.ibm.com) + */ + +#ifndef __MCOUNT_H +#define __MCOUNT_H + +#include +#include +#include + +#define DFL_PC_RES 4 /* default PC resolution for this platform */ +#define CG_MAX_ARCS (1 << (8 * sizeof(short))) +#define FUNCTIONPC(func) (*(unsigned long *)&(func)) + +#define pc_out_of_range(pc) \ + ((pc) < (unsigned long) &_stext || (pc) >= (unsigned long) &_etext) + +#if CPU != 386 +/* Like the above but also returns the result */ +static __inline__ int atomic_add_return(int i, atomic_t *v) +{ + register int oldval; + __asm__ __volatile__( + LOCK "xaddl %2,%0" + :"=m" (v->counter), "=r" (oldval) + :"1" (i), "m" (v->counter) : "memory"); + return oldval + i; +} +#endif + +struct prof_mem_map +{ + unsigned long kernel_buckets; /* number of kernel buckets */ + //unsigned long module_buckets; /* number of module buckets */ + unsigned long nr_cpus; /* number of processors whether profiled or not */ + unsigned long cg_from_size; /* size of one cg_from array */ + unsigned long cg_to_size; /* size of one cg_to array */ + unsigned long cg_to_offset; /* offset of cg_to array */ + unsigned long kernel_start; /* lowest text address in kernel */ + unsigned long kernel_end; /* highest text address in kernel */ + //unsigned long module_start; /* lowest text address in all modules */ + //unsigned long module_end; /* highest text address in all modules */ +}; + +struct cg_arc_dest { + unsigned long address; + atomic_t count; + unsigned short link; + unsigned short pad; +}; + +void cg_record_arc(unsigned long frompc, unsigned long selfpc) __attribute__((regparm(2))); + +int mcount_init(void); + +ssize_t mcount_write(struct file * file, const char * buf, + size_t count, loff_t *ppos); + +ssize_t mcount_read(struct file * file, char * buf, + size_t count, loff_t *ppos); + +static struct file_operations mcount_operations = { + write: mcount_write, + read: mcount_read, +}; + +#endif diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/kernel/Makefile 320-kcg/kernel/Makefile --- 310-irqbal_fast/kernel/Makefile 2003-10-21 11:45:20.000000000 -0700 +++ 320-kcg/kernel/Makefile 2003-10-21 11:53:37.000000000 -0700 @@ -27,6 +27,13 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKCONFIG_PROC) += configs.o obj-$(CONFIG_X86_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_MCOUNT) += mcount.o + +ifeq ($(CONFIG_MCOUNT),y) +$(obj)/mcount.o: + $(CC) $(subst -pg,,$(CFLAGS)) -c -o $(obj)/mcount.o $(src)/mcount.c +endif + ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/kernel/mcount.c 320-kcg/kernel/mcount.c --- 310-irqbal_fast/kernel/mcount.c 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/kernel/mcount.c 2003-10-21 11:52:15.000000000 -0700 @@ -0,0 +1,197 @@ +/* + * kernel/mcount.c + * + * Implementation of kernel mcount handler and supporting functions. + * + * Code based on kernprof http://oss.sgi.com/projects/kernprof/ + * Copyright (C) SGI 1999, 2000, 2001 + * Written by Dimitris Michailidis (dimitris@engr.sgi.com) + * Modified by John Hawkes (hawkes@engr.sgi.com) + * Contributions from Niels Christiansen (nchr@us.ibm.com) + * Adapted for stand-alone call graphing by Adam Litke (agl@us.ibm.com) + */ + +#include +#include +#include +#include +#include + +void UNKNOWN_KERNEL(void) {} /* Dummy functions to make profiles more */ +void UNKNOWN_MODULE(void) {} /* descriptive */ + +unsigned int mcount_shift, PC_resolution = DFL_PC_RES; + +char* memory_start = NULL; +unsigned short *cg_from_base = NULL; +struct cg_arc_dest *cg_to_base = NULL; +int cg_arc_overflow = 0; /* set when no new arcs can be added to the call graph */ +int n_buckets = 0; +size_t mem_needed; /* space needed for the call graph and the PC samples */ +extern char _stext, _etext, _sinittext, _einittext; + +void (*mcount_hook)(unsigned long, unsigned long) = NULL; +struct proc_dir_entry *mcount_pde; + +static int mcount_alloc_mem(void) +{ + unsigned long cg_from_size, cg_to_size; + size_t text_size = (unsigned long) &_etext - (unsigned long) &_stext; + struct prof_mem_map *memory_map; + + for (mcount_shift = 0; (1 << mcount_shift) < PC_resolution; mcount_shift++); + n_buckets = text_size >> mcount_shift; + cg_from_size = n_buckets * sizeof(short); + cg_to_size = CG_MAX_ARCS * sizeof(struct cg_arc_dest); + mem_needed = sizeof(struct prof_mem_map) + + ((cg_from_size + cg_to_size) * num_online_cpus()); + if ((memory_start = vmalloc(mem_needed)) == NULL) { + return -ENOMEM; + } + memset(memory_start, 0, mem_needed); + + cg_from_base = (unsigned short *) (memory_start + sizeof(struct prof_mem_map)); + cg_to_base = (struct cg_arc_dest *) (memory_start + sizeof(struct prof_mem_map) + + (cg_from_size * num_online_cpus())); + + memory_map = (struct prof_mem_map*) memory_start; + memory_map->kernel_buckets = n_buckets; + memory_map->nr_cpus = num_online_cpus(); + memory_map->cg_from_size = cg_from_size; + memory_map->cg_to_size = cg_to_size; + memory_map->cg_to_offset = cg_from_size * num_online_cpus(); + memory_map->kernel_start = (unsigned long)&_stext; + memory_map->kernel_end = (unsigned long)&_etext; + return 0; +} + +static void mcount_free_mem(void) +{ + vfree(memory_start); + memory_start = NULL; +} + +/* Record an arc traversal in the call graph. Called by mcount(). SMP safe */ +void cg_record_arc(unsigned long frompc, unsigned long selfpc) +{ +#ifndef __HAVE_ARCH_CMPXCHG + static spinlock_t cg_record_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; +#endif + int toindex, fromindex, cpu; + unsigned short *q, *cg_from; + struct cg_arc_dest *p, *cg_to; + + cpu = smp_processor_id(); + + cg_from = &cg_from_base[n_buckets * cpu]; + cg_to = &cg_to_base[CG_MAX_ARCS * cpu]; + + if (pc_out_of_range(frompc)) + fromindex = (FUNCTIONPC(UNKNOWN_KERNEL) - (unsigned long) &_stext) + >> mcount_shift; + else + fromindex = (frompc - (unsigned long) &_stext) >> mcount_shift; + q = &cg_from[fromindex]; + + /* Easy case: the arc is already in the call graph */ + for (toindex = *q; toindex != 0; ) { + p = &cg_to[toindex]; + if (p->address == selfpc) { + atomic_inc(&p->count); + return; + } + toindex = p->link; + } + /* + * No luck. We need to add a new arc. Since cg_to[0] is unused, + * we use cg_to[0].count to keep track of the next available arc. + */ + if (cg_arc_overflow) { + return; + } + toindex = atomic_add_return(1, &cg_to->count); + if (toindex >= CG_MAX_ARCS) { + /* + * We have run out of space for arcs. We'll keep incrementing + * the existing ones but we won't try to add any more. + */ + cg_arc_overflow = 1; + atomic_set(&cg_to->count, CG_MAX_ARCS - 1); + return; + } + /* + * We have a secured slot for a new arc and all we need to do is + * initialize it and add it to a hash bucket. We use compare&swap, if + * possible, to avoid any spinlocks whatsoever. + */ + p = &cg_to[toindex]; + p->address = selfpc; + atomic_set(&p->count, 1); +#ifdef __HAVE_ARCH_CMPXCHG + { + p->link = *q; + } while (cmpxchg(q, p->link, toindex) != p->link); +#else + spin_lock_irqsave(&cg_record_lock, flags); + p->link = *q; + *q = toindex; + spin_unlock_irqrestore(&cg_record_lock, flags); +#endif + return; +} + +int mcount_start(void) +{ + if (!memory_start) { + if(mcount_alloc_mem()) + return -ENOMEM; + mcount_pde->size = mem_needed; + } + mcount_hook = cg_record_arc; + return 0; +} + +int mcount_stop(void) +{ + mcount_hook = NULL; + return 0; +} + +int mcount_cleanup(void) +{ + mcount_stop(); + mcount_pde->size = 0; + mcount_free_mem(); + return 0; +} + +ssize_t mcount_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + count = (count + *ppos >= mcount_pde->size) ? + mcount_pde->size - *ppos : count; + copy_to_user(buf, memory_start + *ppos, count); + *ppos += count; + return count; +} + +ssize_t mcount_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + int ret; + + switch (buf[0]) { + case '0': + ret = mcount_cleanup(); + break; + case '1': + ret = mcount_stop(); + break; + case '2': + ret = mcount_start(); + default: + ret = -EINVAL; + } + return (ret == 0) ? count : ret; +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.1/Makefile 320-kcg/scripts/readcg-0.1/Makefile --- 310-irqbal_fast/scripts/readcg-0.1/Makefile 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.1/Makefile 2003-10-09 10:14:18.000000000 -0700 @@ -0,0 +1,10 @@ +# Makefile for readcg - Kernel Call Graph +# Adam Litke (agl@us.ibm.com) + +CC = /usr/bin/gcc + +readcg: clean + $(CC) -Wall -o readcg readcg.c + +clean: + rm -f readcg diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.1/README 320-kcg/scripts/readcg-0.1/README --- 310-irqbal_fast/scripts/readcg-0.1/README 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.1/README 2003-10-14 13:55:32.000000000 -0700 @@ -0,0 +1,27 @@ +READNE - readcg-0.1 + +10-14-03: Initial Release + +readcg is the userspace utility for printing kernel call graphs. +This tool is in its early stages and most certainly has bugs. +Please email me to report bugs or to submit +patches. + +QUICK START: + +(1.) Obtain readcg-xx.tar.gz and kcg-X.X.X-V.patch from the lse + project page (www.sf.net/projects/lse). + +(2.) Patch your kernel and build it. Make sure 'Generate + function call graph' in the 'Kernel Hacking' section is + turned on. Install your kernel and boot it. + +(3.) Unpack readcg and descend into the readcg directory. + Build the tool by typing 'make'. + +(4.) To generate a call graph do the following: + readcg -c # Clear counters + readcg -e # Enable profiling + + readcg -d # Disable profiling + readcg -m # Generate call graph diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.1/readcg.c 320-kcg/scripts/readcg-0.1/readcg.c --- 310-irqbal_fast/scripts/readcg-0.1/readcg.c 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.1/readcg.c 2003-10-14 11:26:10.000000000 -0700 @@ -0,0 +1,451 @@ +/* + * readcg.c - control kernel call graphing + * + * Copyright (C) 1999-2002 SGI + * + * Written by Dimitris Michailidis (dimitris@engr.sgi.com) + * Modifications by John Hawkes (hawkes@engr.sgi.com) + * Contributions from Niels Christiansen (nchr@us.ibm.com) + * Contributions from Ethan Solomita (ethan@cs.columbia.edu) + * Adapted by Adam Litke (agl@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "readcg.h" + + +static char optstring[]="edcm:"; +char *prgname; +struct prof_mem_map *memory_map; +struct addr_to_name_map *a2n; +unsigned long num_entries; + +void err_exit(const char *s) +{ + perror(s); + exit(1); +} + +/* + * Create a address -> name mapping using a System.map file + */ +void parse_map(char *map_file) { + FILE * map; + char mapline[S_LEN]; + unsigned long address, i=0; + char mode[2], fn_name[NAME_LEN]; + + map = fopen(map_file, "r"); + + if (!map) + err_exit("Cannot open map file"); + + while(fgets(mapline, S_LEN, map)) { + sscanf(mapline, "%lx %s %s", &address, mode, fn_name); + if (*mode == 'T' || *mode == 't' || + *mode == 'W' || *mode == 'w') + ++i; + } + num_entries = i+1; + a2n = calloc(i+1, sizeof(struct addr_to_name_map)); + fseek(map, 0, SEEK_SET); + + i = 0; + while(fgets(mapline, S_LEN, map) && i < num_entries) { + sscanf(mapline, "%lx %s %s", &address, mode, fn_name); + if (*mode != 'T' && *mode != 't' && + *mode != 'W' && *mode != 'w') continue; + a2n[i].addr = address; + strncpy(a2n[i].name, fn_name, NAME_LEN); + a2n[i].name[strlen(fn_name)] = '\0'; + i++; + } +} + +/* + * Simple array binary search to retrieve function name given an address + */ +void search_map(struct addr_to_name_map *map, unsigned long address, char *name){ + long low, high, bar; + + low = 0; + high = num_entries; + + while(low <= high) { + bar = low + ((high - low) / 2); + if (map[bar].addr == address || + (map[bar].addr < address && + map[bar+1].addr > address)) { + strcpy(name, map[bar].name); + return; + } else if (map[bar].addr > address) + high = bar - 1; + else + low = bar + 1; + } + name = "\0"; + return; +} + +/* + * Read the contents of a file into a dynamically allocated buffer. + * Also return the size of the buffer. + */ +char *read_file(const char *name, size_t off, size_t *lenp) +{ + char *buf; + int fd; + off_t end; + size_t len; + + if ((fd = open(name, O_RDONLY)) < 0 || + (end = lseek(fd, off, SEEK_END)) < 0 || + lseek(fd, 0, SEEK_SET) < 0) + err_exit(name); + + len = (size_t) end; + if ((buf = malloc(len)) == NULL) + err_exit("malloc"); + + lseek(fd, off, SEEK_SET); + read(fd, buf, len); + + close(fd); + if (lenp) *lenp = len; + return buf; +} + +/* + * Read header from beginning of call graph data + */ +void get_memory_map(struct prof_mem_map *map) +{ + int fd; + off_t end; + size_t mapsz = sizeof(struct prof_mem_map); + + if ((fd = open(PROC_FILE, O_RDONLY)) < 0 || + (end = lseek(fd, 0, SEEK_END)) < 0 || + lseek(fd, 0, SEEK_SET) < 0) + err_exit(PROC_FILE); + if(read(fd, map, mapsz) != mapsz) + err_exit(PROC_FILE); + close(fd); +} + +/* + * Insert a destination address record into list + */ +struct cg_entry *insert_to_record(struct cg_entry *list, + kaddress_t addr, + unsigned long count, + int merge) +{ + struct cg_entry *cur, *prev, *new; + char fn_name[NAME_LEN]; + + new = malloc(sizeof(struct cg_entry)); + if (!new) + err_exit("malloc"); + + search_map(a2n, addr, fn_name); + strncpy(new->name, fn_name, NAME_LEN); + new->count = count; + new->next = NULL; + + if (list == NULL) { + list = new; + return list; + } + + if (new->count > list->count) { + new->next = list; + list = new; + return list; + } + + cur = list->next; + prev = list; + while (cur && new->count < cur->count) { + prev = cur; + cur = cur->next; + } + new->next = cur; + prev->next = new; + return list; +} + +/* + * Return the correct from address record if it exists, else create one + */ +struct cg_entry *get_from_record(struct cg_entry *list, char *name) +{ + struct cg_entry *prev, *cur; + + prev = NULL; + cur = list; + while (cur && strcmp(cur->name, name)) { + prev = cur; + cur = cur->next; + } + + if (!cur) { + cur = malloc(sizeof(struct cg_entry)); + if (!cur) + err_exit("malloc"); + + strncpy(cur->name, name, NAME_LEN); + cur->next = NULL; + cur->list = NULL; + if (prev) + prev->next = cur; + } + return cur; +} + +/* + * Print the call graph according to the following format: + * nnnn parent_to_base_function + * base_function + * nnnn child_of_base_function + */ +void output_cg_data(struct cg_entry *from_head, struct cg_entry *to_head) +{ + struct cg_entry *from; + struct cg_entry *to; + + for(from = from_head; from; from = from->next) { + printf("%s\n", "========================================"); + /* Print parent functions */ + to = get_from_record(to_head, from->name); + for (to = to->list; to; to = to->next) + printf(" %10lu %s\n", to->count, to->name); + /* Current function */ + printf("%s\n", from->name); + + /* Print children */ + for(to = from->list; to; to = to->next) { + printf(" %10lu %s\n", to->count, to->name); + } + } +} + +/* + * Get the call graph from /proc. The function has two phases: + * (1) Combine per-cpu graphs into a single graph + * (2) Create src->dst mapping and a reverse mapping + */ +struct cg_entry *get_gprof_call_graph() +{ + int from_index, from_len, step; + unsigned short *fromc, *froms; + struct cg_arc_dest *toc, *tos; + char *buf = NULL; + char *b; + char *p; + int cpu; + int to_index; + int i; + int curix; + address_t frompc, lowpc, highpc; + long total_count = 0L; + long cg_input = 0L; + long cg_merged = 0L; + long cg_base = 0L; + long cg_records = 0L; + long lost_ones = 0L; + char name[NAME_LEN]; + struct cg_entry *fcur = NULL; + struct cg_entry *from_head, *to_head; + + lowpc = memory_map->kernel_start; + highpc = memory_map->kernel_end; + + step = PROF_GET_PC_RES; + + buf = read_file(PROC_FILE, sizeof (struct prof_mem_map), NULL); + froms = (unsigned short *) buf; + from_len = (highpc - lowpc) / step; + p = buf + memory_map->cg_to_offset; + tos = (struct cg_arc_dest *)p; + + b = buf; + curix = tos[0].count + 1; + + for (cpu = 0; cpu < memory_map->nr_cpus; cpu++) + { + b = buf + memory_map->cg_from_size * cpu; + fromc = (unsigned short *)b; + p = buf + memory_map->cg_to_offset + memory_map->cg_to_size * cpu; + toc = (struct cg_arc_dest *)p; + for (from_index = 0; from_index < from_len; ++from_index) + { + frompc = lowpc + from_index * step; + for (to_index = fromc[from_index]; to_index != 0; to_index = toc[to_index].link) + { + cg_input++; + if (!cpu) + { + cg_base++; + continue; + } + for (i = froms[from_index]; i != 0; i = tos[i].link) + { + if (tos[i].address == toc[to_index].address) + { + cg_merged++; + tos[i].count += toc[to_index].count; + break; + } + } + if (i == 0) + { + if (curix >= CG_MAX_ARCS) + lost_ones++; + else + { + tos[curix].link = froms[from_index]; + tos[curix].address = toc[to_index].address; + tos[curix].count = toc[to_index].count; + froms[from_index] = curix++; + } + } + } + } + } + +// Merge completed + + from_head = NULL; + to_head = NULL; + for (from_index = 0; from_index < from_len; ++from_index) + { + kaddress_t src, dst; + + frompc = lowpc + from_index * step; + src = (kaddress_t)frompc; + search_map(a2n, src, name); + + for (to_index = froms[from_index]; to_index != 0; + to_index = tos[to_index].link) + { + + total_count += tos[to_index].count; + cg_records++; + dst = (kaddress_t)tos[to_index].address; + if (dst) { + char name2[NAME_LEN]; + /* Create src->dst mapping */ + search_map(a2n, src, name); + fcur = get_from_record(from_head, name); + if (!from_head) from_head = fcur; + fcur->list = insert_to_record(fcur->list, dst, tos[to_index].count, 1); + /* Create dst->src mapping */ + search_map(a2n, dst, name2); + fcur = get_from_record(to_head, name2); + if (!to_head) to_head = fcur; + fcur->list = insert_to_record(fcur->list, src, tos[to_index].count, 1); + + } + } + } + + free(buf); + from_head->list->list = to_head; /* Dirty dirty hack to return both lists */ + return from_head; +} + +/* generate call graph information from /proc/mcount */ +void output_cg_profile() +{ + struct cg_entry *from_head, *to_head; + + memory_map = malloc(sizeof(struct prof_mem_map) + 4); + if (!memory_map) + err_exit("malloc"); + + get_memory_map(memory_map); + + from_head = get_gprof_call_graph(); + to_head = from_head->list->list; /* See dirty dirty hack above */ + output_cg_data(from_head, to_head); +} + +void profile_cmd(char cmd) +{ + FILE * file; + + file = fopen(PROC_FILE, "r+"); + if (!file) + err_exit(PROC_FILE); + + if (fputc(cmd, file) != cmd) + err_exit(PROC_FILE); + + fclose(file); + exit(0); +} + +void usage() +{ + fprintf(stderr, "Usage: %s [ -e | -d | -c | -m ]\n", prgname); + fprintf(stderr, "\t-m Print profile\n"); + fprintf(stderr, "\t-e Enable profiler\n"); + fprintf(stderr, "\t-d Disable profiler\n"); + fprintf(stderr, "\t-c Clear profile data\n"); + exit(1); +} + +int main (int argc, char **argv) +{ + char c; + char *mapfile = NULL; + prgname = argv[0]; + + while ((c = getopt(argc, argv, optstring)) != -1) { + switch(c) { + case 'm': + mapfile = optarg; + break; + case 'e': + profile_cmd(MCOUNT_ENABLE); + case 'd': + profile_cmd(MCOUNT_DISABLE); + case 'c': + profile_cmd(MCOUNT_CLEAR); + default: + usage(); + } + } + if (!mapfile) usage(); + + parse_map(mapfile); + output_cg_profile(); + return 0; +} diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/scripts/readcg-0.1/readcg.h 320-kcg/scripts/readcg-0.1/readcg.h --- 310-irqbal_fast/scripts/readcg-0.1/readcg.h 1969-12-31 16:00:00.000000000 -0800 +++ 320-kcg/scripts/readcg-0.1/readcg.h 2003-10-09 10:30:17.000000000 -0700 @@ -0,0 +1,65 @@ +#ifndef __READCG_H +#define __READCG_H + +#define VERSION "0.1" + +#define CG_MAX_ARCS (1 << (8 * sizeof(short))) +#define PROF_GET_PC_RES 4 +#define S_LEN 128 +#define NAME_LEN 50 + +//#define PTR_ALIGNED __attribute__ ((aligned (__alignof__ (char *)))) + +#define PROC_FILE "/proc/mcount" +#define MCOUNT_ENABLE '2' +#define MCOUNT_DISABLE '1' +#define MCOUNT_CLEAR '0' + +/* User space address */ +typedef unsigned long address_t; + +/* Kernel address */ +#if defined(__ppc64__) +typedef unsigned long long kaddress_t; +#else +typedef address_t kaddress_t; +#endif + +/* + * Stores memory mapping of profiler buckets + */ +struct prof_mem_map +{ + unsigned long kernel_buckets; /* number of kernel buckets */ + //unsigned long module_buckets; /* number of module buckets */ + unsigned long nr_cpus; /* number of processors whether profiled or not */ + unsigned long cg_from_size; /* size of one cg_from array */ + unsigned long cg_to_size; /* size of one cg_to array */ + unsigned long cg_to_offset; /* offset of cg_to array */ + unsigned long kernel_start; /* lowest text address in kernel */ + unsigned long kernel_end; /* highest text address in kernel */ + //unsigned long module_start; /* lowest text address in all modules */ + //unsigned long module_end; /* highest text address in all modules */ +}; + +struct addr_to_name_map { + unsigned long addr; + char name[NAME_LEN]; +}; + +/* This must match the kernel's definition. */ +struct cg_arc_dest { + kaddress_t address; + int count; + unsigned short link; + unsigned short pad; +}; + +struct cg_entry { + char name[NAME_LEN]; + unsigned long count; + struct cg_entry *list; + struct cg_entry *next; +}; + +#endif /* __READCG_H */